Spaces:
Runtime error
Runtime error
File size: 888 Bytes
c731612 a803cdd c731612 dc50cde c68949e c731612 ca1c8f9 c731612 4fc3024 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import download_and_extract_zip
def gen_splits():
URL = os.getenv('URL')
destination_folder = os.getcwd()
download_and_extract_zip.download_and_extract_zip(URL, destination_folder)
file_paths = os.listdir('Model_TS_Full')
new_file_paths = [os.getcwd() +"/Model_TS_Full/"+ file for file in file_paths]
loaders = []
for file_path in new_file_paths:
if file_path.lower().endswith(".pdf"):
loaders.append(PyPDFLoader(file_path))
docs = []
for loader in loaders:
docs.extend(loader.load())
# Splitting Documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 7500, chunk_overlap = 500)
splits = text_splitter.split_documents(docs)
return splits |