originws-app / document_handler.py
Maurizio Dipierro
working cmd
6c94128
raw
history blame
733 Bytes
import os
import pickle
from langchain_community.document_loaders.sitemap import SitemapLoader
docs_file_path = 'sitemap_docs.pkl'
def save_documents_to_disk(docs, file_path):
"""Save the documents to a file using pickle."""
with open(file_path, 'wb') as file:
pickle.dump(docs, file)
def load_documents_from_disk(file_path):
"""Load the documents from a file if it exists."""
if os.path.exists(file_path):
with open(file_path, 'rb') as file:
return pickle.load(file)
return None
def load_documents_from_sitemap(sitemap_url):
"""Load documents from a sitemap URL using SitemapLoader."""
sitemap_loader = SitemapLoader(web_path=sitemap_url)
return sitemap_loader.load()