originws-app / document_handler.py
Maurizio Dipierro
origin working
cd65ba5
raw
history blame contribute delete
696 Bytes
import os
import pickle
from langchain_community.document_loaders.sitemap import SitemapLoader
def save_documents_to_disk(docs, file_path):
"""Save the documents to a file using pickle."""
with open(file_path, 'wb') as file:
pickle.dump(docs, file)
def load_documents_from_disk(file_path):
"""Load the documents from a file if it exists."""
if os.path.exists(file_path):
with open(file_path, 'rb') as file:
return pickle.load(file)
return None
def load_documents_from_sitemap(sitemap_url):
"""Load documents from a sitemap URL using SitemapLoader."""
sitemap_loader = SitemapLoader(web_path=sitemap_url)
return sitemap_loader.load()