Spaces:
Sleeping
Sleeping
import os | |
import pickle | |
from langchain_community.document_loaders.sitemap import SitemapLoader | |
def save_documents_to_disk(docs, file_path): | |
"""Save the documents to a file using pickle.""" | |
with open(file_path, 'wb') as file: | |
pickle.dump(docs, file) | |
def load_documents_from_disk(file_path): | |
"""Load the documents from a file if it exists.""" | |
if os.path.exists(file_path): | |
with open(file_path, 'rb') as file: | |
return pickle.load(file) | |
return None | |
def load_documents_from_sitemap(sitemap_url): | |
"""Load documents from a sitemap URL using SitemapLoader.""" | |
sitemap_loader = SitemapLoader(web_path=sitemap_url) | |
return sitemap_loader.load() | |