Spaces:
Sleeping
Sleeping
File size: 1,129 Bytes
5a9839d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import requests
import os
from langchain.document_loaders import PyMuPDFLoader
# Define the URLs for the documents
url_1 = "https://example.com/Blueprint-for-an-AI-Bill-of-Rights.pdf"
url_2 = "https://example.com/NIST.AI.600-1.pdf"
# Define local file paths for storing the downloaded PDFs
file_path_1 = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
file_path_2 = "data/NIST.AI.600-1.pdf"
# Function to download a file from a URL
def download_pdf(url, file_path):
# Check if the file already exists to avoid re-downloading
if not os.path.exists(file_path):
print(f"Downloading {file_path} from {url}...")
response = requests.get(url)
with open(file_path, 'wb') as f:
f.write(response.content)
else:
print(f"{file_path} already exists, skipping download.")
# Download the PDFs from the URLs
download_pdf(url_1, file_path_1)
download_pdf(url_2, file_path_2)
# Load the PDFs using PyMuPDFLoader
loader_1 = PyMuPDFLoader(file_path_1)
documents_1 = loader_1.load()
loader_2 = PyMuPDFLoader(file_path_2)
documents_2 = loader_2.load() |