Spaces:
Paused
Paused
File size: 1,122 Bytes
952909f 2624a11 7bc489f 952909f 7bc489f d056c3f 7bc489f 952909f 2624a11 952909f 7bc489f 2624a11 e5beda5 2624a11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, WebBaseLoader
from langchain_community.vectorstores import Qdrant
import os
def process_file(file_or_url):
if isinstance(file_or_url, str) and file_or_url.startswith(('http://', 'https://')):
# Handle URL
loader = WebBaseLoader(file_or_url)
docs = loader.load()
documents.extend(docs)
# save the file temporarily
temp_file = "./"+file_or_url.path
with open(temp_file, "wb") as file:
file.write(file_or_url.content)
file_name = file_or_url.name
documents = []
if file_or_url.path.endswith(".pdf"):
loader = PyMuPDFLoader(temp_file)
docs = loader.load()
documents.extend(docs)
else:
loader = TextLoader(temp_file)
docs = loader.load()
documents.extend(docs)
return documents
def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
Qdrant.from_documents(
documents,
embeddings,
url=qdrant_client.url,
prefer_grpc=True,
collection_name=collection_name,
) |