Spaces:
Build error
Build error
import os | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import SentenceTransformerEmbeddings | |
from langchain.document_loaders import DirectoryLoader | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.vectorstores import Qdrant | |
from qdrant_client import QdrantClient | |
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") | |
client = QdrantClient( | |
url=os.getenv("QDRANT_URL", "https://QDRANT_URL.europe-west3-0.gcp.cloud.qdrant.io"), | |
api_key=os.getenv("QDRANT_API_KEY"), | |
prefer_grpc=False | |
) | |
loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
texts = text_splitter.split_documents(documents) | |
qdrant = Qdrant.from_documents( | |
texts, | |
embeddings, | |
client=client, | |
collection_name="vector_db" | |
) | |
#print("Vector DB Successfully Created!") |