import os from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.document_loaders import DirectoryLoader from langchain.document_loaders import PyPDFLoader from langchain.vectorstores import Qdrant from qdrant_client import QdrantClient embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") client = QdrantClient( url=os.getenv("QDRANT_URL", "https://QDRANT_URL.europe-west3-0.gcp.cloud.qdrant.io"), api_key=os.getenv("QDRANT_API_KEY"), prefer_grpc=False ) loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_documents(documents) qdrant = Qdrant.from_documents( texts, embeddings, client=client, collection_name="vector_db" ) #print("Vector DB Successfully Created!")