from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import DirectoryLoader from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import Qdrant from langchain_community.embeddings import SentenceTransformerEmbeddings embeddings = SentenceTransformerEmbeddings(model_name='BAAI/bge-large-en') print(embeddings) loader = DirectoryLoader('Data/', glob='110106081.pdf', show_progress=True, loader_cls=PyPDFLoader)\ documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_documents(documents) url = "http://localhost:6333/" qdrant = Qdrant.from_documents(texts, embeddings, url=url, prefer_grpc=False, collection_name="patent_database") print("Vector Database created")