Spaces:
Sleeping
Sleeping
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import DirectoryLoader | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_community.vectorstores import Qdrant | |
from langchain_community.embeddings import SentenceTransformerEmbeddings | |
embeddings = SentenceTransformerEmbeddings(model_name='BAAI/bge-large-en') | |
print(embeddings) | |
loader = DirectoryLoader('Data/', glob='110106081.pdf', show_progress=True, loader_cls=PyPDFLoader)\ | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
texts = text_splitter.split_documents(documents) | |
url = "http://localhost:6333/" | |
qdrant = Qdrant.from_documents(texts, embeddings, url=url, prefer_grpc=False, collection_name="patent_database") | |
print("Vector Database created") |