rag-tool

Running

Chris4K commited on Nov 24, 2023

Commit

1f5e9cb

1 Parent(s): 046c1e4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from langchain.vectorstores import Chroma
 from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
 # Use Hugging Face Inference API embeddings
 inference_api_key = os.environ['HF']
 api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
@@ -13,24 +13,19 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
 )
 # Load and process the PDF files
-loader = PyPDFLoader("./new_papers/ReACT.pdf")
 documents = loader.load()
 print("-----------")
 print(documents)
 print("-----------")
 # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
-text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=50)
 vdocuments = text_splitter.split_documents(documents)
 # Create Chroma vector store for API embeddings
 api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
-print(api_db.similarity_search("What is react"))
 # Define the PDF retrieval function
 def pdf_retrieval(query):
     # Run the query through the retriever

 from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
 # Use Hugging Face Inference API embeddings
 inference_api_key = os.environ['HF']
 api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
 )
 # Load and process the PDF files
+loader = PyPDFLoader("/content/ReACT.pdf")
 documents = loader.load()
 print("-----------")
 print(documents)
 print("-----------")
 # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
 # Create Chroma vector store for API embeddings
 api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
 # Define the PDF retrieval function
 def pdf_retrieval(query):
     # Run the query through the retriever