Chris4K commited on
Commit
1f5e9cb
·
1 Parent(s): 046c1e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -4,7 +4,7 @@ from langchain.vectorstores import Chroma
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
7
-
8
  # Use Hugging Face Inference API embeddings
9
  inference_api_key = os.environ['HF']
10
  api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
@@ -13,24 +13,19 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
13
  )
14
 
15
  # Load and process the PDF files
16
- loader = PyPDFLoader("./new_papers/ReACT.pdf")
17
  documents = loader.load()
18
  print("-----------")
19
  print(documents)
20
  print("-----------")
21
 
22
  # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
23
- text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=50)
24
  vdocuments = text_splitter.split_documents(documents)
25
 
26
-
27
-
28
-
29
  # Create Chroma vector store for API embeddings
30
  api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
31
 
32
- print(api_db.similarity_search("What is react"))
33
-
34
  # Define the PDF retrieval function
35
  def pdf_retrieval(query):
36
  # Run the query through the retriever
 
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
7
+
8
  # Use Hugging Face Inference API embeddings
9
  inference_api_key = os.environ['HF']
10
  api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
 
13
  )
14
 
15
  # Load and process the PDF files
16
+ loader = PyPDFLoader("/content/ReACT.pdf")
17
  documents = loader.load()
18
  print("-----------")
19
  print(documents)
20
  print("-----------")
21
 
22
  # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
23
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
24
  vdocuments = text_splitter.split_documents(documents)
25
 
 
 
 
26
  # Create Chroma vector store for API embeddings
27
  api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
28
 
 
 
29
  # Define the PDF retrieval function
30
  def pdf_retrieval(query):
31
  # Run the query through the retriever