Spaces:

himel06
/

Llama-PDF-ChatBot

Runtime error

himel06 commited on Aug 1, 2024

Commit

b1118ff

1 Parent(s): 19ab709

Python File

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import sys
 import streamlit as st
-from PyPDF2 import PdfReader # PyPDF2 for PDF text extraction
 import faiss  # FAISS for similarity search
 import numpy as np
 from langchain_community.llms import Replicate
@@ -13,7 +13,7 @@ from langchain_huggingface import HuggingFaceEmbeddings
 os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj"
 def extract_text_with_pypdf2(file):
-    reader = PdfReader(file)
     text = ""
     for page in reader.pages:
         text += page.extract_text()
@@ -29,7 +29,7 @@ if uploaded_file is not None:
     # Split the text into smaller chunks for processing
     text_splitter = CharacterTextSplitter(
-        separator="\n",
         chunk_size=800,
         chunk_overlap=200,
         length_function=len
@@ -56,7 +56,7 @@ if uploaded_file is not None:
         combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
         combined_prompt += "\n\n".join([doc.page_content for doc in docs])
         # Get the response from the Llama model
-        response = llm(combined_prompt)
         return response
     st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")

 import os
 import sys
 import streamlit as st
+import PyPDF2  # PyPDF2 for PDF text extraction
 import faiss  # FAISS for similarity search
 import numpy as np
 from langchain_community.llms import Replicate
 os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj"
 def extract_text_with_pypdf2(file):
+    reader = PyPDF2.PdfReader(file)
     text = ""
     for page in reader.pages:
         text += page.extract_text()
     # Split the text into smaller chunks for processing
     text_splitter = CharacterTextSplitter(
+        separator=" ",  # Using space as a separator for more control
         chunk_size=800,
         chunk_overlap=200,
         length_function=len
         combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
         combined_prompt += "\n\n".join([doc.page_content for doc in docs])
         # Get the response from the Llama model
+        response = llm.invoke(combined_prompt)  # Updated method call
         return response
     st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")