himel06 commited on
Commit
b1118ff
·
1 Parent(s): 19ab709

Python File

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import sys
3
  import streamlit as st
4
- from PyPDF2 import PdfReader # PyPDF2 for PDF text extraction
5
  import faiss # FAISS for similarity search
6
  import numpy as np
7
  from langchain_community.llms import Replicate
@@ -13,7 +13,7 @@ from langchain_huggingface import HuggingFaceEmbeddings
13
  os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj"
14
 
15
  def extract_text_with_pypdf2(file):
16
- reader = PdfReader(file)
17
  text = ""
18
  for page in reader.pages:
19
  text += page.extract_text()
@@ -29,7 +29,7 @@ if uploaded_file is not None:
29
 
30
  # Split the text into smaller chunks for processing
31
  text_splitter = CharacterTextSplitter(
32
- separator="\n",
33
  chunk_size=800,
34
  chunk_overlap=200,
35
  length_function=len
@@ -56,7 +56,7 @@ if uploaded_file is not None:
56
  combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
57
  combined_prompt += "\n\n".join([doc.page_content for doc in docs])
58
  # Get the response from the Llama model
59
- response = llm(combined_prompt)
60
  return response
61
 
62
  st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")
 
1
  import os
2
  import sys
3
  import streamlit as st
4
+ import PyPDF2 # PyPDF2 for PDF text extraction
5
  import faiss # FAISS for similarity search
6
  import numpy as np
7
  from langchain_community.llms import Replicate
 
13
  os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj"
14
 
15
  def extract_text_with_pypdf2(file):
16
+ reader = PyPDF2.PdfReader(file)
17
  text = ""
18
  for page in reader.pages:
19
  text += page.extract_text()
 
29
 
30
  # Split the text into smaller chunks for processing
31
  text_splitter = CharacterTextSplitter(
32
+ separator=" ", # Using space as a separator for more control
33
  chunk_size=800,
34
  chunk_overlap=200,
35
  length_function=len
 
56
  combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
57
  combined_prompt += "\n\n".join([doc.page_content for doc in docs])
58
  # Get the response from the Llama model
59
+ response = llm.invoke(combined_prompt) # Updated method call
60
  return response
61
 
62
  st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")