hamzaherry commited on
Commit
b5f9e6b
·
verified ·
1 Parent(s): 472cb47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -22
app.py CHANGED
@@ -5,8 +5,14 @@ from PyPDF2 import PdfReader
5
  from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
  from dotenv import load_dotenv
 
 
8
 
9
-
 
 
 
 
10
 
11
  # Initialize Groq client
12
  client = Groq(api_key="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")
@@ -18,16 +24,27 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
18
  dimension = 384 # Embedding size for the Sentence Transformer model
19
  index = faiss.IndexFlatL2(dimension)
20
 
21
- # Function to process PDF and create embeddings
22
- def process_pdf(pdf_file):
23
- pdf_reader = PdfReader(pdf_file)
24
- text = ""
25
- for page in pdf_reader.pages:
26
- text += page.extract_text()
27
- chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks
28
- embeddings = model.encode(chunks)
29
- index.add(embeddings)
30
- return chunks, embeddings
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Function to query FAISS and generate a response
33
  def query_model(query):
@@ -50,15 +67,10 @@ def query_model(query):
50
 
51
  # Streamlit app
52
  st.title("RAG-based PDF Question Answering")
53
- st.write("Upload a PDF and ask questions based on its content.")
54
-
55
- uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
56
- if uploaded_file:
57
- stored_chunks, _ = process_pdf(uploaded_file)
58
- st.success("PDF processed and embeddings created.")
59
 
60
- query = st.text_input("Ask a question:")
61
- if query:
62
- answer = query_model(query)
63
- st.write("### Answer:")
64
- st.write(answer)
 
5
  from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
  from dotenv import load_dotenv
8
+ import requests
9
+ from io import BytesIO
10
 
11
+ # Predefined Google Drive links
12
+ PDF_LINKS = [
13
+ "https://drive.google.com/uc?id=1JPf0XvDhn8QoDOlZDrxCOpu4WzKFESNz",
14
+ # Add more Google Drive links here
15
+ ]
16
 
17
  # Initialize Groq client
18
  client = Groq(api_key="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")
 
24
  dimension = 384 # Embedding size for the Sentence Transformer model
25
  index = faiss.IndexFlatL2(dimension)
26
 
27
+ # Store chunks globally
28
+ stored_chunks = []
29
+
30
+ # Function to download and extract the PDF content
31
+ def download_and_process_pdf(link):
32
+ response = requests.get(link)
33
+ if response.status_code == 200:
34
+ pdf_reader = PdfReader(BytesIO(response.content))
35
+ text = ""
36
+ for page in pdf_reader.pages:
37
+ text += page.extract_text()
38
+ chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks
39
+ embeddings = model.encode(chunks)
40
+ index.add(embeddings)
41
+ stored_chunks.extend(chunks)
42
+ else:
43
+ print(f"Failed to download PDF from link: {link}")
44
+
45
+ # Process all predefined links
46
+ for link in PDF_LINKS:
47
+ download_and_process_pdf(link)
48
 
49
  # Function to query FAISS and generate a response
50
  def query_model(query):
 
67
 
68
  # Streamlit app
69
  st.title("RAG-based PDF Question Answering")
70
+ st.write("Preloaded documents from Google Drive are ready for querying.")
 
 
 
 
 
71
 
72
+ query = st.text_input("Ask a question:")
73
+ if query:
74
+ answer = query_model(query)
75
+ st.write("### Answer:")
76
+ st.write(answer)