engr-awaisjamal commited on
Commit
d910f7b
·
verified ·
1 Parent(s): b8da310

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -35
app.py CHANGED
@@ -8,7 +8,8 @@ from groq import Groq
8
 
9
  # Set up Groq client
10
  client = Groq(
11
- api_key="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")
 
12
 
13
  # Streamlit app
14
  st.title("RAG-based PDF QA Application")
@@ -18,42 +19,55 @@ uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
18
 
19
  if uploaded_file:
20
  # Step 2: Extract text from PDF
21
- pdf_reader = PdfReader(uploaded_file)
22
- text = "\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
23
-
24
- # Step 3: Split text into chunks
25
- text_splitter = RecursiveCharacterTextSplitter(
26
- chunk_size=1000, chunk_overlap=200
27
- )
28
- chunks = text_splitter.split_text(text)
29
-
30
- # Step 4: Generate embeddings
31
- st.text("Generating embeddings...")
32
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
33
- vector_db = FAISS.from_texts(chunks, embeddings)
34
-
35
- st.success("Embeddings generated and stored in vector database.")
36
-
37
- # Step 5: User interaction
38
- query = st.text_input("Ask a question based on the uploaded document:")
39
- if query:
40
- # Retrieve relevant chunks from vector DB
41
- docs = vector_db.similarity_search(query, k=3)
42
- context = "\n".join(doc.page_content for doc in docs)
43
-
44
- # Use Groq API for response generation
45
- chat_completion = client.chat.completions.create(
46
- messages=[
47
- {"role": "system", "content": "You are a helpful assistant."},
48
- {"role": "user", "content": query},
49
- {"role": "assistant", "content": context},
50
- ],
51
- model="llama3-8b-8192",
52
- stream=False,
53
  )
 
 
 
54
 
55
- answer = chat_completion.choices[0].message.content
56
- st.text_area("Answer:", value=answer, height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  # Footer
59
  st.caption("Powered by Open Source Models and Groq API.")
 
 
8
 
9
  # Set up Groq client
10
  client = Groq(
11
+ api_key="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"
12
+ )
13
 
14
  # Streamlit app
15
  st.title("RAG-based PDF QA Application")
 
19
 
20
  if uploaded_file:
21
  # Step 2: Extract text from PDF
22
+ try:
23
+ pdf_reader = PdfReader(uploaded_file)
24
+ text = "\n".join(
25
+ page.extract_text() for page in pdf_reader.pages if page.extract_text()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  )
27
+ except Exception as e:
28
+ st.error(f"Failed to read PDF: {e}")
29
+ text = ""
30
 
31
+ if text:
32
+ # Step 3: Split text into chunks
33
+ text_splitter = RecursiveCharacterTextSplitter(
34
+ chunk_size=1000, chunk_overlap=200
35
+ )
36
+ chunks = text_splitter.split_text(text)
37
+
38
+ # Step 4: Generate embeddings
39
+ st.text("Generating embeddings...")
40
+ try:
41
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
42
+ vector_db = FAISS.from_texts(chunks, embeddings)
43
+ st.success("Embeddings generated and stored in vector database.")
44
+ except Exception as e:
45
+ st.error(f"Error generating embeddings: {e}")
46
+
47
+ # Step 5: User interaction
48
+ query = st.text_input("Ask a question based on the uploaded document:")
49
+ if query:
50
+ try:
51
+ # Retrieve relevant chunks from vector DB
52
+ docs = vector_db.similarity_search(query, k=3)
53
+ context = "\n".join(doc.page_content for doc in docs)
54
+
55
+ # Use Groq API for response generation
56
+ chat_completion = client.chat.completions.create(
57
+ messages=[
58
+ {"role": "system", "content": "You are a helpful assistant."},
59
+ {"role": "user", "content": query},
60
+ {"role": "assistant", "content": context},
61
+ ],
62
+ model="llama3-8b-8192",
63
+ stream=False,
64
+ )
65
+
66
+ answer = chat_completion.choices[0].message.content
67
+ st.text_area("Answer:", value=answer, height=200)
68
+ except Exception as e:
69
+ st.error(f"Error processing query: {e}")
70
 
71
  # Footer
72
  st.caption("Powered by Open Source Models and Groq API.")
73
+