Spaces:

engr-awaisjamal
/

RAG-based-PDF-QA-Application

Sleeping

App Files Files Community

engr-awaisjamal commited on Dec 28, 2024

Commit

b22814b

verified ·

1 Parent(s): 7ec8da4

Create app.py

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import streamlit as st
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from groq import Groq
+# Set up Groq client
+client = Groq(
+    api_key="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"),
+)
+# Streamlit app
+st.title("RAG-based PDF QA Application")
+# Step 1: Upload PDF document
+uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
+if uploaded_file:
+    # Step 2: Extract text from PDF
+    pdf_reader = PdfReader(uploaded_file)
+    text = "\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
+    # Step 3: Split text into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000, chunk_overlap=200
+    )
+    chunks = text_splitter.split_text(text)
+    # Step 4: Generate embeddings
+    st.text("Generating embeddings...")
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vector_db = FAISS.from_texts(chunks, embeddings)
+    st.success("Embeddings generated and stored in vector database.")
+    # Step 5: User interaction
+    query = st.text_input("Ask a question based on the uploaded document:")
+    if query:
+        # Retrieve relevant chunks from vector DB
+        docs = vector_db.similarity_search(query, k=3)
+        context = "\n".join(doc.page_content for doc in docs)
+        # Use Groq API for response generation
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": query},
+                {"role": "assistant", "content": context},
+            ],
+            model="llama3-8b-8192",
+            stream=False,
+        )
+        answer = chat_completion.choices[0].message.content
+        st.text_area("Answer:", value=answer, height=200)
+# Footer
+st.caption("Powered by Open Source Models and Groq API.")