Spaces:

hamzaherry
/

RAG-BASED-APP-QA

Sleeping

App Files Files Community

hamzaherry commited on Dec 23, 2024

Commit

472cb47

verified ·

1 Parent(s): 0de1a00

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py CHANGED Viewed

	@@ -0,0 +1,64 @@

+import os
+import faiss
+import streamlit as st
+from PyPDF2 import PdfReader
+from sentence_transformers import SentenceTransformer
+from groq import Groq
+from dotenv import load_dotenv
+# Initialize Groq client
+client = Groq(api_key="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")
+# Load Sentence Transformer model
+model = SentenceTransformer("all-MiniLM-L6-v2")
+# Initialize FAISS
+dimension = 384  # Embedding size for the Sentence Transformer model
+index = faiss.IndexFlatL2(dimension)
+# Function to process PDF and create embeddings
+def process_pdf(pdf_file):
+    pdf_reader = PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    chunks = [text[i:i + 500] for i in range(0, len(text), 500)]  # Chunk into 500-char blocks
+    embeddings = model.encode(chunks)
+    index.add(embeddings)
+    return chunks, embeddings
+# Function to query FAISS and generate a response
+def query_model(query):
+    query_vector = model.encode([query])
+    _, indices = index.search(query_vector, k=3)  # Top 3 similar chunks
+    response_chunks = [stored_chunks[idx] for idx in indices[0]]
+    context = " ".join(response_chunks)
+    # Groq API call
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": f"Context: {context}\n\nQuery: {query}",
+            }
+        ],
+        model="llama3-8b-8192",
+    )
+    return chat_completion.choices[0].message.content
+# Streamlit app
+st.title("RAG-based PDF Question Answering")
+st.write("Upload a PDF and ask questions based on its content.")
+uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
+if uploaded_file:
+    stored_chunks, _ = process_pdf(uploaded_file)
+    st.success("PDF processed and embeddings created.")
+    query = st.text_input("Ask a question:")
+    if query:
+        answer = query_model(query)
+        st.write("### Answer:")
+        st.write(answer)