engr-awaisjamal's picture
Update app.py
d910f7b verified
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from groq import Groq
# Set up Groq client
client = Groq(
api_key="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"
)
# Streamlit app
st.title("RAG-based PDF QA Application")
# Step 1: Upload PDF document
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
if uploaded_file:
# Step 2: Extract text from PDF
try:
pdf_reader = PdfReader(uploaded_file)
text = "\n".join(
page.extract_text() for page in pdf_reader.pages if page.extract_text()
)
except Exception as e:
st.error(f"Failed to read PDF: {e}")
text = ""
if text:
# Step 3: Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200
)
chunks = text_splitter.split_text(text)
# Step 4: Generate embeddings
st.text("Generating embeddings...")
try:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_texts(chunks, embeddings)
st.success("Embeddings generated and stored in vector database.")
except Exception as e:
st.error(f"Error generating embeddings: {e}")
# Step 5: User interaction
query = st.text_input("Ask a question based on the uploaded document:")
if query:
try:
# Retrieve relevant chunks from vector DB
docs = vector_db.similarity_search(query, k=3)
context = "\n".join(doc.page_content for doc in docs)
# Use Groq API for response generation
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": query},
{"role": "assistant", "content": context},
],
model="llama3-8b-8192",
stream=False,
)
answer = chat_completion.choices[0].message.content
st.text_area("Answer:", value=answer, height=200)
except Exception as e:
st.error(f"Error processing query: {e}")
# Footer
st.caption("Powered by Open Source Models and Groq API.")