Spaces:

engr-awaisjamal
/

RAG-based-PDF-QA-Application

Sleeping

App Files Files Community

RAG-based-PDF-QA-Application / app.py

engr-awaisjamal

Update app.py

d910f7b verified 3 months ago

raw

history blame contribute delete

2.6 kB

	import os
	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from groq import Groq

	# Set up Groq client
	client = Groq(
	api_key="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"
	)

	# Streamlit app
	st.title("RAG-based PDF QA Application")

	# Step 1: Upload PDF document
	uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

	if uploaded_file:
	# Step 2: Extract text from PDF
	try:
	pdf_reader = PdfReader(uploaded_file)
	text = "\n".join(
	page.extract_text() for page in pdf_reader.pages if page.extract_text()
	)
	except Exception as e:
	st.error(f"Failed to read PDF: {e}")
	text = ""

	if text:
	# Step 3: Split text into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200
	)
	chunks = text_splitter.split_text(text)

	# Step 4: Generate embeddings
	st.text("Generating embeddings...")
	try:
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vector_db = FAISS.from_texts(chunks, embeddings)
	st.success("Embeddings generated and stored in vector database.")
	except Exception as e:
	st.error(f"Error generating embeddings: {e}")

	# Step 5: User interaction
	query = st.text_input("Ask a question based on the uploaded document:")
	if query:
	try:
	# Retrieve relevant chunks from vector DB
	docs = vector_db.similarity_search(query, k=3)
	context = "\n".join(doc.page_content for doc in docs)

	# Use Groq API for response generation
	chat_completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": query},
	{"role": "assistant", "content": context},
	],
	model="llama3-8b-8192",
	stream=False,
	)

	answer = chat_completion.choices[0].message.content
	st.text_area("Answer:", value=answer, height=200)
	except Exception as e:
	st.error(f"Error processing query: {e}")

	# Footer
	st.caption("Powered by Open Source Models and Groq API.")