Spaces:

hamzaherry
/

RAG-BASED-APP-QA

Sleeping

App Files Files Community

RAG-BASED-APP-QA / app.py

hamzaherry

Update app.py

472cb47 verified 6 months ago

raw

history blame

1.98 kB

	import os
	import faiss
	import streamlit as st
	from PyPDF2 import PdfReader
	from sentence_transformers import SentenceTransformer
	from groq import Groq
	from dotenv import load_dotenv



	# Initialize Groq client
	client = Groq(api_key="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")

	# Load Sentence Transformer model
	model = SentenceTransformer("all-MiniLM-L6-v2")

	# Initialize FAISS
	dimension = 384 # Embedding size for the Sentence Transformer model
	index = faiss.IndexFlatL2(dimension)

	# Function to process PDF and create embeddings
	def process_pdf(pdf_file):
	pdf_reader = PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks
	embeddings = model.encode(chunks)
	index.add(embeddings)
	return chunks, embeddings

	# Function to query FAISS and generate a response
	def query_model(query):
	query_vector = model.encode([query])
	_, indices = index.search(query_vector, k=3) # Top 3 similar chunks
	response_chunks = [stored_chunks[idx] for idx in indices[0]]
	context = " ".join(response_chunks)

	# Groq API call
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": f"Context: {context}\n\nQuery: {query}",
	}
	],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content

	# Streamlit app
	st.title("RAG-based PDF Question Answering")
	st.write("Upload a PDF and ask questions based on its content.")

	uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
	if uploaded_file:
	stored_chunks, _ = process_pdf(uploaded_file)
	st.success("PDF processed and embeddings created.")

	query = st.text_input("Ask a question:")
	if query:
	answer = query_model(query)
	st.write("### Answer:")
	st.write(answer)