Spaces:

hamzaherry
/

RAG-BASED-APP-QA

Sleeping

App Files Files Community

RAG-BASED-APP-QA / app.py

hamzaherry

Update app.py

b5f9e6b verified 7 months ago

raw

history blame contribute delete

2.31 kB

	import os
	import faiss
	import streamlit as st
	from PyPDF2 import PdfReader
	from sentence_transformers import SentenceTransformer
	from groq import Groq
	from dotenv import load_dotenv
	import requests
	from io import BytesIO

	# Predefined Google Drive links
	PDF_LINKS = [
	"https://drive.google.com/uc?id=1JPf0XvDhn8QoDOlZDrxCOpu4WzKFESNz",
	# Add more Google Drive links here
	]

	# Initialize Groq client
	client = Groq(api_key="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")

	# Load Sentence Transformer model
	model = SentenceTransformer("all-MiniLM-L6-v2")

	# Initialize FAISS
	dimension = 384 # Embedding size for the Sentence Transformer model
	index = faiss.IndexFlatL2(dimension)

	# Store chunks globally
	stored_chunks = []

	# Function to download and extract the PDF content
	def download_and_process_pdf(link):
	response = requests.get(link)
	if response.status_code == 200:
	pdf_reader = PdfReader(BytesIO(response.content))
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks
	embeddings = model.encode(chunks)
	index.add(embeddings)
	stored_chunks.extend(chunks)
	else:
	print(f"Failed to download PDF from link: {link}")

	# Process all predefined links
	for link in PDF_LINKS:
	download_and_process_pdf(link)

	# Function to query FAISS and generate a response
	def query_model(query):
	query_vector = model.encode([query])
	_, indices = index.search(query_vector, k=3) # Top 3 similar chunks
	response_chunks = [stored_chunks[idx] for idx in indices[0]]
	context = " ".join(response_chunks)

	# Groq API call
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": f"Context: {context}\n\nQuery: {query}",
	}
	],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content

	# Streamlit app
	st.title("RAG-based PDF Question Answering")
	st.write("Preloaded documents from Google Drive are ready for querying.")

	query = st.text_input("Ask a question:")
	if query:
	answer = query_model(query)
	st.write("### Answer:")
	st.write(answer)