Spaces:
Sleeping
Sleeping
import os | |
import faiss | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
from sentence_transformers import SentenceTransformer | |
from groq import Groq | |
from dotenv import load_dotenv | |
# Initialize Groq client | |
client = Groq(api_key="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") | |
# Load Sentence Transformer model | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Initialize FAISS | |
dimension = 384 # Embedding size for the Sentence Transformer model | |
index = faiss.IndexFlatL2(dimension) | |
# Function to process PDF and create embeddings | |
def process_pdf(pdf_file): | |
pdf_reader = PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks | |
embeddings = model.encode(chunks) | |
index.add(embeddings) | |
return chunks, embeddings | |
# Function to query FAISS and generate a response | |
def query_model(query): | |
query_vector = model.encode([query]) | |
_, indices = index.search(query_vector, k=3) # Top 3 similar chunks | |
response_chunks = [stored_chunks[idx] for idx in indices[0]] | |
context = " ".join(response_chunks) | |
# Groq API call | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": f"Context: {context}\n\nQuery: {query}", | |
} | |
], | |
model="llama3-8b-8192", | |
) | |
return chat_completion.choices[0].message.content | |
# Streamlit app | |
st.title("RAG-based PDF Question Answering") | |
st.write("Upload a PDF and ask questions based on its content.") | |
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"]) | |
if uploaded_file: | |
stored_chunks, _ = process_pdf(uploaded_file) | |
st.success("PDF processed and embeddings created.") | |
query = st.text_input("Ask a question:") | |
if query: | |
answer = query_model(query) | |
st.write("### Answer:") | |
st.write(answer) | |