|
import os |
|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from groq import Groq |
|
|
|
|
|
client = Groq( |
|
api_key="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" |
|
) |
|
|
|
|
|
st.title("RAG-based PDF QA Application") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf") |
|
|
|
if uploaded_file: |
|
|
|
try: |
|
pdf_reader = PdfReader(uploaded_file) |
|
text = "\n".join( |
|
page.extract_text() for page in pdf_reader.pages if page.extract_text() |
|
) |
|
except Exception as e: |
|
st.error(f"Failed to read PDF: {e}") |
|
text = "" |
|
|
|
if text: |
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, chunk_overlap=200 |
|
) |
|
chunks = text_splitter.split_text(text) |
|
|
|
|
|
st.text("Generating embeddings...") |
|
try: |
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
vector_db = FAISS.from_texts(chunks, embeddings) |
|
st.success("Embeddings generated and stored in vector database.") |
|
except Exception as e: |
|
st.error(f"Error generating embeddings: {e}") |
|
|
|
|
|
query = st.text_input("Ask a question based on the uploaded document:") |
|
if query: |
|
try: |
|
|
|
docs = vector_db.similarity_search(query, k=3) |
|
context = "\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
chat_completion = client.chat.completions.create( |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": query}, |
|
{"role": "assistant", "content": context}, |
|
], |
|
model="llama3-8b-8192", |
|
stream=False, |
|
) |
|
|
|
answer = chat_completion.choices[0].message.content |
|
st.text_area("Answer:", value=answer, height=200) |
|
except Exception as e: |
|
st.error(f"Error processing query: {e}") |
|
|
|
|
|
st.caption("Powered by Open Source Models and Groq API.") |
|
|
|
|