shah1zil commited on
Commit
2c73555
·
verified ·
1 Parent(s): c773831

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ import pickle
7
+ from groq import Groq
8
+ import os
9
+
10
+ # Streamlit App
11
+ st.title("RAG-based PDF Query App")
12
+ st.write("Upload a PDF, extract its content, and query it using Groq API.")
13
+
14
+ # Upload PDF
15
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
16
+
17
+ if uploaded_file is not None:
18
+ # Extract text from PDF
19
+ def extract_text_from_pdf(uploaded_file):
20
+ doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
21
+ text = ""
22
+ for page in doc:
23
+ text += page.get_text()
24
+ return text
25
+
26
+ pdf_text = extract_text_from_pdf(uploaded_file)
27
+ st.success("PDF uploaded and extracted successfully!")
28
+
29
+ # Chunk & Tokenize Text
30
+ def chunk_text(text, chunk_size=500, overlap=50):
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
32
+ return text_splitter.split_text(text)
33
+
34
+ chunks = chunk_text(pdf_text)
35
+
36
+ # Create Embeddings & Store in FAISS
37
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
38
+ vector_store = FAISS.from_texts(chunks, embedding_model)
39
+
40
+ # Save FAISS index
41
+ with open("faiss_index.pkl", "wb") as f:
42
+ pickle.dump(vector_store, f)
43
+
44
+ st.success("Document processed and stored in vector database!")
45
+
46
+ # Query Section
47
+ query = st.text_input("Enter your query:")
48
+
49
+ if st.button("Search"):
50
+ if query:
51
+ # Load FAISS index
52
+ with open("faiss_index.pkl", "rb") as f:
53
+ vector_store = pickle.load(f)
54
+
55
+ docs = vector_store.similarity_search(query, k=3)
56
+ context = "\n".join([doc.page_content for doc in docs])
57
+
58
+ client = Groq(api_key=GROQ_API_KEY)
59
+
60
+ response = client.chat.completions.create(
61
+ messages=[{"role": "user", "content": context + "\n\n" + query}],
62
+ model="llama-3.3-70b-versatile",
63
+ )
64
+
65
+ st.subheader("Response:")
66
+ st.write(response.choices[0].message.content)
67
+ else:
68
+ st.warning("Please enter a query to search.")