Abdullah-Basar commited on
Commit
bddbd8b
·
verified ·
1 Parent(s): 137e603

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from PyPDF2 import PdfReader
6
+ from sentence_transformers import SentenceTransformer
7
+ import faiss
8
+ from groq import Groq
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+ GROQ_API_KEY = "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"
13
+
14
+ # Initialize Groq client
15
+ client = Groq(api_key=GROQ_API_KEY)
16
+
17
+ # Load the embedding model
18
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
19
+
20
+ # Streamlit UI
21
+ st.set_page_config(page_title="RAG-Based Application", layout="wide")
22
+ st.title("RAG-Based Application")
23
+ st.sidebar.header("Upload Your PDF")
24
+
25
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type=["pdf"])
26
+
27
+ if uploaded_file is not None:
28
+ try:
29
+ # Extract text from PDF
30
+ st.write("Extracting text from the PDF...")
31
+ reader = PdfReader(io.BytesIO(uploaded_file.read()))
32
+ text = "".join([page.extract_text() for page in reader.pages])
33
+
34
+ if not text.strip():
35
+ st.error("The uploaded PDF contains no text. Please upload a valid document.")
36
+ st.stop()
37
+
38
+ # Split the text into chunks
39
+ st.write("Processing the PDF into chunks...")
40
+ chunk_size = 500
41
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
42
+
43
+ # Create embeddings for the chunks
44
+ st.write("Creating embeddings for text chunks...")
45
+ embeddings = embedding_model.encode(chunks)
46
+ if len(embeddings.shape) == 1:
47
+ embeddings = embeddings.reshape(1, -1)
48
+
49
+ # Store embeddings in FAISS
50
+ st.write("Storing embeddings in FAISS...")
51
+ dimension = embeddings.shape[1]
52
+ index = faiss.IndexFlatL2(dimension)
53
+ index.add(embeddings)
54
+ st.write(f"Stored {len(chunks)} chunks in FAISS.")
55
+
56
+ # Ask a question
57
+ st.subheader("Ask a Question")
58
+ user_query = st.text_input("Enter your question:")
59
+ if user_query:
60
+ query_embedding = embedding_model.encode([user_query])
61
+ distances, indices = index.search(query_embedding, k=1)
62
+ best_chunk = chunks[indices[0][0]]
63
+
64
+ # Use Groq API to interact with the LLM
65
+ st.write("Interacting with the LLM...")
66
+ chat_completion = client.chat.completions.create(
67
+ messages=[
68
+ {
69
+ "role": "user",
70
+ "content": f"Using this context: {best_chunk}, answer the following question: {user_query}",
71
+ }
72
+ ],
73
+ model="llama3-8b-8192",
74
+ )
75
+
76
+ # Display the response
77
+ st.subheader("LLM Response")
78
+ st.write(chat_completion.choices[0].message.content)
79
+ except Exception as e:
80
+ st.error(f"An error occurred: {e}")