Spaces:

Abdullah-Basar
/

RAG-BASED-APP

Sleeping

App Files Files Community

Abdullah-Basar commited on Dec 23, 2024

Commit

bddbd8b

verified ·

1 Parent(s): 137e603

Create app.py

Browse files

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import io
+import streamlit as st
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from sentence_transformers import SentenceTransformer
+import faiss
+from groq import Groq
+# Load environment variables
+load_dotenv()
+GROQ_API_KEY = "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"
+# Initialize Groq client
+client = Groq(api_key=GROQ_API_KEY)
+# Load the embedding model
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Streamlit UI
+st.set_page_config(page_title="RAG-Based Application", layout="wide")
+st.title("RAG-Based Application")
+st.sidebar.header("Upload Your PDF")
+uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type=["pdf"])
+if uploaded_file is not None:
+    try:
+        # Extract text from PDF
+        st.write("Extracting text from the PDF...")
+        reader = PdfReader(io.BytesIO(uploaded_file.read()))
+        text = "".join([page.extract_text() for page in reader.pages])
+        if not text.strip():
+            st.error("The uploaded PDF contains no text. Please upload a valid document.")
+            st.stop()
+        # Split the text into chunks
+        st.write("Processing the PDF into chunks...")
+        chunk_size = 500
+        chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+        # Create embeddings for the chunks
+        st.write("Creating embeddings for text chunks...")
+        embeddings = embedding_model.encode(chunks)
+        if len(embeddings.shape) == 1:
+            embeddings = embeddings.reshape(1, -1)
+        # Store embeddings in FAISS
+        st.write("Storing embeddings in FAISS...")
+        dimension = embeddings.shape[1]
+        index = faiss.IndexFlatL2(dimension)
+        index.add(embeddings)
+        st.write(f"Stored {len(chunks)} chunks in FAISS.")
+        # Ask a question
+        st.subheader("Ask a Question")
+        user_query = st.text_input("Enter your question:")
+        if user_query:
+            query_embedding = embedding_model.encode([user_query])
+            distances, indices = index.search(query_embedding, k=1)
+            best_chunk = chunks[indices[0][0]]
+            # Use Groq API to interact with the LLM
+            st.write("Interacting with the LLM...")
+            chat_completion = client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"Using this context: {best_chunk}, answer the following question: {user_query}",
+                    }
+                ],
+                model="llama3-8b-8192",
+            )
+            # Display the response
+            st.subheader("LLM Response")
+            st.write(chat_completion.choices[0].message.content)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")