Spaces:

Shahabmoin
/

RAG-Based-Chatbot

Sleeping

App Files Files Community

Shahabmoin commited on Dec 23, 2024

Commit

45975ce

verified ·

1 Parent(s): 3d6b54f

Create app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import PyPDF2
+import faiss
+import streamlit as st
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from groq import Groq
+# Initialize Groq API
+client = Groq(api_key=os.environ.get("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"))
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with open(pdf_path, "rb") as file:
+        reader = PyPDF2.PdfReader(file)
+        for page in reader.pages:
+            text += page.extract_text()
+    return text
+# Function to create chunks and embeddings using LangChain
+def process_text_with_langchain(text):
+    # Split text into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500, chunk_overlap=50
+    )
+    chunks = text_splitter.split_text(text)
+    # Create embeddings and FAISS index
+    embeddings = HuggingFaceEmbeddings()
+    vectorstore = FAISS.from_texts(chunks, embeddings)
+    return vectorstore, chunks
+# Function to query FAISS index
+def query_faiss_index(query, vectorstore):
+    docs = vectorstore.similarity_search(query, k=3)
+    results = [doc.page_content for doc in docs]
+    return results
+# Function to interact with Groq LLM
+def ask_groq(query):
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": query,
+            }
+        ],
+        model="llama3-8b-8192",
+        stream=False,
+    )
+    return chat_completion.choices[0].message.content
+# Streamlit app
+st.title("RAG-Based Chatbot")
+uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
+if uploaded_file is not None:
+    with open("uploaded_file.pdf", "wb") as f:
+        f.write(uploaded_file.read())
+    st.info("Processing the PDF...")
+    text = extract_text_from_pdf("uploaded_file.pdf")
+    vectorstore, chunks = process_text_with_langchain(text)
+    st.success("PDF processed and indexed successfully!")
+    query = st.text_input("Ask a question about the document")
+    if query:
+        st.info("Searching relevant chunks...")
+        relevant_chunks = query_faiss_index(query, vectorstore)
+        context = "\n".join(relevant_chunks)
+        st.info("Getting response from the language model...")
+        response = ask_groq(f"Context: {context}\n\nQuestion: {query}")
+        st.success(response)