Spaces:

Shahabmoin
/

RAG-Based-Chatbot

Sleeping

File size: 2,451 Bytes

45975ce

import os
import PyPDF2
import faiss
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from groq import Groq

# Initialize Groq API
client = Groq(api_key=os.environ.get("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"))

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

# Function to create chunks and embeddings using LangChain
def process_text_with_langchain(text):
    # Split text into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, chunk_overlap=50
    )
    chunks = text_splitter.split_text(text)

    # Create embeddings and FAISS index
    embeddings = HuggingFaceEmbeddings()
    vectorstore = FAISS.from_texts(chunks, embeddings)

    return vectorstore, chunks

# Function to query FAISS index
def query_faiss_index(query, vectorstore):
    docs = vectorstore.similarity_search(query, k=3)
    results = [doc.page_content for doc in docs]
    return results

# Function to interact with Groq LLM
def ask_groq(query):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": query,
            }
        ],
        model="llama3-8b-8192",
        stream=False,
    )
    return chat_completion.choices[0].message.content

# Streamlit app
st.title("RAG-Based Chatbot")

uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file is not None:
    with open("uploaded_file.pdf", "wb") as f:
        f.write(uploaded_file.read())

    st.info("Processing the PDF...")
    text = extract_text_from_pdf("uploaded_file.pdf")
    vectorstore, chunks = process_text_with_langchain(text)

    st.success("PDF processed and indexed successfully!")

    query = st.text_input("Ask a question about the document")
    if query:
        st.info("Searching relevant chunks...")
        relevant_chunks = query_faiss_index(query, vectorstore)
        context = "\n".join(relevant_chunks)

        st.info("Getting response from the language model...")
        response = ask_groq(f"Context: {context}\n\nQuestion: {query}")
        st.success(response)