Spaces:
Sleeping
Sleeping
File size: 2,451 Bytes
45975ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
import PyPDF2
import faiss
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from groq import Groq
# Initialize Groq API
client = Groq(api_key=os.environ.get("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"))
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text()
return text
# Function to create chunks and embeddings using LangChain
def process_text_with_langchain(text):
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500, chunk_overlap=50
)
chunks = text_splitter.split_text(text)
# Create embeddings and FAISS index
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_texts(chunks, embeddings)
return vectorstore, chunks
# Function to query FAISS index
def query_faiss_index(query, vectorstore):
docs = vectorstore.similarity_search(query, k=3)
results = [doc.page_content for doc in docs]
return results
# Function to interact with Groq LLM
def ask_groq(query):
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": query,
}
],
model="llama3-8b-8192",
stream=False,
)
return chat_completion.choices[0].message.content
# Streamlit app
st.title("RAG-Based Chatbot")
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file is not None:
with open("uploaded_file.pdf", "wb") as f:
f.write(uploaded_file.read())
st.info("Processing the PDF...")
text = extract_text_from_pdf("uploaded_file.pdf")
vectorstore, chunks = process_text_with_langchain(text)
st.success("PDF processed and indexed successfully!")
query = st.text_input("Ask a question about the document")
if query:
st.info("Searching relevant chunks...")
relevant_chunks = query_faiss_index(query, vectorstore)
context = "\n".join(relevant_chunks)
st.info("Getting response from the language model...")
response = ask_groq(f"Context: {context}\n\nQuestion: {query}")
st.success(response)
|