rehanafzal's picture
Update app.py
7b65368 verified
raw
history blame
6.68 kB
# import os
# from groq import Groq
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.vectorstores import FAISS
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from PyPDF2 import PdfReader
# import streamlit as st
# from tempfile import NamedTemporaryFile
# # Initialize Groq client
# client = Groq(api_key=os.getenv("Groq_api_key"))
# # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# # Function to extract text from a PDF
# def extract_text_from_pdf(pdf_file_path):
# pdf_reader = PdfReader(pdf_file_path)
# text = ""
# for page in pdf_reader.pages:
# text += page.extract_text()
# return text
# # Function to split text into chunks
# def chunk_text(text, chunk_size=500, chunk_overlap=50):
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=chunk_size, chunk_overlap=chunk_overlap
# )
# return text_splitter.split_text(text)
# # Function to create embeddings and store them in FAISS
# def create_embeddings_and_store(chunks):
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# vector_db = FAISS.from_texts(chunks, embedding=embeddings)
# return vector_db
# # Function to query the vector database and interact with Groq
# def query_vector_db(query, vector_db):
# # Retrieve relevant documents
# docs = vector_db.similarity_search(query, k=3)
# context = "\n".join([doc.page_content for doc in docs])
# # Interact with Groq API
# chat_completion = client.chat.completions.create(
# messages=[
# {"role": "system", "content": f"Use the following context:\n{context}"},
# {"role": "user", "content": query},
# ],
# model="llama3-8b-8192",
# )
# return chat_completion.choices[0].message.content
# # Streamlit app
# st.title("Interactive PDF Reader and Chat")
# # Upload PDF
# uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
# if uploaded_file:
# with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
# temp_file.write(uploaded_file.read())
# pdf_path = temp_file.name
# # Extract text, chunk it, and create embeddings
# text = extract_text_from_pdf(pdf_path)
# chunks = chunk_text(text)
# vector_db = create_embeddings_and_store(chunks)
# # State management for the chat
# if "chat_history" not in st.session_state:
# st.session_state.chat_history = []
# # Display chat history
# for i, chat in enumerate(st.session_state.chat_history):
# st.write(f"**Query {i+1}:** {chat['query']}")
# st.write(f"**Response:** {chat['response']}")
# st.write("---")
# # Add new query input dynamically
# if "query_count" not in st.session_state:
# st.session_state.query_count = 1
# query_key = f"query_{st.session_state.query_count}"
# user_query = st.text_input(f"Enter Query {st.session_state.query_count}:", key=query_key)
# if user_query:
# # Generate response
# response = query_vector_db(user_query, vector_db)
# # Append query and response to the chat history
# st.session_state.chat_history.append({"query": user_query, "response": response})
# # Increment query count for the next input box
# st.session_state.query_count += 1
# # Rerun to show the updated UI
# st.experimental_rerun()
import os
from groq import Groq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PyPDF2 import PdfReader
import streamlit as st
from tempfile import NamedTemporaryFile
# Initialize Groq client
client = Groq(api_key=os.environ.get("Goq_api_key"))
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file_path):
pdf_reader = PdfReader(pdf_file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to split text into chunks
def chunk_text(text, chunk_size=500, chunk_overlap=50):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
return text_splitter.split_text(text)
# Function to create embeddings and store them in FAISS
def create_embeddings_and_store(chunks):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_texts(chunks, embedding=embeddings)
return vector_db
# Function to query the vector database and interact with Groq
def query_vector_db(query, vector_db):
# Retrieve relevant documents
docs = vector_db.similarity_search(query, k=3)
context = "\n".join([doc.page_content for doc in docs])
# Interact with Groq API
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": f"Use the following context:\n{context}"},
{"role": "user", "content": query},
],
model="llama3-8b-8192",
)
return chat_completion.choices[0].message.content
# Streamlit app
st.title("Interactive PDF Reader and Chat")
# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
if uploaded_file:
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(uploaded_file.read())
pdf_path = temp_file.name
# Extract text, chunk it, and create embeddings
if "vector_db" not in st.session_state:
text = extract_text_from_pdf(pdf_path)
chunks = chunk_text(text)
st.session_state.vector_db = create_embeddings_and_store(chunks)
# Initialize chat history if not already done
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Display chat history
for i, chat in enumerate(st.session_state.chat_history):
st.write(f"**Query {i+1}:** {chat['query']}")
st.write(f"**Response:** {chat['response']}")
st.write("---")
# Add new query input dynamically
query_key = f"query_{len(st.session_state.chat_history) + 1}"
user_query = st.text_input("Enter your query:", key=query_key)
if user_query:
# Generate response
response = query_vector_db(user_query, st.session_state.vector_db)
# Append query and response to the chat history
st.session_state.chat_history.append({"query": user_query, "response": response})
# Refresh the app without needing manual rerun
st.experimental_set_query_params(rerun="true")