DrishtiSharma's picture
Create interim.py
bd82d31 verified
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
import os
import chromadb
from chromadb import Client, Settings
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()
# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
st.stop()
# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
texts = []
for uploaded_file in uploaded_files:
reader = PdfReader(uploaded_file)
for page in reader.pages:
texts.append(page.extract_text())
# Combine and embed the texts
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma.from_texts(texts, embedding=embeddings)
return vectorstore
# Function to set up the chat chain
def chat_chain(vectorstore):
llm = ChatGroq(model="llama-3.1-70b-versatile",
temperature=0,
groq_api_key=GROQ_API_KEY)
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(
llm=llm,
output_key="answer",
memory_key="chat_history",
return_messages=True
)
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff",
memory=memory,
verbose=True,
return_source_documents=True
)
return chain
# Streamlit UI configuration
st.set_page_config(
page_title="Multi Doc Chat",
page_icon="πŸ“š",
layout="centered"
)
st.title("Chat with Your DocsπŸ“š")
# File uploader for PDFs
uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
# Process PDFs and initialize the vectorstore
if uploaded_files:
with st.spinner("Processing files..."):
vectorstore = process_and_store_pdfs(uploaded_files)
st.session_state.vectorstore = vectorstore
st.session_state.conversational_chain = chat_chain(vectorstore)
st.success("Files successfully processed! You can now chat with your documents.")
# Initialize chat history
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Display chat history
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# User input
if "conversational_chain" in st.session_state:
user_input = st.chat_input("Ask AI...")
if user_input:
st.session_state.chat_history.append({"role": "user", "content": user_input})
with st.chat_message("user"):
st.markdown(user_input)
with st.chat_message("assistant"):
# Generate response
response = st.session_state.conversational_chain({"question": user_input})
assistant_response = response["answer"]
st.markdown(assistant_response)
st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
else:
st.info("Please upload PDF files to start chatting.")