File size: 3,288 Bytes
107ed40 b766313 fad1562 b766313 fad1562 c320ec9 fad1562 5d13b89 fad1562 c320ec9 fad1562 c320ec9 5d13b89 c320ec9 5d13b89 fad1562 5d13b89 b766313 5d13b89 b766313 5d13b89 b766313 5d13b89 b766313 5d13b89 b766313 fad1562 c320ec9 fad1562 c320ec9 fad1562 c320ec9 fad1562 5d13b89 b766313 fad1562 5d13b89 c320ec9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
import os
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
st.stop()
# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
texts = []
for uploaded_file in uploaded_files:
reader = PdfReader(uploaded_file)
for page in reader.pages:
texts.append(page.extract_text())
# Combine and embed the texts
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma.from_texts(texts, embedding=embeddings)
return vectorstore
# Function to set up the chat chain
def chat_chain(vectorstore):
llm = ChatGroq(model="llama-3.1-70b-versatile",
temperature=0,
groq_api_key=GROQ_API_KEY)
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(
llm=llm,
output_key="answer",
memory_key="chat_history",
return_messages=True
)
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff",
memory=memory,
verbose=True,
return_source_documents=True
)
return chain
# Streamlit UI configuration
st.set_page_config(
page_title="Multi Doc Chat",
page_icon="π",
layout="centered"
)
st.title("π Multi Documents Chatbot")
# File uploader for PDFs
uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
# Process PDFs and initialize the vectorstore
if uploaded_files:
with st.spinner("Processing files..."):
vectorstore = process_and_store_pdfs(uploaded_files)
st.session_state.vectorstore = vectorstore
st.session_state.conversational_chain = chat_chain(vectorstore)
st.success("Files successfully processed! You can now chat with your documents.")
# Initialize chat history
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Display chat history
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# User input
if "conversational_chain" in st.session_state:
user_input = st.chat_input("Ask AI...")
if user_input:
st.session_state.chat_history.append({"role": "user", "content": user_input})
with st.chat_message("user"):
st.markdown(user_input)
with st.chat_message("assistant"):
# Generate response
response = st.session_state.conversational_chain({"question": user_input})
assistant_response = response["answer"]
st.markdown(assistant_response)
st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
else:
st.info("Please upload PDF files to start chatting.")
|