File size: 3,288 Bytes
107ed40
 
b766313
fad1562
b766313
 
 
fad1562
 
c320ec9
fad1562
5d13b89
 
 
 
 
fad1562
c320ec9
 
 
 
 
 
 
fad1562
c320ec9
5d13b89
c320ec9
5d13b89
fad1562
5d13b89
b766313
5d13b89
 
 
b766313
 
 
 
 
 
 
5d13b89
b766313
 
 
 
 
 
 
 
 
 
5d13b89
b766313
 
5d13b89
b766313
 
 
 
fad1562
c320ec9
 
fad1562
c320ec9
 
 
 
 
 
 
fad1562
c320ec9
 
 
fad1562
5d13b89
b766313
 
 
fad1562
5d13b89
c320ec9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc

import os
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader

# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
    st.stop()

# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
    texts = []
    for uploaded_file in uploaded_files:
        reader = PdfReader(uploaded_file)
        for page in reader.pages:
            texts.append(page.extract_text())

    # Combine and embed the texts
    embeddings = HuggingFaceEmbeddings()
    vectorstore = Chroma.from_texts(texts, embedding=embeddings)
    return vectorstore

# Function to set up the chat chain
def chat_chain(vectorstore):
    llm = ChatGroq(model="llama-3.1-70b-versatile", 
                   temperature=0, 
                   groq_api_key=GROQ_API_KEY)
    retriever = vectorstore.as_retriever()
    memory = ConversationBufferMemory(
        llm=llm,
        output_key="answer",
        memory_key="chat_history",
        return_messages=True
    )

    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        memory=memory,
        verbose=True,
        return_source_documents=True
    )
    return chain

# Streamlit UI configuration
st.set_page_config(
    page_title="Multi Doc Chat",
    page_icon="πŸ“š",
    layout="centered"
)

st.title("πŸ“š Multi Documents Chatbot")

# File uploader for PDFs
uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])

# Process PDFs and initialize the vectorstore
if uploaded_files:
    with st.spinner("Processing files..."):
        vectorstore = process_and_store_pdfs(uploaded_files)
        st.session_state.vectorstore = vectorstore
        st.session_state.conversational_chain = chat_chain(vectorstore)
    st.success("Files successfully processed! You can now chat with your documents.")

# Initialize chat history
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Display chat history
for message in st.session_state.chat_history:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# User input
if "conversational_chain" in st.session_state:
    user_input = st.chat_input("Ask AI...")
    if user_input:
        st.session_state.chat_history.append({"role": "user", "content": user_input})

        with st.chat_message("user"):
            st.markdown(user_input)

        with st.chat_message("assistant"):
            # Generate response
            response = st.session_state.conversational_chain({"question": user_input})
            assistant_response = response["answer"]

            st.markdown(assistant_response)
            st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
else:
    st.info("Please upload PDF files to start chatting.")