|
|
|
|
|
import os |
|
import chromadb |
|
from chromadb import Client, Settings |
|
import streamlit as st |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_chroma import Chroma |
|
from langchain_groq import ChatGroq |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import ConversationalRetrievalChain |
|
from PyPDF2 import PdfReader |
|
|
|
|
|
chromadb.api.client.SharedSystemClient.clear_system_cache() |
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
if not GROQ_API_KEY: |
|
st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.") |
|
st.stop() |
|
|
|
|
|
def process_and_store_pdfs(uploaded_files): |
|
texts = [] |
|
for uploaded_file in uploaded_files: |
|
reader = PdfReader(uploaded_file) |
|
for page in reader.pages: |
|
texts.append(page.extract_text()) |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings() |
|
vectorstore = Chroma.from_texts(texts, embedding=embeddings) |
|
return vectorstore |
|
|
|
|
|
def chat_chain(vectorstore): |
|
llm = ChatGroq(model="llama-3.1-70b-versatile", |
|
temperature=0, |
|
groq_api_key=GROQ_API_KEY) |
|
retriever = vectorstore.as_retriever() |
|
memory = ConversationBufferMemory( |
|
llm=llm, |
|
output_key="answer", |
|
memory_key="chat_history", |
|
return_messages=True |
|
) |
|
|
|
chain = ConversationalRetrievalChain.from_llm( |
|
llm=llm, |
|
retriever=retriever, |
|
chain_type="stuff", |
|
memory=memory, |
|
verbose=True, |
|
return_source_documents=True |
|
) |
|
return chain |
|
|
|
|
|
st.set_page_config( |
|
page_title="Multi Doc Chat", |
|
page_icon="π", |
|
layout="centered" |
|
) |
|
|
|
st.title("Chat with Your Docsπ") |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"]) |
|
|
|
|
|
if uploaded_files: |
|
with st.spinner("Processing files..."): |
|
vectorstore = process_and_store_pdfs(uploaded_files) |
|
st.session_state.vectorstore = vectorstore |
|
st.session_state.conversational_chain = chat_chain(vectorstore) |
|
st.success("Files successfully processed! You can now chat with your documents.") |
|
|
|
|
|
if "chat_history" not in st.session_state: |
|
st.session_state.chat_history = [] |
|
|
|
|
|
for message in st.session_state.chat_history: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
if "conversational_chain" in st.session_state: |
|
user_input = st.chat_input("Ask AI...") |
|
if user_input: |
|
st.session_state.chat_history.append({"role": "user", "content": user_input}) |
|
|
|
with st.chat_message("user"): |
|
st.markdown(user_input) |
|
|
|
with st.chat_message("assistant"): |
|
|
|
response = st.session_state.conversational_chain({"question": user_input}) |
|
assistant_response = response["answer"] |
|
|
|
st.markdown(assistant_response) |
|
st.session_state.chat_history.append({"role": "assistant", "content": assistant_response}) |
|
else: |
|
st.info("Please upload PDF files to start chatting.") |