DrishtiSharma commited on
Commit
bd82d31
Β·
verified Β·
1 Parent(s): a92a1de

Create interim.py

Browse files
Files changed (1) hide show
  1. interim.py +105 -0
interim.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
2
+
3
+ import os
4
+ import chromadb
5
+ from chromadb import Client, Settings
6
+ import streamlit as st
7
+ from langchain_huggingface import HuggingFaceEmbeddings
8
+ from langchain_chroma import Chroma
9
+ from langchain_groq import ChatGroq
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from PyPDF2 import PdfReader
13
+
14
+ # Clear ChromaDB cache to fix tenant issue
15
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
16
+
17
+ # Ensure required environment variables are set
18
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
19
+ if not GROQ_API_KEY:
20
+ st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
21
+ st.stop()
22
+
23
+ # Function to process PDFs and set up the vectorstore
24
+ def process_and_store_pdfs(uploaded_files):
25
+ texts = []
26
+ for uploaded_file in uploaded_files:
27
+ reader = PdfReader(uploaded_file)
28
+ for page in reader.pages:
29
+ texts.append(page.extract_text())
30
+
31
+ # Combine and embed the texts
32
+ embeddings = HuggingFaceEmbeddings()
33
+ vectorstore = Chroma.from_texts(texts, embedding=embeddings)
34
+ return vectorstore
35
+
36
+ # Function to set up the chat chain
37
+ def chat_chain(vectorstore):
38
+ llm = ChatGroq(model="llama-3.1-70b-versatile",
39
+ temperature=0,
40
+ groq_api_key=GROQ_API_KEY)
41
+ retriever = vectorstore.as_retriever()
42
+ memory = ConversationBufferMemory(
43
+ llm=llm,
44
+ output_key="answer",
45
+ memory_key="chat_history",
46
+ return_messages=True
47
+ )
48
+
49
+ chain = ConversationalRetrievalChain.from_llm(
50
+ llm=llm,
51
+ retriever=retriever,
52
+ chain_type="stuff",
53
+ memory=memory,
54
+ verbose=True,
55
+ return_source_documents=True
56
+ )
57
+ return chain
58
+
59
+ # Streamlit UI configuration
60
+ st.set_page_config(
61
+ page_title="Multi Doc Chat",
62
+ page_icon="πŸ“š",
63
+ layout="centered"
64
+ )
65
+
66
+ st.title("Chat with Your DocsπŸ“š")
67
+
68
+ # File uploader for PDFs
69
+ uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
70
+
71
+ # Process PDFs and initialize the vectorstore
72
+ if uploaded_files:
73
+ with st.spinner("Processing files..."):
74
+ vectorstore = process_and_store_pdfs(uploaded_files)
75
+ st.session_state.vectorstore = vectorstore
76
+ st.session_state.conversational_chain = chat_chain(vectorstore)
77
+ st.success("Files successfully processed! You can now chat with your documents.")
78
+
79
+ # Initialize chat history
80
+ if "chat_history" not in st.session_state:
81
+ st.session_state.chat_history = []
82
+
83
+ # Display chat history
84
+ for message in st.session_state.chat_history:
85
+ with st.chat_message(message["role"]):
86
+ st.markdown(message["content"])
87
+
88
+ # User input
89
+ if "conversational_chain" in st.session_state:
90
+ user_input = st.chat_input("Ask AI...")
91
+ if user_input:
92
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
93
+
94
+ with st.chat_message("user"):
95
+ st.markdown(user_input)
96
+
97
+ with st.chat_message("assistant"):
98
+ # Generate response
99
+ response = st.session_state.conversational_chain({"question": user_input})
100
+ assistant_response = response["answer"]
101
+
102
+ st.markdown(assistant_response)
103
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
104
+ else:
105
+ st.info("Please upload PDF files to start chatting.")