segestic commited on
Commit
8c99fd1
·
verified ·
1 Parent(s): 602dcae

Upload 3 files

Browse files
Files changed (3) hide show
  1. main.py +122 -0
  2. requirements.txt +11 -0
  3. util.py +101 -0
main.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from util import *
2
+ from streamlit_option_menu import option_menu
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_groq import ChatGroq
5
+ from dotenv import load_dotenv
6
+
7
+ # --- PAGE CONFIGURATION ---
8
+ st.set_page_config(page_title="Doc Chat", page_icon=":robot_face:", layout="centered")
9
+
10
+ # --- SETUP SESSION STATE VARIABLES ---
11
+ if "vector_store" not in st.session_state:
12
+ st.session_state.vector_store = False
13
+ if "response" not in st.session_state:
14
+ st.session_state.response = None
15
+ if "prompt_activation" not in st.session_state:
16
+ st.session_state.prompt_activation = False
17
+ if "conversation" not in st.session_state:
18
+ st.session_state.conversation = None
19
+ if "chat_history" not in st.session_state:
20
+ st.session_state.chat_history = None
21
+ if "prompt" not in st.session_state:
22
+ st.session_state.prompt = False
23
+
24
+ load_dotenv()
25
+
26
+ # --- SIDEBAR CONFIGURATION ---
27
+ st.sidebar.header('Configuration')
28
+ groq_api_key = sidebar_api_key_configuration()
29
+ model = sidebar_groq_model_selection()
30
+
31
+ # --- MAIN PAGE CONFIGURATION ---
32
+ st.title("Doc Chat :robot_face:")
33
+ st.write("*Interrogate Documents :books:, Ignite Insights: AI at Your Service*")
34
+ st.write(':blue[***Powered by Groq AI Inference Technology***]')
35
+
36
+ # ---- NAVIGATION MENU -----
37
+ selected = option_menu(
38
+ menu_title=None,
39
+ options=["Doc Chat", "Reference", "About"],
40
+ icons=["robot", "bi-file-text-fill", "app"], # https://icons.getbootstrap.com
41
+ orientation="horizontal",
42
+ )
43
+
44
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name=model)
45
+
46
+ prompt = ChatPromptTemplate.from_template(
47
+ """
48
+ Answer the question based on the provided context only. If question is not within the context, do not try to answer
49
+ and respond that the asked question is out of context or something similar.
50
+ Please provide the most accurate response based on the question.
51
+ <context>
52
+ {context}
53
+ Questions: {input}
54
+ """
55
+ )
56
+ # ----- SETUP Doc Chat MENU ------
57
+ if selected == "Doc Chat":
58
+ st.subheader("Upload PDF(s)")
59
+ pdf_docs = st.file_uploader("Upload your PDFs", type=['pdf'], accept_multiple_files=True,
60
+ disabled=not st.session_state.prompt_activation, label_visibility='collapsed')
61
+ process = st.button("Process", type="primary", key="process", disabled=not pdf_docs)
62
+
63
+ if process:
64
+ with st.spinner("Processing ..."):
65
+ st.session_state.vector_store = create_vectorstore(pdf_docs)
66
+ st.session_state.prompt = True
67
+ st.success('Database is ready')
68
+
69
+ st.divider()
70
+
71
+ if "messages" not in st.session_state:
72
+ st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
73
+
74
+ for msg in st.session_state.messages:
75
+ st.chat_message(msg["role"]).write(msg["content"])
76
+
77
+ container = st.container(border=True)
78
+ if question := st.chat_input(placeholder='Enter your question related to uploaded document',
79
+ disabled=not st.session_state.prompt):
80
+ st.session_state.messages.append({"role": "user", "content": question})
81
+ st.chat_message("user").write(question)
82
+
83
+ with st.spinner('Processing...'):
84
+ st.session_state.response = get_llm_response(llm, prompt, question)
85
+ st.session_state.messages.append({"role": "assistant", "content": st.session_state.response['answer']})
86
+ st.chat_message("assistant").write(st.session_state.response['answer'])
87
+
88
+ # ----- SETUP REFERENCE MENU ------
89
+ if selected == "Reference":
90
+ st.title("Reference & Context")
91
+ if st.session_state.response is not None:
92
+ for i, doc in enumerate(st.session_state.response["context"]):
93
+ with st.expander(f'Reference # {i + 1}'):
94
+ st.write(doc.page_content)
95
+
96
+ # ----- SETUP ABOUT MENU ------
97
+ if selected == "About":
98
+ with st.expander("About this App"):
99
+ st.markdown(''' This app allows you to chat with your PDF documents. It has following functionality:
100
+
101
+ - Allows to chat with multiple PDF documents
102
+ - Support of Groq AI inference technology
103
+ - Display the response context and document reference
104
+
105
+ ''')
106
+ with st.expander("Which Large Language models are supported by this App?"):
107
+ st.markdown(''' This app supports the following LLMs as supported by Groq:
108
+
109
+ - Chat Models -- Groq
110
+ - Llama3-8b-8192
111
+ - Llama3-70b-8192
112
+ - Mixtral-8x7b-32768
113
+ - Gemma-7b-it
114
+ ''')
115
+
116
+ with st.expander("Which library is used for vectorstore?"):
117
+ st.markdown(''' This app supports the FAISS for AI similarity search and vectorstore:
118
+ ''')
119
+
120
+ with st.expander("Whom to contact regarding this app?"):
121
+ st.markdown(''' Contact [Sree Narayanan]([email protected])
122
+ ''')
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pypdf
2
+ langchain
3
+ langchain-core
4
+ langchain-groq
5
+ langchain-community
6
+ streamlit
7
+ streamlit-option-menu
8
+ python-dotenv
9
+ boto3
10
+ faiss-cpu
11
+ gpt4all
util.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypdf import PdfReader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
4
+ from langchain_community.embeddings.ollama import OllamaEmbeddings
5
+ from langchain_community.embeddings.bedrock import BedrockEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.chains.combine_documents import create_stuff_documents_chain
8
+ from langchain.chains import create_retrieval_chain
9
+ from dotenv import load_dotenv
10
+ import streamlit as st
11
+ import os
12
+
13
+ load_dotenv()
14
+
15
+
16
+ # Function to get the API key
17
+ def get_api_key():
18
+ # Try to get the API key from st.secrets first
19
+ try:
20
+ groq_api_key = os.getenv("GROQ_API_KEY", "")
21
+
22
+ return groq_api_key
23
+ except Exception as e:
24
+ print(e)
25
+
26
+ def get_inference_api_key():
27
+ try:
28
+ inference_api_key = os.getenv("INFERENCE_API_KEY", "")
29
+
30
+ return inference_api_key
31
+ except Exception as e:
32
+ print(e)
33
+
34
+
35
+ # Function for API configuration at sidebar
36
+ def sidebar_api_key_configuration():
37
+ groq_api_key = get_api_key()
38
+ if groq_api_key == '':
39
+ st.sidebar.warning('Enter the API Key(s) 🗝️')
40
+ st.session_state.prompt_activation = False
41
+ elif (groq_api_key.startswith('gsk_') and (len(groq_api_key) == 56)):
42
+ st.sidebar.success('Lets Proceed!', icon='️👉')
43
+ st.session_state.prompt_activation = True
44
+ else:
45
+ st.sidebar.warning('Please enter the correct API Key 🗝️!', icon='⚠️')
46
+ st.session_state.prompt_activation = False
47
+ return groq_api_key
48
+
49
+
50
+ def sidebar_groq_model_selection():
51
+ st.sidebar.subheader("Model Selection")
52
+ model = st.sidebar.selectbox('Select the Model', ('Llama3-8b-8192', 'Llama3-70b-8192', 'Mixtral-8x7b-32768',
53
+ 'Gemma-7b-it'), label_visibility="collapsed")
54
+ return model
55
+
56
+
57
+ # Read PDF data
58
+ def read_pdf_data(pdf_docs):
59
+ text = ""
60
+ for pdf in pdf_docs:
61
+ pdf_reader = PdfReader(pdf)
62
+ for page in pdf_reader.pages:
63
+ text += page.extract_text()
64
+ return text
65
+
66
+
67
+ # Split data into chunks
68
+ def split_data(text):
69
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
70
+ text_chunks = text_splitter.split_text(text)
71
+ return text_chunks
72
+
73
+
74
+ def get_embedding_function():
75
+ # embeddings = BedrockEmbeddings(
76
+ # credentials_profile_name="default", region_name="us-east-1"
77
+ # )
78
+ #embeddings = OllamaEmbeddings(model="nomic-embed-text")
79
+ inference_api_key = get_inference_api_key()
80
+
81
+ embeddings = HuggingFaceInferenceAPIEmbeddings(
82
+ api_key=inference_api_key, model_name="sentence-transformers/all-MiniLM-l6-v2"
83
+ )
84
+ return embeddings
85
+
86
+
87
+ # Create vectorstore
88
+ def create_vectorstore(pdf_docs):
89
+ raw_text = read_pdf_data(pdf_docs) # Get PDF text
90
+ text_chunks = split_data(raw_text) # Get the text chunks
91
+ embeddings = get_embedding_function() # Get the embedding function
92
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
93
+ return vectorstore
94
+
95
+
96
+ # Get response from llm of user asked question
97
+ def get_llm_response(llm, prompt, question):
98
+ document_chain = create_stuff_documents_chain(llm, prompt)
99
+ retrieval_chain = create_retrieval_chain(st.session_state.vector_store.as_retriever(), document_chain)
100
+ response = retrieval_chain.invoke({'input': question})
101
+ return response