Sbnos commited on
Commit
1caab63
·
verified ·
1 Parent(s): 1bc894f

mainfile cgpt fin

Browse files
Files changed (1) hide show
  1. app.py +100 -224
app.py CHANGED
@@ -1,270 +1,146 @@
1
  import streamlit as st
2
  import os
3
- from langchain.vectorstores import Chroma
4
- from langchain.embeddings import HuggingFaceBgeEmbeddings
 
 
 
5
  from langchain_together import Together
6
- from langchain import hub
7
- from operator import itemgetter
8
- from langchain.schema.runnable import RunnableParallel
9
- from langchain.schema import format_document
10
- from typing import List, Tuple
11
- from langchain.chains import LLMChain
12
- from langchain.chains import RetrievalQA
13
- from langchain.schema.output_parser import StrOutputParser
14
- from langchain.memory import StreamlitChatMessageHistory
15
- from langchain.memory import ConversationBufferMemory
16
- from langchain.chains import ConversationalRetrievalChain
17
- from langchain.memory import ConversationSummaryMemory
18
- from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
19
- from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
20
-
21
-
22
- # Load the embedding function
23
- model_name = "BAAI/bge-base-en"
24
- encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
25
-
26
- embedding_function = HuggingFaceBgeEmbeddings(
27
- model_name=model_name,
28
- encode_kwargs=encode_kwargs
29
- )
30
-
31
- # Load the ChromaDB vector store
32
- # persist_directory="./mrcpchromadb/"
33
- # vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="mrcppassmednotes")
34
-
35
-
36
-
37
 
38
- # Load the LLM
39
  llm = Together(
40
- model="mistralai/Mixtral-8x22B-Instruct-v0.1",
41
  temperature=0.2,
42
  top_k=12,
 
43
  together_api_key=os.environ['pilotikval']
44
  )
45
 
46
- # Load the summarizeLLM
47
- llmc = Together(
48
- model="mistralai/Mixtral-8x22B-Instruct-v0.1",
49
- temperature=0.2,
50
- top_k=3,
51
- together_api_key=os.environ['pilotikval']
52
- )
53
-
54
- msgs = StreamlitChatMessageHistory(key="langchain_messages")
55
- memory = ConversationBufferMemory(chat_memory=msgs)
56
-
57
 
 
 
58
 
59
- DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
60
-
61
- def _combine_documents(
62
- docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
63
- ):
64
- doc_strings = [format_document(doc, document_prompt) for doc in docs]
65
- return document_separator.join(doc_strings)
66
-
67
-
68
-
69
- chistory = []
70
-
71
- def store_chat_history(role: str, content: str):
72
- # Append the new message to the chat history
73
- chistory.append({"role": role, "content": content})
74
 
 
 
 
 
75
 
76
  # Define the Streamlit app
77
  def app():
78
-
79
-
80
-
81
  with st.sidebar:
82
-
83
  st.title("dochatter")
84
- # Create a dropdown selection box
85
  option = st.selectbox(
86
  'Which retriever would you like to use?',
87
  ('General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine')
88
  )
89
- # Depending on the selected option, choose the appropriate retriever
90
- if option == 'RespiratoryFishman':
91
- persist_directory="./respfishmandbcud/"
92
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="fishmannotescud")
93
- retriever = vectordb.as_retriever(search_kwargs={"k": 5})
94
- retriever = retriever # replace with your actual retriever
95
-
96
- if option == 'RespiratoryMurray':
97
- persist_directory="./respmurray/"
98
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="respmurraynotes")
99
- retriever = vectordb.as_retriever(search_kwargs={"k": 5})
100
- retriever = retriever
101
 
102
- if option == 'MedMRCP2':
103
- persist_directory="./medmrcp2store/"
104
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="medmrcp2notes")
105
- retriever = vectordb.as_retriever(search_kwargs={"k": 5})
106
- retriever = retriever
107
-
108
- if option == 'General Medicine':
109
- persist_directory="./oxfordmedbookdir/"
110
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="oxfordmed")
111
- retriever = vectordb.as_retriever(search_kwargs={"k": 7})
112
- retriever = retriever
113
-
114
- else:
115
- persist_directory="./mrcpchromadb/"
116
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="mrcppassmednotes")
117
- retriever = vectordb.as_retriever(search_kwargs={"k": 5})
118
- retriever = retriever # replace with your actual retriever
119
- retriever = retriever # replace with your actual retriever
120
-
121
- #template = """You are an AI chatbot having a conversation with a human. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
122
- #{context}
123
- #{history}
124
- #Human: {human_input}
125
- #AI: """
126
- #prompt = PromptTemplate(input_variables=["history", "question"], template=template)
127
- #template = st.text_area("Template", value=template, height=180)
128
- #prompt2 = ChatPromptTemplate.from_template(template)
 
 
 
 
 
 
 
 
 
 
 
129
 
130
-
131
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  # Session State
134
- # Store LLM generated responses
135
  if "messages" not in st.session_state.keys():
136
  st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
137
-
138
-
139
-
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
- ## Retry lets go
148
-
149
- _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question which contains the themes of the conversation. Do not write the question. Do not write the answer.
150
- Chat History:
151
- {chat_history}
152
- Follow Up Input: {question}
153
- Standalone question:"""
154
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
155
-
156
- template = """You are helping a doctor. Answer with what you know from the context provided. Please be as detailed and thorough. Answer the question based on the following context:
157
- {context}
158
- Question: {question}
159
- """
160
- ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
161
-
162
-
163
- _inputs = RunnableParallel(
164
- standalone_question=RunnablePassthrough.assign(
165
- chat_history=lambda x: chistory
166
- )
167
- | CONDENSE_QUESTION_PROMPT
168
- | llmc
169
- | StrOutputParser(),
170
- )
171
- _context = {
172
- "context": itemgetter("standalone_question") | retriever | _combine_documents,
173
- "question": lambda x: x["standalone_question"],
174
- }
175
- conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | llm
176
-
177
-
178
-
179
-
180
-
181
 
182
-
183
-
184
-
185
-
186
-
187
  st.header("Ask Away!")
188
- # Display the messages
189
  for message in st.session_state.messages:
190
  with st.chat_message(message["role"]):
191
  st.write(message["content"])
192
- store_chat_history(message["role"], message["content"])
193
 
194
- # prompt = hub.pull("rlm/rag-prompt")
195
-
196
-
197
-
198
-
199
  prompts2 = st.chat_input("Say something")
200
 
201
- # Implement using different book sources, if statements
202
-
203
-
204
-
205
-
206
-
207
-
208
-
209
-
210
-
211
-
212
-
213
-
214
-
215
-
216
  if prompts2:
217
  st.session_state.messages.append({"role": "user", "content": prompts2})
218
  with st.chat_message("user"):
219
  st.write(prompts2)
220
-
221
-
222
 
223
  if st.session_state.messages[-1]["role"] != "assistant":
224
  with st.chat_message("assistant"):
225
  with st.spinner("Thinking..."):
226
- response = conversational_qa_chain.invoke(
227
- {
228
- "question": prompts2,
229
- "chat_history": chistory,
230
- }
231
- )
232
- st.write(response)
233
- message = {"role": "assistant", "content": response}
234
- st.session_state.messages.append(message)
235
-
236
-
237
-
238
-
239
-
240
-
241
-
242
-
243
-
244
-
245
-
246
-
247
- # Create a button to submit the question
248
-
249
-
250
-
251
-
252
-
253
-
254
-
255
-
256
-
257
-
258
-
259
-
260
-
261
-
262
-
263
-
264
-
265
-
266
- # Initialize history
267
- history = []
268
 
269
  if __name__ == '__main__':
270
- app()
 
1
  import streamlit as st
2
  import os
3
+ import asyncio
4
+ import bs4
5
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_community.vectorstores import Chroma
8
  from langchain_together import Together
9
+ from langchain_community.chat_message_histories import StreamlitChatMessageHistory
10
+ from langchain_community.document_loaders import WebBaseLoader
11
+ from langchain_core.chat_history import BaseChatMessageHistory
12
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
13
+ from langchain_core.runnables.history import RunnableWithMessageHistory
14
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
15
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Initialize the LLMs
18
  llm = Together(
19
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
20
  temperature=0.2,
21
  top_k=12,
22
+ max_tokens=22048,
23
  together_api_key=os.environ['pilotikval']
24
  )
25
 
26
+ # Function to store chat history
27
+ store = {}
 
 
 
 
 
 
 
 
 
28
 
29
+ model_name = "BAAI/bge-base-en"
30
+ encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
31
 
32
+ embedding_function = HuggingFaceBgeEmbeddings(
33
+ model_name=model_name,
34
+ encode_kwargs=encode_kwargs
35
+ )
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
38
+ if session_id not in store:
39
+ store[session_id] = StreamlitChatMessageHistory(key=session_id)
40
+ return store[session_id]
41
 
42
  # Define the Streamlit app
43
  def app():
 
 
 
44
  with st.sidebar:
 
45
  st.title("dochatter")
 
46
  option = st.selectbox(
47
  'Which retriever would you like to use?',
48
  ('General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine')
49
  )
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # Define retrievers based on option
52
+ persist_directory = {
53
+ 'General Medicine': "./oxfordmedbookdir/",
54
+ 'RespiratoryFishman': "./respfishmandbcud/",
55
+ 'RespiratoryMurray': "./respmurray/",
56
+ 'MedMRCP2': "./medmrcp2store/",
57
+ 'OldMedicine': "./mrcpchromadb/"
58
+ }.get(option, "./mrcpchromadb/")
59
+
60
+ collection_name = {
61
+ 'General Medicine': "oxfordmed",
62
+ 'RespiratoryFishman': "fishmannotescud",
63
+ 'RespiratoryMurray': "respmurraynotes",
64
+ 'MedMRCP2': "medmrcp2notes",
65
+ 'OldMedicine': "mrcppassmednotes"
66
+ }.get(option, "mrcppassmednotes")
67
+
68
+ vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function, collection_name=collection_name)
69
+ retriever = vectordb.as_retriever(search_kwargs={"k": 5})
70
+
71
+ # Define the prompt templates
72
+ contextualize_q_system_prompt = (
73
+ "Given a chat history and the latest user question "
74
+ "which might reference context in the chat history, "
75
+ "formulate a standalone question which can be understood "
76
+ "without the chat history. Do NOT answer the question, "
77
+ "just reformulate it if needed and otherwise return it as is."
78
+ )
79
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
80
+ [
81
+ ("system", contextualize_q_system_prompt),
82
+ MessagesPlaceholder("chat_history"),
83
+ ("human", "{input}"),
84
+ ]
85
+ )
86
+ history_aware_retriever = create_history_aware_retriever(
87
+ llm, retriever, contextualize_q_prompt
88
+ )
89
 
90
+ system_prompt = (
91
+ "You are an assistant for question-answering tasks. "
92
+ "Use the following pieces of retrieved context to answer "
93
+ "the question. If you don't know the answer, say that you "
94
+ "don't know."
95
+ "\n\n"
96
+ "{context}"
97
+ )
98
+ qa_prompt = ChatPromptTemplate.from_messages(
99
+ [
100
+ ("system", system_prompt),
101
+ MessagesPlaceholder("chat_history"),
102
+ ("human", "{input}"),
103
+ ]
104
+ )
105
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
106
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
107
+
108
+ # Statefully manage chat history
109
+ conversational_rag_chain = RunnableWithMessageHistory(
110
+ rag_chain,
111
+ get_session_history,
112
+ input_messages_key="input",
113
+ history_messages_key="chat_history",
114
+ output_messages_key="answer",
115
+ )
116
 
117
  # Session State
 
118
  if "messages" not in st.session_state.keys():
119
  st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
 
 
 
 
 
121
  st.header("Ask Away!")
 
122
  for message in st.session_state.messages:
123
  with st.chat_message(message["role"]):
124
  st.write(message["content"])
 
125
 
 
 
 
 
 
126
  prompts2 = st.chat_input("Say something")
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  if prompts2:
129
  st.session_state.messages.append({"role": "user", "content": prompts2})
130
  with st.chat_message("user"):
131
  st.write(prompts2)
 
 
132
 
133
  if st.session_state.messages[-1]["role"] != "assistant":
134
  with st.chat_message("assistant"):
135
  with st.spinner("Thinking..."):
136
+ final_response = conversational_rag_chain.invoke(
137
+ {
138
+ "input": prompts2,
139
+ },
140
+ config={"configurable": {"session_id": "current_session"}}
141
+ )
142
+ st.write(final_response['answer'])
143
+ st.session_state.messages.append({"role": "assistant", "content": final_response['answer']})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  if __name__ == '__main__':
146
+ app()