Spaces:

Sbnos
/

medchat2

Running on CPU Upgrade

App Files Files Community

Sbnos commited on Jul 13, 2024

Commit

1caab63

verified ·

1 Parent(s): 1bc894f

mainfile cgpt fin

Browse files

Files changed (1) hide show

app.py +100 -224

app.py CHANGED Viewed

@@ -1,270 +1,146 @@
 import streamlit as st
 import os
-from langchain.vectorstores import Chroma
-from langchain.embeddings import HuggingFaceBgeEmbeddings
 from langchain_together import Together
-from langchain import hub
-from operator import itemgetter
-from langchain.schema.runnable import RunnableParallel
-from langchain.schema import format_document
-from typing import List, Tuple
-from langchain.chains import LLMChain
-from langchain.chains import RetrievalQA
-from langchain.schema.output_parser import StrOutputParser
-from langchain.memory import StreamlitChatMessageHistory
-from langchain.memory import ConversationBufferMemory
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationSummaryMemory
-from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
-from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
-# Load the embedding function
-model_name = "BAAI/bge-base-en"
-encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
-embedding_function = HuggingFaceBgeEmbeddings(
-    model_name=model_name,
-    encode_kwargs=encode_kwargs
-)
-# Load the ChromaDB vector store
-# persist_directory="./mrcpchromadb/"
-# vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="mrcppassmednotes")
-# Load the LLM
 llm = Together(
-    model="mistralai/Mixtral-8x22B-Instruct-v0.1",
     temperature=0.2,
     top_k=12,
     together_api_key=os.environ['pilotikval']
 )
-# Load the summarizeLLM
-llmc = Together(
-    model="mistralai/Mixtral-8x22B-Instruct-v0.1",
-    temperature=0.2,
-    top_k=3,
-    together_api_key=os.environ['pilotikval']
-)
-msgs = StreamlitChatMessageHistory(key="langchain_messages")
-memory = ConversationBufferMemory(chat_memory=msgs)
-DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
-def _combine_documents(
-        docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
-    ):
-        doc_strings = [format_document(doc, document_prompt) for doc in docs]
-        return document_separator.join(doc_strings)
-chistory = []
-def store_chat_history(role: str, content: str):
-    # Append the new message to the chat history
-    chistory.append({"role": role, "content": content})
 # Define the Streamlit app
 def app():
     with st.sidebar:
         st.title("dochatter")
-        # Create a dropdown selection box
         option = st.selectbox(
             'Which retriever would you like to use?',
             ('General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine')
         )
-        # Depending on the selected option, choose the appropriate retriever
-        if option == 'RespiratoryFishman':
-            persist_directory="./respfishmandbcud/"
-            vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="fishmannotescud")
-            retriever = vectordb.as_retriever(search_kwargs={"k": 5})
-            retriever = retriever # replace with your actual retriever
-        if option == 'RespiratoryMurray':
-            persist_directory="./respmurray/"
-            vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="respmurraynotes")
-            retriever = vectordb.as_retriever(search_kwargs={"k": 5})
-            retriever = retriever
-        if option == 'MedMRCP2':
-            persist_directory="./medmrcp2store/"
-            vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="medmrcp2notes")
-            retriever = vectordb.as_retriever(search_kwargs={"k": 5})
-            retriever = retriever
-        if option == 'General Medicine':
-            persist_directory="./oxfordmedbookdir/"
-            vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="oxfordmed")
-            retriever = vectordb.as_retriever(search_kwargs={"k": 7})
-            retriever = retriever
-        else:
-            persist_directory="./mrcpchromadb/"
-            vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function,collection_name="mrcppassmednotes")
-            retriever = vectordb.as_retriever(search_kwargs={"k": 5})
-            retriever = retriever # replace with your actual retriever
-            retriever = retriever  # replace with your actual retriever
-        #template = """You are an AI chatbot having a conversation with a human. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
-        #{context}
-        #{history}
-        #Human: {human_input}
-        #AI: """
-        #prompt = PromptTemplate(input_variables=["history", "question"], template=template)
-        #template = st.text_area("Template", value=template, height=180)
-        #prompt2 = ChatPromptTemplate.from_template(template)
     # Session State
-    # Store LLM generated responses
     if "messages" not in st.session_state.keys():
         st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
-    ## Retry lets go
-    _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question which contains the themes of the conversation. Do not write the question. Do not write the answer.
-    Chat History:
-    {chat_history}
-    Follow Up Input: {question}
-    Standalone question:"""
-    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-    template = """You are helping a doctor. Answer with what you know from the context provided. Please be as detailed and thorough. Answer the question based on the following context:
-    {context}
-    Question: {question}
-    """
-    ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
-    _inputs = RunnableParallel(
-        standalone_question=RunnablePassthrough.assign(
-            chat_history=lambda x: chistory
-        )
-        | CONDENSE_QUESTION_PROMPT
-        | llmc
-        | StrOutputParser(),
-    )
-    _context = {
-        "context": itemgetter("standalone_question") | retriever | _combine_documents,
-        "question": lambda x: x["standalone_question"],
-    }
-    conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | llm
     st.header("Ask Away!")
-    # Display the messages
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.write(message["content"])
-            store_chat_history(message["role"], message["content"])
-    # prompt = hub.pull("rlm/rag-prompt")
     prompts2 = st.chat_input("Say something")
-    # Implement using different book sources, if statements
     if prompts2:
         st.session_state.messages.append({"role": "user", "content": prompts2})
         with st.chat_message("user"):
             st.write(prompts2)
     if st.session_state.messages[-1]["role"] != "assistant":
         with st.chat_message("assistant"):
             with st.spinner("Thinking..."):
-                response = conversational_qa_chain.invoke(
-                {
-                    "question": prompts2,
-                    "chat_history": chistory,
-                }
-            )
-                st.write(response)
-        message = {"role": "assistant", "content": response}
-        st.session_state.messages.append(message)
-    # Create a button to submit the question
-# Initialize history
-history = []
 if __name__ == '__main__':
-    app()

 import streamlit as st
 import os
+import asyncio
+import bs4
+from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_community.vectorstores import Chroma
 from langchain_together import Together
+from langchain_community.chat_message_histories import StreamlitChatMessageHistory
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain.embeddings import HuggingFaceBgeEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+# Initialize the LLMs
 llm = Together(
+    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
     temperature=0.2,
     top_k=12,
+    max_tokens=22048,
     together_api_key=os.environ['pilotikval']
 )
+# Function to store chat history
+store = {}
+model_name = "BAAI/bge-base-en"
+encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
+embedding_function = HuggingFaceBgeEmbeddings(
+    model_name=model_name,
+    encode_kwargs=encode_kwargs
+)
+def get_session_history(session_id: str) -> BaseChatMessageHistory:
+    if session_id not in store:
+        store[session_id] = StreamlitChatMessageHistory(key=session_id)
+    return store[session_id]
 # Define the Streamlit app
 def app():
     with st.sidebar:
         st.title("dochatter")
         option = st.selectbox(
             'Which retriever would you like to use?',
             ('General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine')
         )
+        # Define retrievers based on option
+        persist_directory = {
+            'General Medicine': "./oxfordmedbookdir/",
+            'RespiratoryFishman': "./respfishmandbcud/",
+            'RespiratoryMurray': "./respmurray/",
+            'MedMRCP2': "./medmrcp2store/",
+            'OldMedicine': "./mrcpchromadb/"
+        }.get(option, "./mrcpchromadb/")
+        collection_name = {
+            'General Medicine': "oxfordmed",
+            'RespiratoryFishman': "fishmannotescud",
+            'RespiratoryMurray': "respmurraynotes",
+            'MedMRCP2': "medmrcp2notes",
+            'OldMedicine': "mrcppassmednotes"
+        }.get(option, "mrcppassmednotes")
+        vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function, collection_name=collection_name)
+        retriever = vectordb.as_retriever(search_kwargs={"k": 5})
+    # Define the prompt templates
+    contextualize_q_system_prompt = (
+        "Given a chat history and the latest user question "
+        "which might reference context in the chat history, "
+        "formulate a standalone question which can be understood "
+        "without the chat history. Do NOT answer the question, "
+        "just reformulate it if needed and otherwise return it as is."
+    )
+    contextualize_q_prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", contextualize_q_system_prompt),
+            MessagesPlaceholder("chat_history"),
+            ("human", "{input}"),
+        ]
+    )
+    history_aware_retriever = create_history_aware_retriever(
+        llm, retriever, contextualize_q_prompt
+    )
+    system_prompt = (
+        "You are an assistant for question-answering tasks. "
+        "Use the following pieces of retrieved context to answer "
+        "the question. If you don't know the answer, say that you "
+        "don't know."
+        "\n\n"
+        "{context}"
+    )
+    qa_prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", system_prompt),
+            MessagesPlaceholder("chat_history"),
+            ("human", "{input}"),
+        ]
+    )
+    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+    # Statefully manage chat history
+    conversational_rag_chain = RunnableWithMessageHistory(
+        rag_chain,
+        get_session_history,
+        input_messages_key="input",
+        history_messages_key="chat_history",
+        output_messages_key="answer",
+    )
     # Session State
     if "messages" not in st.session_state.keys():
         st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
     st.header("Ask Away!")
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.write(message["content"])
     prompts2 = st.chat_input("Say something")
     if prompts2:
         st.session_state.messages.append({"role": "user", "content": prompts2})
         with st.chat_message("user"):
             st.write(prompts2)
     if st.session_state.messages[-1]["role"] != "assistant":
         with st.chat_message("assistant"):
             with st.spinner("Thinking..."):
+                final_response = conversational_rag_chain.invoke(
+                    {
+                        "input": prompts2,
+                    },
+                    config={"configurable": {"session_id": "current_session"}}
+                )
+                st.write(final_response['answer'])
+        st.session_state.messages.append({"role": "assistant", "content": final_response['answer']})
 if __name__ == '__main__':
+    app()