Spaces:

HengJay
/

snomed-ct-assistant

Running on CPU Upgrade

App Files Files Community

HengJay commited on Jun 11, 2024

Commit

6f77775

1 Parent(s): a5cfcc4

Test with OpenAI API - JSON mode

Browse files

Files changed (2) hide show

SNOMED-CT_Assistant.py +73 -47
pages/Vector DB of SNOMED-CT.py +1 -1

SNOMED-CT_Assistant.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import random
 import streamlit as st
 from openai import OpenAI
 from dotenv import load_dotenv
 import pandas as pd
@@ -13,7 +15,7 @@ import pandas as pd
 st.set_page_config(layout="wide")
-remote = True
 if remote:
     with st.sidebar:
@@ -29,11 +31,15 @@ st.caption("👩‍⚕️ A smart medical assistant with SNOMED-CT knowledge.")
 # System prompt
 system_prompt = """You are a medical expert with rich experience in SNOMED-CT professional knowledge.
-You are skilled at assisting medical professionals and answering questions in the medical field. You are patient, helpful and professional.
 Please refuse to answer inquiries and requests unrelated to the medical field, in order to maintain professionalism in medicine.
 As an experienced professional, you possess deep expertise in the field of SNOMED CT Entity Linking.
 You have a thorough understanding of the relevant workflows and critical aspects involved, encompassing:
-- Processing electronic medical records (EHRs), Adept handling of electronic medical record (EMR) data processing
 - Entity Identification, Proficient entity recognition capabilities, identifying and extracting relevant medical concepts from unstructured text
 - Skilled Entity Mapping, accurately linking identified entities to their corresponding SNOMED CT concepts
 - Seamless integration and output of clinical terminology, ensuring the accurate representation and utilization of standardized medical language
@@ -46,7 +52,11 @@ You have a thorough understanding of the relevant workflows and critical aspects
 Here is the practical entity linking process example:
 - the input text in EHRs: "Patient referred for a biopsy to investigate potential swelling in upper larynx."
 - the identified entity: "biopsy", "larynx"
-- the mapped SNOMED CT concepts id & descriptions: "274317003 | Laryngoscopic biopsy larynx (procedure)", "4596009 | Laryngeal structure (body structure)"
 List out as many potential SNOMED entities as possible from the original medical text description,
 including Diseases, Diagnoses, Clinical Findings (like Signs and Symptoms),
@@ -58,14 +68,9 @@ Patients' Occupations, Patients' Social Contexts (e.g., religion and ethnicity),
 Numbers or units related symbols are not included in this range and can be ignored.
 Output Format Requirements (Must follow):
-- Present the results in a tabular format with the following 3 columns only: "Identified Entity", "SNOMED CT Concept IDs", and "SNOMED CT Descriptions". Do not arbitrarily replace the column names, as that would lead to unclear output.
-- The table should be easy to read and understand, with each row displaying the identified medical entity, its corresponding SNOMED CT concept ID, and the full SNOMED CT description.
-- Ensure the formatting and organization of the table is clean and professional, optimized for the user's ease of reference.
-Your comprehensive knowledge and mastery of these key components make you an invaluable asset in the realm of biomedical natural language processing and knowledge extraction.
-With your specialized expertise, you are able to navigate the complexities of SNOMED CT Entity Linking with ease, delivering accurate and reliable results that support various healthcare and research applications.
-When answering questions, except for the use of English for medical-related terminology,  always respond in Traditional Chinese (zh-TW).
-If there are any SNOMED-CT related medical professional terms, please provide the original text in parentheses afterwards."""
 # Func: generate random med text
@@ -84,28 +89,61 @@ def random_med_text(text_df):
 # Func: Gen Medical Prompt Example
-def generate_med_prompt(medical_text):
-    return f"""請協助我做電子病歷 (Electronic Health Record, EHR) 的 SNOMED-CT Entity Linking 的處理， 這是原本的病歷文字:  \n {medical_text} \n """
-# test_prompt = """請協助我做 EHR 的 SNOMED CT Entity Linking 的處理， 這是原本的病歷文字:
-# "Patient referred for a biopsy to investigate potential swelling in upper larynx."
-# ，首先做 Entity Identification，列出醫學相關術語片段，接著做 Entity Mapping，將對應的 SNOMED CT 術語列出。
-# 輸出格式用表格，欄位是 "identified entity", "SNOMED CT concept ids", "SNOMED CT descriptions"。"""
 client = OpenAI(api_key=openai_api_key)
 model_tag = "gpt-3.5-turbo"
-def chat_input(prompt):
-    # with st.sidebar:
-    # st.write("You are talking with: ", model_tag)
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    st.chat_message("user").write(prompt)
     with st.spinner("Thinking..."):
-        response = client.chat.completions.create(
-            model=model_tag, messages=st.session_state.messages, temperature=0.5)
-        msg = response.choices[0].message.content
         st.session_state.messages.append({"role": "assistant", "content": msg})
         st.chat_message("assistant").write(msg)
 if "messages" not in st.session_state:
     st.session_state["messages"] = [{"role": "system", "content": system_prompt},
@@ -116,35 +154,23 @@ for msg in st.session_state.messages:
         continue
     st.chat_message(msg["role"]).write(msg["content"])
-if prompt := st.chat_input():
     if not openai_api_key:
         st.info("Please add your OpenAI API key to continue.")
         st.stop()
-    chat_input(prompt)
-    # st.session_state.messages.append({"role": "user", "content": prompt})
-    # st.chat_message("user").write(prompt)
-    # with st.spinner("Thinking..."):
-    #     response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
-    #     msg = response.choices[0].message.content
-    #     st.session_state.messages.append({"role": "assistant", "content": msg})
-    #     st.chat_message("assistant").write(msg)
 if st.sidebar.button("Example Input",type="primary"):
-    med_prompt = generate_med_prompt("Patient referred for a biopsy to investigate potential swelling in upper larynx.")
-    chat_input(med_prompt)
 if st.sidebar.button("Random Input",type="primary"):
     index, human, med_text, response = random_med_text(raw_text_df)
     response = response.replace(",","  \n")
-    med_prompt = generate_med_prompt(med_text)
-    chat_input(med_prompt)
     st.sidebar.write(f"[Random Text](https://huggingface.co/datasets/JaimeML/snomed-entity-challenge) Index: {index}")
     st.sidebar.markdown(f"Ref Entity:  \n  {response}")
-# model_tag = st.sidebar.selectbox(
-#     "Which model do you want to chat with?",
-#     ("gpt-4o", "gpt-3.5-turbo")
-# )

 import os
 import random
+import json
 import streamlit as st
+import chromadb
 from openai import OpenAI
 from dotenv import load_dotenv
 import pandas as pd
 st.set_page_config(layout="wide")
+remote = False
 if remote:
     with st.sidebar:
 # System prompt
 system_prompt = """You are a medical expert with rich experience in SNOMED-CT professional knowledge.
+You are skilled at assisting medical professionals and answering questions in the medical field.
+You are patient, helpful and professional.
+Your comprehensive knowledge and mastery of these key components make you an invaluable asset in the realm of biomedical natural language processing and knowledge extraction.
+With your specialized expertise, you are able to navigate the complexities of SNOMED CT Entity Linking with ease, delivering accurate and reliable results that support various healthcare and research applications.
 Please refuse to answer inquiries and requests unrelated to the medical field, in order to maintain professionalism in medicine.
 As an experienced professional, you possess deep expertise in the field of SNOMED CT Entity Linking.
 You have a thorough understanding of the relevant workflows and critical aspects involved, encompassing:
+- Adept handling of electronic medical record (EMR) data processing
 - Entity Identification, Proficient entity recognition capabilities, identifying and extracting relevant medical concepts from unstructured text
 - Skilled Entity Mapping, accurately linking identified entities to their corresponding SNOMED CT concepts
 - Seamless integration and output of clinical terminology, ensuring the accurate representation and utilization of standardized medical language
 Here is the practical entity linking process example:
 - the input text in EHRs: "Patient referred for a biopsy to investigate potential swelling in upper larynx."
 - the identified entity: "biopsy", "larynx"
+- response the identified entities with JSON format: {"identified_entity" : ["biopsy", "larynx"]}
+- During Entity Identification processing, if the original medical text data clearly contains commonly used medical abbreviations, convert the abbreviations into their full names, and provide the original abbreviations in parentheses for easy reference.
+- For example: "The patient has the multiple disease, including T2D, CAD, HTN, CKD etc. decreased T3 and T4 levels."
+- T2D: "Type 2 Diabetes Mellitus", CAD: "Coronary Artery Disease", HTN: "Hypertension", CKD: "Chronic Kidney Disease", T3: "Triiodothyronine", T4: "Thyroxine"
+- Respond with full names in JSON format: {"identified_entity" : ["Type 2 Diabetes Mellitus (T2D)", "Coronary Artery Disease (CAD)", "Hypertension (HTN)", "Chronic Kidney Disease (CKD)", "Triiodothyronine (T3)", "Thyroxine (T4)"]}
 List out as many potential SNOMED entities as possible from the original medical text description,
 including Diseases, Diagnoses, Clinical Findings (like Signs and Symptoms),
 Numbers or units related symbols are not included in this range and can be ignored.
 Output Format Requirements (Must follow):
+- As default, only process "Entity Identification", and find out the entity related to SNOMED CT terms.
+- Present the results in JSON format, like:  {"identified_entity" : ["biopsy", "larynx"]}
+"""
 # Func: generate random med text
 # Func: Gen Medical Prompt Example
+def generate_entity_identification_prompt(medical_text):
+    return f"""Help me to do "SNOMED-CT Entity Identification" process with raw medical text (Electronic Health Record, EHR):  \n {medical_text} \n """
+def generate_entity_mapping_prompt(entity, query_result_dict):
+    return f"""Help me to do "SNOMED-CT Entity Mapping" process with entity: {entity} and query result \n {query_result_dict} \n , output with table format, including 5 columns: "Identified Entity", "Distance", "IDs", "SNOMED CT Concept IDs", "SNOMED CT Descriptions"  \n """
+# Chroma DB Client
+chroma_client = chromadb.PersistentClient(path="snomed_ct_id_term_1410k")
+collection = chroma_client.get_or_create_collection(name="snomed_ct_id_term")
+# Func: query chrome_db
+def query_chroma_db(query_text, query_number):
+    results = collection.query(
+        query_texts=[query_text],
+        n_results=query_number,
+        include=["distances", "metadatas", "documents"]
+    )
+    return results
+# Func: chroma_db_result to dict
+def get_dict_from_chroma_results(results):
+    result_dict = {'ids': results['ids'][0], 'concept_ids': [ str(sub['concept_id']) for sub in results['metadatas'][0] ], 'distances': results['distances'][0], 'documents': results['documents'][0]}
+    return result_dict
+# OpenAI Client Configuration
 client = OpenAI(api_key=openai_api_key)
 model_tag = "gpt-3.5-turbo"
+# Chat Session with OpenAI API
+def chat_input(prompt, med_text):
+    st.session_state.messages.append({"role": "user", "content": med_text})
+    st.chat_message("user").write(med_text)
     with st.spinner("Thinking..."):
+        entity_identification_response = client.chat.completions.create(
+            model=model_tag, response_format={ "type": "json_object" }, messages=st.session_state.messages, temperature=0.5)
+        msg = entity_identification_response.choices[0].message.content
+        entity_list = json.loads(msg)["identified_entity"]
+        print("entity list: ", entity_list)
         st.session_state.messages.append({"role": "assistant", "content": msg})
         st.chat_message("assistant").write(msg)
+        for entity in entity_list:
+            print("entity: ", entity)
+            results = query_chroma_db(entity, 10)
+            results_dict = get_dict_from_chroma_results(results)
+            entity_mapping_prompt = generate_entity_mapping_prompt(entity, results_dict)
+            st.session_state.messages.append({"role": "user", "content": entity_mapping_prompt})
+            entity_mapping_response = client.chat.completions.create(
+                model=model_tag, messages=st.session_state.messages, temperature=0.5)
+            mapping_msg = entity_mapping_response.choices[0].message.content
+            st.session_state.messages.append({"role": "assistant", "content": mapping_msg})
+            st.chat_message("assistant").write(mapping_msg)
 if "messages" not in st.session_state:
     st.session_state["messages"] = [{"role": "system", "content": system_prompt},
         continue
     st.chat_message(msg["role"]).write(msg["content"])
+if user_input := st.chat_input():
     if not openai_api_key:
         st.info("Please add your OpenAI API key to continue.")
         st.stop()
+    entity_identification_prompt = generate_entity_identification_prompt(user_input)
+    chat_input(entity_identification_prompt, user_input)
 if st.sidebar.button("Example Input",type="primary"):
+    med_text = "Patient referred for a biopsy to investigate potential swelling in upper larynx."
+    entity_identification_prompt = generate_entity_identification_prompt(med_text)
+    chat_input(entity_identification_prompt, med_text)
 if st.sidebar.button("Random Input",type="primary"):
     index, human, med_text, response = random_med_text(raw_text_df)
     response = response.replace(",","  \n")
+    entity_identification_prompt = generate_entity_identification_prompt(med_text)
+    chat_input(entity_identification_prompt, med_text)
     st.sidebar.write(f"[Random Text](https://huggingface.co/datasets/JaimeML/snomed-entity-challenge) Index: {index}")
     st.sidebar.markdown(f"Ref Entity:  \n  {response}")

pages/Vector DB of SNOMED-CT.py CHANGED Viewed

@@ -16,7 +16,7 @@ st.set_page_config(layout="wide")
 st.title("📚 Semantic Search with Vector Database of SNOMED-CT 💡")
 st.caption("🔍 Search any SNOMED-CT relate decription & concept with natural language.")
 st.sidebar.title("🔍 Search Setting")
-query_number = st.sidebar.slider("Query Numbers", 5, 30, 10)
 st.markdown("##### ➡️⌨️ Please input some medical description here, e.g. \"insomnia two nights a week.\", \"COPD\", \"Degenerative Joint Disease\"")
 query_text = st.text_input("Input: any medical description snippet","Type-2 Diabetes")

 st.title("📚 Semantic Search with Vector Database of SNOMED-CT 💡")
 st.caption("🔍 Search any SNOMED-CT relate decription & concept with natural language.")
 st.sidebar.title("🔍 Search Setting")
+query_number = st.sidebar.slider("Query Numbers", 10, 30, 10)
 st.markdown("##### ➡️⌨️ Please input some medical description here, e.g. \"insomnia two nights a week.\", \"COPD\", \"Degenerative Joint Disease\"")
 query_text = st.text_input("Input: any medical description snippet","Type-2 Diabetes")