HengJay commited on
Commit
6f77775
·
1 Parent(s): a5cfcc4

Test with OpenAI API - JSON mode

Browse files
SNOMED-CT_Assistant.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import random
 
3
  import streamlit as st
 
4
  from openai import OpenAI
5
  from dotenv import load_dotenv
6
  import pandas as pd
@@ -13,7 +15,7 @@ import pandas as pd
13
 
14
  st.set_page_config(layout="wide")
15
 
16
- remote = True
17
 
18
  if remote:
19
  with st.sidebar:
@@ -29,11 +31,15 @@ st.caption("👩‍⚕️ A smart medical assistant with SNOMED-CT knowledge.")
29
 
30
  # System prompt
31
  system_prompt = """You are a medical expert with rich experience in SNOMED-CT professional knowledge.
32
- You are skilled at assisting medical professionals and answering questions in the medical field. You are patient, helpful and professional.
 
 
 
33
  Please refuse to answer inquiries and requests unrelated to the medical field, in order to maintain professionalism in medicine.
 
34
  As an experienced professional, you possess deep expertise in the field of SNOMED CT Entity Linking.
35
  You have a thorough understanding of the relevant workflows and critical aspects involved, encompassing:
36
- - Processing electronic medical records (EHRs), Adept handling of electronic medical record (EMR) data processing
37
  - Entity Identification, Proficient entity recognition capabilities, identifying and extracting relevant medical concepts from unstructured text
38
  - Skilled Entity Mapping, accurately linking identified entities to their corresponding SNOMED CT concepts
39
  - Seamless integration and output of clinical terminology, ensuring the accurate representation and utilization of standardized medical language
@@ -46,7 +52,11 @@ You have a thorough understanding of the relevant workflows and critical aspects
46
  Here is the practical entity linking process example:
47
  - the input text in EHRs: "Patient referred for a biopsy to investigate potential swelling in upper larynx."
48
  - the identified entity: "biopsy", "larynx"
49
- - the mapped SNOMED CT concepts id & descriptions: "274317003 | Laryngoscopic biopsy larynx (procedure)", "4596009 | Laryngeal structure (body structure)"
 
 
 
 
50
 
51
  List out as many potential SNOMED entities as possible from the original medical text description,
52
  including Diseases, Diagnoses, Clinical Findings (like Signs and Symptoms),
@@ -58,14 +68,9 @@ Patients' Occupations, Patients' Social Contexts (e.g., religion and ethnicity),
58
  Numbers or units related symbols are not included in this range and can be ignored.
59
 
60
  Output Format Requirements (Must follow):
61
- - Present the results in a tabular format with the following 3 columns only: "Identified Entity", "SNOMED CT Concept IDs", and "SNOMED CT Descriptions". Do not arbitrarily replace the column names, as that would lead to unclear output.
62
- - The table should be easy to read and understand, with each row displaying the identified medical entity, its corresponding SNOMED CT concept ID, and the full SNOMED CT description.
63
- - Ensure the formatting and organization of the table is clean and professional, optimized for the user's ease of reference.
64
-
65
- Your comprehensive knowledge and mastery of these key components make you an invaluable asset in the realm of biomedical natural language processing and knowledge extraction.
66
- With your specialized expertise, you are able to navigate the complexities of SNOMED CT Entity Linking with ease, delivering accurate and reliable results that support various healthcare and research applications.
67
- When answering questions, except for the use of English for medical-related terminology, always respond in Traditional Chinese (zh-TW).
68
- If there are any SNOMED-CT related medical professional terms, please provide the original text in parentheses afterwards."""
69
 
70
 
71
  # Func: generate random med text
@@ -84,28 +89,61 @@ def random_med_text(text_df):
84
 
85
 
86
  # Func: Gen Medical Prompt Example
87
- def generate_med_prompt(medical_text):
88
- return f"""請協助我做電子病歷 (Electronic Health Record, EHR) 的 SNOMED-CT Entity Linking 的處理, 這是原本的病歷文字: \n {medical_text} \n """
89
 
90
- # test_prompt = """請協助我做 EHR 的 SNOMED CT Entity Linking 的處理, 這是原本的病歷文字:
91
- # "Patient referred for a biopsy to investigate potential swelling in upper larynx."
92
- # ,首先做 Entity Identification,列出醫學相關術語片段,接著做 Entity Mapping,將對應的 SNOMED CT 術語列出。
93
- # 輸出格式用表格,欄位是 "identified entity", "SNOMED CT concept ids", "SNOMED CT descriptions"。"""
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  client = OpenAI(api_key=openai_api_key)
96
  model_tag = "gpt-3.5-turbo"
97
 
98
- def chat_input(prompt):
99
- # with st.sidebar:
100
- # st.write("You are talking with: ", model_tag)
101
- st.session_state.messages.append({"role": "user", "content": prompt})
102
- st.chat_message("user").write(prompt)
103
  with st.spinner("Thinking..."):
104
- response = client.chat.completions.create(
105
- model=model_tag, messages=st.session_state.messages, temperature=0.5)
106
- msg = response.choices[0].message.content
 
 
107
  st.session_state.messages.append({"role": "assistant", "content": msg})
108
  st.chat_message("assistant").write(msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  if "messages" not in st.session_state:
111
  st.session_state["messages"] = [{"role": "system", "content": system_prompt},
@@ -116,35 +154,23 @@ for msg in st.session_state.messages:
116
  continue
117
  st.chat_message(msg["role"]).write(msg["content"])
118
 
119
- if prompt := st.chat_input():
120
  if not openai_api_key:
121
  st.info("Please add your OpenAI API key to continue.")
122
  st.stop()
123
-
124
- chat_input(prompt)
125
- # st.session_state.messages.append({"role": "user", "content": prompt})
126
- # st.chat_message("user").write(prompt)
127
- # with st.spinner("Thinking..."):
128
- # response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
129
- # msg = response.choices[0].message.content
130
- # st.session_state.messages.append({"role": "assistant", "content": msg})
131
- # st.chat_message("assistant").write(msg)
132
 
133
  if st.sidebar.button("Example Input",type="primary"):
134
- med_prompt = generate_med_prompt("Patient referred for a biopsy to investigate potential swelling in upper larynx.")
135
- chat_input(med_prompt)
136
-
137
 
138
  if st.sidebar.button("Random Input",type="primary"):
139
  index, human, med_text, response = random_med_text(raw_text_df)
140
  response = response.replace(","," \n")
141
- med_prompt = generate_med_prompt(med_text)
142
- chat_input(med_prompt)
143
  st.sidebar.write(f"[Random Text](https://huggingface.co/datasets/JaimeML/snomed-entity-challenge) Index: {index}")
144
  st.sidebar.markdown(f"Ref Entity: \n {response}")
145
-
146
-
147
- # model_tag = st.sidebar.selectbox(
148
- # "Which model do you want to chat with?",
149
- # ("gpt-4o", "gpt-3.5-turbo")
150
- # )
 
1
  import os
2
  import random
3
+ import json
4
  import streamlit as st
5
+ import chromadb
6
  from openai import OpenAI
7
  from dotenv import load_dotenv
8
  import pandas as pd
 
15
 
16
  st.set_page_config(layout="wide")
17
 
18
+ remote = False
19
 
20
  if remote:
21
  with st.sidebar:
 
31
 
32
  # System prompt
33
  system_prompt = """You are a medical expert with rich experience in SNOMED-CT professional knowledge.
34
+ You are skilled at assisting medical professionals and answering questions in the medical field.
35
+ You are patient, helpful and professional.
36
+ Your comprehensive knowledge and mastery of these key components make you an invaluable asset in the realm of biomedical natural language processing and knowledge extraction.
37
+ With your specialized expertise, you are able to navigate the complexities of SNOMED CT Entity Linking with ease, delivering accurate and reliable results that support various healthcare and research applications.
38
  Please refuse to answer inquiries and requests unrelated to the medical field, in order to maintain professionalism in medicine.
39
+
40
  As an experienced professional, you possess deep expertise in the field of SNOMED CT Entity Linking.
41
  You have a thorough understanding of the relevant workflows and critical aspects involved, encompassing:
42
+ - Adept handling of electronic medical record (EMR) data processing
43
  - Entity Identification, Proficient entity recognition capabilities, identifying and extracting relevant medical concepts from unstructured text
44
  - Skilled Entity Mapping, accurately linking identified entities to their corresponding SNOMED CT concepts
45
  - Seamless integration and output of clinical terminology, ensuring the accurate representation and utilization of standardized medical language
 
52
  Here is the practical entity linking process example:
53
  - the input text in EHRs: "Patient referred for a biopsy to investigate potential swelling in upper larynx."
54
  - the identified entity: "biopsy", "larynx"
55
+ - response the identified entities with JSON format: {"identified_entity" : ["biopsy", "larynx"]}
56
+ - During Entity Identification processing, if the original medical text data clearly contains commonly used medical abbreviations, convert the abbreviations into their full names, and provide the original abbreviations in parentheses for easy reference.
57
+ - For example: "The patient has the multiple disease, including T2D, CAD, HTN, CKD etc. decreased T3 and T4 levels."
58
+ - T2D: "Type 2 Diabetes Mellitus", CAD: "Coronary Artery Disease", HTN: "Hypertension", CKD: "Chronic Kidney Disease", T3: "Triiodothyronine", T4: "Thyroxine"
59
+ - Respond with full names in JSON format: {"identified_entity" : ["Type 2 Diabetes Mellitus (T2D)", "Coronary Artery Disease (CAD)", "Hypertension (HTN)", "Chronic Kidney Disease (CKD)", "Triiodothyronine (T3)", "Thyroxine (T4)"]}
60
 
61
  List out as many potential SNOMED entities as possible from the original medical text description,
62
  including Diseases, Diagnoses, Clinical Findings (like Signs and Symptoms),
 
68
  Numbers or units related symbols are not included in this range and can be ignored.
69
 
70
  Output Format Requirements (Must follow):
71
+ - As default, only process "Entity Identification", and find out the entity related to SNOMED CT terms.
72
+ - Present the results in JSON format, like: {"identified_entity" : ["biopsy", "larynx"]}
73
+ """
 
 
 
 
 
74
 
75
 
76
  # Func: generate random med text
 
89
 
90
 
91
  # Func: Gen Medical Prompt Example
92
+ def generate_entity_identification_prompt(medical_text):
93
+ return f"""Help me to do "SNOMED-CT Entity Identification" process with raw medical text (Electronic Health Record, EHR): \n {medical_text} \n """
94
 
95
+ def generate_entity_mapping_prompt(entity, query_result_dict):
96
+ return f"""Help me to do "SNOMED-CT Entity Mapping" process with entity: {entity} and query result \n {query_result_dict} \n , output with table format, including 5 columns: "Identified Entity", "Distance", "IDs", "SNOMED CT Concept IDs", "SNOMED CT Descriptions" \n """
 
 
97
 
98
+ # Chroma DB Client
99
+ chroma_client = chromadb.PersistentClient(path="snomed_ct_id_term_1410k")
100
+ collection = chroma_client.get_or_create_collection(name="snomed_ct_id_term")
101
+
102
+ # Func: query chrome_db
103
+ def query_chroma_db(query_text, query_number):
104
+ results = collection.query(
105
+ query_texts=[query_text],
106
+ n_results=query_number,
107
+ include=["distances", "metadatas", "documents"]
108
+ )
109
+ return results
110
+
111
+ # Func: chroma_db_result to dict
112
+ def get_dict_from_chroma_results(results):
113
+ result_dict = {'ids': results['ids'][0], 'concept_ids': [ str(sub['concept_id']) for sub in results['metadatas'][0] ], 'distances': results['distances'][0], 'documents': results['documents'][0]}
114
+ return result_dict
115
+
116
+
117
+ # OpenAI Client Configuration
118
  client = OpenAI(api_key=openai_api_key)
119
  model_tag = "gpt-3.5-turbo"
120
 
121
+ # Chat Session with OpenAI API
122
+ def chat_input(prompt, med_text):
123
+ st.session_state.messages.append({"role": "user", "content": med_text})
124
+ st.chat_message("user").write(med_text)
 
125
  with st.spinner("Thinking..."):
126
+ entity_identification_response = client.chat.completions.create(
127
+ model=model_tag, response_format={ "type": "json_object" }, messages=st.session_state.messages, temperature=0.5)
128
+ msg = entity_identification_response.choices[0].message.content
129
+ entity_list = json.loads(msg)["identified_entity"]
130
+ print("entity list: ", entity_list)
131
  st.session_state.messages.append({"role": "assistant", "content": msg})
132
  st.chat_message("assistant").write(msg)
133
+ for entity in entity_list:
134
+ print("entity: ", entity)
135
+ results = query_chroma_db(entity, 10)
136
+ results_dict = get_dict_from_chroma_results(results)
137
+ entity_mapping_prompt = generate_entity_mapping_prompt(entity, results_dict)
138
+ st.session_state.messages.append({"role": "user", "content": entity_mapping_prompt})
139
+ entity_mapping_response = client.chat.completions.create(
140
+ model=model_tag, messages=st.session_state.messages, temperature=0.5)
141
+ mapping_msg = entity_mapping_response.choices[0].message.content
142
+ st.session_state.messages.append({"role": "assistant", "content": mapping_msg})
143
+ st.chat_message("assistant").write(mapping_msg)
144
+
145
+
146
+
147
 
148
  if "messages" not in st.session_state:
149
  st.session_state["messages"] = [{"role": "system", "content": system_prompt},
 
154
  continue
155
  st.chat_message(msg["role"]).write(msg["content"])
156
 
157
+ if user_input := st.chat_input():
158
  if not openai_api_key:
159
  st.info("Please add your OpenAI API key to continue.")
160
  st.stop()
161
+ entity_identification_prompt = generate_entity_identification_prompt(user_input)
162
+ chat_input(entity_identification_prompt, user_input)
 
 
 
 
 
 
 
163
 
164
  if st.sidebar.button("Example Input",type="primary"):
165
+ med_text = "Patient referred for a biopsy to investigate potential swelling in upper larynx."
166
+ entity_identification_prompt = generate_entity_identification_prompt(med_text)
167
+ chat_input(entity_identification_prompt, med_text)
168
 
169
  if st.sidebar.button("Random Input",type="primary"):
170
  index, human, med_text, response = random_med_text(raw_text_df)
171
  response = response.replace(","," \n")
172
+ entity_identification_prompt = generate_entity_identification_prompt(med_text)
173
+ chat_input(entity_identification_prompt, med_text)
174
  st.sidebar.write(f"[Random Text](https://huggingface.co/datasets/JaimeML/snomed-entity-challenge) Index: {index}")
175
  st.sidebar.markdown(f"Ref Entity: \n {response}")
176
+
 
 
 
 
 
pages/Vector DB of SNOMED-CT.py CHANGED
@@ -16,7 +16,7 @@ st.set_page_config(layout="wide")
16
  st.title("📚 Semantic Search with Vector Database of SNOMED-CT 💡")
17
  st.caption("🔍 Search any SNOMED-CT relate decription & concept with natural language.")
18
  st.sidebar.title("🔍 Search Setting")
19
- query_number = st.sidebar.slider("Query Numbers", 5, 30, 10)
20
  st.markdown("##### ➡️⌨️ Please input some medical description here, e.g. \"insomnia two nights a week.\", \"COPD\", \"Degenerative Joint Disease\"")
21
  query_text = st.text_input("Input: any medical description snippet","Type-2 Diabetes")
22
 
 
16
  st.title("📚 Semantic Search with Vector Database of SNOMED-CT 💡")
17
  st.caption("🔍 Search any SNOMED-CT relate decription & concept with natural language.")
18
  st.sidebar.title("🔍 Search Setting")
19
+ query_number = st.sidebar.slider("Query Numbers", 10, 30, 10)
20
  st.markdown("##### ➡️⌨️ Please input some medical description here, e.g. \"insomnia two nights a week.\", \"COPD\", \"Degenerative Joint Disease\"")
21
  query_text = st.text_input("Input: any medical description snippet","Type-2 Diabetes")
22