adithiyyha commited on
Commit
bba5f79
Β·
verified Β·
1 Parent(s): 70be421

Update icd9_ui.py

Browse files
Files changed (1) hide show
  1. icd9_ui.py +215 -38
icd9_ui.py CHANGED
@@ -136,12 +136,106 @@
136
  # st.write(f"- {code}: {description}")
137
  # else:
138
  # st.error("Please enter a medical summary.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  import torch
140
  import pandas as pd
141
  import streamlit as st
 
142
  from transformers import LongformerTokenizer, LongformerForSequenceClassification
 
 
 
143
 
144
- # Load the fine-tuned model and tokenizer
145
  model_path = "./clinical_longformer"
146
  tokenizer = LongformerTokenizer.from_pretrained(model_path)
147
  model = LongformerForSequenceClassification.from_pretrained(model_path)
@@ -149,17 +243,8 @@ model.eval() # Set the model to evaluation mode
149
 
150
  # Load the ICD-9 descriptions from CSV into a dictionary
151
  icd9_desc_df = pd.read_csv("D_ICD_DIAGNOSES.csv") # Adjust the path to your CSV file
152
- icd9_desc_df['ICD9_CODE'] = icd9_desc_df['ICD9_CODE'].astype(str) # Ensure ICD9_CODE is string type
153
- icd9_descriptions = dict(zip(icd9_desc_df['ICD9_CODE'].str.replace('.', ''), icd9_desc_df['LONG_TITLE'])) # Remove decimals for matching
154
-
155
- # Load the ICD-9 to ICD-10 mapping
156
- icd9_to_icd10 = {}
157
- with open("2015_I9gem.txt", "r") as file:
158
- for line in file:
159
- parts = line.strip().split()
160
- if len(parts) == 3:
161
- icd9, icd10, _ = parts
162
- icd9_to_icd10[icd9] = icd10
163
 
164
  # ICD-9 code columns used during training
165
  icd9_columns = [
@@ -171,7 +256,7 @@ icd9_columns = [
171
  '995.92', 'V15.82', 'V45.81', 'V45.82', 'V58.61'
172
  ]
173
 
174
- # Function for making predictions and mapping to ICD-10
175
  def predict_icd9(texts, tokenizer, model, threshold=0.5):
176
  inputs = tokenizer(
177
  texts,
@@ -180,7 +265,7 @@ def predict_icd9(texts, tokenizer, model, threshold=0.5):
180
  max_length=512,
181
  return_tensors="pt"
182
  )
183
-
184
  with torch.no_grad():
185
  outputs = model(
186
  input_ids=inputs["input_ids"],
@@ -189,40 +274,132 @@ def predict_icd9(texts, tokenizer, model, threshold=0.5):
189
  logits = outputs.logits
190
  probabilities = torch.sigmoid(logits)
191
  predictions = (probabilities > threshold).int()
192
-
193
  predicted_icd9 = []
194
  for pred in predictions:
195
  codes = [icd9_columns[i] for i, val in enumerate(pred) if val == 1]
196
  predicted_icd9.append(codes)
197
-
198
- # Fetch descriptions and map to ICD-10 codes
199
  predictions_with_desc = []
200
  for codes in predicted_icd9:
201
- code_with_desc = []
202
- for code in codes:
203
- icd9_stripped = code.replace('.', '')
204
- icd10_code = icd9_to_icd10.get(icd9_stripped, "Mapping not found")
205
- icd9_desc = icd9_descriptions.get(icd9_stripped, "Description not found")
206
- code_with_desc.append((code, icd9_desc, icd10_code))
207
  predictions_with_desc.append(code_with_desc)
208
-
209
  return predictions_with_desc
210
 
211
  # Streamlit UI
212
- st.title("ICD-9 to ICD-10 Code Prediction")
213
- st.sidebar.header("Model Options")
214
- threshold = st.sidebar.slider("Prediction Threshold", 0.0, 1.0, 0.5, 0.01)
215
-
216
- st.write("### Enter Medical Summary")
217
- input_text = st.text_area("Medical Summary", placeholder="Enter clinical notes here...")
218
-
219
- if st.button("Predict"):
220
- if input_text.strip():
221
- predictions = predict_icd9([input_text], tokenizer, model, threshold)
222
- st.write("### Predicted ICD-9 and ICD-10 Codes with Descriptions")
223
- for icd9_code, description, icd10_code in predictions[0]:
224
- st.write(f"- ICD-9: {icd9_code} ({description}) -> ICD-10: {icd10_code}")
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  else:
226
- st.error("Please enter a medical summary.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
 
 
136
  # st.write(f"- {code}: {description}")
137
  # else:
138
  # st.error("Please enter a medical summary.")
139
+ # import torch
140
+ # import pandas as pd
141
+ # import streamlit as st
142
+ # from transformers import LongformerTokenizer, LongformerForSequenceClassification
143
+
144
+ # # Load the fine-tuned model and tokenizer
145
+ # model_path = "./clinical_longformer"
146
+ # tokenizer = LongformerTokenizer.from_pretrained(model_path)
147
+ # model = LongformerForSequenceClassification.from_pretrained(model_path)
148
+ # model.eval() # Set the model to evaluation mode
149
+
150
+ # # Load the ICD-9 descriptions from CSV into a dictionary
151
+ # icd9_desc_df = pd.read_csv("D_ICD_DIAGNOSES.csv") # Adjust the path to your CSV file
152
+ # icd9_desc_df['ICD9_CODE'] = icd9_desc_df['ICD9_CODE'].astype(str) # Ensure ICD9_CODE is string type
153
+ # icd9_descriptions = dict(zip(icd9_desc_df['ICD9_CODE'].str.replace('.', ''), icd9_desc_df['LONG_TITLE'])) # Remove decimals for matching
154
+
155
+ # # Load the ICD-9 to ICD-10 mapping
156
+ # icd9_to_icd10 = {}
157
+ # with open("2015_I9gem.txt", "r") as file:
158
+ # for line in file:
159
+ # parts = line.strip().split()
160
+ # if len(parts) == 3:
161
+ # icd9, icd10, _ = parts
162
+ # icd9_to_icd10[icd9] = icd10
163
+
164
+ # # ICD-9 code columns used during training
165
+ # icd9_columns = [
166
+ # '038.9', '244.9', '250.00', '272.0', '272.4', '276.1', '276.2', '285.1', '285.9',
167
+ # '287.5', '305.1', '311', '36.15', '37.22', '37.23', '38.91', '38.93', '39.61',
168
+ # '39.95', '401.9', '403.90', '410.71', '412', '414.01', '424.0', '427.31', '428.0',
169
+ # '486', '496', '507.0', '511.9', '518.81', '530.81', '584.9', '585.9', '599.0',
170
+ # '88.56', '88.72', '93.90', '96.04', '96.6', '96.71', '96.72', '99.04', '99.15',
171
+ # '995.92', 'V15.82', 'V45.81', 'V45.82', 'V58.61'
172
+ # ]
173
+
174
+ # # Function for making predictions and mapping to ICD-10
175
+ # def predict_icd9(texts, tokenizer, model, threshold=0.5):
176
+ # inputs = tokenizer(
177
+ # texts,
178
+ # padding="max_length",
179
+ # truncation=True,
180
+ # max_length=512,
181
+ # return_tensors="pt"
182
+ # )
183
+
184
+ # with torch.no_grad():
185
+ # outputs = model(
186
+ # input_ids=inputs["input_ids"],
187
+ # attention_mask=inputs["attention_mask"]
188
+ # )
189
+ # logits = outputs.logits
190
+ # probabilities = torch.sigmoid(logits)
191
+ # predictions = (probabilities > threshold).int()
192
+
193
+ # predicted_icd9 = []
194
+ # for pred in predictions:
195
+ # codes = [icd9_columns[i] for i, val in enumerate(pred) if val == 1]
196
+ # predicted_icd9.append(codes)
197
+
198
+ # # Fetch descriptions and map to ICD-10 codes
199
+ # predictions_with_desc = []
200
+ # for codes in predicted_icd9:
201
+ # code_with_desc = []
202
+ # for code in codes:
203
+ # icd9_stripped = code.replace('.', '')
204
+ # icd10_code = icd9_to_icd10.get(icd9_stripped, "Mapping not found")
205
+ # icd9_desc = icd9_descriptions.get(icd9_stripped, "Description not found")
206
+ # code_with_desc.append((code, icd9_desc, icd10_code))
207
+ # predictions_with_desc.append(code_with_desc)
208
+
209
+ # return predictions_with_desc
210
+
211
+ # # Streamlit UI
212
+ # st.title("ICD-9 to ICD-10 Code Prediction")
213
+ # st.sidebar.header("Model Options")
214
+ # threshold = st.sidebar.slider("Prediction Threshold", 0.0, 1.0, 0.5, 0.01)
215
+
216
+ # st.write("### Enter Medical Summary")
217
+ # input_text = st.text_area("Medical Summary", placeholder="Enter clinical notes here...")
218
+
219
+ # if st.button("Predict"):
220
+ # if input_text.strip():
221
+ # predictions = predict_icd9([input_text], tokenizer, model, threshold)
222
+ # st.write("### Predicted ICD-9 and ICD-10 Codes with Descriptions")
223
+ # for icd9_code, description, icd10_code in predictions[0]:
224
+ # st.write(f"- ICD-9: {icd9_code} ({description}) -> ICD-10: {icd10_code}")
225
+ # else:
226
+ # st.error("Please enter a medical summary.")
227
+
228
+ import os
229
  import torch
230
  import pandas as pd
231
  import streamlit as st
232
+ from PIL import Image
233
  from transformers import LongformerTokenizer, LongformerForSequenceClassification
234
+ from phi.agent import Agent
235
+ from phi.model.google import Gemini
236
+ from phi.tools.duckduckgo import DuckDuckGo
237
 
238
+ # Load the fine-tuned ICD-9 model and tokenizer
239
  model_path = "./clinical_longformer"
240
  tokenizer = LongformerTokenizer.from_pretrained(model_path)
241
  model = LongformerForSequenceClassification.from_pretrained(model_path)
 
243
 
244
  # Load the ICD-9 descriptions from CSV into a dictionary
245
  icd9_desc_df = pd.read_csv("D_ICD_DIAGNOSES.csv") # Adjust the path to your CSV file
246
+ icd9_desc_df['ICD9_CODE'] = icd9_desc_df['ICD9_CODE'].astype(str) # Ensure ICD9_CODE is string type for matching
247
+ icd9_descriptions = dict(zip(icd9_desc_df['ICD9_CODE'].str.replace('.', ''), icd9_desc_df['LONG_TITLE'])) # Remove decimals in ICD9 code for matching
 
 
 
 
 
 
 
 
 
248
 
249
  # ICD-9 code columns used during training
250
  icd9_columns = [
 
256
  '995.92', 'V15.82', 'V45.81', 'V45.82', 'V58.61'
257
  ]
258
 
259
+ # Function for making ICD-9 predictions
260
  def predict_icd9(texts, tokenizer, model, threshold=0.5):
261
  inputs = tokenizer(
262
  texts,
 
265
  max_length=512,
266
  return_tensors="pt"
267
  )
268
+
269
  with torch.no_grad():
270
  outputs = model(
271
  input_ids=inputs["input_ids"],
 
274
  logits = outputs.logits
275
  probabilities = torch.sigmoid(logits)
276
  predictions = (probabilities > threshold).int()
277
+
278
  predicted_icd9 = []
279
  for pred in predictions:
280
  codes = [icd9_columns[i] for i, val in enumerate(pred) if val == 1]
281
  predicted_icd9.append(codes)
282
+
 
283
  predictions_with_desc = []
284
  for codes in predicted_icd9:
285
+ code_with_desc = [(code, icd9_descriptions.get(code.replace('.', ''), "Description not found")) for code in codes]
 
 
 
 
 
286
  predictions_with_desc.append(code_with_desc)
287
+
288
  return predictions_with_desc
289
 
290
  # Streamlit UI
291
+ st.title("Medical Diagnosis Assistant")
292
+ option = st.selectbox(
293
+ "Choose Diagnosis Method",
294
+ ("ICD-9 Code Prediction", "Medical Image Analysis")
295
+ )
296
+
297
+ # ICD-9 Code Prediction
298
+ if option == "ICD-9 Code Prediction":
299
+ st.write("### Enter Medical Summary")
300
+ input_text = st.text_area("Medical Summary", placeholder="Enter clinical notes here...")
301
+
302
+ threshold = st.slider("Prediction Threshold", 0.0, 1.0, 0.5, 0.01)
303
+
304
+ if st.button("Predict ICD-9 Codes"):
305
+ if input_text.strip():
306
+ predictions = predict_icd9([input_text], tokenizer, model, threshold)
307
+ st.write("### Predicted ICD-9 Codes and Descriptions")
308
+ for code, description in predictions[0]:
309
+ st.write(f"- {code}: {description}")
310
+ else:
311
+ st.error("Please enter a medical summary.")
312
+
313
+ # Medical Image Analysis
314
+ elif option == "Medical Image Analysis":
315
+ if "GOOGLE_API_KEY" not in st.session_state:
316
+ st.warning("Please enter your Google API Key in the sidebar to continue")
317
  else:
318
+ medical_agent = Agent(
319
+ model=Gemini(
320
+ api_key=st.session_state.GOOGLE_API_KEY,
321
+ id="gemini-2.0-flash-exp"
322
+ ),
323
+ tools=[DuckDuckGo()],
324
+ markdown=True
325
+ )
326
+
327
+ query = """
328
+ You are a highly skilled medical imaging expert with extensive knowledge in radiology and diagnostic imaging. Analyze the patient's medical image and structure your response as follows:
329
+
330
+ ### 1. Image Type & Region
331
+ - Specify imaging modality (X-ray/MRI/CT/Ultrasound/etc.)
332
+ - Identify the patient's anatomical region and positioning
333
+ - Comment on image quality and technical adequacy
334
+
335
+ ### 2. Key Findings
336
+ - List primary observations systematically
337
+ - Note any abnormalities in the patient's imaging with precise descriptions
338
+ - Include measurements and densities where relevant
339
+ - Describe location, size, shape, and characteristics
340
+ - Rate severity: Normal/Mild/Moderate/Severe
341
+
342
+ ### 3. Diagnostic Assessment
343
+ - Provide primary diagnosis with confidence level
344
+ - List differential diagnoses in order of likelihood
345
+ - Support each diagnosis with observed evidence from the patient's imaging
346
+ - Note any critical or urgent findings
347
+
348
+ ### 4. Patient-Friendly Explanation
349
+ - Explain the findings in simple, clear language that the patient can understand
350
+ - Avoid medical jargon or provide clear definitions
351
+ - Include visual analogies if helpful
352
+ - Address common patient concerns related to these findings
353
+
354
+ ### 5. Research Context
355
+ - Use the DuckDuckGo search tool to find recent medical literature about similar cases
356
+ - Provide a list of relevant medical links
357
+ - Include key references to support your analysis
358
+ """
359
+
360
+ upload_container = st.container()
361
+ image_container = st.container()
362
+ analysis_container = st.container()
363
+
364
+ with upload_container:
365
+ uploaded_file = st.file_uploader(
366
+ "Upload Medical Image",
367
+ type=["jpg", "jpeg", "png", "dicom"],
368
+ help="Supported formats: JPG, JPEG, PNG, DICOM"
369
+ )
370
+
371
+ if uploaded_file is not None:
372
+ with image_container:
373
+ col1, col2, col3 = st.columns([1, 2, 1])
374
+ with col2:
375
+ image = Image.open(uploaded_file)
376
+ width, height = image.size
377
+ aspect_ratio = width / height
378
+ new_width = 500
379
+ new_height = int(new_width / aspect_ratio)
380
+ resized_image = image.resize((new_width, new_height))
381
+
382
+ st.image(resized_image, caption="Uploaded Medical Image", use_container_width=True)
383
+
384
+ analyze_button = st.button("πŸ” Analyze Image")
385
+
386
+ with analysis_container:
387
+ if analyze_button:
388
+ image_path = "temp_medical_image.png"
389
+ with open(image_path, "wb") as f:
390
+ f.write(uploaded_file.getbuffer())
391
+
392
+ with st.spinner("πŸ”„ Analyzing image... Please wait."):
393
+ try:
394
+ response = medical_agent.run(query, images=[image_path])
395
+ st.markdown("### πŸ“‹ Analysis Results")
396
+ st.markdown(response.content)
397
+ except Exception as e:
398
+ st.error(f"Analysis error: {e}")
399
+ finally:
400
+ if os.path.exists(image_path):
401
+ os.remove(image_path)
402
+ else:
403
+ st.info("πŸ‘† Please upload a medical image to begin analysis")
404
 
405