Bonosa2 commited on
Commit
bd86d31
Β·
verified Β·
1 Parent(s): a599403

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -58
app.py CHANGED
@@ -5,7 +5,6 @@ import traceback
5
  import pandas as pd
6
  import torch
7
  import gradio as gr
8
- import gc
9
  from transformers import (
10
  logging,
11
  AutoProcessor,
@@ -13,6 +12,7 @@ from transformers import (
13
  AutoModelForImageTextToText
14
  )
15
  from sklearn.model_selection import train_test_split
 
16
 
17
  # ─── Silence irrelevant warnings ───────────────────────────────────────────────
18
  logging.set_verbosity_error()
@@ -35,6 +35,7 @@ tokenizer = AutoTokenizer.from_pretrained(
35
  def generate_and_export():
36
  try:
37
  # 1) Lazy‑load the full FP16 model
 
38
  model = AutoModelForImageTextToText.from_pretrained(
39
  MODEL_ID,
40
  trust_remote_code=True,
@@ -43,25 +44,37 @@ def generate_and_export():
43
  device_map="auto"
44
  )
45
  device = next(model.parameters()).device
 
46
 
47
- # 2) Text→SOAP helper
48
- def to_soap(text: str) -> str:
 
 
 
 
 
 
 
 
 
49
  inputs = processor.apply_chat_template(
50
  [
51
- {"role":"system","content":[{"type":"text","text":"You are a medical AI assistant."}]},
52
- {"role":"user", "content":[{"type":"text","text":text}]}
53
  ],
54
  add_generation_prompt=True,
55
  tokenize=True,
56
  return_tensors="pt",
57
  return_dict=True
58
  ).to(device)
 
59
  out = model.generate(
60
  **inputs,
61
- max_new_tokens=400,
62
  do_sample=True,
63
- top_p=0.95,
64
- temperature=0.1,
 
65
  pad_token_id=processor.tokenizer.eos_token_id,
66
  use_cache=False
67
  )
@@ -70,82 +83,203 @@ def generate_and_export():
70
  out[:, prompt_len:], skip_special_tokens=True
71
  )[0].strip()
72
 
73
- # 3) Generate 20 doc notes + ground truths
74
- docs, gts = [], []
75
- for i in range(1, 21):
76
- doc = to_soap("Generate a realistic, concise doctor's progress note for a single patient encounter.")
77
- docs.append(doc)
78
- gts.append(to_soap(doc))
79
- if i % 5 == 0:
80
- torch.cuda.empty_cache()
81
 
82
- # 4) Split into 15 train / 5 test
83
- df = pd.DataFrame({"doc_note": docs, "ground_truth_soap": gts})
84
- train_df, test_df = train_test_split(df, test_size=5, random_state=42)
85
 
86
- os.makedirs("outputs", exist_ok=True)
87
 
88
- # 5) Inference on train split β†’ outputs/inference.tsv
89
- train_preds = [to_soap(d) for d in train_df["doc_note"]]
90
- inf = train_df.reset_index(drop=True).copy()
91
- inf["id"] = inf.index + 1
92
- inf["predicted_soap"] = train_preds
93
- inf[["id","ground_truth_soap","predicted_soap"]].to_csv(
94
- "outputs/inference.tsv", sep="\t", index=False
95
- )
96
 
97
- # 6) Inference on test split β†’ outputs/eval.csv
98
- test_preds = [to_soap(d) for d in test_df["doc_note"]]
99
- pd.DataFrame({
100
- "id": range(1, len(test_preds) + 1),
101
- "predicted_soap": test_preds
102
- }).to_csv("outputs/eval.csv", index=False)
103
 
104
- # 7) Return status + file paths for download
105
- return (
106
- "βœ… Done with 20 notes (15 train / 5 test)!",
107
- "outputs/inference.tsv",
108
- "outputs/eval.csv"
109
- )
110
- # Add this to your generate_and_export function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # ... existing code ...
 
 
113
 
114
- # Better memory management
115
  for i in range(1, 21):
116
- doc = to_soap("Generate a realistic, concise doctor's progress note...")
117
- docs.append(doc)
118
- gts.append(to_soap(doc))
 
 
 
 
 
 
119
 
120
- # More aggressive cleanup every 3 iterations
121
  if i % 3 == 0:
122
  torch.cuda.empty_cache()
123
  gc.collect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- # Clean up model after use
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  del model
127
  torch.cuda.empty_cache()
128
  gc.collect()
129
-
130
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  except Exception as e:
133
  traceback.print_exc()
134
- return (f"❌ Error: {e}", None, None)
135
 
136
  # ─── Gradio UI ─────────────────────────────────────────────────────────────────
137
- with gr.Blocks() as demo:
138
- gr.Markdown("# Gemma‑3n SOAP Generator 🩺")
139
- btn = gr.Button("Generate & Export 20 Notes")
140
- status = gr.Textbox(interactive=False, label="Status")
141
- inf_file = gr.File(label="Download inference.tsv")
142
- eval_file= gr.File(label="Download eval.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  btn.click(
145
  fn=generate_and_export,
146
  inputs=None,
147
- outputs=[status, inf_file, eval_file]
148
  )
149
 
 
 
 
 
 
 
 
150
  if __name__ == "__main__":
151
- demo.launch()
 
5
  import pandas as pd
6
  import torch
7
  import gradio as gr
 
8
  from transformers import (
9
  logging,
10
  AutoProcessor,
 
12
  AutoModelForImageTextToText
13
  )
14
  from sklearn.model_selection import train_test_split
15
+ import gc
16
 
17
  # ─── Silence irrelevant warnings ───────────────────────────────────────────────
18
  logging.set_verbosity_error()
 
35
  def generate_and_export():
36
  try:
37
  # 1) Lazy‑load the full FP16 model
38
+ print("Loading model...")
39
  model = AutoModelForImageTextToText.from_pretrained(
40
  MODEL_ID,
41
  trust_remote_code=True,
 
44
  device_map="auto"
45
  )
46
  device = next(model.parameters()).device
47
+ print(f"Model loaded on device: {device}")
48
 
49
+ # 2) Text→Doctor Note helper
50
+ def generate_doctor_note() -> str:
51
+ prompt = """Generate a realistic, concise doctor's progress note for a single patient encounter.
52
+ Include patient symptoms, physical examination findings, and clinical observations.
53
+ Keep it brief and medical in nature.
54
+
55
+ Example format:
56
+ Patient presents with [symptoms]. Physical exam reveals [findings]. [Additional observations].
57
+
58
+ Doctor's Note:"""
59
+
60
  inputs = processor.apply_chat_template(
61
  [
62
+ {"role": "system", "content": [{"type": "text", "text": "You are a medical assistant generating realistic patient encounter notes."}]},
63
+ {"role": "user", "content": [{"type": "text", "text": prompt}]}
64
  ],
65
  add_generation_prompt=True,
66
  tokenize=True,
67
  return_tensors="pt",
68
  return_dict=True
69
  ).to(device)
70
+
71
  out = model.generate(
72
  **inputs,
73
+ max_new_tokens=150,
74
  do_sample=True,
75
+ top_p=0.9,
76
+ temperature=0.8,
77
+ repetition_penalty=1.2,
78
  pad_token_id=processor.tokenizer.eos_token_id,
79
  use_cache=False
80
  )
 
83
  out[:, prompt_len:], skip_special_tokens=True
84
  )[0].strip()
85
 
86
+ # 3) Doctor Note→SOAP helper
87
+ def convert_to_soap(doctor_note: str) -> str:
88
+ prompt = f"""Convert the following medical note into proper SOAP format.
 
 
 
 
 
89
 
90
+ Medical Note: {doctor_note}
 
 
91
 
92
+ Please structure your response exactly as follows:
93
 
94
+ SUBJECTIVE:
95
+ [Patient's reported symptoms, complaints, and history]
 
 
 
 
 
 
96
 
97
+ OBJECTIVE:
98
+ [Physical exam findings, vital signs, observable data]
 
 
 
 
99
 
100
+ ASSESSMENT:
101
+ [Clinical diagnosis, differential diagnosis, or impression]
102
+
103
+ PLAN:
104
+ [Treatment plan, medications, follow-up instructions]
105
+
106
+ SOAP Note:"""
107
+
108
+ inputs = processor.apply_chat_template(
109
+ [
110
+ {"role": "system", "content": [{"type": "text", "text": "You are a medical documentation assistant. Convert medical notes into structured SOAP format. Be concise and clinical."}]},
111
+ {"role": "user", "content": [{"type": "text", "text": prompt}]}
112
+ ],
113
+ add_generation_prompt=True,
114
+ tokenize=True,
115
+ return_tensors="pt",
116
+ return_dict=True
117
+ ).to(device)
118
+
119
+ out = model.generate(
120
+ **inputs,
121
+ max_new_tokens=250,
122
+ do_sample=True,
123
+ top_p=0.9,
124
+ temperature=0.7,
125
+ repetition_penalty=1.3,
126
+ pad_token_id=processor.tokenizer.eos_token_id,
127
+ use_cache=False
128
+ )
129
+ prompt_len = inputs["input_ids"].shape[-1]
130
+ return processor.batch_decode(
131
+ out[:, prompt_len:], skip_special_tokens=True
132
+ )[0].strip()
133
 
134
+ # 4) Generate 20 doctor notes + convert to SOAP
135
+ print("Generating doctor notes and SOAP conversions...")
136
+ docs, soaps = [], []
137
 
 
138
  for i in range(1, 21):
139
+ print(f"Generating note {i}/20...")
140
+
141
+ # Generate doctor note
142
+ doctor_note = generate_doctor_note()
143
+ docs.append(doctor_note)
144
+
145
+ # Convert to SOAP
146
+ soap_note = convert_to_soap(doctor_note)
147
+ soaps.append(soap_note)
148
 
149
+ # Memory cleanup every 3 iterations
150
  if i % 3 == 0:
151
  torch.cuda.empty_cache()
152
  gc.collect()
153
+ print(f"Memory cleaned after note {i}")
154
+
155
+ print("All notes generated successfully!")
156
+
157
+ # 5) Split into 15 train / 5 test
158
+ df = pd.DataFrame({"doctor_note": docs, "soap_note": soaps})
159
+ train_df, test_df = train_test_split(df, test_size=5, random_state=42)
160
+
161
+ os.makedirs("outputs", exist_ok=True)
162
+
163
+ # 6) Generate predictions on train split β†’ outputs/inference.tsv
164
+ print("Generating predictions for training set...")
165
+ train_preds = []
166
+ for idx, doctor_note in enumerate(train_df["doctor_note"]):
167
+ print(f"Predicting train {idx+1}/{len(train_df)}...")
168
+ pred_soap = convert_to_soap(doctor_note)
169
+ train_preds.append(pred_soap)
170
+
171
+ if (idx + 1) % 3 == 0:
172
+ torch.cuda.empty_cache()
173
+ gc.collect()
174
+
175
+ inf_df = train_df.reset_index(drop=True).copy()
176
+ inf_df["id"] = inf_df.index + 1
177
+ inf_df["ground_truth_soap"] = inf_df["soap_note"]
178
+ inf_df["predicted_soap"] = train_preds
179
 
180
+ # Save inference results
181
+ inf_df[["id", "ground_truth_soap", "predicted_soap"]].to_csv(
182
+ "outputs/inference.tsv", sep="\t", index=False
183
+ )
184
+ print("Inference results saved!")
185
+
186
+ # 7) Generate predictions on test split β†’ outputs/eval.csv
187
+ print("Generating predictions for test set...")
188
+ test_preds = []
189
+ for idx, doctor_note in enumerate(test_df["doctor_note"]):
190
+ print(f"Predicting test {idx+1}/{len(test_df)}...")
191
+ pred_soap = convert_to_soap(doctor_note)
192
+ test_preds.append(pred_soap)
193
+
194
+ torch.cuda.empty_cache()
195
+ gc.collect()
196
+
197
+ eval_df = pd.DataFrame({
198
+ "id": range(1, len(test_preds) + 1),
199
+ "predicted_soap": test_preds
200
+ })
201
+ eval_df.to_csv("outputs/eval.csv", index=False)
202
+ print("Evaluation results saved!")
203
+
204
+ # 8) Save complete dataset for reference
205
+ complete_df = pd.DataFrame({
206
+ "id": range(1, len(docs) + 1),
207
+ "doctor_note": docs,
208
+ "soap_note": soaps
209
+ })
210
+ complete_df.to_csv("outputs/complete_dataset.csv", index=False)
211
+ print("Complete dataset saved!")
212
+
213
+ # 9) Cleanup model
214
  del model
215
  torch.cuda.empty_cache()
216
  gc.collect()
217
+ print("Model cleaned up!")
218
 
219
+ # 10) Return status + file paths for download
220
+ return (
221
+ f"βœ… Successfully generated 20 notes!\n"
222
+ f"πŸ“Š Training set: {len(train_df)} notes\n"
223
+ f"πŸ§ͺ Test set: {len(test_df)} notes\n"
224
+ f"πŸ’Ύ Files ready for download",
225
+ "outputs/inference.tsv",
226
+ "outputs/eval.csv",
227
+ "outputs/complete_dataset.csv"
228
+ )
229
 
230
  except Exception as e:
231
  traceback.print_exc()
232
+ return (f"❌ Error: {e}", None, None, None)
233
 
234
  # ─── Gradio UI ─────────────────────────────────────────────────────────────────
235
+ with gr.Blocks(title="SOAP Generator") as demo:
236
+ gr.Markdown("""
237
+ # 🩺 Medical SOAP Note Generator
238
+
239
+ This app generates realistic doctor's notes and converts them to SOAP format:
240
+ - **S**ubjective: Patient's reported symptoms
241
+ - **O**bjective: Observable findings and exam results
242
+ - **A**ssessment: Clinical diagnosis/impression
243
+ - **P**lan: Treatment plan and follow-up
244
+
245
+ **Process:**
246
+ 1. Generate 20 realistic doctor's progress notes
247
+ 2. Convert each note to structured SOAP format
248
+ 3. Split into 15 training + 5 test samples
249
+ 4. Generate predictions and export files
250
+ """)
251
+
252
+ with gr.Row():
253
+ btn = gr.Button("πŸš€ Generate & Export SOAP Notes", variant="primary", size="lg")
254
+
255
+ with gr.Row():
256
+ status = gr.Textbox(
257
+ label="πŸ“‹ Generation Status",
258
+ interactive=False,
259
+ lines=5,
260
+ placeholder="Click the button above to start generation..."
261
+ )
262
+
263
+ with gr.Row():
264
+ with gr.Column():
265
+ inf_file = gr.File(label="πŸ“Š Download inference.tsv (Training Predictions)")
266
+ with gr.Column():
267
+ eval_file = gr.File(label="πŸ§ͺ Download eval.csv (Test Predictions)")
268
+ with gr.Column():
269
+ complete_file = gr.File(label="πŸ’Ύ Download complete_dataset.csv (All Data)")
270
 
271
  btn.click(
272
  fn=generate_and_export,
273
  inputs=None,
274
+ outputs=[status, inf_file, eval_file, complete_file]
275
  )
276
 
277
+ gr.Markdown("""
278
+ ### πŸ“ Output Files:
279
+ - **inference.tsv**: Training set with ground truth and predicted SOAP notes
280
+ - **eval.csv**: Test set predictions only
281
+ - **complete_dataset.csv**: All 20 generated doctor notes and SOAP conversions
282
+ """)
283
+
284
  if __name__ == "__main__":
285
+ demo.launch()