Bonosa2 commited on
Commit
761fe09
·
verified ·
1 Parent(s): 15efba0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import pandas as pd
4
+ import easyocr
5
+ import gradio as gr
6
+ from transformers import (
7
+ AutoConfig,
8
+ AutoProcessor,
9
+ AutoTokenizer,
10
+ AutoModelForImageTextToText
11
+ )
12
+
13
+ MODEL_ID = "google/gemma-3n-e2b-it"
14
+ HF_TOKEN = os.environ.get("HF_TOKEN") # set via Space secrets
15
+
16
+ # 1) Load the model and OCR reader
17
+ config = AutoConfig.from_pretrained(MODEL_ID, trust_remote_code=True, use_auth_token=HF_TOKEN)
18
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, use_auth_token=HF_TOKEN)
19
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True, use_auth_token=HF_TOKEN)
20
+ model = AutoModelForImageTextToText.from_pretrained(
21
+ MODEL_ID,
22
+ config=config,
23
+ trust_remote_code=True,
24
+ use_auth_token=HF_TOKEN,
25
+ load_in_8bit=True,
26
+ device_map="auto"
27
+ )
28
+ device = next(model.parameters()).device
29
+
30
+ ocr_reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available(), verbose=False)
31
+
32
+ def generate_soap_note(text: str) -> str:
33
+ prompt = f"""You are a medical AI assistant. Convert these notes into a SOAP note:
34
+
35
+ {text}
36
+
37
+ Format as:
38
+ S - SUBJECTIVE:
39
+ O - OBJECTIVE:
40
+ A - ASSESSMENT:
41
+ P - PLAN:
42
+
43
+ Produce the complete SOAP."""
44
+ inputs = processor.apply_chat_template(
45
+ [
46
+ {"role":"system","content":[{"type":"text","text":"Expert medical AI assistant."}]},
47
+ {"role":"user", "content":[{"type":"text","text":prompt}]}
48
+ ],
49
+ add_generation_prompt=True,
50
+ tokenize=True,
51
+ return_dict=True,
52
+ return_tensors="pt"
53
+ ).to(device)
54
+ input_len = inputs["input_ids"].shape[-1]
55
+ with torch.no_grad():
56
+ out = model.generate(
57
+ **inputs,
58
+ max_new_tokens=400,
59
+ do_sample=True,
60
+ top_p=0.95,
61
+ temperature=0.1,
62
+ pad_token_id=processor.tokenizer.eos_token_id,
63
+ disable_compile=True
64
+ )
65
+ return processor.batch_decode(out[:, input_len:], skip_special_tokens=True)[0].strip()
66
+
67
+ # 2) On startup: generate 100 synthetic note pairs and save TSVs
68
+ docs, soaps = [], []
69
+ for i in range(1, 101):
70
+ doc = generate_soap_note("Generate a realistic, concise doctor's progress note for a single patient encounter.")
71
+ docs.append(doc)
72
+ soaps.append(generate_soap_note(doc))
73
+ if i % 10 == 0:
74
+ print(f"Generated {i}/100")
75
+
76
+ pd.DataFrame({"doc_note": docs}).to_csv("doc_notes.tsv", sep="\t", index=False)
77
+ pd.DataFrame({"soap_note": soaps}).to_csv("ground_truth_soap.tsv", sep="\t", index=False)
78
+ print("✅ Saved doc_notes.tsv & ground_truth_soap.tsv")
79
+
80
+ # 3) Blank Gradio UI placeholder
81
+ def noop():
82
+ return "Data generated — check TSV files in the repo."
83
+
84
+ with gr.Blocks() as demo:
85
+ gr.Markdown("# SOAP Generator Space")
86
+ gr.Button("Generate Completed (see logs)").click(noop, [], "output")
87
+ gr.Textbox(label="Status", interactive=False, lines=2, placeholder="Ready", elem_id="output")
88
+
89
+ if __name__ == "__main__":
90
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)