Bonosa2 commited on
Commit
a255cfc
·
verified ·
1 Parent(s): 741135c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -77,6 +77,56 @@ pd.DataFrame({"doc_note": docs}).to_csv("doc_notes.tsv", sep="\t", index=False)
77
  pd.DataFrame({"soap_note": soaps}).to_csv("ground_truth_soap.tsv", sep="\t", index=False)
78
  print("✅ Saved doc_notes.tsv & ground_truth_soap.tsv")
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  # 3) Blank Gradio UI placeholder
81
  def noop():
82
  return "Data generated — check TSV files in the repo."
 
77
  pd.DataFrame({"soap_note": soaps}).to_csv("ground_truth_soap.tsv", sep="\t", index=False)
78
  print("✅ Saved doc_notes.tsv & ground_truth_soap.tsv")
79
 
80
+ # Colab/Kaggle/Space Cell: Split, infer on train (70), infer on test (30), and save files
81
+
82
+ import os
83
+ import pandas as pd
84
+ from sklearn.model_selection import train_test_split
85
+
86
+ # Make outputs directory
87
+ os.makedirs("outputs", exist_ok=True)
88
+
89
+ # 1) Load generated notes and ground truths
90
+ docs = pd.read_csv("doc_notes.tsv", sep="\t") # has column 'doc_note'
91
+ gts = pd.read_csv("ground_truth_soap.tsv", sep="\t") # has column 'soap_note'
92
+
93
+ full = pd.DataFrame({
94
+ "doc_note": docs["doc_note"],
95
+ "ground_truth_soap": gts["soap_note"]
96
+ })
97
+
98
+ # 2) Split 70% train / 30% test
99
+ train_df, test_df = train_test_split(full, test_size=0.3, random_state=42)
100
+
101
+ # 3) Run inference on the 70‑row train split and save inference.tsv
102
+ train_preds = []
103
+ for idx, row in train_df.reset_index(drop=True).iterrows():
104
+ pred = generate_soap_note(row["doc_note"])
105
+ train_preds.append(pred)
106
+
107
+ inference_df = train_df.reset_index(drop=True).copy()
108
+ inference_df["id"] = inference_df.index + 1
109
+ inference_df["predicted_soap"] = train_preds
110
+ inference_df = inference_df[["id","ground_truth_soap","predicted_soap"]]
111
+ inference_df.to_csv("outputs/inference.tsv", sep="\t", index=False)
112
+
113
+ # 4) Run inference on the 30‑row test split and save eval.csv
114
+ test_preds = []
115
+ for idx, row in test_df.reset_index(drop=True).iterrows():
116
+ pred = generate_soap_note(row["doc_note"])
117
+ test_preds.append(pred)
118
+
119
+ eval_df = pd.DataFrame({
120
+ "id": range(1, len(test_preds) + 1),
121
+ "predicted_soap": test_preds
122
+ })
123
+ eval_df.to_csv("outputs/eval.csv", index=False)
124
+
125
+ print("✅ Saved:")
126
+ print(" outputs/inference.tsv (70 rows: id, ground_truth_soap, predicted_soap)")
127
+ print(" outputs/eval.csv (30 rows: id, predicted_soap)")
128
+
129
+
130
  # 3) Blank Gradio UI placeholder
131
  def noop():
132
  return "Data generated — check TSV files in the repo."