VyLala commited on
Commit
92286a1
Β·
verified Β·
1 Parent(s): 3d02ea3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -145
app.py CHANGED
@@ -1,145 +1,146 @@
1
- # βœ… Optimized mtDNA MVP UI with Faster Pipeline & Required Feedback
2
-
3
- import gradio as gr
4
- from collections import Counter
5
- import csv
6
- import os
7
- from functools import lru_cache
8
- from mtdna_classifier import classify_sample_location
9
- import subprocess
10
-
11
- import shutil
12
-
13
- if not shutil.which("esummary"):
14
- subprocess.run(
15
- 'yes | sh -c "$(wget -q https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh -O -)"',
16
- shell=True
17
- )
18
- os.environ["PATH"] += f":{os.environ['HOME']}/edirect"
19
- print("EDirect esummary found at:", shutil.which("esummary"))
20
-
21
-
22
- @lru_cache(maxsize=128)
23
- def classify_sample_location_cached(accession):
24
- return classify_sample_location(accession)
25
-
26
- # Count and suggest final location
27
- def compute_final_suggested_location(rows):
28
- candidates = [
29
- row.get("Predicted Location", "").strip()
30
- for row in rows
31
- if row.get("Predicted Location", "").strip().lower() not in ["", "sample id not found"]
32
- ] + [
33
- row.get("Inferred Region", "").strip()
34
- for row in rows
35
- if row.get("Inferred Region", "").strip().lower() not in ["","unknown"]
36
- ]
37
-
38
- if not candidates:
39
- return Counter(), ("Unknown", 0)
40
-
41
- counts = Counter(candidates)
42
- top_location, count = counts.most_common(1)[0]
43
- return counts, (top_location, count)
44
-
45
- # Store feedback (with required fields)
46
- def store_feedback_to_drive(accession, answer1, answer2, contact=""):
47
- if not answer1.strip() or not answer2.strip():
48
- return "⚠️ Please answer both questions before submitting."
49
-
50
- feedback_file = "data/user_fb/feedback_mtdna.csv"
51
- header = ["accession", "helpful", "improvement", "contact"]
52
- row = [accession, answer1, answer2, contact]
53
- file_exists = os.path.isfile(feedback_file)
54
- with open(feedback_file, "a", newline="") as f:
55
- writer = csv.writer(f)
56
- if not file_exists:
57
- writer.writerow(header)
58
- writer.writerow(row)
59
- return "βœ… Feedback submitted. Thank you!"
60
-
61
- def summarize_results(accession):
62
- try:
63
- output = classify_sample_location_cached(accession)
64
- except Exception as e:
65
- return [], f"❌ Error: {e}"
66
-
67
- if accession not in output:
68
- return [], "❌ Accession not found in results."
69
-
70
- isolate = next((k for k in output if k != accession), None)
71
- row_score = []
72
- rows = []
73
-
74
- for key in [accession, isolate]:
75
- if key not in output:
76
- continue
77
- sample_id_label = f"{key} ({'accession number' if key == accession else 'isolate of accession'})"
78
- for section, techniques in output[key].items():
79
- for technique, content in techniques.items():
80
- source = content.get("source", "")
81
- predicted = content.get("predicted_location", "")
82
- haplogroup = content.get("haplogroup", "")
83
- inferred = content.get("inferred_location", "")
84
- context = content.get("context_snippet", "")[:300] if "context_snippet" in content else ""
85
-
86
- row = {
87
- "Sample ID": sample_id_label,
88
- "Technique": technique,
89
- "Source": f"The region of haplogroup is inferred\nby using this source: {source}" if technique == "haplogroup" else source,
90
- "Predicted Location": "" if technique == "haplogroup" else predicted,
91
- "Haplogroup": haplogroup if technique == "haplogroup" else "",
92
- "Inferred Region": inferred if technique == "haplogroup" else "",
93
- "Context Snippet": context
94
- }
95
-
96
- row_score.append(row)
97
- rows.append(list(row.values()))
98
-
99
- location_counts, (final_location, count) = compute_final_suggested_location(row_score)
100
- summary_lines = [f"### 🧭 Location Frequency Summary", "After counting all predicted and inferred locations:\n"]
101
- summary_lines += [f"- **{loc}**: {cnt} times" for loc, cnt in location_counts.items()]
102
- summary_lines.append(f"\n**Final Suggested Location:** πŸ—ΊοΈ **{final_location}** (mentioned {count} times)")
103
- summary = "\n".join(summary_lines)
104
-
105
- return rows, summary
106
-
107
- # Gradio UI
108
- with gr.Blocks() as interface:
109
- gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
110
- gr.Markdown("Enter an accession number to infer geographic origin. You'll see predictions, confidence scores, and can submit feedback.")
111
-
112
- with gr.Row():
113
- accession = gr.Textbox(label="Enter Accession Number (e.g., KU131308)")
114
- run_button = gr.Button("πŸ” Submit and Classify")
115
- reset_button = gr.Button("πŸ”„ Reset")
116
-
117
- status = gr.Markdown(visible=False)
118
- headers = ["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"]
119
- output_table = gr.Dataframe(headers=headers, interactive=False)
120
- output_summary = gr.Markdown()
121
-
122
- gr.Markdown("---")
123
- gr.Markdown("### πŸ’¬ Feedback (required)")
124
- q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
125
- q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
126
- contact = gr.Textbox(label="πŸ“§ Your email or institution (optional)")
127
- submit_feedback = gr.Button("βœ… Submit Feedback")
128
- feedback_status = gr.Markdown()
129
-
130
- def classify_with_loading(accession):
131
- return gr.update(value="⏳ Please wait... processing...", visible=True)
132
-
133
- def classify_main(accession):
134
- table, summary = summarize_results(accession)
135
- return table, summary, gr.update(visible=False)
136
-
137
- def reset_fields():
138
- return "", "", "", "", "", [], "", gr.update(visible=False)
139
-
140
- run_button.click(fn=classify_with_loading, inputs=accession, outputs=status)
141
- run_button.click(fn=classify_main, inputs=accession, outputs=[output_table, output_summary, status])
142
- submit_feedback.click(fn=store_feedback_to_drive, inputs=[accession, q1, q2, contact], outputs=feedback_status)
143
- reset_button.click(fn=reset_fields, inputs=[], outputs=[accession, q1, q2, contact, feedback_status, output_table, output_summary, status])
144
-
145
- interface.launch(share=True)
 
 
1
+ # βœ… Optimized mtDNA MVP UI with Faster Pipeline & Required Feedback
2
+
3
+ import gradio as gr
4
+ from collections import Counter
5
+ import csv
6
+ import os
7
+ from functools import lru_cache
8
+ from mtdna_classifier import classify_sample_location
9
+ import subprocess
10
+
11
+ import shutil
12
+
13
+ if not shutil.which("esummary"):
14
+ subprocess.run(
15
+ 'yes | sh -c "$(wget -q https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh -O -)"',
16
+ shell=True
17
+ )
18
+ os.environ["PATH"] += f":{os.environ['HOME']}/edirect"
19
+ print("EDirect esummary found at:", shutil.which("esummary"))
20
+
21
+
22
+ @lru_cache(maxsize=128)
23
+ def classify_sample_location_cached(accession):
24
+ return classify_sample_location(accession)
25
+
26
+ # Count and suggest final location
27
+ def compute_final_suggested_location(rows):
28
+ candidates = [
29
+ row.get("Predicted Location", "").strip()
30
+ for row in rows
31
+ if row.get("Predicted Location", "").strip().lower() not in ["", "sample id not found"]
32
+ ] + [
33
+ row.get("Inferred Region", "").strip()
34
+ for row in rows
35
+ if row.get("Inferred Region", "").strip().lower() not in ["","unknown"]
36
+ ]
37
+
38
+ if not candidates:
39
+ return Counter(), ("Unknown", 0)
40
+
41
+ counts = Counter(candidates)
42
+ top_location, count = counts.most_common(1)[0]
43
+ return counts, (top_location, count)
44
+
45
+ # Store feedback (with required fields)
46
+ def store_feedback_to_drive(accession, answer1, answer2, contact=""):
47
+ if not answer1.strip() or not answer2.strip():
48
+ return "⚠️ Please answer both questions before submitting."
49
+
50
+ feedback_file = "data/user_fb/feedback_mtdna.csv"
51
+ header = ["accession", "helpful", "improvement", "contact"]
52
+ row = [accession, answer1, answer2, contact]
53
+ file_exists = os.path.isfile(feedback_file)
54
+ with open(feedback_file, "a", newline="") as f:
55
+ writer = csv.writer(f)
56
+ if not file_exists:
57
+ writer.writerow(header)
58
+ writer.writerow(row)
59
+ return "βœ… Feedback submitted. Thank you!"
60
+
61
+ def summarize_results(accession):
62
+ try:
63
+ output = classify_sample_location_cached(accession)
64
+ print(output)
65
+ except Exception as e:
66
+ return [], f"❌ Error: {e}"
67
+
68
+ if accession not in output:
69
+ return [], "❌ Accession not found in results."
70
+
71
+ isolate = next((k for k in output if k != accession), None)
72
+ row_score = []
73
+ rows = []
74
+
75
+ for key in [accession, isolate]:
76
+ if key not in output:
77
+ continue
78
+ sample_id_label = f"{key} ({'accession number' if key == accession else 'isolate of accession'})"
79
+ for section, techniques in output[key].items():
80
+ for technique, content in techniques.items():
81
+ source = content.get("source", "")
82
+ predicted = content.get("predicted_location", "")
83
+ haplogroup = content.get("haplogroup", "")
84
+ inferred = content.get("inferred_location", "")
85
+ context = content.get("context_snippet", "")[:300] if "context_snippet" in content else ""
86
+
87
+ row = {
88
+ "Sample ID": sample_id_label,
89
+ "Technique": technique,
90
+ "Source": f"The region of haplogroup is inferred\nby using this source: {source}" if technique == "haplogroup" else source,
91
+ "Predicted Location": "" if technique == "haplogroup" else predicted,
92
+ "Haplogroup": haplogroup if technique == "haplogroup" else "",
93
+ "Inferred Region": inferred if technique == "haplogroup" else "",
94
+ "Context Snippet": context
95
+ }
96
+
97
+ row_score.append(row)
98
+ rows.append(list(row.values()))
99
+
100
+ location_counts, (final_location, count) = compute_final_suggested_location(row_score)
101
+ summary_lines = [f"### 🧭 Location Frequency Summary", "After counting all predicted and inferred locations:\n"]
102
+ summary_lines += [f"- **{loc}**: {cnt} times" for loc, cnt in location_counts.items()]
103
+ summary_lines.append(f"\n**Final Suggested Location:** πŸ—ΊοΈ **{final_location}** (mentioned {count} times)")
104
+ summary = "\n".join(summary_lines)
105
+
106
+ return rows, summary
107
+ print(summarize_results("KU131308"))
108
+ # Gradio UI
109
+ with gr.Blocks() as interface:
110
+ gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
111
+ gr.Markdown("Enter an accession number to infer geographic origin. You'll see predictions, confidence scores, and can submit feedback.")
112
+
113
+ with gr.Row():
114
+ accession = gr.Textbox(label="Enter Accession Number (e.g., KU131308)")
115
+ run_button = gr.Button("πŸ” Submit and Classify")
116
+ reset_button = gr.Button("πŸ”„ Reset")
117
+
118
+ status = gr.Markdown(visible=False)
119
+ headers = ["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"]
120
+ output_table = gr.Dataframe(headers=headers, interactive=False)
121
+ output_summary = gr.Markdown()
122
+
123
+ gr.Markdown("---")
124
+ gr.Markdown("### πŸ’¬ Feedback (required)")
125
+ q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
126
+ q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
127
+ contact = gr.Textbox(label="πŸ“§ Your email or institution (optional)")
128
+ submit_feedback = gr.Button("βœ… Submit Feedback")
129
+ feedback_status = gr.Markdown()
130
+
131
+ def classify_with_loading(accession):
132
+ return gr.update(value="⏳ Please wait... processing...", visible=True)
133
+
134
+ def classify_main(accession):
135
+ table, summary = summarize_results(accession)
136
+ return table, summary, gr.update(visible=False)
137
+
138
+ def reset_fields():
139
+ return "", "", "", "", "", [], "", gr.update(visible=False)
140
+
141
+ run_button.click(fn=classify_with_loading, inputs=accession, outputs=status)
142
+ run_button.click(fn=classify_main, inputs=accession, outputs=[output_table, output_summary, status])
143
+ submit_feedback.click(fn=store_feedback_to_drive, inputs=[accession, q1, q2, contact], outputs=feedback_status)
144
+ reset_button.click(fn=reset_fields, inputs=[], outputs=[accession, q1, q2, contact, feedback_status, output_table, output_summary, status])
145
+
146
+ interface.launch(share=True)