Spaces:

VyLala
/

mtDNALocation

Running

App Files Files Community

VyLala commited on Apr 13

Commit

92286a1

verified ·

1 Parent(s): 3d02ea3

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -145

app.py CHANGED Viewed

@@ -1,145 +1,146 @@
-# ✅ Optimized mtDNA MVP UI with Faster Pipeline & Required Feedback
-import gradio as gr
-from collections import Counter
-import csv
-import os
-from functools import lru_cache
-from mtdna_classifier import classify_sample_location
-import subprocess
-import shutil
-if not shutil.which("esummary"):
-    subprocess.run(
-        'yes | sh -c "$(wget -q https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh -O -)"',
-        shell=True
-    )
-    os.environ["PATH"] += f":{os.environ['HOME']}/edirect"
-print("EDirect esummary found at:", shutil.which("esummary"))
-@lru_cache(maxsize=128)
-def classify_sample_location_cached(accession):
-    return classify_sample_location(accession)
-# Count and suggest final location
-def compute_final_suggested_location(rows):
-    candidates = [
-        row.get("Predicted Location", "").strip()
-        for row in rows
-        if row.get("Predicted Location", "").strip().lower() not in ["", "sample id not found"]
-    ] + [
-        row.get("Inferred Region", "").strip()
-        for row in rows
-        if row.get("Inferred Region", "").strip().lower() not in  ["","unknown"]
-    ]
-    if not candidates:
-        return Counter(), ("Unknown", 0)
-    counts = Counter(candidates)
-    top_location, count = counts.most_common(1)[0]
-    return counts, (top_location, count)
-# Store feedback (with required fields)
-def store_feedback_to_drive(accession, answer1, answer2, contact=""):
-    if not answer1.strip() or not answer2.strip():
-        return "⚠️ Please answer both questions before submitting."
-    feedback_file = "data/user_fb/feedback_mtdna.csv"
-    header = ["accession", "helpful", "improvement", "contact"]
-    row = [accession, answer1, answer2, contact]
-    file_exists = os.path.isfile(feedback_file)
-    with open(feedback_file, "a", newline="") as f:
-        writer = csv.writer(f)
-        if not file_exists:
-            writer.writerow(header)
-        writer.writerow(row)
-    return "✅ Feedback submitted. Thank you!"
-def summarize_results(accession):
-    try:
-        output = classify_sample_location_cached(accession)
-    except Exception as e:
-        return [], f"❌ Error: {e}"
-    if accession not in output:
-        return [], "❌ Accession not found in results."
-    isolate = next((k for k in output if k != accession), None)
-    row_score = []
-    rows = []
-    for key in [accession, isolate]:
-        if key not in output:
-            continue
-        sample_id_label = f"{key} ({'accession number' if key == accession else 'isolate of accession'})"
-        for section, techniques in output[key].items():
-            for technique, content in techniques.items():
-                source = content.get("source", "")
-                predicted = content.get("predicted_location", "")
-                haplogroup = content.get("haplogroup", "")
-                inferred = content.get("inferred_location", "")
-                context = content.get("context_snippet", "")[:300] if "context_snippet" in content else ""
-                row = {
-                    "Sample ID": sample_id_label,
-                    "Technique": technique,
-                    "Source": f"The region of haplogroup is inferred\nby using this source: {source}" if technique == "haplogroup" else source,
-                    "Predicted Location": "" if technique == "haplogroup" else predicted,
-                    "Haplogroup": haplogroup if technique == "haplogroup" else "",
-                    "Inferred Region": inferred if technique == "haplogroup" else "",
-                    "Context Snippet": context
-                }
-                row_score.append(row)
-                rows.append(list(row.values()))
-    location_counts, (final_location, count) = compute_final_suggested_location(row_score)
-    summary_lines = [f"### 🧭 Location Frequency Summary", "After counting all predicted and inferred locations:\n"]
-    summary_lines += [f"- **{loc}**: {cnt} times" for loc, cnt in location_counts.items()]
-    summary_lines.append(f"\n**Final Suggested Location:** 🗺️ **{final_location}** (mentioned {count} times)")
-    summary = "\n".join(summary_lines)
-    return rows, summary
-# Gradio UI
-with gr.Blocks() as interface:
-    gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
-    gr.Markdown("Enter an accession number to infer geographic origin. You'll see predictions, confidence scores, and can submit feedback.")
-    with gr.Row():
-        accession = gr.Textbox(label="Enter Accession Number (e.g., KU131308)")
-        run_button = gr.Button("🔍 Submit and Classify")
-        reset_button = gr.Button("🔄 Reset")
-    status = gr.Markdown(visible=False)
-    headers = ["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"]
-    output_table = gr.Dataframe(headers=headers, interactive=False)
-    output_summary = gr.Markdown()
-    gr.Markdown("---")
-    gr.Markdown("### 💬 Feedback (required)")
-    q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
-    q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
-    contact = gr.Textbox(label="📧 Your email or institution (optional)")
-    submit_feedback = gr.Button("✅ Submit Feedback")
-    feedback_status = gr.Markdown()
-    def classify_with_loading(accession):
-        return gr.update(value="⏳ Please wait... processing...", visible=True)
-    def classify_main(accession):
-        table, summary = summarize_results(accession)
-        return table, summary, gr.update(visible=False)
-    def reset_fields():
-        return "", "", "", "", "", [], "", gr.update(visible=False)
-    run_button.click(fn=classify_with_loading, inputs=accession, outputs=status)
-    run_button.click(fn=classify_main, inputs=accession, outputs=[output_table, output_summary, status])
-    submit_feedback.click(fn=store_feedback_to_drive, inputs=[accession, q1, q2, contact], outputs=feedback_status)
-    reset_button.click(fn=reset_fields, inputs=[], outputs=[accession, q1, q2, contact, feedback_status, output_table, output_summary, status])
-interface.launch(share=True)

+# ✅ Optimized mtDNA MVP UI with Faster Pipeline & Required Feedback
+import gradio as gr
+from collections import Counter
+import csv
+import os
+from functools import lru_cache
+from mtdna_classifier import classify_sample_location
+import subprocess
+import shutil
+if not shutil.which("esummary"):
+    subprocess.run(
+        'yes | sh -c "$(wget -q https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh -O -)"',
+        shell=True
+    )
+    os.environ["PATH"] += f":{os.environ['HOME']}/edirect"
+print("EDirect esummary found at:", shutil.which("esummary"))
+@lru_cache(maxsize=128)
+def classify_sample_location_cached(accession):
+    return classify_sample_location(accession)
+# Count and suggest final location
+def compute_final_suggested_location(rows):
+    candidates = [
+        row.get("Predicted Location", "").strip()
+        for row in rows
+        if row.get("Predicted Location", "").strip().lower() not in ["", "sample id not found"]
+    ] + [
+        row.get("Inferred Region", "").strip()
+        for row in rows
+        if row.get("Inferred Region", "").strip().lower() not in  ["","unknown"]
+    ]
+    if not candidates:
+        return Counter(), ("Unknown", 0)
+    counts = Counter(candidates)
+    top_location, count = counts.most_common(1)[0]
+    return counts, (top_location, count)
+# Store feedback (with required fields)
+def store_feedback_to_drive(accession, answer1, answer2, contact=""):
+    if not answer1.strip() or not answer2.strip():
+        return "⚠️ Please answer both questions before submitting."
+    feedback_file = "data/user_fb/feedback_mtdna.csv"
+    header = ["accession", "helpful", "improvement", "contact"]
+    row = [accession, answer1, answer2, contact]
+    file_exists = os.path.isfile(feedback_file)
+    with open(feedback_file, "a", newline="") as f:
+        writer = csv.writer(f)
+        if not file_exists:
+            writer.writerow(header)
+        writer.writerow(row)
+    return "✅ Feedback submitted. Thank you!"
+def summarize_results(accession):
+    try:
+        output = classify_sample_location_cached(accession)
+        print(output)
+    except Exception as e:
+        return [], f"❌ Error: {e}"
+    if accession not in output:
+        return [], "❌ Accession not found in results."
+    isolate = next((k for k in output if k != accession), None)
+    row_score = []
+    rows = []
+    for key in [accession, isolate]:
+        if key not in output:
+            continue
+        sample_id_label = f"{key} ({'accession number' if key == accession else 'isolate of accession'})"
+        for section, techniques in output[key].items():
+            for technique, content in techniques.items():
+                source = content.get("source", "")
+                predicted = content.get("predicted_location", "")
+                haplogroup = content.get("haplogroup", "")
+                inferred = content.get("inferred_location", "")
+                context = content.get("context_snippet", "")[:300] if "context_snippet" in content else ""
+                row = {
+                    "Sample ID": sample_id_label,
+                    "Technique": technique,
+                    "Source": f"The region of haplogroup is inferred\nby using this source: {source}" if technique == "haplogroup" else source,
+                    "Predicted Location": "" if technique == "haplogroup" else predicted,
+                    "Haplogroup": haplogroup if technique == "haplogroup" else "",
+                    "Inferred Region": inferred if technique == "haplogroup" else "",
+                    "Context Snippet": context
+                }
+                row_score.append(row)
+                rows.append(list(row.values()))
+    location_counts, (final_location, count) = compute_final_suggested_location(row_score)
+    summary_lines = [f"### 🧭 Location Frequency Summary", "After counting all predicted and inferred locations:\n"]
+    summary_lines += [f"- **{loc}**: {cnt} times" for loc, cnt in location_counts.items()]
+    summary_lines.append(f"\n**Final Suggested Location:** 🗺️ **{final_location}** (mentioned {count} times)")
+    summary = "\n".join(summary_lines)
+    return rows, summary
+print(summarize_results("KU131308"))
+# Gradio UI
+with gr.Blocks() as interface:
+    gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
+    gr.Markdown("Enter an accession number to infer geographic origin. You'll see predictions, confidence scores, and can submit feedback.")
+    with gr.Row():
+        accession = gr.Textbox(label="Enter Accession Number (e.g., KU131308)")
+        run_button = gr.Button("🔍 Submit and Classify")
+        reset_button = gr.Button("🔄 Reset")
+    status = gr.Markdown(visible=False)
+    headers = ["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"]
+    output_table = gr.Dataframe(headers=headers, interactive=False)
+    output_summary = gr.Markdown()
+    gr.Markdown("---")
+    gr.Markdown("### 💬 Feedback (required)")
+    q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
+    q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
+    contact = gr.Textbox(label="📧 Your email or institution (optional)")
+    submit_feedback = gr.Button("✅ Submit Feedback")
+    feedback_status = gr.Markdown()
+    def classify_with_loading(accession):
+        return gr.update(value="⏳ Please wait... processing...", visible=True)
+    def classify_main(accession):
+        table, summary = summarize_results(accession)
+        return table, summary, gr.update(visible=False)
+    def reset_fields():
+        return "", "", "", "", "", [], "", gr.update(visible=False)
+    run_button.click(fn=classify_with_loading, inputs=accession, outputs=status)
+    run_button.click(fn=classify_main, inputs=accession, outputs=[output_table, output_summary, status])
+    submit_feedback.click(fn=store_feedback_to_drive, inputs=[accession, q1, q2, contact], outputs=feedback_status)
+    reset_button.click(fn=reset_fields, inputs=[], outputs=[accession, q1, q2, contact, feedback_status, output_table, output_summary, status])
+interface.launch(share=True)