import asyncio import re from pathlib import Path import streamlit as st import pandas as pd import plotly.express as px from fpdf import FPDF from streamlit_agraph import agraph from mcp.orchestrator import orchestrate_search, answer_ai_question from mcp.knowledge_graph import build_agraph from mcp.graph_metrics import build_nx, get_top_hubs, get_density from mcp.protocols import draft_protocol # Streamlit configuration st.set_page_config(page_title="MedGenesis AI", layout="wide") # Initialize session state if "res" not in st.session_state: st.session_state.res = None # Header UI st.title("🧬 MedGenesis AI") llm = st.radio("LLM engine", ["openai", "gemini"], horizontal=True) query = st.text_input("Enter biomedical question") # PDF generation helper def _make_pdf(papers): pdf = FPDF() pdf.add_page() pdf.set_font("Helvetica", size=12) pdf.cell(0, 10, "MedGenesis AI – Results", ln=True, align="C") pdf.ln(5) for i, p in enumerate(papers, 1): pdf.set_font("Helvetica", "B", 11) pdf.multi_cell(0, 7, f"{i}. {p.get('title','')}") pdf.set_font("Helvetica", size=9) body = f"""{p.get('authors','')} {p.get('summary','')} {p.get('link','')}""" pdf.multi_cell(0, 6, body) pdf.ln(3) return pdf.output(dest="S").encode("latin-1", errors="replace") # Trigger search if st.button("Run Search 🚀") and query.strip(): with st.spinner("Gathering data…"): st.session_state.res = asyncio.run(orchestrate_search(query, llm)) # Retrieve results res = st.session_state.res # If no results yet, prompt user if not res: st.info("Enter a question and press **Run Search 🚀** to begin.") else: # Create tabs tabs = st.tabs(["Results", "Graph", "Clusters", "Variants", "Trials", "Metrics", "Visuals", "Protocols"]) title_tab, graph_tab, clust_tab, var_tab, trial_tab, met_tab, vis_tab, proto_tab = tabs # Results Tab with title_tab: for i, p in enumerate(res["papers"], 1): st.markdown(f"**{i}. [{p['title']}]({p['link']})**") st.write(p["summary"]) c1, c2 = st.columns(2) c1.download_button("CSV", pd.DataFrame(res["papers"]).to_csv(index=False), "papers.csv", "text/csv") c2.download_button("PDF", _make_pdf(res["papers"]), "papers.pdf", "application/pdf") st.subheader("AI summary") st.info(res["ai_summary"]) # Graph Tab with graph_tab: nodes, edges, cfg = build_agraph(res["papers"], res["umls"], res.get("drug_safety", []), res.get("umls_relations", [])) hl = st.text_input("Highlight node:", key="hl") if hl: pat = re.compile(re.escape(hl), re.I) for n in nodes: n.color = "#f1c40f" if pat.search(n.label) else n.color agraph(nodes, edges, cfg) # Clusters Tab with clust_tab: clusters = res.get("clusters", []) if clusters: df = pd.DataFrame({ "title": [p['title'] for p in res['papers']], "cluster": clusters }) st.write("### Paper Clusters") for c in sorted(set(clusters)): st.write(f"**Cluster {c}**") for t in df[df['cluster'] == c]['title']: st.write(f"- {t}") else: st.info("No clusters to show.") # Variants Tab with var_tab: variants = res.get("variants", []) if variants: st.json(variants) else: st.warning("No variants found. Try a well-known gene like 'TP53'.") # Trials Tab with trial_tab: trials = res.get("clinical_trials", []) if trials: st.json(trials) else: st.warning("No trials found. Try a disease name or specific drug.") # Metrics Tab with met_tab: G = build_nx([n.__dict__ for n in nodes], [e.__dict__ for e in edges]) st.metric("Density", f"{get_density(G):.3f}") st.markdown("**Top hubs**") for nid, sc in get_top_hubs(G): label = next((n.label for n in nodes if n.id == nid), nid) st.write(f"- {label}: {sc:.3f}") # Visuals Tab with vis_tab: years = [p.get("published", "")[:4] for p in res["papers"] if p.get("published")] if years: st.plotly_chart(px.histogram(pd.DataFrame({'year': years}), x='year', nbins=10, title="Publication Year")) # Protocols Tab with proto_tab: hyp = st.text_input("Enter hypothesis for protocol:", key="proto_q") if st.button("Draft Protocol") and hyp.strip(): with st.spinner("Generating protocol…"): doc = asyncio.run(draft_protocol(hyp, context=res["ai_summary"], llm=llm)) st.subheader("Experimental Protocol") st.write(doc)