Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on Jun 25

Commit

bc40121

verified ·

1 Parent(s): e5ff04a

Update mcp/orchestrator.py

Browse files

Files changed (1) hide show

mcp/orchestrator.py +91 -115

mcp/orchestrator.py CHANGED Viewed

@@ -1,138 +1,114 @@
 """
-MedGenesis – multi-API orchestrator
-──────────────────────────────────
-• Supports OpenAI or Gemini (pass llm="openai" | "gemini")
-• Falls back between redundant data sources whenever possible
-• All network I/O is async & individually time-bounded
 """
-from __future__ import annotations
-import asyncio, textwrap
-from typing import Any, Dict, List, Tuple
-# ── 1. Literature helpers ────────────────────────────────────────────
 from mcp.arxiv            import fetch_arxiv
 from mcp.pubmed           import fetch_pubmed
-# ── 2. Gene / disease / expression helpers ───────────────────────────
-from mcp.gene_hub         import resolve_gene          # smart dispatcher
-from mcp.mygene           import fetch_gene_info
-from mcp.ensembl          import fetch_ensembl
-from mcp.opentargets      import fetch_ot              # tractability, constraint
-from mcp.cbio             import fetch_cbio
-# ── 3. Safety, trials, concepts ──────────────────────────────────────
-from mcp.openfda          import fetch_drug_safety
-from mcp.clinicaltrials   import search_trials
 from mcp.umls             import lookup_umls
 from mcp.disgenet         import disease_to_genes
-# ── 4. Chem & drug metadata ──────────────────────────────────────────
-from mcp.drugcentral_ext  import fetch_drugcentral
-from mcp.pubchem_ext      import fetch_compound
-# ── 5. LLM utils (OpenAI & Gemini) ───────────────────────────────────
 from mcp.openai_utils     import ai_summarize, ai_qa
 from mcp.gemini           import gemini_summarize, gemini_qa
-###############################################################################
-# Internal routing helpers
-###############################################################################
-_DEFAULT_LLM = "openai"
-def _llm_router(choice: str) -> Tuple:
-    """
-    Return (summary_fn, qa_fn, tag) for the requested engine.
-    """
-    if str(choice).lower() == "gemini":
-        return gemini_summarize, gemini_qa, "gemini"
-    return ai_summarize, ai_qa, "openai"
-###############################################################################
-#  High-level enrichment helpers
-###############################################################################
-async def _keyword_enrichment(keywords: List[str]) -> Dict[str, Any]:
-    """
-    Fan-out to UMLS, Drug Safety, and probes gene/Disease APIs in parallel.
-    """
-    umls_tasks  = [lookup_umls(k)         for k in keywords]
-    fda_tasks   = [fetch_drug_safety(k)   for k in keywords]
-    gene_tasks  = [resolve_gene(k)        for k in keywords]
-    # gather protects against individual failures
-    umls, fda, genes = await asyncio.gather(
-        asyncio.gather(*umls_tasks,  return_exceptions=True),
-        asyncio.gather(*fda_tasks,   return_exceptions=True),
-        asyncio.gather(*gene_tasks,  return_exceptions=True),
     )
-    # flatten & sanitise
-    return {
-        "umls" : [u for u in umls  if not isinstance(u, Exception)],
-        "fda"  : [d for d in fda   if not isinstance(d, Exception)],
-        "genes": [g for g in genes if not isinstance(g, Exception)],
-    }
-###############################################################################
-#  Public orchestration entry-points
-###############################################################################
-async def orchestrate_search(query: str, *, llm: str=_DEFAULT_LLM,
-                             max_papers: int = 25,
-                             max_trials: int = 20) -> Dict[str, Any]:
-    """
-    Full pipeline:
-      1. Fetch literature  (arXiv + PubMed)
-      2. Derive keywords   (simple TF filtering)
-      3. Multi-API enrich  (UMLS, safety, gene, trials, chem)
-      4. Summarise with LLM
-    """
-    # ── 1 literature (parallel) ───────────────────────────────────────
-    arxiv_task  = asyncio.create_task(fetch_arxiv(query,  max_results=max_papers//2))
-    pubmed_task = asyncio.create_task(fetch_pubmed(query, max_results=max_papers//2))
-    papers      = sum(await asyncio.gather(arxiv_task, pubmed_task, return_exceptions=False), [])
-    # ── 2 keywords (top-8 by naive word-freq) ─────────────────────────
-    joined  = " ".join(p["summary"] for p in papers)
-    tokens  = [w for w in joined.split() if len(w) > 4]
-    freq    = {}
-    for t in tokens: freq[t] = freq.get(t, 0) + 1
-    keywords = sorted(freq, key=freq.get, reverse=True)[:8]
-    # ── 3 enrichment ──────────────────────────────────────────────────
-    enrich_task  = asyncio.create_task(_keyword_enrichment(keywords))
-    trials_task  = asyncio.create_task(search_trials(query, max_studies=max_trials))
-    gene_dis_gen = asyncio.create_task(disease_to_genes(query))  # coarse disease string
-    enrich, trials, gene_dis = await asyncio.gather(enrich_task, trials_task, gene_dis_gen)
-    # ── 4 LLM summary & return ────────────────────────────────────────
-    summarise_fn, _, engine_tag = _llm_router(llm)
     try:
-        ai_summary = await summarise_fn(joined[:15000])
     except Exception:
-        ai_summary = "LLM unavailable or quota exceeded."
     return {
         "papers"          : papers,
-        "keywords"        : keywords,
-        "umls"            : enrich["umls"],
-        "drug_safety"     : enrich["fda"],
-        "genes"           : enrich["genes"],
-        "gene_disease"    : gene_dis,
         "clinical_trials" : trials,
-        "ai_summary"      : ai_summary,
-        "llm_used"        : engine_tag,
     }
-async def answer_ai_question(question: str, *, context: str,
-                             llm: str=_DEFAULT_LLM) -> Dict[str, str]:
-    """
-    Follow-up Q-A on demand.
-    """
-    _, qa_fn, _ = _llm_router(llm)
-    try:
-        answer = await qa_fn(question, context)
-    except Exception:
-        answer = "LLM unavailable or quota exceeded."
-    return {"answer": answer}

 """
+MedGenesis – dual-LLM orchestrator (OpenAI + Gemini)
+----------------------------------------------------
+Returns a single dict the UI expects.  New keys:
+• variants      – mutation summaries from cBioPortal
+• variant_count – quick count for empty-tab logic
 """
+import asyncio
+from typing import Dict, Any, List
+# literature + NLP
 from mcp.arxiv            import fetch_arxiv
 from mcp.pubmed           import fetch_pubmed
+from mcp.nlp              import extract_keywords
+# enrichment
 from mcp.umls             import lookup_umls
+from mcp.openfda          import fetch_drug_safety
+from mcp.ncbi             import search_gene, get_mesh_definition
 from mcp.disgenet         import disease_to_genes
+from mcp.clinicaltrials   import search_trials
+from mcp.mygene           import fetch_gene_info
+from mcp.ensembl          import fetch_ensembl
+from mcp.opentargets      import fetch_ot
+from mcp.cbio             import fetch_cbio          # NEW
+# LLMs
 from mcp.openai_utils     import ai_summarize, ai_qa
 from mcp.gemini           import gemini_summarize, gemini_qa
+_DEF = "openai"
+def _llm_router(llm: str):
+    llm = (llm or _DEF).lower()
+    if llm == "gemini":
+        return ("gemini", gemini_summarize, gemini_qa)
+    return ("openai", ai_summarize, ai_qa)
+# ---------------- gene meta helper ----------------
+async def _resolve_gene(sym: str) -> Dict[str, Any]:
+    for fn in (fetch_gene_info, fetch_ensembl, fetch_ot):
+        try:
+            data = await fn(sym)
+            if data:
+                return data
+        except Exception:
+            continue
+    return {}
+# ---------------- orchestrator --------------------
+async def orchestrate_search(query: str, *, llm: str = _DEF) -> Dict[str, Any]:
+    # 1 literature ---------------------------------------------------
+    arxiv_f  = asyncio.create_task(fetch_arxiv(query))
+    pubmed_f = asyncio.create_task(fetch_pubmed(query))
+    papers   = sum(await asyncio.gather(arxiv_f, pubmed_f), [])
+    # 2 keywords -----------------------------------------------------
+    blob = " ".join(p["summary"] for p in papers)
+    keys = extract_keywords(blob)[:8] if blob else []
+    # 3 parallel enrichment -----------------------------------------
+    umls_f   = [lookup_umls(k)       for k in keys]
+    fda_f    = [fetch_drug_safety(k) for k in keys]
+    ncbi_f   = [search_gene(k) for k in keys]
+    mesh_f   = [get_mesh_definition(k) for k in keys]
+    gene_meta= [ _resolve_gene(k) for k in keys[:3] ]          # cheap
+    trials_f = asyncio.create_task(search_trials(query, max_studies=20))
+    # primary await
+    (
+        umls, fda, ncbi, meshes, gmeta, trials
+    ) = await asyncio.gather(
+        asyncio.gather(*umls_f,  return_exceptions=True),
+        asyncio.gather(*fda_f,   return_exceptions=True),
+        asyncio.gather(*ncbi_f,  return_exceptions=True),
+        asyncio.gather(*mesh_f,  return_exceptions=True),
+        asyncio.gather(*gene_meta, return_exceptions=True),
+        trials_f,
     )
+    # 4 variants (fire & forget; don’t fail whole run) --------------
+    var_jobs = [fetch_cbio(g.get("symbol") or k)
+                for g, k in zip(gmeta, keys[:len(gmeta)])]
     try:
+        variants = sum(await asyncio.gather(*var_jobs), [])
     except Exception:
+        variants = []
+    # 5 LLM summary -------------------------------------------------
+    _, summarise, _ = _llm_router(llm)
+    summary = await summarise(blob) if blob else "No abstracts found."
     return {
         "papers"          : papers,
+        "umls"            : umls,
+        "drug_safety"     : fda,
+        "genes"           : sum(ncbi, []),
+        "mesh_defs"       : meshes,
+        "gene_meta"       : gmeta,
+        "gene_disease"    : await disease_to_genes(query) or [],
         "clinical_trials" : trials,
+        "variants"        : variants,
+        "variant_count"   : len(variants),
+        "ai_summary"      : summary,
+        "llm_used"        : llm.lower(),
     }
+# ---------------- follow-up QA --------------------
+async def answer_ai_question(question: str, *, context: str, llm: str = _DEF) -> Dict[str, str]:
+    _, _, qa_fn = _llm_router(llm)
+    ans = await qa_fn(question, context)
+    return {"answer": ans}