File size: 3,375 Bytes
3d539ca 9965499 3d539ca 9965499 3d539ca 9965499 3637999 9965499 3637999 9965499 3637999 9965499 3d539ca 9965499 3d539ca 9965499 3d539ca 9965499 3d539ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# mcp/orchestrator.py
"""
Orchestrates retrieval, enrichment, and AI synthesis for a user query.
"""
import asyncio
from typing import Dict, Any, List
from mcp.arxiv import fetch_arxiv
from mcp.pubmed import fetch_pubmed
from mcp.nlp import extract_keywords
from mcp.umls import lookup_umls
from mcp.openfda import fetch_drug_safety
from mcp.ncbi import search_gene, get_mesh_definition
from mcp.disgenet import disease_to_genes
from mcp.clinicaltrials import search_trials
from mcp.openai_utils import ai_summarize, ai_qa
# ---------------------------------------------------------------------
async def _gene_and_mesh_enrichment(keywords: List[str]) -> Dict[str, Any]:
"""Run NCBI and DisGeNET on keywords in parallel."""
tasks = []
for kw in keywords:
tasks.append(search_gene(kw))
tasks.append(get_mesh_definition(kw))
tasks.append(disease_to_genes(kw))
results = await asyncio.gather(*tasks, return_exceptions=True)
genes, meshes, disgen = [], [], []
for i, res in enumerate(results):
if isinstance(res, Exception):
continue
# Cycle: 0 gene, 1 mesh, 2 disgenet, repeat …
mod = i % 3
if mod == 0:
genes.extend(res)
elif mod == 1:
meshes.append(res)
else:
disgen.extend(res)
return {"genes": genes, "meshes": meshes, "disgenet": disgen}
# ---------------------------------------------------------------------
async def orchestrate_search(query: str) -> Dict[str, Any]:
"""Main entry—returns a rich result dict for app UI."""
# -------- literature retrieval in parallel --------
arxiv_task = asyncio.create_task(fetch_arxiv(query))
pubmed_task = asyncio.create_task(fetch_pubmed(query))
arxiv_results, pubmed_results = await asyncio.gather(arxiv_task, pubmed_task)
papers = arxiv_results + pubmed_results
# -------- keyword extraction --------
paper_text = " ".join(p["summary"] for p in papers)
keywords = extract_keywords(paper_text)[:8]
# -------- enrichment tasks in parallel --------
umls_tasks = [lookup_umls(k) for k in keywords]
fda_tasks = [fetch_drug_safety(k) for k in keywords]
enrich_task = asyncio.create_task(_gene_and_mesh_enrichment(keywords))
trials_task = asyncio.create_task(search_trials(query, max_studies=10))
umls, fda, enrich, trials = await asyncio.gather(
asyncio.gather(*umls_tasks),
asyncio.gather(*fda_tasks),
enrich_task,
trials_task,
)
# -------- AI summary --------
summary = await ai_summarize(paper_text)
links = [p["link"] for p in papers[:3]]
return {
"papers" : papers,
"umls" : umls,
"drug_safety" : fda,
"ai_summary" : summary,
"suggested_reading": links,
# new fields
"genes" : enrich["genes"],
"mesh_definitions": enrich["meshes"],
"gene_disease" : enrich["disgenet"],
"clinical_trials" : trials,
}
# ---------------------------------------------------------------------
async def answer_ai_question(question: str, context: str = "") -> Dict[str, str]:
"""Free-form Q&A using OpenAI."""
answer = await ai_qa(question, context)
return {"answer": answer}
|