File size: 3,375 Bytes
3d539ca
9965499
 
 
3d539ca
9965499
 
 
 
 
 
 
 
 
 
 
3d539ca
 
9965499
 
 
 
 
 
 
 
 
3637999
9965499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3637999
 
9965499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3637999
9965499
 
3d539ca
9965499
 
 
 
3d539ca
9965499
 
 
 
 
3d539ca
 
9965499
 
 
3d539ca
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# mcp/orchestrator.py
"""
Orchestrates retrieval, enrichment, and AI synthesis for a user query.
"""

import asyncio
from typing import Dict, Any, List

from mcp.arxiv        import fetch_arxiv
from mcp.pubmed       import fetch_pubmed
from mcp.nlp          import extract_keywords
from mcp.umls         import lookup_umls
from mcp.openfda      import fetch_drug_safety
from mcp.ncbi         import search_gene, get_mesh_definition
from mcp.disgenet     import disease_to_genes
from mcp.clinicaltrials import search_trials
from mcp.openai_utils import ai_summarize, ai_qa

# ---------------------------------------------------------------------
async def _gene_and_mesh_enrichment(keywords: List[str]) -> Dict[str, Any]:
    """Run NCBI and DisGeNET on keywords in parallel."""
    tasks = []
    for kw in keywords:
        tasks.append(search_gene(kw))
        tasks.append(get_mesh_definition(kw))
        tasks.append(disease_to_genes(kw))
    results = await asyncio.gather(*tasks, return_exceptions=True)

    genes, meshes, disgen = [], [], []
    for i, res in enumerate(results):
        if isinstance(res, Exception):
            continue
        # Cycle: 0 gene, 1 mesh, 2 disgenet, repeat …
        mod = i % 3
        if mod == 0:
            genes.extend(res)
        elif mod == 1:
            meshes.append(res)
        else:
            disgen.extend(res)
    return {"genes": genes, "meshes": meshes, "disgenet": disgen}

# ---------------------------------------------------------------------
async def orchestrate_search(query: str) -> Dict[str, Any]:
    """Main entry—returns a rich result dict for app UI."""
    # -------- literature retrieval in parallel --------
    arxiv_task  = asyncio.create_task(fetch_arxiv(query))
    pubmed_task = asyncio.create_task(fetch_pubmed(query))
    arxiv_results, pubmed_results = await asyncio.gather(arxiv_task, pubmed_task)
    papers = arxiv_results + pubmed_results

    # -------- keyword extraction --------
    paper_text = " ".join(p["summary"] for p in papers)
    keywords   = extract_keywords(paper_text)[:8]

    # -------- enrichment tasks in parallel --------
    umls_tasks   = [lookup_umls(k)       for k in keywords]
    fda_tasks    = [fetch_drug_safety(k) for k in keywords]
    enrich_task  = asyncio.create_task(_gene_and_mesh_enrichment(keywords))
    trials_task  = asyncio.create_task(search_trials(query, max_studies=10))

    umls, fda, enrich, trials = await asyncio.gather(
        asyncio.gather(*umls_tasks),
        asyncio.gather(*fda_tasks),
        enrich_task,
        trials_task,
    )

    # -------- AI summary --------
    summary = await ai_summarize(paper_text)
    links   = [p["link"] for p in papers[:3]]

    return {
        "papers"          : papers,
        "umls"            : umls,
        "drug_safety"     : fda,
        "ai_summary"      : summary,
        "suggested_reading": links,
        # new fields
        "genes"           : enrich["genes"],
        "mesh_definitions": enrich["meshes"],
        "gene_disease"    : enrich["disgenet"],
        "clinical_trials" : trials,
    }

# ---------------------------------------------------------------------
async def answer_ai_question(question: str, context: str = "") -> Dict[str, str]:
    """Free-form Q&A using OpenAI."""
    answer = await ai_qa(question, context)
    return {"answer": answer}