mgbam commited on
Commit
6dfdfa2
Β·
verified Β·
1 Parent(s): e9291d3

Create embeddings.py

Browse files
Files changed (1) hide show
  1. mcp/embeddings.py +46 -0
mcp/embeddings.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── mcp/embeddings.py ───────────────────────────────────────────────────
2
+ import os, asyncio
3
+ from huggingface_hub import InferenceClient
4
+ from sklearn.cluster import KMeans
5
+
6
+ # Use your HF token for pro endpoints
7
+ HF_TOKEN = os.getenv("HF_TOKEN")
8
+ EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
9
+ client = InferenceClient(token=HF_TOKEN)
10
+
11
+ async def embed_texts(texts: list[str]) -> list[list[float]]:
12
+ """
13
+ Compute embeddings for a list of texts via HF Inference API.
14
+ """
15
+ def _embed(t):
16
+ return client.embed(model=EMBED_MODEL, inputs=t)
17
+ # run in threadpool
18
+ tasks = [asyncio.to_thread(_embed, t) for t in texts]
19
+ return await asyncio.gather(*tasks)
20
+
21
+ async def cluster_embeddings(embs: list[list[float]], n_clusters: int = 5) -> list[int]:
22
+ """
23
+ Cluster embeddings into n_clusters, return list of cluster labels.
24
+ """
25
+ kmeans = KMeans(n_clusters=n_clusters, random_state=0)
26
+ return kmeans.fit_predict(embs).tolist()
27
+
28
+
29
+ # ── mcp/protocols.py ───────────────────────────────────────────────────
30
+ import asyncio
31
+ from mcp.openai_utils import ai_qa
32
+ from mcp.gemini import gemini_qa
33
+
34
+ async def draft_protocol(question: str, context: str, llm: str = "openai") -> str:
35
+ """
36
+ Draft a detailed experimental protocol for a given hypothesis/question.
37
+ """
38
+ if llm.lower() == "gemini":
39
+ qa_fn = gemini_qa
40
+ else:
41
+ qa_fn = ai_qa
42
+ prompt = (
43
+ "You are a senior researcher. Draft a step-by-step experimental protocol to test: "
44
+ f"{question}\nContext:\n{context}\nInclude materials, methods, controls, expected outcomes."
45
+ )
46
+ return await qa_fn(prompt)