Spaces:
Running
Running
Rohit Rajpoot
commited on
Commit
·
6b3fcc5
1
Parent(s):
7536c5d
Add bayes_chat.py so assist.bayes_chat can be imported
Browse files- assist/bayes_chat.py +27 -0
assist/bayes_chat.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
|
6 |
+
# Load the Bayesian embeddings & vocab at import time
|
7 |
+
WEIGHTS = torch.load("tensor_bayes.pt").detach().numpy() # shape: (V, V)
|
8 |
+
with open("vocab_bayes.json", "r") as f:
|
9 |
+
TOKEN2IDX = json.load(f)
|
10 |
+
IDX2TOKEN = {int(idx): tok for tok, idx in TOKEN2IDX.items()}
|
11 |
+
|
12 |
+
def bayes_chat(question: str) -> str:
|
13 |
+
"""
|
14 |
+
Given a user question, tokenize → average Bayesian embeddings →
|
15 |
+
find the nearest token in the vocab → return that as the "answer."
|
16 |
+
"""
|
17 |
+
tokens = question.lower().split()
|
18 |
+
idxs = [TOKEN2IDX[t] for t in tokens if t in TOKEN2IDX]
|
19 |
+
if not idxs:
|
20 |
+
return "🤔 I don’t recognize any of those words."
|
21 |
+
# average the rows corresponding to each token
|
22 |
+
qv = np.mean(WEIGHTS[idxs], axis=0, keepdims=True)
|
23 |
+
# compute similarities against every token’s vector
|
24 |
+
sims = cosine_similarity(qv, WEIGHTS)[0]
|
25 |
+
best_idx = int(np.argmax(sims))
|
26 |
+
best_tok = IDX2TOKEN.get(best_idx, "<unknown>")
|
27 |
+
return f"🔬 Bayesian neighbor: **{best_tok}**"
|