Rohit Rajpoot commited on
Commit
6b3fcc5
·
1 Parent(s): 7536c5d

Add bayes_chat.py so assist.bayes_chat can be imported

Browse files
Files changed (1) hide show
  1. assist/bayes_chat.py +27 -0
assist/bayes_chat.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ import numpy as np
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ # Load the Bayesian embeddings & vocab at import time
7
+ WEIGHTS = torch.load("tensor_bayes.pt").detach().numpy() # shape: (V, V)
8
+ with open("vocab_bayes.json", "r") as f:
9
+ TOKEN2IDX = json.load(f)
10
+ IDX2TOKEN = {int(idx): tok for tok, idx in TOKEN2IDX.items()}
11
+
12
+ def bayes_chat(question: str) -> str:
13
+ """
14
+ Given a user question, tokenize → average Bayesian embeddings →
15
+ find the nearest token in the vocab → return that as the "answer."
16
+ """
17
+ tokens = question.lower().split()
18
+ idxs = [TOKEN2IDX[t] for t in tokens if t in TOKEN2IDX]
19
+ if not idxs:
20
+ return "🤔 I don’t recognize any of those words."
21
+ # average the rows corresponding to each token
22
+ qv = np.mean(WEIGHTS[idxs], axis=0, keepdims=True)
23
+ # compute similarities against every token’s vector
24
+ sims = cosine_similarity(qv, WEIGHTS)[0]
25
+ best_idx = int(np.argmax(sims))
26
+ best_tok = IDX2TOKEN.get(best_idx, "<unknown>")
27
+ return f"🔬 Bayesian neighbor: **{best_tok}**"