Rohit Rajpoot commited on
Commit
fb2b4e2
·
1 Parent(s): aa99e83

Detach tensor before .numpy()

Browse files
Files changed (1) hide show
  1. assist/chat.py +29 -5
assist/chat.py CHANGED
@@ -1,9 +1,33 @@
1
- # assist/chat.py
 
 
 
 
 
 
 
 
 
 
2
 
3
  def chat(question: str) -> str:
4
  """
5
- Chat plugin stub: echoes back what you asked.
 
 
 
 
6
  """
7
- if not question.strip():
8
- return "Please enter a question above."
9
- return f"Chat plugin stub received: “{question}”"
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ import numpy as np
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ # Load once at import time
7
+ WEIGHTS = torch.load("tensor.pt").detach().numpy() # shape: (V, D)
8
+ with open("vocab.json", "r") as f:
9
+ TOKEN2IDX = json.load(f)
10
+ # Build reverse map: idx (as int) → token (str)
11
+ IDX2TOKEN = {int(i): w for w, i in TOKEN2IDX.items()}
12
 
13
  def chat(question: str) -> str:
14
  """
15
+ Embedding Q&A stub:
16
+ - Tokenize by whitespace
17
+ - Lookup embeddings
18
+ - Average them
19
+ - Find nearest token in vocab
20
  """
21
+ # Simple whitespace tokenizer; you can improve this later
22
+ tokens = question.lower().split()
23
+ # Map to indices, drop unknowns
24
+ idxs = [TOKEN2IDX[t] for t in tokens if t in TOKEN2IDX]
25
+ if not idxs:
26
+ return "🤔 I don't recognize any of those words."
27
+ # Average embedding vector
28
+ q_embed = np.mean(WEIGHTS[idxs], axis=0, keepdims=True)
29
+ # Cosine‐similarity against all vocab embeddings
30
+ sims = cosine_similarity(q_embed, WEIGHTS)[0]
31
+ best = int(np.argmax(sims))
32
+ best_word = IDX2TOKEN.get(best, "<unknown>")
33
+ return f"🗣️ Nearest concept: **{best_word}**"