Spaces:

rohitrajpoot
/

reposage-chatbot

Running

Rohit Rajpoot commited on Jun 21

Commit

fb2b4e2

1 Parent(s): aa99e83

Detach tensor before .numpy()

Files changed (1) hide show

assist/chat.py CHANGED Viewed

@@ -1,9 +1,33 @@
-# assist/chat.py
 def chat(question: str) -> str:
     """
-    Chat plugin stub: echoes back what you asked.
     """
-    if not question.strip():
-        return "Please enter a question above."
-    return f"Chat plugin stub received: “{question}”"

+import json
+import torch
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+# Load once at import time
+WEIGHTS = torch.load("tensor.pt").detach().numpy()   # shape: (V, D)
+with open("vocab.json", "r") as f:
+    TOKEN2IDX = json.load(f)
+# Build reverse map: idx (as int) → token (str)
+IDX2TOKEN = {int(i): w for w, i in TOKEN2IDX.items()}
 def chat(question: str) -> str:
     """
+    Embedding Q&A stub:
+    - Tokenize by whitespace
+    - Lookup embeddings
+    - Average them
+    - Find nearest token in vocab
     """
+    # Simple whitespace tokenizer; you can improve this later
+    tokens = question.lower().split()
+    # Map to indices, drop unknowns
+    idxs = [TOKEN2IDX[t] for t in tokens if t in TOKEN2IDX]
+    if not idxs:
+        return "🤔 I don't recognize any of those words."
+    # Average embedding vector
+    q_embed = np.mean(WEIGHTS[idxs], axis=0, keepdims=True)
+    # Cosine‐similarity against all vocab embeddings
+    sims = cosine_similarity(q_embed, WEIGHTS)[0]
+    best = int(np.argmax(sims))
+    best_word = IDX2TOKEN.get(best, "<unknown>")
+    return f"🗣️ Nearest concept: **{best_word}**"