import json
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load the Bayesian embeddings & vocab at import time
WEIGHTS = torch.load("tensor_bayes.pt").detach().numpy()   # shape: (V, V)
with open("vocab_bayes.json", "r") as f:
    TOKEN2IDX = json.load(f)
IDX2TOKEN = {int(idx): tok for tok, idx in TOKEN2IDX.items()}

def bayes_chat(question: str) -> str:
    """
    Given a user question, tokenize → average Bayesian embeddings →
    find the nearest token in the vocab → return that as the "answer."
    """
    tokens = question.lower().split()
    idxs = [TOKEN2IDX[t] for t in tokens if t in TOKEN2IDX]
    if not idxs:
        return "🤔 I don’t recognize any of those words."
    # average the rows corresponding to each token
    qv = np.mean(WEIGHTS[idxs], axis=0, keepdims=True)
    # compute similarities against every token’s vector
    sims = cosine_similarity(qv, WEIGHTS)[0]
    best_idx = int(np.argmax(sims))
    best_tok = IDX2TOKEN.get(best_idx, "<unknown>")
    return f"🔬 Bayesian neighbor: **{best_tok}**"