import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch

with open("knowledge.txt", "r", encoding="utf-8") as file:
  knowledge = file.read()

print(knowledge)

cleaned_chunks = [chunk.strip() for chunk in knowledge.strip().split("\n") if chunk.strip()]
print(cleaned_chunks)

model = SentenceTransformer('all-MiniLM-L6-v2')

chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
print(chunk_embeddings)

cleaned_text = ""

def get_top_chunks(query):
    query_embedding = model.encode(query, convert_to_tensor=True)
    query_embedding_normalized = query_embedding / query_embedding.norm()

    similarities = torch.matmul(chunk_embeddings, query_embedding_normalized)
    print(similarities)
    top_indices = torch.topk(similarities, k=5).indices.tolist()
    print(top_indices)

    return [cleaned_chunks[i] for i in top_indices]

top_results = get_top_chunks("What are some good wizard characters?")
print(top_results)


client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(message, history):
    response = ""

    top_chunks = get_top_chunks(message)
    context = "\n".join(top_chunks)

    messages = [
        {
            "role": "system",
            "content": (
                "You are a chatbot that helps users create characters for role-playing games. "
                "Use the following knowledge to inform your answers:\n\n" + context
            )
        }
    ]

    if history:
        messages.extend(history)

    messages.append({"role": "user", "content": message})

    stream = client.chat_completion(
        messages,
        max_tokens=500,
        temperature=1.2,
        stream=True
    )

    for message in stream:
        token = message.choices[0].delta.content
        if token is not None:
            response += token
            yield response

chatbot = gr.ChatInterface(respond, type="messages")

chatbot.launch()