capstone

Sleeping

File size: 1,980 Bytes

a59bc9c
70bcdad
c589e1a
 
 
18898ac
 
c589e1a
2571345
c589e1a
2571345
c589e1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffd4434
ee24018
834791f
c589e1a
70bcdad
776a762
ee24018
c589e1a
 
 
 
 
 
 
 
 
 
 
 
776a762
70bcdad
 
776a762
70bcdad
776a762
 
 
0663427
776a762
 
 
834791f
ee24018
 
 
 
 
776a762
70bcdad
 
776a762

import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch

with open("knowledge.txt", "r", encoding="utf-8") as file:
  knowledge = file.read()

print(knowledge)

cleaned_chunks = [chunk.strip() for chunk in knowledge.strip().split("\n") if chunk.strip()]
print(cleaned_chunks)

model = SentenceTransformer('all-MiniLM-L6-v2')

chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
print(chunk_embeddings)

cleaned_text = ""

def get_top_chunks(query):
    query_embedding = model.encode(query, convert_to_tensor=True)
    query_embedding_normalized = query_embedding / query_embedding.norm()

    similarities = torch.matmul(chunk_embeddings, query_embedding_normalized)
    print(similarities)
    top_indices = torch.topk(similarities, k=5).indices.tolist()
    print(top_indices)

    return [cleaned_chunks[i] for i in top_indices]

top_results = get_top_chunks("What are some good wizard characters?")
print(top_results)







client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(message, history):
    response = ""

    top_chunks = get_top_chunks(message)
    context = "\n".join(top_chunks)

    messages = [
        {
            "role": "system",
            "content": (
                "You are a chatbot that helps users create characters for role-playing games. "
                "Use the following knowledge to inform your answers:\n\n" + context
            )
        }
    ]

    if history:
        messages.extend(history)

    messages.append({"role": "user", "content": message})

    stream = client.chat_completion(
        messages,
        max_tokens=500,
        temperature=1.2,
        stream=True
    )

    for message in stream:
        token = message.choices[0].delta.content
        if token is not None:
            response += token
            yield response

chatbot = gr.ChatInterface(respond, type="messages")

chatbot.launch()