import gradio as gr from huggingface_hub import InferenceClient !pip install -q sentence-transformers from sentence_transformers import SentenceTransformer import torch with open("knowlege.txt", "r", encoding="utf-8") as file: knowlege = file.read() print(knowlege) cleaned_chunks = [chunk.strip() for chunk in knowlege.strip().split("\n") if chunk.strip()] print(cleaned_chunks) model = SentenceTransformer('all-MiniLM-L6-v2') chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True) print(chunk_embeddings) cleaned_text = "" def get_top_chunks(query): query_embedding = model.encode(query, convert_to_tensor=True) query_embedding_normalized = query_embedding / query_embedding.norm() similarities = torch.matmul(chunk_embeddings, query_embedding_normalized) print(similarities) top_indices = torch.topk(similarities, k=5).indices.tolist() print(top_indices) return [cleaned_chunks[i] for i in top_indices] top_results = get_top_chunks("What are some good wizard characters?") print(top_results) client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # def respond(message, history): # response = "" # messages = [{"role": "system", "content": "You are a chatbot that helps users create characters for role playing games."}] # if history: # messages.extend(history) # messages.append({"role": "user", "content": message}) # stream = client.chat_completion( # messages, # max_tokens=100, # temperature=1.2, # stream=True # ) # for message in stream: # token = message.choices[0].delta.content # if token is not None: # response += token # yield response def respond(message, history): response = "" # Retrieve top chunks based on the current user message top_chunks = get_top_chunks(message) context = "\n".join(top_chunks) # Add knowledge as part of system instructions messages = [ { "role": "system", "content": ( "You are a chatbot that helps users create characters for role-playing games. " "Use the following knowledge to inform your answers:\n\n" + context ) } ] if history: messages.extend(history) messages.append({"role": "user", "content": message}) stream = client.chat_completion( messages, max_tokens=100, temperature=1.2, stream=True ) for message in stream: token = message.choices[0].delta.content if token is not None: response += token yield response chatbot = gr.ChatInterface(respond, type="messages") chatbot.launch()