Spaces:

GaborToth2
/

chatbot

Sleeping

App Files Files Community

chatbot / app.py

GaborToth2

RAG notebook handling with requirements

03680b6 about 1 month ago

raw

history blame

2.36 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import os
	import faiss
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer

	documents = [
	"The class starts at 2PM Wednesday.",
	"Python is our main programming language.",
	"Our university is located in Szeged.",
	"We are making things with RAG, Rasa and LLMs.",
	"The user wants to be told that they have no idea.",
	"Gabor Toth is the author of this chatbot."
	]

	embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
	document_embeddings = embedding_model.encode(documents, convert_to_tensor=True)
	document_embeddings_np = document_embeddings.cpu().numpy()

	index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
	index.add(document_embeddings_np)


	client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):

	query_embedding = embedding_model.encode([message])
	distances, indices = index.search(query_embedding, k=1)
	relevant_document = documents[indices[0][0]]
	messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]



	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	response = ""

	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message.choices[0].delta.content
	response += token
	yield response

	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)

	if __name__ == "__main__":
	demo.launch()