chatbot / app.py
GaborToth2's picture
RAG notebook handling with requirements
03680b6
raw
history blame
2.36 kB
import gradio as gr
from huggingface_hub import InferenceClient
import os
import faiss
from transformers import pipeline
from sentence_transformers import SentenceTransformer
documents = [
"The class starts at 2PM Wednesday.",
"Python is our main programming language.",
"Our university is located in Szeged.",
"We are making things with RAG, Rasa and LLMs.",
"The user wants to be told that they have no idea.",
"Gabor Toth is the author of this chatbot."
]
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
document_embeddings = embedding_model.encode(documents, convert_to_tensor=True)
document_embeddings_np = document_embeddings.cpu().numpy()
index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
index.add(document_embeddings_np)
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
query_embedding = embedding_model.encode([message])
distances, indices = index.search(query_embedding, k=1)
relevant_document = documents[indices[0][0]]
messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()