Spaces:
Sleeping
Sleeping
File size: 3,588 Bytes
caceecd 149acbb ea5bb32 149acbb e7db4c3 37bb369 0269ce7 ea5bb32 37bb369 932ce4c 239bd4f 37bb369 239bd4f 37bb369 caceecd 149acbb 37bb369 239bd4f 37bb369 ea5bb32 239bd4f 37bb369 ea5bb32 149acbb 37bb369 149acbb ea5bb32 149acbb ea5bb32 37bb369 ea5bb32 37bb369 ea5bb32 149acbb 37bb369 149acbb 37bb369 239bd4f 37bb369 239bd4f 37bb369 239bd4f 37bb369 239bd4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import gradio as gr
import os
from huggingface_hub import InferenceClient
import cohere
# Models, API keys and initialization of API clients
COHERE_MODEL = "command-r-plus"
HF_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
HF_API_KEY = os.getenv("HF_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
client_hf = InferenceClient(model=HF_MODEL, token=HF_API_KEY)
client_cohere = cohere.Client(COHERE_API_KEY)
def respond(
message: str,
history: list[tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
use_cohere: bool
):
"""Handles chatbot responses based on user input and chat history.
This function integrates with either the Cohere API or Hugging Face API to generate AI-based responses.
Args:
message (str): The latest user message.
history (list[tuple[str, str]]): A list of previous exchanges where:
- Each tuple contains (user_message, assistant_response).
- Example: [("Hello", "Hi there!"), ("How are you?", "I'm good!")]
system_message (str): A system-level instruction for the chatbot (e.g., personality, style).
max_tokens (int): Maximum number of new tokens the model can generate.
temperature (float): Controls randomness (higher = more varied responses).
top_p (float): Probability threshold for token selection (higher = more diverse responses).
use_cohere (bool): If True, uses Cohere API; otherwise, uses Hugging Face API.
Yields:
str: The chatbot's response (streamed for Hugging Face, full response for Cohere).
"""
# Constructing the message history for context
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message}) # Append current user message
response = ""
if use_cohere:
# Using Cohere API (no streaming support)
cohere_response = client_cohere.chat(
message=message,
model=COHERE_MODEL,
temperature=temperature,
max_tokens=max_tokens
)
response = cohere_response.text
yield response # Yield full response immediately
else:
# Using Hugging Face API (streaming responses)
for message in client_hf.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content # Extract generated token
response += token
yield response # Yield response incrementally
# Gradio UI with user-configurable inputs
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System prompt"), # System instruction
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # Token limit
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # Randomness control
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), # Probability mass
gr.Checkbox(label="Use capable Cohere model instead."), # API selection toggle
],
)
# Start Gradio interface
if __name__ == "__main__":
demo.launch()
|