Spaces:
Sleeping
Sleeping
File size: 1,639 Bytes
f960061 e5a2042 142217d 9f2bf2d a3290f0 e5a2042 a3290f0 e5a2042 a3290f0 e5a2042 a3290f0 b0f97a1 a3290f0 9f2bf2d b0f97a1 a3290f0 9f2bf2d e5a2042 34ef334 a3290f0 e5a2042 a3290f0 f960061 e5a2042 f960061 e5a2042 f960061 b0f97a1 a3290f0 9f2bf2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
from huggingface_hub import login
# Fetch token from environment (automatically loaded from secrets)
hf_token = os.getenv("gemma3")
login(hf_token)
# Initialize the client with your model
client = InferenceClient("hackergeek98/gemma-finetuned")
def respond(
message: str,
history: list[tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
):
# Build a prompt from the system message and conversation history
prompt = f"{system_message}\n"
for user_msg, assistant_msg in history:
if user_msg:
prompt += f"User: {user_msg}\n"
if assistant_msg:
prompt += f"Assistant: {assistant_msg}\n"
prompt += f"User: {message}\nAssistant: "
# Call the text generation API with updated parameter name
response = client.text_generation(
model="hackergeek98/gemma-finetuned",
prompt=prompt,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return response["generated_text"]
# Set up the Gradio Chat Interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()
|