Spaces:
Sleeping
Sleeping
File size: 3,407 Bytes
92c50d2 24c93e5 d6ed3af f16d5ff 99261e1 f2617d3 aeb9fdb 111ad2d d6ed3af f2617d3 99261e1 92c50d2 7958865 02c7617 93116a8 46e0200 27dde00 02c7617 46e0200 02c7617 46e0200 5dfffe1 0a93e35 5dfffe1 1aea96c 5dfffe1 93116a8 99dd0b4 957eb79 93116a8 3f4b192 02c7617 aeb9fdb 46e0200 02c7617 3f4b192 92c50d2 02c7617 92c50d2 91fb6a1 92c50d2 74cc87b 49a62d8 92c50d2 1aea96c 9f95576 92c50d2 46e0200 92c50d2 3d50071 92c50d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
from huggingface_hub import InferenceClient
import spaces
import torch
import os
import platform
model = ""
duration = 1
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def choose_model(model_name):
if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
elif model_name == "Llama3-8b-Instruct":
model = "meta-llama/Meta-Llama-3-8B-Instruct"
elif model_name == "Llama3.1-8b-Instruct":
model = "meta-llama/Llama-3.1-8B-Instruct"
elif model_name == "Llama2-13b-chat":
model = "meta-llama/Llama-2-13b-chat-hf"
elif model_name == "Gemma-2-2b":
model = "google/gemma-2-2b-it"
elif model_name == "Gemma-7b":
model = "google/gemma-7b"
elif model_name == "Mixtral-8x7B-Instruct":
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
elif model_name == "Microsoft-phi-2":
model = "microsoft/phi-2"
else: # default to zephyr if no model chosen
model = "HuggingFaceH4/zephyr-7b-beta"
return model
@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):
print(model)
model_name = choose_model(model)
client = InferenceClient(model_name, token=os.getenv('deepseekv2'))
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
token = message.choices[0].delta.content
response += token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Zephr-7b-beta"], label="Select Model"),
gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
]
)
if __name__ == "__main__":
demo.launch()
|