Spaces:
Sleeping
Sleeping
File size: 3,656 Bytes
92c50d2 cb26c51 d6ed3af f16d5ff 99261e1 f2617d3 aeb9fdb 6ceeb25 d6ed3af bb2df8b f2617d3 cb26c51 99261e1 92c50d2 4a5db3e 92c50d2 7958865 02c7617 93116a8 46e0200 27dde00 02c7617 46e0200 02c7617 46e0200 5dfffe1 0a93e35 5dfffe1 1aea96c 5dfffe1 93116a8 99dd0b4 957eb79 93116a8 3f4b192 02c7617 aeb9fdb 46e0200 02c7617 3f4b192 92c50d2 02c7617 92c50d2 91fb6a1 92c50d2 74cc87b 49a62d8 92c50d2 1aea96c 9f95576 92c50d2 46e0200 92c50d2 3d50071 92c50d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
from huggingface_hub import InferenceClient
import spaces #0.32.0
import torch
import os
import platform
model = ""
duration = 24
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
Packages that work::::::::::
Is CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB MIG 3g.40gb
CUDA version: 12.1
Python version: 3.10.13
Pytorch version: 2.4.0+cu121
Gradio version: 5.0.1
"""
def choose_model(model_name):
if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
elif model_name == "Llama3-8b-Instruct":
model = "meta-llama/Meta-Llama-3-8B-Instruct"
elif model_name == "Llama3.1-8b-Instruct":
model = "meta-llama/Llama-3.1-8B-Instruct"
elif model_name == "Llama2-13b-chat":
model = "meta-llama/Llama-2-13b-chat-hf"
elif model_name == "Gemma-2-2b":
model = "google/gemma-2-2b-it"
elif model_name == "Gemma-7b":
model = "google/gemma-7b"
elif model_name == "Mixtral-8x7B-Instruct":
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
elif model_name == "Microsoft-phi-2":
model = "microsoft/phi-2"
else: # default to zephyr if no model chosen
model = "HuggingFaceH4/zephyr-7b-beta"
return model
@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):
print(model)
model_name = choose_model(model)
client = InferenceClient(model_name, token=os.getenv('deepseekv2'))
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
token = message.choices[0].delta.content
response += token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Zephr-7b-beta"], label="Select Model"),
gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
]
)
if __name__ == "__main__":
demo.launch()
|