Spaces:
Sleeping
Sleeping
File size: 3,544 Bytes
92c50d2 cb26c51 d6ed3af f16d5ff 99261e1 88bb70a f2617d3 aeb9fdb 6e03532 ffb230f 8ebb622 da3c5f3 d6ed3af bb2df8b f2617d3 fa7cf7b cb26c51 99261e1 92c50d2 da3c5f3 4a5db3e 92c50d2 7958865 02c7617 93116a8 46e0200 27dde00 02c7617 46e0200 02c7617 46e0200 5dfffe1 0a93e35 5dfffe1 1aea96c 5dfffe1 93116a8 99dd0b4 957eb79 93116a8 3f4b192 02c7617 aeb9fdb 46e0200 02c7617 3f4b192 ad82f24 3f4b192 92c50d2 02c7617 92c50d2 91fb6a1 92c50d2 74cc87b 49a62d8 92c50d2 1aea96c 9f95576 92c50d2 46e0200 92c50d2 88bb70a 92c50d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
from huggingface_hub import InferenceClient
import spaces #0.32.0
import torch
import os
import platform
import requests
model = ""
duration = 5
token = os.getenv('deepseekv2')
provider = 'together' #'fal-ai' #None #replicate # sambanova
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
# print(f"HFhub version: {huggingface_hub.__version__}")
"""
Packages ::::::::::
Is CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB MIG 3g.40gb
CUDA version: 12.1
Python version: 3.10.13
Pytorch version: 2.4.0+cu121
Gradio version: 5.0.1
"""
def choose_model(model_name):
if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
elif model_name == "Llama3-8b-Instruct":
model = "meta-llama/Meta-Llama-3-8B-Instruct"
elif model_name == "Llama3.1-8b-Instruct":
model = "meta-llama/Llama-3.1-8B-Instruct"
elif model_name == "Llama2-13b-chat":
model = "meta-llama/Llama-2-13b-chat-hf"
elif model_name == "Gemma-2-2b":
model = "google/gemma-2-2b-it"
elif model_name == "Gemma-7b":
model = "google/gemma-7b"
elif model_name == "Mixtral-8x7B-Instruct":
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
elif model_name == "Microsoft-phi-2":
model = "microsoft/phi-2"
else: # default to zephyr if no model chosen
model = "HuggingFaceH4/zephyr-7b-beta"
return model
@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):
print(model)
model_name = choose_model(model)
client = InferenceClient(model_name, provider="sambanova", token=os.getenv('deepseekv2'))
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Zephr-7b-beta"], label="Select Model"),
gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
]
)
if __name__ == "__main__":
demo.launch()
|