Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,668 Bytes
ff96e27 8d76d4b ff96e27 6459a05 ff96e27 6459a05 e050c5a 6459a05 ff96e27 e050c5a ff96e27 402ce74 ff96e27 402ce74 ff96e27 4ce392f 402ce74 4ce392f ff96e27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import gradio as gr
from huggingface_hub import InferenceClient
import spaces #0.32.0
import torch
import os
import platform
import requests
from PIL import Image
model = ""
duration = None
token = os.getenv('deepseekv2')
provider = None #'fal-ai' #None #replicate # sambanova
mode = "text-to-text"
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
# print(f"HFhub version: {huggingface_hub.__version__}")
"""
Packages ::::::::::
Is CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB MIG 3g.40gb
CUDA version: 12.1
Python version: 3.10.13
Pytorch version: 2.4.0+cu121
Gradio version: 5.0.1
"""
def choose_model(model_name):
if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
elif model_name == "Llama3-8b-Instruct":
model = "meta-llama/Meta-Llama-3-8B-Instruct"
elif model_name == "Llama3.1-8b-Instruct":
model = "meta-llama/Llama-3.1-8B-Instruct"
elif model_name == "Llama2-13b-chat":
model = "meta-llama/Llama-2-13b-chat-hf"
elif model_name == "Llama-3.2-11B-Vision-Instruct":
model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
mode = "image-to-text"
return model
elif model_name == "Gemma-2-2b":
model = "google/gemma-2-2b-it"
elif model_name == "Gemma-7b":
model = "google/gemma-7b"
elif model_name == "Mixtral-8x7B-Instruct":
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
elif model_name == "Microsoft-phi-2":
model = "microsoft/phi-2"
elif model_name == "Qwen2.5-Coder-32B-Instruct":
model = "Qwen/Qwen2.5-Coder-32B-Instruct"
else: # default to zephyr if no model chosen
model = "HuggingFaceH4/zephyr-7b-beta"
mode = "text-to-text"
return model
@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):
if mode=="text-to-text":
print(model)
model_name = choose_model(model)
client = InferenceClient(model_name, provider=provider, token=os.getenv('deepseekv2'))
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
multimodal=True,
stop_btn = "Stop generation",
# multimodal = True,
title="Ask me anything",
description="Hi there! I am your friendly AI chatbot. Choose from different language models under the Additional Inputs tab below.",
examples=[["Explain quantum computing"], ["Explain forex trading"], ["What is the capital of China?"], ["Make a poem about nature"]],
additional_inputs=[
gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Llama-3.2-11B-Vision-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Qwen2.5-Coder-32B-Instruct", "Zephyr-7b-beta"], label="Select Model"),
gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
],
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg",
image = Image.open(requests.get(url, stream=True).raw)
)
if __name__ == "__main__":
demo.launch(share=True)
|