File size: 4,668 Bytes
ff96e27
 
 
 
 
 
 
8d76d4b
 
ff96e27
 
 
 
 
6459a05
ff96e27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6459a05
 
 
e050c5a
6459a05
ff96e27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e050c5a
 
ff96e27
 
 
 
 
 
402ce74
 
 
ff96e27
402ce74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff96e27
4ce392f
 
 
 
 
402ce74
 
4ce392f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff96e27
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import gradio as gr
from huggingface_hub import InferenceClient
import spaces #0.32.0
import torch
import os
import platform
import requests
from PIL import Image


model = ""
duration = None
token = os.getenv('deepseekv2')
provider = None #'fal-ai' #None #replicate # sambanova
mode = "text-to-text"

print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
# print(f"HFhub version: {huggingface_hub.__version__}")


"""
Packages ::::::::::
Is CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB MIG 3g.40gb
CUDA version: 12.1
Python version: 3.10.13
Pytorch version: 2.4.0+cu121
Gradio version: 5.0.1
"""


def choose_model(model_name):
    if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
        model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

    elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
        model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
        
    elif model_name == "Llama3-8b-Instruct":    
        model = "meta-llama/Meta-Llama-3-8B-Instruct"

    elif model_name == "Llama3.1-8b-Instruct":
        model = "meta-llama/Llama-3.1-8B-Instruct"

    elif model_name == "Llama2-13b-chat":
        model = "meta-llama/Llama-2-13b-chat-hf"

    elif model_name == "Llama-3.2-11B-Vision-Instruct":
        model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
        mode = "image-to-text"
        return model

    elif model_name == "Gemma-2-2b":
        model = "google/gemma-2-2b-it"

    elif model_name == "Gemma-7b":
        model = "google/gemma-7b"
    
    elif model_name == "Mixtral-8x7B-Instruct":
        model = "mistralai/Mixtral-8x7B-Instruct-v0.1"

    elif model_name == "Microsoft-phi-2":
        model = "microsoft/phi-2"

    elif model_name == "Qwen2.5-Coder-32B-Instruct":
        model = "Qwen/Qwen2.5-Coder-32B-Instruct"

    else:    # default to zephyr if no model chosen
        model = "HuggingFaceH4/zephyr-7b-beta"

    mode = "text-to-text"
    return model
    

@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):

    if mode=="text-to-text":
        print(model)
        model_name = choose_model(model)
    
        client = InferenceClient(model_name, provider=provider, token=os.getenv('deepseekv2'))
        
        messages = [{"role": "system", "content": system_message}]
    
        for val in history:
            if val[0]:
                messages.append({"role": "user", "content": val[0]})
            if val[1]:
                messages.append({"role": "assistant", "content": val[1]})
    
        messages.append({"role": "user", "content": message})
    
        response = ""
    
        for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
            token = message.choices[0].delta.content
    
            response += token
            yield response




demo = gr.ChatInterface(

    respond,
    multimodal=True,
    stop_btn = "Stop generation",
    # multimodal = True,
    title="Ask me anything",
    description="Hi there! I am your friendly AI chatbot. Choose from different language models under the Additional Inputs tab below.",
    examples=[["Explain quantum computing"], ["Explain forex trading"], ["What is the capital of China?"], ["Make a poem about nature"]],
    additional_inputs=[
        gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Llama-3.2-11B-Vision-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Qwen2.5-Coder-32B-Instruct", "Zephyr-7b-beta"], label="Select Model"),
        gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
        
    ],

    url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg",
    image = Image.open(requests.get(url, stream=True).raw)
)


if __name__ == "__main__":
    demo.launch(share=True)