File size: 3,585 Bytes
7f0753e
 
 
 
 
 
 
 
 
 
 
b2ec231
7f0753e
 
 
 
 
 
 
 
 
 
 
b2ec231
7f0753e
 
 
 
 
 
 
 
 
 
b2ec231
7f0753e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6601b8
e9d8fd0
e6601b8
 
 
73354c3
e6601b8
3f011e2
e6601b8
73354c3
e6601b8
b2ec231
e6601b8
789f6c7
eedc15d
3f011e2
73354c3
 
 
 
 
 
 
7f0753e
e9d8fd0
7d0a3f0
 
b2ec231
 
 
7d0a3f0
e6601b8
7d0a3f0
e6601b8
7d0a3f0
b2ec231
7d0a3f0
b2ec231
3f011e2
 
7d0a3f0
e6601b8
b2ec231
 
 
 
 
 
7d0a3f0
3f011e2
eedc15d
3f011e2
 
7d0a3f0
 
3f011e2
 
eedc15d
3aabe25
5177a4e
3605342
 
bb24da0
938a41c
789f6c7
e9d8fd0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# import os
# import gradio as gr
# from huggingface_hub import InferenceClient
# from dotenv import load_dotenv

# # Load environment variables
# load_dotenv()
# HF_TOKEN = os.getenv("HF_TOKEN")

# # Initialize Hugging Face Inference Client
# client = InferenceClient(
#     model="mistralai/Mistral-7B-Instruct-v0.3",
#     token=HF_TOKEN
# )

# # System prompt for coding assistant
# system_message = (
#     "You are a helpful and experienced coding assistant specialized in web development. "
#     "Help the user by generating complete and functional code for building websites. "
#     "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
#     "based on their requirements."
# )

# # Streaming chatbot logic
# def respond(message, history):
#     # Prepare messages with system prompt
#     messages = [{"role": "system", "content": system_message}]
#     for msg in history:
#         messages.append(msg)
#     messages.append({"role": "user", "content": message})

#     # Stream response from the model
#     response = ""
#     for chunk in client.chat.completions.create(
#         model="mistralai/Mistral-7B-Instruct-v0.3",
#         messages=messages,
#         max_tokens=1024,
#         temperature=0.7,
#         top_p=0.95,
#         stream=True,
#     ):
#         token = chunk.choices[0].delta.get("content", "") or ""
#         response += token
#         yield response

# # Create Gradio interface
# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot(type='messages')  # Use modern message format
#     gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")  # Match format

# # Launch app
# if __name__ == "__main__":
#     demo.launch()

import os
import gradio as gr
from huggingface_hub import InferenceClient
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")

# Initialize Hugging Face Inference Client
client = InferenceClient(
    model="Qwen/Qwen2.5-Coder-7B-Instruct",
    token=HF_TOKEN
)

# System prompt for coding assistant
system_message = (
    "You are a helpful and experienced coding assistant specialized in web development. "
    "Help the user by generating complete and functional code for building websites. "
    "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
    "based on their requirements."
)

# Streaming chatbot logic
def respond(message, history):
    # Prepare messages with system prompt
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    # Stream response from the model
    response = ""
    for chunk in client.chat.completions.create(
        model="Qwen/Qwen2.5-Coder-7B-Instruct",
        messages=messages,
        max_tokens=2048,
        temperature=0.7,
        top_p=0.95,
        stream=True,
    ):
        # Safely handle empty choices
        if not chunk.choices:
            continue
            
        # Safely extract token content
        token = chunk.choices[0].delta.content or ""
        response += token
        yield response

# Create Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(type='messages')  # Use modern message format
    gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")  # Match format

# Launch app
if __name__ == "__main__":
    demo.launch()