File size: 3,585 Bytes
7f0753e b2ec231 7f0753e b2ec231 7f0753e b2ec231 7f0753e e6601b8 e9d8fd0 e6601b8 73354c3 e6601b8 3f011e2 e6601b8 73354c3 e6601b8 b2ec231 e6601b8 789f6c7 eedc15d 3f011e2 73354c3 7f0753e e9d8fd0 7d0a3f0 b2ec231 7d0a3f0 e6601b8 7d0a3f0 e6601b8 7d0a3f0 b2ec231 7d0a3f0 b2ec231 3f011e2 7d0a3f0 e6601b8 b2ec231 7d0a3f0 3f011e2 eedc15d 3f011e2 7d0a3f0 3f011e2 eedc15d 3aabe25 5177a4e 3605342 bb24da0 938a41c 789f6c7 e9d8fd0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# import os
# import gradio as gr
# from huggingface_hub import InferenceClient
# from dotenv import load_dotenv
# # Load environment variables
# load_dotenv()
# HF_TOKEN = os.getenv("HF_TOKEN")
# # Initialize Hugging Face Inference Client
# client = InferenceClient(
# model="mistralai/Mistral-7B-Instruct-v0.3",
# token=HF_TOKEN
# )
# # System prompt for coding assistant
# system_message = (
# "You are a helpful and experienced coding assistant specialized in web development. "
# "Help the user by generating complete and functional code for building websites. "
# "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
# "based on their requirements."
# )
# # Streaming chatbot logic
# def respond(message, history):
# # Prepare messages with system prompt
# messages = [{"role": "system", "content": system_message}]
# for msg in history:
# messages.append(msg)
# messages.append({"role": "user", "content": message})
# # Stream response from the model
# response = ""
# for chunk in client.chat.completions.create(
# model="mistralai/Mistral-7B-Instruct-v0.3",
# messages=messages,
# max_tokens=1024,
# temperature=0.7,
# top_p=0.95,
# stream=True,
# ):
# token = chunk.choices[0].delta.get("content", "") or ""
# response += token
# yield response
# # Create Gradio interface
# with gr.Blocks() as demo:
# chatbot = gr.Chatbot(type='messages') # Use modern message format
# gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages") # Match format
# # Launch app
# if __name__ == "__main__":
# demo.launch()
import os
import gradio as gr
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
# Initialize Hugging Face Inference Client
client = InferenceClient(
model="Qwen/Qwen2.5-Coder-7B-Instruct",
token=HF_TOKEN
)
# System prompt for coding assistant
system_message = (
"You are a helpful and experienced coding assistant specialized in web development. "
"Help the user by generating complete and functional code for building websites. "
"You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
"based on their requirements."
)
# Streaming chatbot logic
def respond(message, history):
# Prepare messages with system prompt
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Stream response from the model
response = ""
for chunk in client.chat.completions.create(
model="Qwen/Qwen2.5-Coder-7B-Instruct",
messages=messages,
max_tokens=2048,
temperature=0.7,
top_p=0.95,
stream=True,
):
# Safely handle empty choices
if not chunk.choices:
continue
# Safely extract token content
token = chunk.choices[0].delta.content or ""
response += token
yield response
# Create Gradio interface
with gr.Blocks() as demo:
chatbot = gr.Chatbot(type='messages') # Use modern message format
gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages") # Match format
# Launch app
if __name__ == "__main__":
demo.launch()
|