# import os # import gradio as gr # from huggingface_hub import InferenceClient # from dotenv import load_dotenv # # Load environment variables # load_dotenv() # HF_TOKEN = os.getenv("HF_TOKEN") # # Initialize Hugging Face Inference Client # client = InferenceClient( # model="mistralai/Mistral-7B-Instruct-v0.3", # token=HF_TOKEN # ) # # System prompt for coding assistant # system_message = ( # "You are a helpful and experienced coding assistant specialized in web development. " # "Help the user by generating complete and functional code for building websites. " # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) " # "based on their requirements." # ) # # Streaming chatbot logic # def respond(message, history): # # Prepare messages with system prompt # messages = [{"role": "system", "content": system_message}] # for msg in history: # messages.append(msg) # messages.append({"role": "user", "content": message}) # # Stream response from the model # response = "" # for chunk in client.chat.completions.create( # model="mistralai/Mistral-7B-Instruct-v0.3", # messages=messages, # max_tokens=1024, # temperature=0.7, # top_p=0.95, # stream=True, # ): # token = chunk.choices[0].delta.get("content", "") or "" # response += token # yield response # # Create Gradio interface # with gr.Blocks() as demo: # chatbot = gr.Chatbot(type='messages') # Use modern message format # gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages") # Match format # # Launch app # if __name__ == "__main__": # demo.launch() import os import gradio as gr from huggingface_hub import InferenceClient from dotenv import load_dotenv # Load environment variables load_dotenv() HF_TOKEN = os.getenv("HF_TOKEN") # Initialize Hugging Face Inference Client client = InferenceClient( model="Qwen/Qwen2.5-Coder-7B-Instruct", token=HF_TOKEN ) # System prompt for coding assistant system_message = ( "You are a helpful and experienced coding assistant specialized in web development. " "Help the user by generating complete and functional code for building websites. " "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) " "based on their requirements." ) # Streaming chatbot logic def respond(message, history): # Prepare messages with system prompt messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) # Stream response from the model response = "" for chunk in client.chat.completions.create( model="Qwen/Qwen2.5-Coder-7B-Instruct", messages=messages, max_tokens=2048, temperature=0.7, top_p=0.95, stream=True, ): # Safely handle empty choices if not chunk.choices: continue # Safely extract token content token = chunk.choices[0].delta.content or "" response += token yield response # Create Gradio interface with gr.Blocks() as demo: chatbot = gr.Chatbot(type='messages') # Use modern message format gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages") # Match format # Launch app if __name__ == "__main__": demo.launch()