import gradio as gr from huggingface_hub import InferenceClient # Function to interact with the model using the Inference API def chat_with_model(user_input, hf_api_key): # Initialize the InferenceClient with the provided API key client = InferenceClient(api_key=hf_api_key) # Define the messages for the chat (system message tailored for a code assistant) messages = [ {"role": "system", "content": "You are a code assistant that helps with code generation, debugging, and explanations."}, {"role": "user", "content": user_input} ] # Create a stream for chat completions using the API stream = client.chat.completions.create( model="Qwen/Qwen2.5-Coder-32B-Instruct", messages=messages, max_tokens=500, stream=True ) # Collect the generated response from the model response = "" for chunk in stream: response += chunk.choices[0].delta.content return response # Create the Gradio interface iface = gr.Interface( fn=chat_with_model, inputs=[ gr.Textbox(lines=5, placeholder="Ask me anything about coding..."), gr.Textbox(lines=1, placeholder="Enter your Hugging Face API key", type="password") # API key input ], outputs="text", title="Code Assistant with Qwen2.5-Coder", description="A code assistant that helps you with code generation, debugging, and explanations using the Qwen2.5-Coder model via Hugging Face Inference API." ) # Launch the interface iface.launch()