import gradio as gr
from huggingface_hub import InferenceClient

# Function to interact with the model using the Inference API
def chat_with_model(user_input, hf_api_key):
    # Initialize the InferenceClient with the provided API key
    client = InferenceClient(api_key=hf_api_key)

    # Define the messages for the chat (system message tailored for a code assistant)
    messages = [
        {"role": "system", "content": "You are a code assistant that helps with code generation, debugging, and explanations."},
        {"role": "user", "content": user_input}
    ]

    # Create a stream for chat completions using the API
    stream = client.chat.completions.create(
        model="Qwen/Qwen2.5-Coder-32B-Instruct", 
        messages=messages, 
        max_tokens=500,
        stream=True
    )
    
    # Collect the generated response from the model
    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content
    return response

# Create the Gradio interface
iface = gr.Interface(
    fn=chat_with_model, 
    inputs=[
        gr.Textbox(lines=5, placeholder="Ask me anything about coding..."), 
        gr.Textbox(lines=1, placeholder="Enter your Hugging Face API key", type="password")  # API key input
    ], 
    outputs="text", 
    title="Code Assistant with Qwen2.5-Coder", 
    description="A code assistant that helps you with code generation, debugging, and explanations using the Qwen2.5-Coder model via Hugging Face Inference API."
)

# Launch the interface
iface.launch()