import gradio as gr
import os
import spaces
from transformers import GemmaTokenizer, AutoModelForCausalLM

# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)

# Load the tokenizer and model
tokenizer = GemmaTokenizer.from_pretrained("google/codegemma-7b-it")
model = AutoModelForCausalLM.from_pretrained("google/codegemma-7b-it", device_map="auto")

@spaces.GPU(duration=120)
def codegemma(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
    """
    Generate a response using the CodeGemma model.

    Args:
        message (str): The input message.
        history (list): The conversation history used by ChatInterface.
        temperature (float): The temperature for generating the response.
        max_new_tokens (int): The maximum number of new tokens to generate.

    Returns:
        str: The generated response.
    """
    input_ids = tokenizer(message, return_tensors="pt").to("cuda:0")
    outputs = model.generate(
        **input_ids,
        temperature=temperature,
        max_new_tokens=max_new_tokens,
    )
    response = tokenizer.decode(outputs[0])
    return response


placeholder = """
<div style="opacity: 0.65;">
    <img src="https://ysharma-dummy-chat-app.hf.space/file=/tmp/gradio/7dd7659cff2eab51f0f5336f378edfca01dd16fa/gemma_lockup_vertical_full-color_rgb.png" style="width:30%;">
    <br><b>CodeGemma-7B-IT Chatbot</b>
</div>
"""


# Gradio block
chatbot=gr.Chatbot(placeholder=placeholder,)
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown("# CODEGEMMA-7b-IT")
    gr.ChatInterface(codegemma,
                     chatbot=chatbot,
                     fill_height=True,
                     additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
                     additional_inputs=[
                         gr.Slider(0, 1, 0.95, label="Temperature", render=False),
                         gr.Slider(128, 4096, 512, label="Max new tokens", render=False ),
                         ],
                     examples=[["Write a Python function to calculate the nth fibonacci number."]],
                     cache_examples=False,
                     )
    

if __name__ == "__main__":
    demo.launch(debug=False)