LLM.C 1.5B Demo

import os
os.system('pip install minijinja')
import gradio as gr
from huggingface_hub import InferenceClient
import torch
import spaces

# Initialize the client with your model
client = InferenceClient("karpathy/gpt2_1558M_final2_hf")

@spaces.GPU
def generate_text(prompt, max_tokens, temperature, top_p):
    response = ""
    for chunk in client.text_generation(
        prompt,
        max_new_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        if isinstance(chunk, str):
            response += chunk
        elif hasattr(chunk, 'token'):
            response += chunk.token.text
        elif hasattr(chunk, 'generated_text'):
            response += chunk.generated_text
        yield response

    if not response:
        yield "I apologize, but I couldn't generate a response."

def clear_input():
    return ""

# Define example prompts
unicorn_example = "In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."
time_travel_example = "Explain the grandfather paradox in time travel and propose a potential resolution."

with gr.Blocks() as demo:
    gr.Markdown("<h1 style='text-align: center;'>LLM.C 1.5B Demo</h1>")
    
    prompt = gr.Textbox(lines=3, label='Enter your prompt')
    output = gr.Textbox(lines=10, label='Generated text')
    
    with gr.Row():
        clear_button = gr.Button("🧹 Clear input")
        submit = gr.Button("🚀 Generate")
    
    gr.Markdown("### Example prompts")
    with gr.Row():
        example1 = gr.Button("🦄 Unicorn Discovery")
        example2 = gr.Button("⏳ Time Travel Paradox")

    with gr.Accordion("Advanced Settings", open=False):
        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens")
        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)")

    # Set up event handlers
    submit.click(generate_text, inputs=[prompt, max_tokens, temperature, top_p], outputs=output)
    clear_button.click(clear_input, inputs=[], outputs=prompt)
    example1.click(lambda: unicorn_example, inputs=[], outputs=prompt)
    example2.click(lambda: time_travel_example, inputs=[], outputs=prompt)

    gr.Markdown(
        """
        ## About LLM.C
        some stuff about llmc
        """
    )

if __name__ == "__main__":
    demo.launch()