import os os.system('pip install minijinja') import gradio as gr from huggingface_hub import InferenceClient import torch import spaces # Initialize the client with your model client = InferenceClient("karpathy/gpt2_1558M_final2_hf") @spaces.GPU def generate_text(prompt, max_tokens, temperature, top_p): response = "" for chunk in client.text_generation( prompt, max_new_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if isinstance(chunk, str): response += chunk elif hasattr(chunk, 'token'): response += chunk.token.text elif hasattr(chunk, 'generated_text'): response += chunk.generated_text yield response if not response: yield "I apologize, but I couldn't generate a response." def clear_input(): return "" # Define example prompts unicorn_example = "In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English." time_travel_example = "Explain the grandfather paradox in time travel and propose a potential resolution." with gr.Blocks() as demo: gr.Markdown("