import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

def load_model():
    """
    Load the DeepSeek-R1 model. 
    Note: We rely on flash_attn, so this should now work
    once PyTorch+CUDA and flash_attn are installed.
    """
    try:
        model = AutoModelForCausalLM.from_pretrained(
            "deepseek-ai/DeepSeek-R1", 
            trust_remote_code=True
        )
        tokenizer = AutoTokenizer.from_pretrained(
            "deepseek-ai/DeepSeek-R1", 
            trust_remote_code=True
        )
        # Return a text-generation pipeline
        return pipeline("text-generation", model=model, tokenizer=tokenizer)
    except Exception as e:
        return f"Model Loading Error: {e}"

model_pipeline = load_model()

def process_text(input_text):
    """
    Uses the loaded DeepSeek-R1 pipeline to generate text.
    """
    if isinstance(model_pipeline, str):
        return f"Error: {model_pipeline}"  # If model_pipeline is an error string
    try:
        # Adjust parameters as desired
        outputs = model_pipeline(input_text, max_length=200, num_return_sequences=1)
        return outputs[0]["generated_text"]
    except Exception as e:
        return f"Inference Error: {e}"

with gr.Blocks() as demo:
    gr.Markdown(
        "# DeepSeek-R1 Text Generator\n"
        "Enter a prompt and generate text using the DeepSeek-R1 model."
    )
    input_box = gr.Textbox(
        lines=5, label="Input Prompt", placeholder="Type your prompt here..."
    )
    generate_btn = gr.Button("Generate")
    output_box = gr.Textbox(
        lines=10, label="Generated Text", placeholder="Generated text appears here..."
    )

    generate_btn.click(fn=process_text, inputs=input_box, outputs=output_box)

demo.launch()