import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline def load_model(): """ Load the DeepSeek-R1 model. Note: We rely on flash_attn, so this should now work once PyTorch+CUDA and flash_attn are installed. """ try: model = AutoModelForCausalLM.from_pretrained( "deepseek-ai/DeepSeek-R1", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained( "deepseek-ai/DeepSeek-R1", trust_remote_code=True ) # Return a text-generation pipeline return pipeline("text-generation", model=model, tokenizer=tokenizer) except Exception as e: return f"Model Loading Error: {e}" model_pipeline = load_model() def process_text(input_text): """ Uses the loaded DeepSeek-R1 pipeline to generate text. """ if isinstance(model_pipeline, str): return f"Error: {model_pipeline}" # If model_pipeline is an error string try: # Adjust parameters as desired outputs = model_pipeline(input_text, max_length=200, num_return_sequences=1) return outputs[0]["generated_text"] except Exception as e: return f"Inference Error: {e}" with gr.Blocks() as demo: gr.Markdown( "# DeepSeek-R1 Text Generator\n" "Enter a prompt and generate text using the DeepSeek-R1 model." ) input_box = gr.Textbox( lines=5, label="Input Prompt", placeholder="Type your prompt here..." ) generate_btn = gr.Button("Generate") output_box = gr.Textbox( lines=10, label="Generated Text", placeholder="Generated text appears here..." ) generate_btn.click(fn=process_text, inputs=input_box, outputs=output_box) demo.launch()