import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig import torch # Initialize model and tokenizer MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True) # Configure GPTQ for inference quantization_config = GPTQConfig( bits=4, # 4-bit quantization dataset="c4", # Required dummy dataset for config model_seqlen=2048 # Match model's maximum context length ) # Load model with CPU optimizations model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, quantization_config=quantization_config, torch_dtype=torch.float32, # CPU-friendly precision low_cpu_mem_usage=True, offload_folder="offload", # Disk offloading for large layers offload_state_dict=True # Memory-efficient state loading ) def generate_text(prompt, max_length=150, temperature=0.7): """Generate text with optimized inference settings""" inputs = tokenizer(prompt, return_tensors="pt").to("cpu") with torch.no_grad(): outputs = model.generate( **inputs, max_length=max_length, temperature=temperature, pad_token_id=tokenizer.eos_token_id, num_beams=1, # Single-beam for minimal memory do_sample=True, # Enable sampling for creativity top_p=0.95, # Nucleus sampling repetition_penalty=1.1 # Reduce repetition ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Gradio Interface with Enhanced UX with gr.Blocks(theme="soft", css=".gr-box {border-radius: 10px}") as demo: gr.Markdown(""" # 🧠 DeepSeek Coder 1.3B Text Generator *Optimized for CPU execution on HuggingFace Free Tier* """) with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Input Prompt", placeholder="Enter your programming/code-related question...", lines=5, max_lines=10, elem_classes=["monospace"] ) with gr.Row(): max_length = gr.Slider(50, 500, value=150, label="Max Length", step=10) temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity", step=0.05) submit = gr.Button("🚀 Generate", variant="primary") output = gr.Textbox( label="Generated Output", lines=12, max_lines=20, elem_classes=["monospace"] ) submit.click( fn=generate_text, inputs=[prompt, max_length, temperature], outputs=output ) gr.Examples( examples=[ ["Write a Python function to calculate Fibonacci numbers"], ["Explain the difference between list and tuples in Python"], ["Create a simple Flask API endpoint for user registration"] ], fn=generate_text, inputs=[prompt, max_length, temperature], outputs=output, cache_examples=False ) if __name__ == "__main__": demo.launch()