druvx13's picture
Update app.py
ee2590c verified
raw
history blame
3.16 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
import torch
# Initialize model and tokenizer
MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
# Configure GPTQ for inference
quantization_config = GPTQConfig(
bits=4, # 4-bit quantization
dataset="c4", # Required dummy dataset for config
model_seqlen=2048 # Match model's maximum context length
)
# Load model with CPU optimizations
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=quantization_config,
torch_dtype=torch.float32, # CPU-friendly precision
low_cpu_mem_usage=True,
offload_folder="offload", # Disk offloading for large layers
offload_state_dict=True # Memory-efficient state loading
)
def generate_text(prompt, max_length=150, temperature=0.7):
"""Generate text with optimized inference settings"""
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
num_beams=1, # Single-beam for minimal memory
do_sample=True, # Enable sampling for creativity
top_p=0.95, # Nucleus sampling
repetition_penalty=1.1 # Reduce repetition
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Gradio Interface with Enhanced UX
with gr.Blocks(theme="soft", css=".gr-box {border-radius: 10px}") as demo:
gr.Markdown("""
# 🧠 DeepSeek Coder 1.3B Text Generator
*Optimized for CPU execution on HuggingFace Free Tier*
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Input Prompt",
placeholder="Enter your programming/code-related question...",
lines=5,
max_lines=10,
elem_classes=["monospace"]
)
with gr.Row():
max_length = gr.Slider(50, 500, value=150, label="Max Length", step=10)
temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity", step=0.05)
submit = gr.Button("πŸš€ Generate", variant="primary")
output = gr.Textbox(
label="Generated Output",
lines=12,
max_lines=20,
elem_classes=["monospace"]
)
submit.click(
fn=generate_text,
inputs=[prompt, max_length, temperature],
outputs=output
)
gr.Examples(
examples=[
["Write a Python function to calculate Fibonacci numbers"],
["Explain the difference between list and tuples in Python"],
["Create a simple Flask API endpoint for user registration"]
],
fn=generate_text,
inputs=[prompt, max_length, temperature],
outputs=output,
cache_examples=False
)
if __name__ == "__main__":
demo.launch()