File size: 3,156 Bytes
82b1c50 ee2590c 82b1c50 39dd6f6 82b1c50 39dd6f6 82b1c50 ee2590c 39dd6f6 82b1c50 ee2590c 82b1c50 ee2590c 39dd6f6 82b1c50 ee2590c 82b1c50 39dd6f6 ee2590c 82b1c50 39dd6f6 ee2590c 39dd6f6 ee2590c 39dd6f6 ee2590c 82b1c50 39dd6f6 ee2590c 39dd6f6 82b1c50 39dd6f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
import torch
# Initialize model and tokenizer
MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
# Configure GPTQ for inference
quantization_config = GPTQConfig(
bits=4, # 4-bit quantization
dataset="c4", # Required dummy dataset for config
model_seqlen=2048 # Match model's maximum context length
)
# Load model with CPU optimizations
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=quantization_config,
torch_dtype=torch.float32, # CPU-friendly precision
low_cpu_mem_usage=True,
offload_folder="offload", # Disk offloading for large layers
offload_state_dict=True # Memory-efficient state loading
)
def generate_text(prompt, max_length=150, temperature=0.7):
"""Generate text with optimized inference settings"""
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
num_beams=1, # Single-beam for minimal memory
do_sample=True, # Enable sampling for creativity
top_p=0.95, # Nucleus sampling
repetition_penalty=1.1 # Reduce repetition
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Gradio Interface with Enhanced UX
with gr.Blocks(theme="soft", css=".gr-box {border-radius: 10px}") as demo:
gr.Markdown("""
# π§ DeepSeek Coder 1.3B Text Generator
*Optimized for CPU execution on HuggingFace Free Tier*
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Input Prompt",
placeholder="Enter your programming/code-related question...",
lines=5,
max_lines=10,
elem_classes=["monospace"]
)
with gr.Row():
max_length = gr.Slider(50, 500, value=150, label="Max Length", step=10)
temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity", step=0.05)
submit = gr.Button("π Generate", variant="primary")
output = gr.Textbox(
label="Generated Output",
lines=12,
max_lines=20,
elem_classes=["monospace"]
)
submit.click(
fn=generate_text,
inputs=[prompt, max_length, temperature],
outputs=output
)
gr.Examples(
examples=[
["Write a Python function to calculate Fibonacci numbers"],
["Explain the difference between list and tuples in Python"],
["Create a simple Flask API endpoint for user registration"]
],
fn=generate_text,
inputs=[prompt, max_length, temperature],
outputs=output,
cache_examples=False
)
if __name__ == "__main__":
demo.launch() |