File size: 2,568 Bytes
82b1c50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from transformers import AutoTokenizer, pipeline
from auto_gptq import AutoGPTQForCausalLM
import torch
import os
# Model loading with memory optimization
MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
cache_dir = "./model_cache"
os.makedirs(cache_dir, exist_ok=True)
# Load tokenizer and model with 4-bit quantization
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)
model = AutoGPTQForCausalLM.from_quantized(
MODEL_NAME,
model_basename="model",
use_safetensors=True,
quantize_config=None,
device_map="auto",
low_cpu_mem_usage=True,
cache_dir=cache_dir
)
# Create generation pipeline
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto"
)
def generate_text(prompt, max_length=512, temperature=0.7):
"""Generate text with safety checks and context awareness"""
full_prompt = f"Instruct: {prompt}\nOutput:"
with torch.inference_mode():
response = generator(
full_prompt,
max_new_tokens=max_length,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)[0]["generated_text"]
# Remove prompt from output
return response.split("Output:")[-1].strip()
# Gradio interface with enhanced UX
with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
gr.Markdown("""
# 🧠 DeepSeek Coder 1.3B Instruct (GPTQ)
*Text-to-Code Generation App*
Enter a programming instruction below and adjust parameters for optimal output.
""")
with gr.Row():
prompt = gr.Textbox(
label="Enter your instruction",
placeholder="Write a Python function to calculate Fibonacci numbers...",
lines=4
)
with gr.Row():
max_length = gr.Slider(64, 2048, value=512, label="Max Output Length")
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)")
output = gr.Textbox(label="Generated Output", lines=10)
submit = gr.Button("✨ Generate Code", variant="primary")
submit.click(
fn=generate_text,
inputs=[prompt, max_length, temperature],
outputs=output
)
gr.Markdown("""
### ℹ️ How it works
- Uses GPTQ-quantized model for efficient inference
- Automatically handles context window management
- Temperature controls randomness (0.1=strict, 1.5=creative)
""")
demo.launch() |