|
import gradio as gr |
|
from transformers import AutoTokenizer, pipeline |
|
from auto_gptq import AutoGPTQForCausalLM |
|
import torch |
|
import os |
|
|
|
|
|
MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ" |
|
cache_dir = "./model_cache" |
|
os.makedirs(cache_dir, exist_ok=True) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir) |
|
model = AutoGPTQForCausalLM.from_quantized( |
|
MODEL_NAME, |
|
model_basename="model", |
|
use_safetensors=True, |
|
quantize_config=None, |
|
device_map="auto", |
|
low_cpu_mem_usage=True, |
|
cache_dir=cache_dir |
|
) |
|
|
|
|
|
generator = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
device_map="auto" |
|
) |
|
|
|
def generate_text(prompt, max_length=512, temperature=0.7): |
|
"""Generate text with safety checks and context awareness""" |
|
full_prompt = f"Instruct: {prompt}\nOutput:" |
|
with torch.inference_mode(): |
|
response = generator( |
|
full_prompt, |
|
max_new_tokens=max_length, |
|
temperature=temperature, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
)[0]["generated_text"] |
|
|
|
return response.split("Output:")[-1].strip() |
|
|
|
|
|
with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo: |
|
gr.Markdown(""" |
|
# 🧠 DeepSeek Coder 1.3B Instruct (GPTQ) |
|
*Text-to-Code Generation App* |
|
Enter a programming instruction below and adjust parameters for optimal output. |
|
""") |
|
|
|
with gr.Row(): |
|
prompt = gr.Textbox( |
|
label="Enter your instruction", |
|
placeholder="Write a Python function to calculate Fibonacci numbers...", |
|
lines=4 |
|
) |
|
|
|
with gr.Row(): |
|
max_length = gr.Slider(64, 2048, value=512, label="Max Output Length") |
|
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)") |
|
|
|
output = gr.Textbox(label="Generated Output", lines=10) |
|
submit = gr.Button("✨ Generate Code", variant="primary") |
|
|
|
submit.click( |
|
fn=generate_text, |
|
inputs=[prompt, max_length, temperature], |
|
outputs=output |
|
) |
|
|
|
gr.Markdown(""" |
|
### ℹ️ How it works |
|
- Uses GPTQ-quantized model for efficient inference |
|
- Automatically handles context window management |
|
- Temperature controls randomness (0.1=strict, 1.5=creative) |
|
""") |
|
|
|
demo.launch() |