|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from auto_gptq import BaseQuantizeConfig |
|
import torch |
|
|
|
|
|
MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_NAME, |
|
device_map="cpu", |
|
quantization_config=BaseQuantizeConfig(), |
|
torch_dtype=torch.float32, |
|
low_cpu_mem_usage=True |
|
) |
|
|
|
def generate_text(prompt, max_length=100, temperature=0.7): |
|
inputs = tokenizer(prompt, return_tensors="pt").to("cpu") |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_length=max_length, |
|
temperature=temperature, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
with gr.Blocks(theme="soft") as demo: |
|
gr.Markdown("# 🧠 DeepSeek Coder 1.3B Text Generator\nOptimized for CPU execution on HuggingFace Spaces") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Textbox( |
|
label="Input Prompt", |
|
placeholder="Enter your programming/code-related question...", |
|
lines=5 |
|
) |
|
max_length = gr.Slider(50, 500, value=150, label="Max Output Length") |
|
temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity Level") |
|
submit = gr.Button("Generate Code", variant="primary") |
|
|
|
output = gr.Textbox(label="Generated Output", lines=10) |
|
|
|
submit.click( |
|
fn=generate_text, |
|
inputs=[prompt, max_length, temperature], |
|
outputs=output |
|
) |
|
|
|
gr.Examples( |
|
examples=[ |
|
["Write a Python function to calculate Fibonacci numbers"], |
|
["Explain the difference between list and tuples in Python"], |
|
["Create a simple Flask API endpoint for user registration"] |
|
], |
|
fn=generate_text, |
|
inputs=[prompt, max_length, temperature], |
|
outputs=output, |
|
cache_examples=False |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |