|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig |
|
import torch |
|
|
|
|
|
MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True) |
|
|
|
|
|
quantization_config = GPTQConfig( |
|
bits=4, |
|
dataset="c4", |
|
model_seqlen=2048 |
|
) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_NAME, |
|
quantization_config=quantization_config, |
|
torch_dtype=torch.float32, |
|
low_cpu_mem_usage=True, |
|
offload_folder="offload", |
|
offload_state_dict=True |
|
) |
|
|
|
def generate_text(prompt, max_length=150, temperature=0.7): |
|
"""Generate text with optimized inference settings""" |
|
inputs = tokenizer(prompt, return_tensors="pt").to("cpu") |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_length=max_length, |
|
temperature=temperature, |
|
pad_token_id=tokenizer.eos_token_id, |
|
num_beams=1, |
|
do_sample=True, |
|
top_p=0.95, |
|
repetition_penalty=1.1 |
|
) |
|
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
with gr.Blocks(theme="soft", css=".gr-box {border-radius: 10px}") as demo: |
|
gr.Markdown(""" |
|
# π§ DeepSeek Coder 1.3B Text Generator |
|
*Optimized for CPU execution on HuggingFace Free Tier* |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Textbox( |
|
label="Input Prompt", |
|
placeholder="Enter your programming/code-related question...", |
|
lines=5, |
|
max_lines=10, |
|
elem_classes=["monospace"] |
|
) |
|
with gr.Row(): |
|
max_length = gr.Slider(50, 500, value=150, label="Max Length", step=10) |
|
temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity", step=0.05) |
|
submit = gr.Button("π Generate", variant="primary") |
|
|
|
output = gr.Textbox( |
|
label="Generated Output", |
|
lines=12, |
|
max_lines=20, |
|
elem_classes=["monospace"] |
|
) |
|
|
|
submit.click( |
|
fn=generate_text, |
|
inputs=[prompt, max_length, temperature], |
|
outputs=output |
|
) |
|
|
|
gr.Examples( |
|
examples=[ |
|
["Write a Python function to calculate Fibonacci numbers"], |
|
["Explain the difference between list and tuples in Python"], |
|
["Create a simple Flask API endpoint for user registration"] |
|
], |
|
fn=generate_text, |
|
inputs=[prompt, max_length, temperature], |
|
outputs=output, |
|
cache_examples=False |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |