Spaces:

druvx13
/

deepseek-coder-1.3b-instruct-GPTQ

Running

App Files Files Community

druvx13 commited on 7 days ago

Commit

39dd6f6

verified ·

1 Parent(s): 6a1f3f6

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -60

app.py CHANGED Viewed

@@ -1,69 +1,49 @@
 import gradio as gr
-from transformers import AutoTokenizer, pipeline
-from auto_gptq import AutoGPTQForCausalLM
 import torch
-import os
-# Model loading with memory optimization
 MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
-cache_dir = "./model_cache"
-os.makedirs(cache_dir, exist_ok=True)
-# Load tokenizer and model with 4-bit quantization
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)
-model = AutoGPTQForCausalLM.from_quantized(
     MODEL_NAME,
-    model_basename="model",
-    use_safetensors=True,
-    quantize_config=None,
-    device_map="auto",
-    low_cpu_mem_usage=True,
-    cache_dir=cache_dir
 )
-# Create generation pipeline
-generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    device_map="auto"
-)
-def generate_text(prompt, max_length=512, temperature=0.7):
-    """Generate text with safety checks and context awareness"""
-    full_prompt = f"Instruct: {prompt}\nOutput:"
-    with torch.inference_mode():
-        response = generator(
-            full_prompt,
-            max_new_tokens=max_length,
             temperature=temperature,
-            do_sample=True,
             pad_token_id=tokenizer.eos_token_id
-        )[0]["generated_text"]
-    # Remove prompt from output
-    return response.split("Output:")[-1].strip()
-# Gradio interface with enhanced UX
-with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
-    gr.Markdown("""
-    # 🧠 DeepSeek Coder 1.3B Instruct (GPTQ)
-    *Text-to-Code Generation App*
-    Enter a programming instruction below and adjust parameters for optimal output.
-    """)
-    with gr.Row():
-        prompt = gr.Textbox(
-            label="Enter your instruction",
-            placeholder="Write a Python function to calculate Fibonacci numbers...",
-            lines=4
         )
-    with gr.Row():
-        max_length = gr.Slider(64, 2048, value=512, label="Max Output Length")
-        temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)")
-    output = gr.Textbox(label="Generated Output", lines=10)
-    submit = gr.Button("✨ Generate Code", variant="primary")
     submit.click(
         fn=generate_text,
@@ -71,11 +51,17 @@ with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: a
         outputs=output
     )
-    gr.Markdown("""
-    ### ℹ️ How it works
-    - Uses GPTQ-quantized model for efficient inference
-    - Automatically handles context window management
-    - Temperature controls randomness (0.1=strict, 1.5=creative)
-    """)
-demo.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from auto_gptq import BaseQuantizeConfig
 import torch
+# Initialize model and tokenizer
 MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
+model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    device_map="cpu",  # Optimized for CPU
+    quantization_config=BaseQuantizeConfig(),  # Required for GPTQ models
+    torch_dtype=torch.float32,  # Better CPU compatibility
+    low_cpu_mem_usage=True
 )
+def generate_text(prompt, max_length=100, temperature=0.7):
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=max_length,
             temperature=temperature,
             pad_token_id=tokenizer.eos_token_id
         )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Gradio UI
+with gr.Blocks(theme="soft") as demo:
+    gr.Markdown("# 🧠 DeepSeek Coder 1.3B Text Generator\nOptimized for CPU execution on HuggingFace Spaces")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Input Prompt",
+                placeholder="Enter your programming/code-related question...",
+                lines=5
+            )
+            max_length = gr.Slider(50, 500, value=150, label="Max Output Length")
+            temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity Level")
+            submit = gr.Button("Generate Code", variant="primary")
+        output = gr.Textbox(label="Generated Output", lines=10)
     submit.click(
         fn=generate_text,
         outputs=output
     )
+    gr.Examples(
+        examples=[
+            ["Write a Python function to calculate Fibonacci numbers"],
+            ["Explain the difference between list and tuples in Python"],
+            ["Create a simple Flask API endpoint for user registration"]
+        ],
+        fn=generate_text,
+        inputs=[prompt, max_length, temperature],
+        outputs=output,
+        cache_examples=False  # Save memory
+    )
+if __name__ == "__main__":
+    demo.launch()