Spaces:

druvx13
/

deepseek-coder-1.3b-instruct-GPTQ

Running

App Files Files Community

druvx13 commited on 7 days ago

Commit

82b1c50

verified ·

1 Parent(s): 1247b4b

Create app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+from transformers import AutoTokenizer, pipeline
+from auto_gptq import AutoGPTQForCausalLM
+import torch
+import os
+# Model loading with memory optimization
+MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
+cache_dir = "./model_cache"
+os.makedirs(cache_dir, exist_ok=True)
+# Load tokenizer and model with 4-bit quantization
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)
+model = AutoGPTQForCausalLM.from_quantized(
+    MODEL_NAME,
+    model_basename="model",
+    use_safetensors=True,
+    quantize_config=None,
+    device_map="auto",
+    low_cpu_mem_usage=True,
+    cache_dir=cache_dir
+)
+# Create generation pipeline
+generator = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device_map="auto"
+)
+def generate_text(prompt, max_length=512, temperature=0.7):
+    """Generate text with safety checks and context awareness"""
+    full_prompt = f"Instruct: {prompt}\nOutput:"
+    with torch.inference_mode():
+        response = generator(
+            full_prompt,
+            max_new_tokens=max_length,
+            temperature=temperature,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )[0]["generated_text"]
+    # Remove prompt from output
+    return response.split("Output:")[-1].strip()
+# Gradio interface with enhanced UX
+with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
+    gr.Markdown("""
+    # 🧠 DeepSeek Coder 1.3B Instruct (GPTQ)
+    *Text-to-Code Generation App*
+    Enter a programming instruction below and adjust parameters for optimal output.
+    """)
+    with gr.Row():
+        prompt = gr.Textbox(
+            label="Enter your instruction",
+            placeholder="Write a Python function to calculate Fibonacci numbers...",
+            lines=4
+        )
+    with gr.Row():
+        max_length = gr.Slider(64, 2048, value=512, label="Max Output Length")
+        temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)")
+    output = gr.Textbox(label="Generated Output", lines=10)
+    submit = gr.Button("✨ Generate Code", variant="primary")
+    submit.click(
+        fn=generate_text,
+        inputs=[prompt, max_length, temperature],
+        outputs=output
+    )
+    gr.Markdown("""
+    ### ℹ️ How it works
+    - Uses GPTQ-quantized model for efficient inference
+    - Automatically handles context window management
+    - Temperature controls randomness (0.1=strict, 1.5=creative)
+    """)
+demo.launch()