druvx13 commited on
Commit
82b1c50
·
verified ·
1 Parent(s): 1247b4b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, pipeline
3
+ from auto_gptq import AutoGPTQForCausalLM
4
+ import torch
5
+ import os
6
+
7
+ # Model loading with memory optimization
8
+ MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
9
+ cache_dir = "./model_cache"
10
+ os.makedirs(cache_dir, exist_ok=True)
11
+
12
+ # Load tokenizer and model with 4-bit quantization
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)
14
+ model = AutoGPTQForCausalLM.from_quantized(
15
+ MODEL_NAME,
16
+ model_basename="model",
17
+ use_safetensors=True,
18
+ quantize_config=None,
19
+ device_map="auto",
20
+ low_cpu_mem_usage=True,
21
+ cache_dir=cache_dir
22
+ )
23
+
24
+ # Create generation pipeline
25
+ generator = pipeline(
26
+ "text-generation",
27
+ model=model,
28
+ tokenizer=tokenizer,
29
+ device_map="auto"
30
+ )
31
+
32
+ def generate_text(prompt, max_length=512, temperature=0.7):
33
+ """Generate text with safety checks and context awareness"""
34
+ full_prompt = f"Instruct: {prompt}\nOutput:"
35
+ with torch.inference_mode():
36
+ response = generator(
37
+ full_prompt,
38
+ max_new_tokens=max_length,
39
+ temperature=temperature,
40
+ do_sample=True,
41
+ pad_token_id=tokenizer.eos_token_id
42
+ )[0]["generated_text"]
43
+ # Remove prompt from output
44
+ return response.split("Output:")[-1].strip()
45
+
46
+ # Gradio interface with enhanced UX
47
+ with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
48
+ gr.Markdown("""
49
+ # 🧠 DeepSeek Coder 1.3B Instruct (GPTQ)
50
+ *Text-to-Code Generation App*
51
+ Enter a programming instruction below and adjust parameters for optimal output.
52
+ """)
53
+
54
+ with gr.Row():
55
+ prompt = gr.Textbox(
56
+ label="Enter your instruction",
57
+ placeholder="Write a Python function to calculate Fibonacci numbers...",
58
+ lines=4
59
+ )
60
+
61
+ with gr.Row():
62
+ max_length = gr.Slider(64, 2048, value=512, label="Max Output Length")
63
+ temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)")
64
+
65
+ output = gr.Textbox(label="Generated Output", lines=10)
66
+ submit = gr.Button("✨ Generate Code", variant="primary")
67
+
68
+ submit.click(
69
+ fn=generate_text,
70
+ inputs=[prompt, max_length, temperature],
71
+ outputs=output
72
+ )
73
+
74
+ gr.Markdown("""
75
+ ### ℹ️ How it works
76
+ - Uses GPTQ-quantized model for efficient inference
77
+ - Automatically handles context window management
78
+ - Temperature controls randomness (0.1=strict, 1.5=creative)
79
+ """)
80
+
81
+ demo.launch()