druvx13 commited on
Commit
ee2590c
·
verified ·
1 Parent(s): 1400e75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -17
app.py CHANGED
@@ -1,21 +1,30 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- from auto_gptq import BaseQuantizeConfig
4
  import torch
5
 
6
  # Initialize model and tokenizer
7
  MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
9
 
 
 
 
 
 
 
 
 
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_NAME,
12
- device_map="cpu", # Optimized for CPU
13
- quantization_config=BaseQuantizeConfig(), # Required for GPTQ models
14
- torch_dtype=torch.float32, # Better CPU compatibility
15
- low_cpu_mem_usage=True
 
16
  )
17
 
18
- def generate_text(prompt, max_length=100, temperature=0.7):
 
19
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
20
 
21
  with torch.no_grad():
@@ -23,27 +32,42 @@ def generate_text(prompt, max_length=100, temperature=0.7):
23
  **inputs,
24
  max_length=max_length,
25
  temperature=temperature,
26
- pad_token_id=tokenizer.eos_token_id
 
 
 
 
27
  )
28
 
29
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
 
31
- # Gradio UI
32
- with gr.Blocks(theme="soft") as demo:
33
- gr.Markdown("# 🧠 DeepSeek Coder 1.3B Text Generator\nOptimized for CPU execution on HuggingFace Spaces")
 
 
 
34
 
35
  with gr.Row():
36
  with gr.Column():
37
  prompt = gr.Textbox(
38
  label="Input Prompt",
39
  placeholder="Enter your programming/code-related question...",
40
- lines=5
 
 
41
  )
42
- max_length = gr.Slider(50, 500, value=150, label="Max Output Length")
43
- temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity Level")
44
- submit = gr.Button("Generate Code", variant="primary")
 
45
 
46
- output = gr.Textbox(label="Generated Output", lines=10)
 
 
 
 
 
47
 
48
  submit.click(
49
  fn=generate_text,
@@ -60,7 +84,7 @@ with gr.Blocks(theme="soft") as demo:
60
  fn=generate_text,
61
  inputs=[prompt, max_length, temperature],
62
  outputs=output,
63
- cache_examples=False # Save memory
64
  )
65
 
66
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
 
3
  import torch
4
 
5
  # Initialize model and tokenizer
6
  MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
8
 
9
+ # Configure GPTQ for inference
10
+ quantization_config = GPTQConfig(
11
+ bits=4, # 4-bit quantization
12
+ dataset="c4", # Required dummy dataset for config
13
+ model_seqlen=2048 # Match model's maximum context length
14
+ )
15
+
16
+ # Load model with CPU optimizations
17
  model = AutoModelForCausalLM.from_pretrained(
18
  MODEL_NAME,
19
+ quantization_config=quantization_config,
20
+ torch_dtype=torch.float32, # CPU-friendly precision
21
+ low_cpu_mem_usage=True,
22
+ offload_folder="offload", # Disk offloading for large layers
23
+ offload_state_dict=True # Memory-efficient state loading
24
  )
25
 
26
+ def generate_text(prompt, max_length=150, temperature=0.7):
27
+ """Generate text with optimized inference settings"""
28
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
29
 
30
  with torch.no_grad():
 
32
  **inputs,
33
  max_length=max_length,
34
  temperature=temperature,
35
+ pad_token_id=tokenizer.eos_token_id,
36
+ num_beams=1, # Single-beam for minimal memory
37
+ do_sample=True, # Enable sampling for creativity
38
+ top_p=0.95, # Nucleus sampling
39
+ repetition_penalty=1.1 # Reduce repetition
40
  )
41
 
42
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
43
 
44
+ # Gradio Interface with Enhanced UX
45
+ with gr.Blocks(theme="soft", css=".gr-box {border-radius: 10px}") as demo:
46
+ gr.Markdown("""
47
+ # 🧠 DeepSeek Coder 1.3B Text Generator
48
+ *Optimized for CPU execution on HuggingFace Free Tier*
49
+ """)
50
 
51
  with gr.Row():
52
  with gr.Column():
53
  prompt = gr.Textbox(
54
  label="Input Prompt",
55
  placeholder="Enter your programming/code-related question...",
56
+ lines=5,
57
+ max_lines=10,
58
+ elem_classes=["monospace"]
59
  )
60
+ with gr.Row():
61
+ max_length = gr.Slider(50, 500, value=150, label="Max Length", step=10)
62
+ temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity", step=0.05)
63
+ submit = gr.Button("🚀 Generate", variant="primary")
64
 
65
+ output = gr.Textbox(
66
+ label="Generated Output",
67
+ lines=12,
68
+ max_lines=20,
69
+ elem_classes=["monospace"]
70
+ )
71
 
72
  submit.click(
73
  fn=generate_text,
 
84
  fn=generate_text,
85
  inputs=[prompt, max_length, temperature],
86
  outputs=output,
87
+ cache_examples=False
88
  )
89
 
90
  if __name__ == "__main__":