druvx13 commited on
Commit
39dd6f6
·
verified ·
1 Parent(s): 6a1f3f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -60
app.py CHANGED
@@ -1,69 +1,49 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, pipeline
3
- from auto_gptq import AutoGPTQForCausalLM
4
  import torch
5
- import os
6
 
7
- # Model loading with memory optimization
8
  MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
9
- cache_dir = "./model_cache"
10
- os.makedirs(cache_dir, exist_ok=True)
11
 
12
- # Load tokenizer and model with 4-bit quantization
13
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)
14
- model = AutoGPTQForCausalLM.from_quantized(
15
  MODEL_NAME,
16
- model_basename="model",
17
- use_safetensors=True,
18
- quantize_config=None,
19
- device_map="auto",
20
- low_cpu_mem_usage=True,
21
- cache_dir=cache_dir
22
  )
23
 
24
- # Create generation pipeline
25
- generator = pipeline(
26
- "text-generation",
27
- model=model,
28
- tokenizer=tokenizer,
29
- device_map="auto"
30
- )
31
-
32
- def generate_text(prompt, max_length=512, temperature=0.7):
33
- """Generate text with safety checks and context awareness"""
34
- full_prompt = f"Instruct: {prompt}\nOutput:"
35
- with torch.inference_mode():
36
- response = generator(
37
- full_prompt,
38
- max_new_tokens=max_length,
39
  temperature=temperature,
40
- do_sample=True,
41
  pad_token_id=tokenizer.eos_token_id
42
- )[0]["generated_text"]
43
- # Remove prompt from output
44
- return response.split("Output:")[-1].strip()
45
-
46
- # Gradio interface with enhanced UX
47
- with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
48
- gr.Markdown("""
49
- # 🧠 DeepSeek Coder 1.3B Instruct (GPTQ)
50
- *Text-to-Code Generation App*
51
- Enter a programming instruction below and adjust parameters for optimal output.
52
- """)
53
-
54
- with gr.Row():
55
- prompt = gr.Textbox(
56
- label="Enter your instruction",
57
- placeholder="Write a Python function to calculate Fibonacci numbers...",
58
- lines=4
59
  )
60
 
61
- with gr.Row():
62
- max_length = gr.Slider(64, 2048, value=512, label="Max Output Length")
63
- temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)")
 
 
64
 
65
- output = gr.Textbox(label="Generated Output", lines=10)
66
- submit = gr.Button("✨ Generate Code", variant="primary")
 
 
 
 
 
 
 
 
 
 
67
 
68
  submit.click(
69
  fn=generate_text,
@@ -71,11 +51,17 @@ with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: a
71
  outputs=output
72
  )
73
 
74
- gr.Markdown("""
75
- ### ℹ️ How it works
76
- - Uses GPTQ-quantized model for efficient inference
77
- - Automatically handles context window management
78
- - Temperature controls randomness (0.1=strict, 1.5=creative)
79
- """)
 
 
 
 
 
80
 
81
- demo.launch()
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from auto_gptq import BaseQuantizeConfig
4
  import torch
 
5
 
6
+ # Initialize model and tokenizer
7
  MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
 
9
 
10
+ model = AutoModelForCausalLM.from_pretrained(
 
 
11
  MODEL_NAME,
12
+ device_map="cpu", # Optimized for CPU
13
+ quantization_config=BaseQuantizeConfig(), # Required for GPTQ models
14
+ torch_dtype=torch.float32, # Better CPU compatibility
15
+ low_cpu_mem_usage=True
 
 
16
  )
17
 
18
+ def generate_text(prompt, max_length=100, temperature=0.7):
19
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
20
+
21
+ with torch.no_grad():
22
+ outputs = model.generate(
23
+ **inputs,
24
+ max_length=max_length,
 
 
 
 
 
 
 
 
25
  temperature=temperature,
 
26
  pad_token_id=tokenizer.eos_token_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  )
28
 
29
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+
31
+ # Gradio UI
32
+ with gr.Blocks(theme="soft") as demo:
33
+ gr.Markdown("# 🧠 DeepSeek Coder 1.3B Text Generator\nOptimized for CPU execution on HuggingFace Spaces")
34
 
35
+ with gr.Row():
36
+ with gr.Column():
37
+ prompt = gr.Textbox(
38
+ label="Input Prompt",
39
+ placeholder="Enter your programming/code-related question...",
40
+ lines=5
41
+ )
42
+ max_length = gr.Slider(50, 500, value=150, label="Max Output Length")
43
+ temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity Level")
44
+ submit = gr.Button("Generate Code", variant="primary")
45
+
46
+ output = gr.Textbox(label="Generated Output", lines=10)
47
 
48
  submit.click(
49
  fn=generate_text,
 
51
  outputs=output
52
  )
53
 
54
+ gr.Examples(
55
+ examples=[
56
+ ["Write a Python function to calculate Fibonacci numbers"],
57
+ ["Explain the difference between list and tuples in Python"],
58
+ ["Create a simple Flask API endpoint for user registration"]
59
+ ],
60
+ fn=generate_text,
61
+ inputs=[prompt, max_length, temperature],
62
+ outputs=output,
63
+ cache_examples=False # Save memory
64
+ )
65
 
66
+ if __name__ == "__main__":
67
+ demo.launch()