Backup-bdg commited on
Commit
c880c8c
·
verified ·
1 Parent(s): d260d90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -1
app.py CHANGED
@@ -1,3 +1,76 @@
1
  import gradio as gr
 
 
 
2
 
3
- gr.load("models/bigcode/starcoder2-15b").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
 
6
+ # Model configuration
7
+ CHECKPOINT = "bigcode/starcoder2-15b"
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Load tokenizer and model (using bfloat16 for efficiency)
11
+ @spaces.GPU(duration=120) # Set duration to 120s to handle model loading/generation
12
+ def load_model_and_generate(prompt, max_length=256, temperature=0.2, top_p=0.95):
13
+ try:
14
+ # Initialize tokenizer
15
+ tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
16
+
17
+ # Initialize model with bfloat16 for lower memory usage
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ CHECKPOINT,
20
+ torch_dtype=torch.bfloat16,
21
+ device_map="auto"
22
+ )
23
+
24
+ # Create text generation pipeline
25
+ pipe = pipeline(
26
+ "text-generation",
27
+ model=model,
28
+ tokenizer=tokenizer,
29
+ device_map="auto",
30
+ torch_dtype=torch.bfloat16
31
+ )
32
+
33
+ # Generate response
34
+ result = pipe(
35
+ prompt,
36
+ max_length=max_length,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ num_return_sequences=1,
40
+ do_sample=True,
41
+ eos_token_id=tokenizer.eos_token_id,
42
+ pad_token_id=tokenizer.eos_token_id,
43
+ truncation=True
44
+ )
45
+
46
+ generated_text = result[0]["generated_text"]
47
+ return generated_text
48
+ except Exception as e:
49
+ return f"Error: {str(e)}"
50
+
51
+ # Gradio interface setup
52
+ with gr.Blocks() as demo:
53
+ gr.Markdown("# StarCoder2-15B Code Generation")
54
+ gr.Markdown("Enter a code prompt (e.g., 'def print_hello_world():') to generate code using bigcode/starcoder2-15b.")
55
+
56
+ # Input components
57
+ prompt = gr.Textbox(label="Code Prompt", placeholder="Enter your code prompt here...")
58
+ max_length = gr.Slider(50, 512, value=256, label="Max Length", step=1)
59
+ temperature = gr.Slider(0.1, 1.0, value=0.2, label="Temperature", step=0.1)
60
+ top_p = gr.Slider(0.1, 1.0, value=0.95, label="Top P", step=0.05)
61
+
62
+ # Output component
63
+ output = gr.Textbox(label="Generated Code")
64
+
65
+ # Submit button
66
+ submit_btn = gr.Button("Generate")
67
+
68
+ # Connect button to function
69
+ submit_btn.click(
70
+ fn=load_model_and_generate,
71
+ inputs=[prompt, max_length, temperature, top_p],
72
+ outputs=output
73
+ )
74
+
75
+ # Launch the Gradio app
76
+ demo.launch()