harpreetsahota commited on
Commit
0ab46f1
Β·
verified Β·
1 Parent(s): e7f93ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -142
app.py CHANGED
@@ -1,44 +1,77 @@
1
- # Fork of the SantaCoder demo (https://huggingface.co/spaces/bigcode/santacoder-demo)
2
-
3
  import gradio as gr
4
- from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
5
- from transformers import pipeline
6
- import os
7
- import torch
8
- from typing import Union, Tuple, List
9
 
 
 
 
10
 
11
- description = """# <p style="text-align: center; color: #292b47;"> 🏎️ <span style='color: #3264ff;'>DeciCoder:</span> A Fast Code Generation ModelπŸ’¨ </p>
12
- <span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciCoder-1b" style="color: #3264ff;">DeciCoder</a>!
13
- DeciCoder is a 1B parameter code generation model trained on The Stack dataset and released under an Apache 2.0 license. It's capable of writing code in Python,
14
- JavaScript, and Java. It's a code-completion model, not an instruction-tuned model; you should prompt the model with a function signature and docstring
15
- and let it complete the rest. The model can also do infilling, specify where you would like the model to complete code with the <span style='color: #3264ff;'>&lt;FILL_HERE&gt;</span>
16
- token.</span>"""
17
 
18
- token = os.environ["HUGGINGFACEHUB_API_TOKEN"]
19
- device="cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
21
 
22
- FIM_PREFIX = "<fim_prefix>"
23
- FIM_MIDDLE = "<fim_middle>"
24
- FIM_SUFFIX = "<fim_suffix>"
25
- FIM_PAD = "<fim_pad>"
26
- EOD = "<|endoftext|>"
27
 
28
- GENERATION_TITLE= "<p style='font-size: 24px; color: #292b47;'>πŸ’» Your generated code:</p>"
 
 
 
 
 
29
 
30
- tokenizer_fim = AutoTokenizer.from_pretrained("Deci/DeciCoder-1b", use_auth_token=token, padding_side="left")
 
 
 
 
 
 
31
 
32
- tokenizer_fim.add_special_tokens({
33
- "additional_special_tokens": [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD],
34
- "pad_token": EOD,
35
- })
36
 
37
- tokenizer = AutoTokenizer.from_pretrained("Deci/DeciCoder-1b", use_auth_token=token, force_download=True)
 
 
 
 
 
38
 
39
- model = AutoModelForCausalLM.from_pretrained("Deci/DeciCoder-1b", trust_remote_code=True, use_auth_token=token, force_download=True).to(device)
40
 
41
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
 
 
 
 
 
 
 
 
42
 
43
  def post_processing(prompt: str, completion: str) -> str:
44
  """
@@ -55,108 +88,20 @@ def post_processing(prompt: str, completion: str) -> str:
55
  prompt = "<span style='color: #7484b7;'>" + prompt + "</span>"
56
  code_html = f"<br><hr><br><pre style='font-size: 12px'><code>{prompt}{completion}</code></pre><br><hr>"
57
  return GENERATION_TITLE + code_html
58
-
59
-
60
- def post_processing_fim(prefix: str, middle: str, suffix: str) -> str:
61
- """
62
- Post-processes the FIM (fill in the middle) generated code with HTML styling.
63
-
64
- Args:
65
- prefix (str): The prefix part of the code.
66
- middle (str): The generated middle part of the code.
67
- suffix (str): The suffix part of the code.
68
-
69
- Returns:
70
- str: The HTML-styled code with prefix, middle, and suffix.
71
- """
72
- prefix = "<span style='color: #7484b7;'>" + prefix + "</span>"
73
- middle = "<span style='color: #ff5b86;'>" + middle + "</span>"
74
- suffix = "<span style='color: #7484b7;'>" + suffix + "</span>"
75
- code_html = f"<br><hr><br><pre style='font-size: 12px'><code>{prefix}{middle}{suffix}</code></pre><br><hr>"
76
- return GENERATION_TITLE + code_html
77
-
78
- def fim_generation(prompt: str, max_new_tokens: int, temperature: float) -> str:
79
- """
80
- Generates code for FIM (fill in the middle) task.
81
 
82
- Args:
83
- prompt (str): The input code prompt with <FILL_HERE> token.
84
- max_new_tokens (int): Maximum number of tokens to generate.
85
- temperature (float): Sampling temperature for generation.
86
-
87
- Returns:
88
- str: The HTML-styled code with filled missing part.
89
- """
90
- prefix = prompt.split("<FILL_HERE>")[0]
91
- suffix = prompt.split("<FILL_HERE>")[1]
92
- [middle] = infill((prefix, suffix), max_new_tokens, temperature)
93
- return post_processing_fim(prefix, middle, suffix)
94
-
95
- def extract_fim_part(s: str) -> str:
96
- """
97
- Extracts the FIM (fill in the middle) part from the generated string.
98
-
99
- Args:
100
- s (str): The generated string with FIM tokens.
101
-
102
- Returns:
103
- str: The extracted FIM part.
104
- """
105
- # Find the index of
106
- start = s.find(FIM_MIDDLE) + len(FIM_MIDDLE)
107
- stop = s.find(EOD, start) or len(s)
108
- return s[start:stop]
109
-
110
- def infill(prefix_suffix_tuples: Union[Tuple[str, str], List[Tuple[str, str]]], max_new_tokens: int, temperature: float) -> List[str]:
111
- """
112
- Generates the infill for the given prefix and suffix tuples.
113
-
114
- Args:
115
- prefix_suffix_tuples (Union[Tuple[str, str], List[Tuple[str, str]]]): Prefix and suffix tuples.
116
- max_new_tokens (int): Maximum number of tokens to generate.
117
- temperature (float): Sampling temperature for generation.
118
-
119
- Returns:
120
- List[str]: The list of generated infill strings.
121
- """
122
- if type(prefix_suffix_tuples) == tuple:
123
- prefix_suffix_tuples = [prefix_suffix_tuples]
124
-
125
- prompts = [f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}" for prefix, suffix in prefix_suffix_tuples]
126
- # `return_token_type_ids=False` is essential, or we get nonsense output.
127
- inputs = tokenizer_fim(prompts, return_tensors="pt", padding=True, return_token_type_ids=False).to(device)
128
- with torch.no_grad():
129
- outputs = model.generate(
130
- **inputs,
131
- do_sample=True,
132
- temperature=temperature,
133
- max_new_tokens=max_new_tokens,
134
- pad_token_id=tokenizer.pad_token_id
135
- )
136
- # WARNING: cannot use skip_special_tokens, because it blows away the FIM special tokens.
137
- return [
138
- extract_fim_part(tokenizer_fim.decode(tensor, skip_special_tokens=False)) for tensor in outputs
139
- ]
140
-
141
- def code_generation(prompt: str, max_new_tokens: int, temperature: float = 0.2, seed: int = 42) -> str:
142
  """
143
  Generates code based on the given prompt. Handles both regular and FIM (Fill-In-Missing) generation.
144
 
145
  Args:
146
  prompt (str): The input code prompt.
147
- max_new_tokens (int): Maximum number of tokens to generate.
148
- temperature (float, optional): Sampling temperature for generation. Defaults to 0.2.
149
- seed (int, optional): Random seed for reproducibility. Defaults to 42.
150
 
151
  Returns:
152
  str: The HTML-styled generated code.
153
  """
154
- if "<FILL_HERE>" in prompt:
155
- return fim_generation(prompt, max_new_tokens, temperature=temperature)
156
- else:
157
- completion = pipe(prompt, do_sample=True, top_p=0.95, temperature=temperature, max_new_tokens=max_new_tokens)[0]['generated_text']
158
- completion = completion[len(prompt):]
159
- return post_processing(prompt, completion)
160
 
161
  demo = gr.Blocks(
162
  css=".gradio-container {background-color: #FAFBFF; color: #292b47}"
@@ -167,31 +112,11 @@ with demo:
167
  with colum_2:
168
  gr.Markdown(value=description)
169
  code = gr.Code(lines=5, language="python", label="Input code", value="def nth_element_in_fibonnaci(element):\n \"\"\"Returns the nth element of the Fibonnaci sequence.\"\"\"")
170
-
171
- with gr.Accordion("Additional settings", open=True):
172
- max_new_tokens= gr.Slider(
173
- minimum=8,
174
- maximum=2048,
175
- step=1,
176
- value=80,
177
- label="Number of tokens to generate",
178
- )
179
- temperature = gr.Slider(
180
- minimum=0.1,
181
- maximum=2.5,
182
- step=0.01,
183
- value=0.2,
184
- label="Temperature",
185
- )
186
- seed = gr.inputs.Number(
187
- default=42,
188
- label="Enter a seed value (integer)"
189
- )
190
  run = gr.Button(value="πŸ‘¨πŸ½β€πŸ’» Generate code", size='lg')
191
  output = gr.HTML(label="πŸ’» Your generated code")
192
 
193
 
194
- event = run.click(code_generation, [code, max_new_tokens, temperature, seed], output, api_name="predict")
195
  gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciCoder-Demo/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")
196
 
197
  demo.launch()
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 
 
 
 
3
 
4
+ description = """# <p style="text-align: center; color: #292b47;"> 🏎️ <span style='color: #3264ff;'>DeciCoder-6B:</span> A Fast Code Generation ModelπŸ’¨ </p>
5
+ <span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciCoder-6B" style="color: #3264ff;">DeciCoder</a>!
6
+ DeciCoder-6B was trained on the Python, Java, Javascript, Rust, C++, C, and C# subset of the Starcoder Training Dataset, and it's released under the Apache 2.0 license. This model is capable of code-completion and instruction following. It surpasses CodeGen 2.5 7B, CodeLlama 7B, abd StarCoder 7B in its supported languages on HumanEval, and leads by 3 points in Python over StarCoderBase 15.5B."""
7
 
8
+ GENERATION_TITLE= "<p style='font-size: 24px; color: #292b47;'>πŸ’» Your generated code:</p>"
 
 
 
 
 
9
 
10
+ def instantiate_huggingface_model(
11
+ model_name,
12
+ quantization_config=None,
13
+ device_map="auto",
14
+ use_cache=True,
15
+ trust_remote_code=None,
16
+ pad_token=None,
17
+ padding_side="left"
18
+ ):
19
+ """
20
+ Instantiate a HuggingFace model with optional quantization using the BitsAndBytes library.
21
+
22
+ Parameters:
23
+ - model_name (str): The name of the model to load from HuggingFace's model hub.
24
+ - quantization_config (BitsAndBytesConfig, optional): Configuration for model quantization.
25
+ If None, defaults to a pre-defined quantization configuration for 4-bit quantization.
26
+ - device_map (str, optional): Device placement strategy for model layers ('auto' by default).
27
+ - use_cache (bool, optional): Whether to cache model outputs (False by default).
28
+ - trust_remote_code (bool, optional): Whether to trust remote code for custom layers (True by default).
29
+ - pad_token (str, optional): The pad token to be used by the tokenizer. If None, uses the EOS token.
30
+ - padding_side (str, optional): The side on which to pad the sequences ('left' by default).
31
 
32
+ Returns:
33
+ - model (PreTrainedModel): The instantiated model ready for inference or fine-tuning.
34
+ - tokenizer (PreTrainedTokenizer): The tokenizer associated with the model.
35
 
36
+ The function will throw an exception if model loading fails.
37
+ """
 
 
 
38
 
39
+ # If quantization_config is not provided, use the default configuration
40
+ if quantization_config is None:
41
+ quantization_config = BitsAndBytesConfig(
42
+ load_in_8bit=True,
43
+ low_cpu_mem_usage=True,
44
+ )
45
 
46
+ model = AutoModelForCausalLM.from_pretrained(
47
+ model_name,
48
+ quantization_config=quantization_config,
49
+ device_map=device_map,
50
+ use_cache=use_cache,
51
+ trust_remote_code=trust_remote_code
52
+ )
53
 
54
+ tokenizer = AutoTokenizer.from_pretrained(model_name,
55
+ trust_remote_code=trust_remote_code)
 
 
56
 
57
+ if pad_token is not None:
58
+ tokenizer.pad_token = pad_token
59
+ else:
60
+ tokenizer.pad_token = tokenizer.eos_token
61
+
62
+ tokenizer.padding_side = padding_side
63
 
64
+ return model, tokenizer
65
 
66
+ model, tokenizer = instantiate_huggingface_model("Deci-early-access/DeciCoder-6B", trust_remote_code=True)
67
+
68
+ pipe = pipeline("text-generation",
69
+ model=model,
70
+ tokenizer=tokenizer,
71
+ device_map="auto",
72
+ max_length=2048,
73
+ temperature=1e-3,
74
+ )
75
 
76
  def post_processing(prompt: str, completion: str) -> str:
77
  """
 
88
  prompt = "<span style='color: #7484b7;'>" + prompt + "</span>"
89
  code_html = f"<br><hr><br><pre style='font-size: 12px'><code>{prompt}{completion}</code></pre><br><hr>"
90
  return GENERATION_TITLE + code_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ def code_generation(prompt: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  """
94
  Generates code based on the given prompt. Handles both regular and FIM (Fill-In-Missing) generation.
95
 
96
  Args:
97
  prompt (str): The input code prompt.
 
 
 
98
 
99
  Returns:
100
  str: The HTML-styled generated code.
101
  """
102
+ completion = pipe(prompt)[0]['generated_text']
103
+ completion = completion[len(prompt):]
104
+ return post_processing(prompt, completion)
 
 
 
105
 
106
  demo = gr.Blocks(
107
  css=".gradio-container {background-color: #FAFBFF; color: #292b47}"
 
112
  with colum_2:
113
  gr.Markdown(value=description)
114
  code = gr.Code(lines=5, language="python", label="Input code", value="def nth_element_in_fibonnaci(element):\n \"\"\"Returns the nth element of the Fibonnaci sequence.\"\"\"")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  run = gr.Button(value="πŸ‘¨πŸ½β€πŸ’» Generate code", size='lg')
116
  output = gr.HTML(label="πŸ’» Your generated code")
117
 
118
 
119
+ event = run.click(code_generation, [code], output)
120
  gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciCoder-Demo/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")
121
 
122
  demo.launch()