menouar commited on
Commit
e75ffde
·
1 Parent(s): 592b663

Update the generated Notebook to push properly to HF

Browse files
app.py CHANGED
@@ -68,6 +68,10 @@ def change_model_selection(model_id):
68
  return None
69
 
70
 
 
 
 
 
71
  def check_valid_input(value):
72
  if isinstance(value, str):
73
  return value and value.strip()
@@ -186,10 +190,12 @@ def generate_code(components: dict[Component, Any]):
186
 
187
  create_merge_lora_cells(notebook['cells'], output_dir)
188
 
 
 
189
  if push_to_hub:
190
  if not should_login:
191
  create_login_hf_cells(notebook['cells'])
192
- push_merged_model_cells(notebook['cells'], output_dir)
193
 
194
  file_name = f"{finetuning_notebook}.ipynb"
195
 
@@ -279,7 +285,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
279
  gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
280
  with gr.Row():
281
  with centered_column():
282
- all_components.update(add_outputs())
 
 
 
283
  with centered_column():
284
  all_components.update(add_outputs1())
285
 
@@ -308,4 +317,25 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
308
  outputs=version_selection
309
  )
310
 
 
 
 
 
 
 
311
  demo.launch(allowed_paths=["/"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return None
69
 
70
 
71
+ def display_push_type(value):
72
+ return gr.Radio(visible=value)
73
+
74
+
75
  def check_valid_input(value):
76
  if isinstance(value, str):
77
  return value and value.strip()
 
190
 
191
  create_merge_lora_cells(notebook['cells'], output_dir)
192
 
193
+ push_type_value = get_value(components, PUSH_TYPE_ID)
194
+
195
  if push_to_hub:
196
  if not should_login:
197
  create_login_hf_cells(notebook['cells'])
198
+ push_merged_model_cells(notebook['cells'], output_dir, push_type_value)
199
 
200
  file_name = f"{finetuning_notebook}.ipynb"
201
 
 
285
  gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
286
  with gr.Row():
287
  with centered_column():
288
+ output_dir_cmp, push_to_hub_cmp = add_outputs()
289
+ all_components.update({output_dir_cmp, push_to_hub_cmp})
290
+ push_type_cmp = add_push_type_cmp()
291
+ all_components.update({push_type_cmp})
292
  with centered_column():
293
  all_components.update(add_outputs1())
294
 
 
317
  outputs=version_selection
318
  )
319
 
320
+ push_to_hub_cmp.change(
321
+ fn=display_push_type,
322
+ inputs=push_to_hub_cmp,
323
+ outputs=push_type_cmp
324
+ )
325
+
326
  demo.launch(allowed_paths=["/"])
327
+
328
+ # Upload metrics to the hub....
329
+ """
330
+ import os
331
+ from huggingface_hub import Repository
332
+
333
+ # Create a repository object
334
+ repo = Repository("Menouar/ft-phi-1")
335
+
336
+ # Push the runs directory
337
+ os.system(f"git -C {repo.local_dir} add output_dir/runs")
338
+ repo.git_commit("Adding TensorBoard logs")
339
+ repo.push_to_hub(commit_message="Adding TensorBoard logs")
340
+
341
+ """
utils/__init__.py CHANGED
@@ -32,8 +32,14 @@ LEARNING_RATE_ID = "learning_rate"
32
  MAX_GRAD_NORM_ID = "max_grad_norm"
33
  WARMUP_RATIO_ID = "warmup_ratio"
34
  LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
 
35
  OUTPUT_DIR_ID = "output_dir"
 
36
  PUSH_TO_HUB_ID = "push_to_hub"
 
 
 
 
37
  REPORT_TO_ID = "report_to"
38
 
39
  MAX_SEQ_LENGTH_ID = "max_seq_length"
 
32
  MAX_GRAD_NORM_ID = "max_grad_norm"
33
  WARMUP_RATIO_ID = "warmup_ratio"
34
  LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
35
+
36
  OUTPUT_DIR_ID = "output_dir"
37
+
38
  PUSH_TO_HUB_ID = "push_to_hub"
39
+ PUSH_TYPE_ID = "push_type"
40
+ PUSH_TYPES_ALL = "Push all the outputs"
41
+ PUSH_TYPES_ONLY_MODEL = "Push only the Model and Tokenizer"
42
+
43
  REPORT_TO_ID = "report_to"
44
 
45
  MAX_SEQ_LENGTH_ID = "max_seq_length"
utils/components_creator.py CHANGED
@@ -167,7 +167,7 @@ def add_training_args_3() -> Set[Component]:
167
  return out_components
168
 
169
 
170
- def add_outputs() -> Set[Component]:
171
  output_dir = gr.Textbox(interactive=True,
172
  label="output_dir",
173
  info='The output directory where the model predictions and checkpoints will be written.',
@@ -178,10 +178,18 @@ def add_outputs() -> Set[Component]:
178
  "True, you must specify 'HF_TOKEN'.",
179
  elem_id=PUSH_TO_HUB_ID)
180
 
181
- out_components: Set[Component] = set()
182
- out_components.add(output_dir)
183
- out_components.add(push_to_hub)
184
- return out_components
 
 
 
 
 
 
 
 
185
 
186
 
187
  def add_outputs1() -> Set[Component]:
 
167
  return out_components
168
 
169
 
170
+ def add_outputs() -> (Component, Component):
171
  output_dir = gr.Textbox(interactive=True,
172
  label="output_dir",
173
  info='The output directory where the model predictions and checkpoints will be written.',
 
178
  "True, you must specify 'HF_TOKEN'.",
179
  elem_id=PUSH_TO_HUB_ID)
180
 
181
+ return output_dir, push_to_hub
182
+
183
+
184
+ def add_push_type_cmp() -> Component:
185
+ push_type = gr.Radio([PUSH_TYPES_ONLY_MODEL, PUSH_TYPES_ALL],
186
+ label="Output Push Option",
187
+ info="Select whether to push only the Model and Tokenizer or all the outputs.",
188
+ interactive=True,
189
+ visible=False,
190
+ value=PUSH_TYPES_ALL,
191
+ elem_id=PUSH_TYPE_ID)
192
+ return push_type
193
 
194
 
195
  def add_outputs1() -> Set[Component]:
utils/notebook_generator.py CHANGED
@@ -2,7 +2,7 @@ from typing import Optional
2
 
3
  import nbformat as nbf
4
 
5
- from utils import FTDataSet
6
 
7
 
8
  def create_install_libraries_cells(cells: list):
@@ -389,7 +389,7 @@ merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_
389
  cells.append(code_cell)
390
 
391
 
392
- def push_merged_model_cells(cells: list, output_dir):
393
  text_cell = nbf.v4.new_markdown_cell(
394
  """### Push the Merged model as well as the Tokenizer to HF hub""")
395
 
@@ -398,6 +398,33 @@ merged_model.push_to_hub("{output_dir}", use_temp_dir=False)
398
 
399
  tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
400
  """
401
- code_cell = nbf.v4.new_code_cell(code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  cells.append(text_cell)
403
  cells.append(code_cell)
 
2
 
3
  import nbformat as nbf
4
 
5
+ from utils import FTDataSet, PUSH_TYPES_ONLY_MODEL
6
 
7
 
8
  def create_install_libraries_cells(cells: list):
 
389
  cells.append(code_cell)
390
 
391
 
392
+ def push_merged_model_cells(cells: list, output_dir, push_type_value):
393
  text_cell = nbf.v4.new_markdown_cell(
394
  """### Push the Merged model as well as the Tokenizer to HF hub""")
395
 
 
398
 
399
  tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
400
  """
401
+
402
+ code_all = f"""
403
+
404
+ from huggingface_hub import HfApi, HfFolder
405
+
406
+ # Instantiate the HfApi class
407
+ api = HfApi()
408
+
409
+ # Your Hugging Face repository
410
+ repo_name = "Menouar/test"
411
+
412
+ # Create a repository on the Hugging Face Hub
413
+ api.create_repo(token=HfFolder.get_token(), name=repo_name, repo_type="model")
414
+
415
+ # Path to your local folder
416
+ folder_path = "{output_dir}"
417
+
418
+ # Create a repository object
419
+ repo = Repository(local_dir=folder_path, clone_from=repo_name)
420
+
421
+ # Commit and push your changes
422
+ repo.git_add(commit_message="Initial commit", git_push=True)
423
+ """
424
+
425
+ if push_type_value == PUSH_TYPES_ONLY_MODEL:
426
+ code_cell = nbf.v4.new_code_cell(code)
427
+ else:
428
+ code_cell = nbf.v4.new_code_cell(code_all)
429
  cells.append(text_cell)
430
  cells.append(code_cell)