Spaces:
Runtime error
Runtime error
menouar
commited on
Commit
·
e75ffde
1
Parent(s):
592b663
Update the generated Notebook to push properly to HF
Browse files- app.py +32 -2
- utils/__init__.py +6 -0
- utils/components_creator.py +13 -5
- utils/notebook_generator.py +30 -3
app.py
CHANGED
@@ -68,6 +68,10 @@ def change_model_selection(model_id):
|
|
68 |
return None
|
69 |
|
70 |
|
|
|
|
|
|
|
|
|
71 |
def check_valid_input(value):
|
72 |
if isinstance(value, str):
|
73 |
return value and value.strip()
|
@@ -186,10 +190,12 @@ def generate_code(components: dict[Component, Any]):
|
|
186 |
|
187 |
create_merge_lora_cells(notebook['cells'], output_dir)
|
188 |
|
|
|
|
|
189 |
if push_to_hub:
|
190 |
if not should_login:
|
191 |
create_login_hf_cells(notebook['cells'])
|
192 |
-
push_merged_model_cells(notebook['cells'], output_dir)
|
193 |
|
194 |
file_name = f"{finetuning_notebook}.ipynb"
|
195 |
|
@@ -279,7 +285,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
|
|
279 |
gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
|
280 |
with gr.Row():
|
281 |
with centered_column():
|
282 |
-
|
|
|
|
|
|
|
283 |
with centered_column():
|
284 |
all_components.update(add_outputs1())
|
285 |
|
@@ -308,4 +317,25 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
|
|
308 |
outputs=version_selection
|
309 |
)
|
310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
demo.launch(allowed_paths=["/"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
return None
|
69 |
|
70 |
|
71 |
+
def display_push_type(value):
|
72 |
+
return gr.Radio(visible=value)
|
73 |
+
|
74 |
+
|
75 |
def check_valid_input(value):
|
76 |
if isinstance(value, str):
|
77 |
return value and value.strip()
|
|
|
190 |
|
191 |
create_merge_lora_cells(notebook['cells'], output_dir)
|
192 |
|
193 |
+
push_type_value = get_value(components, PUSH_TYPE_ID)
|
194 |
+
|
195 |
if push_to_hub:
|
196 |
if not should_login:
|
197 |
create_login_hf_cells(notebook['cells'])
|
198 |
+
push_merged_model_cells(notebook['cells'], output_dir, push_type_value)
|
199 |
|
200 |
file_name = f"{finetuning_notebook}.ipynb"
|
201 |
|
|
|
285 |
gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
|
286 |
with gr.Row():
|
287 |
with centered_column():
|
288 |
+
output_dir_cmp, push_to_hub_cmp = add_outputs()
|
289 |
+
all_components.update({output_dir_cmp, push_to_hub_cmp})
|
290 |
+
push_type_cmp = add_push_type_cmp()
|
291 |
+
all_components.update({push_type_cmp})
|
292 |
with centered_column():
|
293 |
all_components.update(add_outputs1())
|
294 |
|
|
|
317 |
outputs=version_selection
|
318 |
)
|
319 |
|
320 |
+
push_to_hub_cmp.change(
|
321 |
+
fn=display_push_type,
|
322 |
+
inputs=push_to_hub_cmp,
|
323 |
+
outputs=push_type_cmp
|
324 |
+
)
|
325 |
+
|
326 |
demo.launch(allowed_paths=["/"])
|
327 |
+
|
328 |
+
# Upload metrics to the hub....
|
329 |
+
"""
|
330 |
+
import os
|
331 |
+
from huggingface_hub import Repository
|
332 |
+
|
333 |
+
# Create a repository object
|
334 |
+
repo = Repository("Menouar/ft-phi-1")
|
335 |
+
|
336 |
+
# Push the runs directory
|
337 |
+
os.system(f"git -C {repo.local_dir} add output_dir/runs")
|
338 |
+
repo.git_commit("Adding TensorBoard logs")
|
339 |
+
repo.push_to_hub(commit_message="Adding TensorBoard logs")
|
340 |
+
|
341 |
+
"""
|
utils/__init__.py
CHANGED
@@ -32,8 +32,14 @@ LEARNING_RATE_ID = "learning_rate"
|
|
32 |
MAX_GRAD_NORM_ID = "max_grad_norm"
|
33 |
WARMUP_RATIO_ID = "warmup_ratio"
|
34 |
LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
|
|
|
35 |
OUTPUT_DIR_ID = "output_dir"
|
|
|
36 |
PUSH_TO_HUB_ID = "push_to_hub"
|
|
|
|
|
|
|
|
|
37 |
REPORT_TO_ID = "report_to"
|
38 |
|
39 |
MAX_SEQ_LENGTH_ID = "max_seq_length"
|
|
|
32 |
MAX_GRAD_NORM_ID = "max_grad_norm"
|
33 |
WARMUP_RATIO_ID = "warmup_ratio"
|
34 |
LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
|
35 |
+
|
36 |
OUTPUT_DIR_ID = "output_dir"
|
37 |
+
|
38 |
PUSH_TO_HUB_ID = "push_to_hub"
|
39 |
+
PUSH_TYPE_ID = "push_type"
|
40 |
+
PUSH_TYPES_ALL = "Push all the outputs"
|
41 |
+
PUSH_TYPES_ONLY_MODEL = "Push only the Model and Tokenizer"
|
42 |
+
|
43 |
REPORT_TO_ID = "report_to"
|
44 |
|
45 |
MAX_SEQ_LENGTH_ID = "max_seq_length"
|
utils/components_creator.py
CHANGED
@@ -167,7 +167,7 @@ def add_training_args_3() -> Set[Component]:
|
|
167 |
return out_components
|
168 |
|
169 |
|
170 |
-
def add_outputs() ->
|
171 |
output_dir = gr.Textbox(interactive=True,
|
172 |
label="output_dir",
|
173 |
info='The output directory where the model predictions and checkpoints will be written.',
|
@@ -178,10 +178,18 @@ def add_outputs() -> Set[Component]:
|
|
178 |
"True, you must specify 'HF_TOKEN'.",
|
179 |
elem_id=PUSH_TO_HUB_ID)
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
|
187 |
def add_outputs1() -> Set[Component]:
|
|
|
167 |
return out_components
|
168 |
|
169 |
|
170 |
+
def add_outputs() -> (Component, Component):
|
171 |
output_dir = gr.Textbox(interactive=True,
|
172 |
label="output_dir",
|
173 |
info='The output directory where the model predictions and checkpoints will be written.',
|
|
|
178 |
"True, you must specify 'HF_TOKEN'.",
|
179 |
elem_id=PUSH_TO_HUB_ID)
|
180 |
|
181 |
+
return output_dir, push_to_hub
|
182 |
+
|
183 |
+
|
184 |
+
def add_push_type_cmp() -> Component:
|
185 |
+
push_type = gr.Radio([PUSH_TYPES_ONLY_MODEL, PUSH_TYPES_ALL],
|
186 |
+
label="Output Push Option",
|
187 |
+
info="Select whether to push only the Model and Tokenizer or all the outputs.",
|
188 |
+
interactive=True,
|
189 |
+
visible=False,
|
190 |
+
value=PUSH_TYPES_ALL,
|
191 |
+
elem_id=PUSH_TYPE_ID)
|
192 |
+
return push_type
|
193 |
|
194 |
|
195 |
def add_outputs1() -> Set[Component]:
|
utils/notebook_generator.py
CHANGED
@@ -2,7 +2,7 @@ from typing import Optional
|
|
2 |
|
3 |
import nbformat as nbf
|
4 |
|
5 |
-
from utils import FTDataSet
|
6 |
|
7 |
|
8 |
def create_install_libraries_cells(cells: list):
|
@@ -389,7 +389,7 @@ merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_
|
|
389 |
cells.append(code_cell)
|
390 |
|
391 |
|
392 |
-
def push_merged_model_cells(cells: list, output_dir):
|
393 |
text_cell = nbf.v4.new_markdown_cell(
|
394 |
"""### Push the Merged model as well as the Tokenizer to HF hub""")
|
395 |
|
@@ -398,6 +398,33 @@ merged_model.push_to_hub("{output_dir}", use_temp_dir=False)
|
|
398 |
|
399 |
tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
|
400 |
"""
|
401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
cells.append(text_cell)
|
403 |
cells.append(code_cell)
|
|
|
2 |
|
3 |
import nbformat as nbf
|
4 |
|
5 |
+
from utils import FTDataSet, PUSH_TYPES_ONLY_MODEL
|
6 |
|
7 |
|
8 |
def create_install_libraries_cells(cells: list):
|
|
|
389 |
cells.append(code_cell)
|
390 |
|
391 |
|
392 |
+
def push_merged_model_cells(cells: list, output_dir, push_type_value):
|
393 |
text_cell = nbf.v4.new_markdown_cell(
|
394 |
"""### Push the Merged model as well as the Tokenizer to HF hub""")
|
395 |
|
|
|
398 |
|
399 |
tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
|
400 |
"""
|
401 |
+
|
402 |
+
code_all = f"""
|
403 |
+
|
404 |
+
from huggingface_hub import HfApi, HfFolder
|
405 |
+
|
406 |
+
# Instantiate the HfApi class
|
407 |
+
api = HfApi()
|
408 |
+
|
409 |
+
# Your Hugging Face repository
|
410 |
+
repo_name = "Menouar/test"
|
411 |
+
|
412 |
+
# Create a repository on the Hugging Face Hub
|
413 |
+
api.create_repo(token=HfFolder.get_token(), name=repo_name, repo_type="model")
|
414 |
+
|
415 |
+
# Path to your local folder
|
416 |
+
folder_path = "{output_dir}"
|
417 |
+
|
418 |
+
# Create a repository object
|
419 |
+
repo = Repository(local_dir=folder_path, clone_from=repo_name)
|
420 |
+
|
421 |
+
# Commit and push your changes
|
422 |
+
repo.git_add(commit_message="Initial commit", git_push=True)
|
423 |
+
"""
|
424 |
+
|
425 |
+
if push_type_value == PUSH_TYPES_ONLY_MODEL:
|
426 |
+
code_cell = nbf.v4.new_code_cell(code)
|
427 |
+
else:
|
428 |
+
code_cell = nbf.v4.new_code_cell(code_all)
|
429 |
cells.append(text_cell)
|
430 |
cells.append(code_cell)
|