Spaces:

Menouar
/

LLM-FineTuning-Notebook-Generator

Runtime error

App Files Files Community

menouar commited on Mar 10, 2024

Commit

e75ffde

1 Parent(s): 592b663

Update the generated Notebook to push properly to HF

Browse files

Files changed (4) hide show

app.py +32 -2
utils/__init__.py +6 -0
utils/components_creator.py +13 -5
utils/notebook_generator.py +30 -3

app.py CHANGED Viewed

@@ -68,6 +68,10 @@ def change_model_selection(model_id):
     return None
 def check_valid_input(value):
     if isinstance(value, str):
         return value and value.strip()
@@ -186,10 +190,12 @@ def generate_code(components: dict[Component, Any]):
     create_merge_lora_cells(notebook['cells'], output_dir)
     if push_to_hub:
         if not should_login:
             create_login_hf_cells(notebook['cells'])
-        push_merged_model_cells(notebook['cells'], output_dir)
     file_name = f"{finetuning_notebook}.ipynb"
@@ -279,7 +285,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
     gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
     with gr.Row():
         with centered_column():
-            all_components.update(add_outputs())
         with centered_column():
             all_components.update(add_outputs1())
@@ -308,4 +317,25 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
         outputs=version_selection
     )
 demo.launch(allowed_paths=["/"])

     return None
+def display_push_type(value):
+    return gr.Radio(visible=value)
 def check_valid_input(value):
     if isinstance(value, str):
         return value and value.strip()
     create_merge_lora_cells(notebook['cells'], output_dir)
+    push_type_value = get_value(components, PUSH_TYPE_ID)
     if push_to_hub:
         if not should_login:
             create_login_hf_cells(notebook['cells'])
+        push_merged_model_cells(notebook['cells'], output_dir, push_type_value)
     file_name = f"{finetuning_notebook}.ipynb"
     gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
     with gr.Row():
         with centered_column():
+            output_dir_cmp, push_to_hub_cmp = add_outputs()
+            all_components.update({output_dir_cmp, push_to_hub_cmp})
+            push_type_cmp = add_push_type_cmp()
+            all_components.update({push_type_cmp})
         with centered_column():
             all_components.update(add_outputs1())
         outputs=version_selection
     )
+    push_to_hub_cmp.change(
+        fn=display_push_type,
+        inputs=push_to_hub_cmp,
+        outputs=push_type_cmp
+    )
 demo.launch(allowed_paths=["/"])
+# Upload metrics to the hub....
+"""
+import os
+from huggingface_hub import Repository
+# Create a repository object
+repo = Repository("Menouar/ft-phi-1")
+# Push the runs directory
+os.system(f"git -C {repo.local_dir} add output_dir/runs")
+repo.git_commit("Adding TensorBoard logs")
+repo.push_to_hub(commit_message="Adding TensorBoard logs")
+"""

utils/__init__.py CHANGED Viewed

@@ -32,8 +32,14 @@ LEARNING_RATE_ID = "learning_rate"
 MAX_GRAD_NORM_ID = "max_grad_norm"
 WARMUP_RATIO_ID = "warmup_ratio"
 LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
 OUTPUT_DIR_ID = "output_dir"
 PUSH_TO_HUB_ID = "push_to_hub"
 REPORT_TO_ID = "report_to"
 MAX_SEQ_LENGTH_ID = "max_seq_length"

 MAX_GRAD_NORM_ID = "max_grad_norm"
 WARMUP_RATIO_ID = "warmup_ratio"
 LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
 OUTPUT_DIR_ID = "output_dir"
 PUSH_TO_HUB_ID = "push_to_hub"
+PUSH_TYPE_ID = "push_type"
+PUSH_TYPES_ALL = "Push all the outputs"
+PUSH_TYPES_ONLY_MODEL = "Push only the Model and Tokenizer"
 REPORT_TO_ID = "report_to"
 MAX_SEQ_LENGTH_ID = "max_seq_length"

utils/components_creator.py CHANGED Viewed

@@ -167,7 +167,7 @@ def add_training_args_3() -> Set[Component]:
     return out_components
-def add_outputs() -> Set[Component]:
     output_dir = gr.Textbox(interactive=True,
                             label="output_dir",
                             info='The output directory where the model predictions and checkpoints will be written.',
@@ -178,10 +178,18 @@ def add_outputs() -> Set[Component]:
                                    "True, you must specify 'HF_TOKEN'.",
                               elem_id=PUSH_TO_HUB_ID)
-    out_components: Set[Component] = set()
-    out_components.add(output_dir)
-    out_components.add(push_to_hub)
-    return out_components
 def add_outputs1() -> Set[Component]:

     return out_components
+def add_outputs() -> (Component, Component):
     output_dir = gr.Textbox(interactive=True,
                             label="output_dir",
                             info='The output directory where the model predictions and checkpoints will be written.',
                                    "True, you must specify 'HF_TOKEN'.",
                               elem_id=PUSH_TO_HUB_ID)
+    return output_dir, push_to_hub
+def add_push_type_cmp() -> Component:
+    push_type = gr.Radio([PUSH_TYPES_ONLY_MODEL, PUSH_TYPES_ALL],
+                         label="Output Push Option",
+                         info="Select whether to push only the Model and Tokenizer or all the outputs.",
+                         interactive=True,
+                         visible=False,
+                         value=PUSH_TYPES_ALL,
+                         elem_id=PUSH_TYPE_ID)
+    return push_type
 def add_outputs1() -> Set[Component]:

utils/notebook_generator.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Optional
 import nbformat as nbf
-from utils import FTDataSet
 def create_install_libraries_cells(cells: list):
@@ -389,7 +389,7 @@ merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_
     cells.append(code_cell)
-def push_merged_model_cells(cells: list, output_dir):
     text_cell = nbf.v4.new_markdown_cell(
         """### Push the Merged model as well as the Tokenizer to HF hub""")
@@ -398,6 +398,33 @@ merged_model.push_to_hub("{output_dir}", use_temp_dir=False)
 tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
 """
-    code_cell = nbf.v4.new_code_cell(code)
     cells.append(text_cell)
     cells.append(code_cell)

 import nbformat as nbf
+from utils import FTDataSet, PUSH_TYPES_ONLY_MODEL
 def create_install_libraries_cells(cells: list):
     cells.append(code_cell)
+def push_merged_model_cells(cells: list, output_dir, push_type_value):
     text_cell = nbf.v4.new_markdown_cell(
         """### Push the Merged model as well as the Tokenizer to HF hub""")
 tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
 """
+    code_all = f"""
+from huggingface_hub import HfApi, HfFolder
+# Instantiate the HfApi class
+api = HfApi()
+# Your Hugging Face repository
+repo_name = "Menouar/test"
+# Create a repository on the Hugging Face Hub
+api.create_repo(token=HfFolder.get_token(), name=repo_name, repo_type="model")
+# Path to your local folder
+folder_path = "{output_dir}"
+# Create a repository object
+repo = Repository(local_dir=folder_path, clone_from=repo_name)
+# Commit and push your changes
+repo.git_add(commit_message="Initial commit", git_push=True)
+"""
+    if push_type_value == PUSH_TYPES_ONLY_MODEL:
+        code_cell = nbf.v4.new_code_cell(code)
+    else:
+        code_cell = nbf.v4.new_code_cell(code_all)
     cells.append(text_cell)
     cells.append(code_cell)