multimodalart HF staff commited on
Commit
173552f
·
2 Parent(s): b33503b 16f66dc

Merge branch 'new-version'

Browse files
Files changed (5) hide show
  1. app.py +142 -72
  2. mix.zip +2 -2
  3. requirements-local.txt +18 -0
  4. requirements.txt +1 -1
  5. train_dreambooth.py +8 -3
app.py CHANGED
@@ -28,32 +28,40 @@ css = '''
28
  '''
29
  maximum_concepts = 3
30
 
31
- #Pre download the files even if we don't use it here
32
- model_to_load = snapshot_download(repo_id="multimodalart/sd-fine-tunable")
 
 
33
  safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
34
 
35
- def zipdir(path, ziph):
36
- # ziph is zipfile handle
37
- for root, dirs, files in os.walk(path):
38
- for file in files:
39
- ziph.write(os.path.join(root, file),
40
- os.path.relpath(os.path.join(root, file),
41
- os.path.join(path, '..')))
42
 
43
  def swap_text(option):
44
  mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
45
  if(option == "object"):
46
  instance_prompt_example = "cttoy"
47
- freeze_for = 50
48
- return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
49
  elif(option == "person"):
50
  instance_prompt_example = "julcto"
51
- freeze_for = 100
52
- return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name the files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
53
  elif(option == "style"):
54
  instance_prompt_example = "trsldamrl"
55
  freeze_for = 10
56
- return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
 
 
 
 
 
 
 
 
 
57
 
58
  def count_files(*inputs):
59
  file_counter = 0
@@ -70,6 +78,10 @@ def count_files(*inputs):
70
  Training_Steps = int(inputs[-3])
71
  else:
72
  Training_Steps = file_counter*200
 
 
 
 
73
  if(is_spaces):
74
  summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
75
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
@@ -79,6 +91,13 @@ def count_files(*inputs):
79
 
80
  return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
81
 
 
 
 
 
 
 
 
82
  def pad_image(image):
83
  w, h = image.size
84
  if w == h:
@@ -98,7 +117,9 @@ def train(*inputs):
98
 
99
  torch.cuda.empty_cache()
100
  if 'pipe' in globals():
 
101
  del pipe
 
102
  gc.collect()
103
 
104
  if os.path.exists("output_model"): shutil.rmtree('output_model')
@@ -127,57 +148,92 @@ def train(*inputs):
127
  os.makedirs('output_model',exist_ok=True)
128
  uses_custom = inputs[-1]
129
  type_of_thing = inputs[-4]
130
-
131
  remove_attribution_after = inputs[-6]
132
-
 
133
  if(uses_custom):
134
  Training_Steps = int(inputs[-3])
135
  Train_text_encoder_for = int(inputs[-2])
136
  else:
137
- Training_Steps = file_counter*200
138
  if(type_of_thing == "object"):
139
  Train_text_encoder_for=30
140
- elif(type_of_thing == "person"):
141
- Train_text_encoder_for=60
142
  elif(type_of_thing == "style"):
143
  Train_text_encoder_for=15
144
-
145
- class_data_dir = None
 
 
 
 
 
 
 
 
146
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
147
- args_general = argparse.Namespace(
148
- image_captions_filename = True,
149
- train_text_encoder = True,
150
- stop_text_encoder_training = stptxt,
151
- save_n_steps = 0,
152
- pretrained_model_name_or_path = model_to_load,
153
- instance_data_dir="instance_images",
154
- class_data_dir=class_data_dir,
155
- output_dir="output_model",
156
- instance_prompt="",
157
- seed=42,
158
- resolution=512,
159
- mixed_precision="fp16",
160
- train_batch_size=1,
161
- gradient_accumulation_steps=1,
162
- use_8bit_adam=True,
163
- learning_rate=2e-6,
164
- lr_scheduler="polynomial",
165
- lr_warmup_steps = 0,
166
- max_train_steps=Training_Steps,
167
- )
168
- print("Starting training...")
169
- lock_file = open("intraining.lock", "w")
170
- lock_file.close()
171
- run_training(args_general)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  gc.collect()
173
  torch.cuda.empty_cache()
174
- print("Adding Safety Checker to the model...")
175
- shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
176
- shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
177
- shutil.copy(f"model_index.json", "output_model/model_index.json")
 
178
 
179
- #with zipfile.ZipFile('diffusers_model.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
180
- # zipdir('output_model/', zipf)
181
  if(not remove_attribution_after):
182
  print("Archiving model file...")
183
  with tarfile.open("diffusers_model.tar", "w") as tar:
@@ -198,22 +254,27 @@ def train(*inputs):
198
  hf_token = inputs[-5]
199
  model_name = inputs[-7]
200
  where_to_upload = inputs[-8]
201
- push(model_name, where_to_upload, hf_token, True)
202
  hardware_url = f"https://huggingface.co/spaces/{os.environ['SPACE_ID']}/hardware"
203
  headers = { "authorization" : f"Bearer {hf_token}"}
204
  body = {'flavor': 'cpu-basic'}
205
  requests.post(hardware_url, json = body, headers=headers)
206
 
207
- def generate(prompt):
 
208
  torch.cuda.empty_cache()
209
  from diffusers import StableDiffusionPipeline
210
- global pipe
211
- pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
212
- pipe = pipe.to("cuda")
213
- image = pipe(prompt).images[0]
 
 
 
 
214
  return(image)
215
 
216
- def push(model_name, where_to_upload, hf_token, comes_from_automated=False):
217
  if(not os.path.exists("model.ckpt")):
218
  convert("output_model", "model.ckpt")
219
  from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
@@ -247,7 +308,7 @@ license: creativeml-openrail-m
247
  tags:
248
  - text-to-image
249
  ---
250
- ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training)
251
 
252
  You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
253
 
@@ -368,21 +429,24 @@ with gr.Blocks(css=css) as demo:
368
  top_description = gr.HTML(f'''
369
  <div class="gr-prose" style="max-width: 80%">
370
  <h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
371
- <p>If you are having problems with the requirements, try installing xformers with `%pip install git+https://github.com/facebookresearch/xformers@1d31a3a#egg=xformers`</p>
372
  </div>
373
  ''')
374
- gr.Markdown("# Dreambooth Training UI")
375
- gr.Markdown("Customize Stable Diffusion by training it on a few examples of concepts, up to 3 concepts on the same model. This Space is based on TheLastBen's [fast-DreamBooth Colab](https://colab.research.google.com/github/TheLastBen/fast-stable-diffusion/blob/main/fast-DreamBooth.ipynb) with [🧨 diffusers](https://github.com/huggingface/diffusers)")
376
 
377
  with gr.Row() as what_are_you_training:
378
  type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
379
-
 
380
  #Very hacky approach to emulate dynamically created Gradio components
381
  with gr.Row() as upload_your_concept:
382
  with gr.Column():
383
- thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example:")
 
384
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
385
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
 
386
  with gr.Column():
387
  file_collection = []
388
  concept_collection = []
@@ -428,10 +492,10 @@ with gr.Blocks(css=css) as demo:
428
 
429
  with gr.Accordion("Custom Settings", open=False):
430
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
431
- gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 20. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and is fully trained for persons.")
432
- steps = gr.Number(label="How many steps", value=800)
433
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
434
-
435
  with gr.Box(visible=False) as training_summary:
436
  training_summary_text = gr.HTML("", visible=False, label="Training Summary")
437
  is_advanced_visible = True if is_spaces else False
@@ -454,6 +518,7 @@ with gr.Blocks(css=css) as demo:
454
  gr.Markdown("## Try your model")
455
  prompt = gr.Textbox(label="Type your prompt")
456
  result_image = gr.Image()
 
457
  generate_button = gr.Button("Generate Image")
458
 
459
  with gr.Box(visible=False) as push_to_hub:
@@ -470,11 +535,16 @@ with gr.Blocks(css=css) as demo:
470
  convert_button = gr.Button("Convert to CKPT", visible=False)
471
 
472
  #Swap the examples and the % of text encoder trained depending if it is an object, person or style
473
- type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False, show_progress=False)
474
 
 
 
 
475
  #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
476
  for file in file_collection:
 
477
  file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
 
478
  steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
479
  perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
480
 
@@ -485,12 +555,12 @@ with gr.Blocks(css=css) as demo:
485
  train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
486
 
487
  #The main train function
488
- train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
489
 
490
  #Button to generate an image from your trained model after training
491
- generate_button.click(fn=generate, inputs=prompt, outputs=result_image, queue=False)
492
  #Button to push the model to the Hugging Face Hub
493
- push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token], outputs=[success_message_upload, result], queue=False)
494
  #Button to convert the model to ckpt format
495
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
496
 
 
28
  '''
29
  maximum_concepts = 3
30
 
31
+ #Pre download the files
32
+ model_v1 = snapshot_download(repo_id="multimodalart/sd-fine-tunable")
33
+ #model_v2 = snapshot_download(repo_id="stabilityai/stable-diffusion-2")
34
+ model_v2_512 = snapshot_download(repo_id="stabilityai/stable-diffusion-2-base")
35
  safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
36
 
37
+ model_to_load = model_v1
38
+
39
+ with zipfile.ZipFile("mix.zip", 'r') as zip_ref:
40
+ zip_ref.extractall(".")
 
 
 
41
 
42
  def swap_text(option):
43
  mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
44
  if(option == "object"):
45
  instance_prompt_example = "cttoy"
46
+ freeze_for = 30
47
+ return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
48
  elif(option == "person"):
49
  instance_prompt_example = "julcto"
50
+ freeze_for = 70
51
+ return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=True)]
52
  elif(option == "style"):
53
  instance_prompt_example = "trsldamrl"
54
  freeze_for = 10
55
+ return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
56
+
57
+ def swap_base_model(selected_model):
58
+ global model_to_load
59
+ if(selected_model == "v1-5"):
60
+ model_to_load = model_v1
61
+ #elif(selected_model == "v2-768"):
62
+ # model_to_load = model_v2
63
+ else:
64
+ model_to_load = model_v2_512
65
 
66
  def count_files(*inputs):
67
  file_counter = 0
 
78
  Training_Steps = int(inputs[-3])
79
  else:
80
  Training_Steps = file_counter*200
81
+ if(Training_Steps > 2400):
82
+ Training_Steps=2400
83
+ elif(Training_Steps < 1400):
84
+ Training_Steps=1400
85
  if(is_spaces):
86
  summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
87
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
 
91
 
92
  return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
93
 
94
+ def update_steps(*files_list):
95
+ file_counter = 0
96
+ for i, files in enumerate(files_list):
97
+ if(files):
98
+ file_counter+=len(files)
99
+ return(gr.update(value=file_counter*200))
100
+
101
  def pad_image(image):
102
  w, h = image.size
103
  if w == h:
 
117
 
118
  torch.cuda.empty_cache()
119
  if 'pipe' in globals():
120
+ global pipe, pipe_is_set
121
  del pipe
122
+ pipe_is_set = False
123
  gc.collect()
124
 
125
  if os.path.exists("output_model"): shutil.rmtree('output_model')
 
148
  os.makedirs('output_model',exist_ok=True)
149
  uses_custom = inputs[-1]
150
  type_of_thing = inputs[-4]
 
151
  remove_attribution_after = inputs[-6]
152
+ experimental_face_improvement = inputs[-9]
153
+ which_model = inputs[-10]
154
  if(uses_custom):
155
  Training_Steps = int(inputs[-3])
156
  Train_text_encoder_for = int(inputs[-2])
157
  else:
 
158
  if(type_of_thing == "object"):
159
  Train_text_encoder_for=30
160
+
 
161
  elif(type_of_thing == "style"):
162
  Train_text_encoder_for=15
163
+
164
+ elif(type_of_thing == "person"):
165
+ Train_text_encoder_for=75
166
+
167
+ Training_Steps = file_counter*200
168
+ if(Training_Steps > 2400):
169
+ Training_Steps=2400
170
+ elif(Training_Steps < 1400):
171
+ Training_Steps=1400
172
+
173
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
174
+ if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
175
+ args_general = argparse.Namespace(
176
+ image_captions_filename = True,
177
+ train_text_encoder = True if stptxt > 0 else False,
178
+ stop_text_encoder_training = stptxt,
179
+ save_n_steps = 0,
180
+ pretrained_model_name_or_path = model_to_load,
181
+ instance_data_dir="instance_images",
182
+ class_data_dir=None,
183
+ output_dir="output_model",
184
+ instance_prompt="",
185
+ seed=42,
186
+ resolution=512,
187
+ mixed_precision="fp16",
188
+ train_batch_size=1,
189
+ gradient_accumulation_steps=1,
190
+ use_8bit_adam=True,
191
+ learning_rate=2e-6,
192
+ lr_scheduler="polynomial",
193
+ lr_warmup_steps = 0,
194
+ max_train_steps=Training_Steps,
195
+ )
196
+ print("Starting single training...")
197
+ lock_file = open("intraining.lock", "w")
198
+ lock_file.close()
199
+ run_training(args_general)
200
+ else:
201
+ args_general = argparse.Namespace(
202
+ image_captions_filename = True,
203
+ train_text_encoder = True if stptxt > 0 else False,
204
+ stop_text_encoder_training = stptxt,
205
+ save_n_steps = 0,
206
+ pretrained_model_name_or_path = model_to_load,
207
+ instance_data_dir="instance_images",
208
+ class_data_dir="Mix",
209
+ output_dir="output_model",
210
+ with_prior_preservation=True,
211
+ prior_loss_weight=1.0,
212
+ instance_prompt="",
213
+ seed=42,
214
+ resolution=512,
215
+ mixed_precision="fp16",
216
+ train_batch_size=1,
217
+ gradient_accumulation_steps=1,
218
+ use_8bit_adam=True,
219
+ learning_rate=2e-6,
220
+ lr_scheduler="polynomial",
221
+ lr_warmup_steps = 0,
222
+ max_train_steps=Training_Steps,
223
+ num_class_images=200,
224
+ )
225
+ print("Starting multi-training...")
226
+ lock_file = open("intraining.lock", "w")
227
+ lock_file.close()
228
+ run_training(args_general)
229
  gc.collect()
230
  torch.cuda.empty_cache()
231
+ if(which_model == "v1-5"):
232
+ print("Adding Safety Checker to the model...")
233
+ shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
234
+ shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
235
+ shutil.copy(f"model_index.json", "output_model/model_index.json")
236
 
 
 
237
  if(not remove_attribution_after):
238
  print("Archiving model file...")
239
  with tarfile.open("diffusers_model.tar", "w") as tar:
 
254
  hf_token = inputs[-5]
255
  model_name = inputs[-7]
256
  where_to_upload = inputs[-8]
257
+ push(model_name, where_to_upload, hf_token, which_model, True)
258
  hardware_url = f"https://huggingface.co/spaces/{os.environ['SPACE_ID']}/hardware"
259
  headers = { "authorization" : f"Bearer {hf_token}"}
260
  body = {'flavor': 'cpu-basic'}
261
  requests.post(hardware_url, json = body, headers=headers)
262
 
263
+ pipe_is_set = False
264
+ def generate(prompt, steps):
265
  torch.cuda.empty_cache()
266
  from diffusers import StableDiffusionPipeline
267
+ global pipe_is_set
268
+ if(not pipe_is_set):
269
+ global pipe
270
+ pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
271
+ pipe = pipe.to("cuda")
272
+ pipe_is_set = True
273
+
274
+ image = pipe(prompt, num_inference_steps=steps).images[0]
275
  return(image)
276
 
277
+ def push(model_name, where_to_upload, hf_token, which_model, comes_from_automated=False):
278
  if(not os.path.exists("model.ckpt")):
279
  convert("output_model", "model.ckpt")
280
  from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
 
308
  tags:
309
  - text-to-image
310
  ---
311
+ ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training) with the {which_model} base model
312
 
313
  You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
314
 
 
429
  top_description = gr.HTML(f'''
430
  <div class="gr-prose" style="max-width: 80%">
431
  <h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
432
+ <p>Do a <code>pip install requirements-local.txt</code></p>
433
  </div>
434
  ''')
435
+ gr.Markdown("# Dreambooth Training UI 💭")
436
+ gr.Markdown("Customize Stable Diffusion v1 or v2 (new!) by training it on a few examples of concepts, up to 3 concepts on the same model. This Space is based on TheLastBen's [fast-DreamBooth Colab](https://colab.research.google.com/github/TheLastBen/fast-stable-diffusion/blob/main/fast-DreamBooth.ipynb) with [🧨 diffusers](https://github.com/huggingface/diffusers)")
437
 
438
  with gr.Row() as what_are_you_training:
439
  type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
440
+ base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512"], value="v1-5", interactive=True)
441
+
442
  #Very hacky approach to emulate dynamically created Gradio components
443
  with gr.Row() as upload_your_concept:
444
  with gr.Column():
445
+ thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
446
+ thing_experimental = gr.Checkbox(label="Improve faces (prior preservation) - can take longer training but can improve faces", visible=False, value=False)
447
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
448
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
449
+
450
  with gr.Column():
451
  file_collection = []
452
  concept_collection = []
 
492
 
493
  with gr.Accordion("Custom Settings", open=False):
494
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
495
+ gr.Markdown("If not checked, the % of frozen encoder will be tuned automatically to whether you are training an `object`, `person` or `style`. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 75% trained for persons. The number of steps varies between 1400 and 2400 depending on how many images uploaded. If you see too many artifacts in your output, it means it may have overfit and you need less steps. If your results aren't really what you wanted, it may be underfitting and you need more steps.")
496
+ steps = gr.Number(label="How many steps", value=2400)
497
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
498
+
499
  with gr.Box(visible=False) as training_summary:
500
  training_summary_text = gr.HTML("", visible=False, label="Training Summary")
501
  is_advanced_visible = True if is_spaces else False
 
518
  gr.Markdown("## Try your model")
519
  prompt = gr.Textbox(label="Type your prompt")
520
  result_image = gr.Image()
521
+ inference_steps = gr.Slider(minimum=1, maximum=150, value=50, step=1)
522
  generate_button = gr.Button("Generate Image")
523
 
524
  with gr.Box(visible=False) as push_to_hub:
 
535
  convert_button = gr.Button("Convert to CKPT", visible=False)
536
 
537
  #Swap the examples and the % of text encoder trained depending if it is an object, person or style
538
+ type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder, thing_experimental], queue=False, show_progress=False)
539
 
540
+ #Swap the base model
541
+ base_model_to_use.change(fn=swap_base_model, inputs=base_model_to_use, outputs=[])
542
+
543
  #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
544
  for file in file_collection:
545
+ #file.change(fn=update_steps,inputs=file_collection, outputs=steps)
546
  file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
547
+
548
  steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
549
  perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
550
 
 
555
  train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
556
 
557
  #The main train function
558
+ train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[base_model_to_use]+[thing_experimental]+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
559
 
560
  #Button to generate an image from your trained model after training
561
+ generate_button.click(fn=generate, inputs=[prompt, inference_steps], outputs=result_image, queue=False)
562
  #Button to push the model to the Hugging Face Hub
563
+ push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token, base_model_to_use], outputs=[success_message_upload, result], queue=False)
564
  #Button to convert the model to ckpt format
565
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
566
 
mix.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e76812bdb3b21d1ef3050c6c6a09b09a16a21ae23476e92cd3dd1dfa2e846b22
3
- size 61281901
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09207c4e95fcf5296eb0ff708fdc672da960aeb2864d298810db5094b072a0d4
3
+ size 28022653
requirements-local.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch==1.12.1+cu113
3
+ torchvision==0.13.1+cu113
4
+ diffusers==0.9.0
5
+ accelerate==0.12.0
6
+ OmegaConf
7
+ wget
8
+ pytorch_lightning
9
+ huggingface_hub
10
+ ftfy
11
+ transformers
12
+ pyfiglet
13
+ triton==2.0.0.dev20220701
14
+ bitsandbytes
15
+ python-slugify
16
+ requests
17
+ tensorboard
18
+ pip install git+https://github.com/facebookresearch/xformers@7e4c02c#egg=xformers
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch==1.12.1+cu113
3
  torchvision==0.13.1+cu113
4
- git+https://github.com/TheLastBen/diffusers
5
  accelerate==0.12.0
6
  OmegaConf
7
  wget
 
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch==1.12.1+cu113
3
  torchvision==0.13.1+cu113
4
+ diffusers==0.9.0
5
  accelerate==0.12.0
6
  OmegaConf
7
  wget
train_dreambooth.py CHANGED
@@ -6,7 +6,8 @@ from pathlib import Path
6
  from typing import Optional
7
  import subprocess
8
  import sys
9
- import gc
 
10
 
11
  import torch
12
  import torch.nn.functional as F
@@ -54,7 +55,7 @@ def parse_args():
54
  "--class_data_dir",
55
  type=str,
56
  default=None,
57
- required=False,
58
  help="A folder containing the training data of class images.",
59
  )
60
  parser.add_argument(
@@ -301,6 +302,7 @@ class DreamBoothDataset(Dataset):
301
  self.class_data_root = Path(class_data_root)
302
  self.class_data_root.mkdir(parents=True, exist_ok=True)
303
  self.class_images_path = list(self.class_data_root.iterdir())
 
304
  self.num_class_images = len(self.class_images_path)
305
  self._length = max(self.num_class_images, self.num_instance_images)
306
  self.class_prompt = class_prompt
@@ -334,6 +336,7 @@ class DreamBoothDataset(Dataset):
334
  pt=pt.replace("_"," ")
335
  pt=pt.replace("(","")
336
  pt=pt.replace(")","")
 
337
  instance_prompt = pt
338
  sys.stdout.write(" " +instance_prompt+" ")
339
  sys.stdout.flush()
@@ -746,7 +749,7 @@ def run_training(args_imported):
746
  pipeline.text_encoder.save_pretrained(frz_dir)
747
 
748
  if args.save_n_steps >= 200:
749
- if global_step < args.max_train_steps-100 and global_step+1==i:
750
  ckpt_name = "_step_" + str(global_step+1)
751
  save_dir = Path(args.output_dir+ckpt_name)
752
  save_dir=str(save_dir)
@@ -770,6 +773,7 @@ def run_training(args_imported):
770
  subprocess.call('cp -f '+frz_dir +'/*.* '+ save_dir+'/text_encoder', shell=True)
771
  chkpth=args.Session_dir+"/"+inst+".ckpt"
772
  subprocess.call('python /content/diffusers/scripts/convert_diffusers_to_original_stable_diffusion.py --model_path ' + save_dir + ' --checkpoint_path ' + chkpth + ' --half', shell=True)
 
773
  i=i+args.save_n_steps
774
 
775
  accelerator.wait_for_everyone()
@@ -819,3 +823,4 @@ def run_training(args_imported):
819
  if __name__ == "__main__":
820
  pass
821
  #main()
 
 
6
  from typing import Optional
7
  import subprocess
8
  import sys
9
+ import gc
10
+ import random
11
 
12
  import torch
13
  import torch.nn.functional as F
 
55
  "--class_data_dir",
56
  type=str,
57
  default=None,
58
+ #required=False,
59
  help="A folder containing the training data of class images.",
60
  )
61
  parser.add_argument(
 
302
  self.class_data_root = Path(class_data_root)
303
  self.class_data_root.mkdir(parents=True, exist_ok=True)
304
  self.class_images_path = list(self.class_data_root.iterdir())
305
+ random.shuffle(self.class_images_path)
306
  self.num_class_images = len(self.class_images_path)
307
  self._length = max(self.num_class_images, self.num_instance_images)
308
  self.class_prompt = class_prompt
 
336
  pt=pt.replace("_"," ")
337
  pt=pt.replace("(","")
338
  pt=pt.replace(")","")
339
+ pt=pt.replace("-","")
340
  instance_prompt = pt
341
  sys.stdout.write(" " +instance_prompt+" ")
342
  sys.stdout.flush()
 
749
  pipeline.text_encoder.save_pretrained(frz_dir)
750
 
751
  if args.save_n_steps >= 200:
752
+ if global_step < args.max_train_steps and global_step+1==i:
753
  ckpt_name = "_step_" + str(global_step+1)
754
  save_dir = Path(args.output_dir+ckpt_name)
755
  save_dir=str(save_dir)
 
773
  subprocess.call('cp -f '+frz_dir +'/*.* '+ save_dir+'/text_encoder', shell=True)
774
  chkpth=args.Session_dir+"/"+inst+".ckpt"
775
  subprocess.call('python /content/diffusers/scripts/convert_diffusers_to_original_stable_diffusion.py --model_path ' + save_dir + ' --checkpoint_path ' + chkpth + ' --half', shell=True)
776
+ subprocess.call('rm -r '+ save_dir, shell=True)
777
  i=i+args.save_n_steps
778
 
779
  accelerator.wait_for_everyone()
 
823
  if __name__ == "__main__":
824
  pass
825
  #main()
826
+