1inkusFace commited on
Commit
c261e86
·
verified ·
1 Parent(s): 3d6350f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -9,7 +9,7 @@ import numpy as np
9
  import random
10
 
11
  import torch
12
- from diffusers import StableDiffusion3Pipeline
13
  from transformers import CLIPTextModelWithProjection, T5EncoderModel
14
  from transformers import CLIPTokenizer, T5TokenizerFast
15
 
@@ -22,7 +22,6 @@ from huggingface_hub import hf_hub_download
22
  import datetime
23
  import cyper
24
 
25
- from diffusers import AutoencoderKL
26
  #from models.transformer_sd3 import SD3Transformer2DModel
27
  #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
28
 
@@ -82,6 +81,7 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
82
  # text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
83
  #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
84
  #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
 
85
  tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=False, use_fast=True, subfolder="tokenizer_3", token=True),
86
  #torch_dtype=torch.bfloat16,
87
  #use_safetensors=False,
@@ -89,12 +89,13 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
89
  text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
90
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
91
  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
92
-
 
93
  pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
94
 
95
  pipe.to(device=device, dtype=torch.bfloat16)
96
  #pipe.to(device)
97
- pipe.vae=vaeX.to('cpu')
98
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cpu'))
99
 
100
  MAX_SEED = np.iinfo(np.int32).max
@@ -113,7 +114,8 @@ def infer_30(
113
  num_inference_steps,
114
  progress=gr.Progress(track_tqdm=True),
115
  ):
116
- pipe.vae.to('cpu')
 
117
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
118
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
119
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
@@ -163,7 +165,8 @@ def infer_60(
163
  num_inference_steps,
164
  progress=gr.Progress(track_tqdm=True),
165
  ):
166
- pipe.vae.to('cpu')
 
167
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
168
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
169
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
@@ -213,7 +216,8 @@ def infer_90(
213
  num_inference_steps,
214
  progress=gr.Progress(track_tqdm=True),
215
  ):
216
- pipe.vae.to('cpu')
 
217
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
218
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
219
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
@@ -263,7 +267,8 @@ def infer_100(
263
  num_inference_steps,
264
  progress=gr.Progress(track_tqdm=True),
265
  ):
266
- pipe.vae.to('cpu')
 
267
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
268
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
269
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
@@ -307,7 +312,7 @@ body{background-color: blue;}
307
 
308
  with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
309
  with gr.Column(elem_id="col-container"):
310
- gr.Markdown(" # StableDiffusion 3.5 Large with UltraReal lora")
311
  expanded_prompt_output = gr.Textbox(label="Prompt", lines=1) # Add this line
312
  with gr.Row():
313
  prompt = gr.Text(
 
9
  import random
10
 
11
  import torch
12
+ from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, AutoencoderKL
13
  from transformers import CLIPTextModelWithProjection, T5EncoderModel
14
  from transformers import CLIPTokenizer, T5TokenizerFast
15
 
 
22
  import datetime
23
  import cyper
24
 
 
25
  #from models.transformer_sd3 import SD3Transformer2DModel
26
  #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
27
 
 
81
  # text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
82
  #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
83
  #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
84
+ transformer=None,
85
  tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=False, use_fast=True, subfolder="tokenizer_3", token=True),
86
  #torch_dtype=torch.bfloat16,
87
  #use_safetensors=False,
 
89
  text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
90
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
91
  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
92
+ ll_transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
93
+ pipe.transformer=ll_transformer
94
  pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
95
 
96
  pipe.to(device=device, dtype=torch.bfloat16)
97
  #pipe.to(device)
98
+ #pipe.vae=vaeX.to('cpu')
99
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cpu'))
100
 
101
  MAX_SEED = np.iinfo(np.int32).max
 
114
  num_inference_steps,
115
  progress=gr.Progress(track_tqdm=True),
116
  ):
117
+ pipe.vae=vaeX.to('cpu')
118
+ pipe.transformer=ll_transformer
119
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
120
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
121
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 
165
  num_inference_steps,
166
  progress=gr.Progress(track_tqdm=True),
167
  ):
168
+ pipe.vae=vaeX.to('cpu')
169
+ pipe.transformer=ll_transformer
170
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
171
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
172
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 
216
  num_inference_steps,
217
  progress=gr.Progress(track_tqdm=True),
218
  ):
219
+ pipe.vae=vaeX.to('cpu')
220
+ pipe.transformer=ll_transformer
221
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
222
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
223
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 
267
  num_inference_steps,
268
  progress=gr.Progress(track_tqdm=True),
269
  ):
270
+ pipe.vae=vaeX.to('cpu')
271
+ pipe.transformer=ll_transformer
272
  pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
273
  pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
274
  pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 
312
 
313
  with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
314
  with gr.Column(elem_id="col-container"):
315
+ gr.Markdown(" # StableDiffusion 3.5 Large with UltraReal lora test")
316
  expanded_prompt_output = gr.Textbox(label="Prompt", lines=1) # Add this line
317
  with gr.Row():
318
  prompt = gr.Text(