ShoufaChen commited on
Commit
3d5f671
·
verified ·
1 Parent(s): 218c9e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -8,7 +8,7 @@ from omegaconf import OmegaConf
8
  from huggingface_hub import snapshot_download
9
 
10
  import torch
11
- from transformers import T5EncoderModel, AutoTokenizer
12
 
13
  from pixelflow.scheduling_pixelflow import PixelFlowScheduler
14
  from pixelflow.pipeline_pixelflow import PixelFlowPipeline
@@ -22,12 +22,12 @@ parser.add_argument('--class_cond', action='store_true', help='use class conditi
22
  args = parser.parse_args()
23
 
24
  # deploy
25
- args.checkpoint = "pixelflow_t2i"
26
- args.class_cond = False
27
 
28
- output_dir = args.checkpoint
29
 
30
  if args.class_cond:
 
31
  if not os.path.exists(output_dir):
32
  snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
33
  config = OmegaConf.load(f"{output_dir}/config.yaml")
@@ -39,13 +39,12 @@ if args.class_cond:
39
  resolution = 256
40
  NUM_EXAMPLES = 4
41
  else:
42
- if not os.path.exists(output_dir):
43
- snapshot_download(repo_id="ShoufaChen/PixelFlow-Text2Image", local_dir=output_dir)
44
  config = OmegaConf.load(f"{output_dir}/config.yaml")
45
- model = config_utils.instantiate_from_config(config.model)
46
  print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
47
  ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
48
- text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl")
49
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
50
  resolution = 1024
51
  NUM_EXAMPLES = 1
@@ -55,7 +54,6 @@ model.eval()
55
  print(f"outside space.GPU. {torch.cuda.is_available()=}")
56
  if torch.cuda.is_available():
57
  model = model.cuda()
58
- text_encoder = text_encoder.cuda() if text_encoder else None
59
  device = torch.device("cuda")
60
  else:
61
  raise ValueError("No GPU")
@@ -70,8 +68,8 @@ pipeline = PixelFlowPipeline(
70
  max_token_length=512,
71
  )
72
 
73
- @spaces.GPU(duration=120)
74
- def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
75
  print(f"inside space.GPU. {torch.cuda.is_available()=}")
76
  seed_everything(seed)
77
  with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
@@ -83,7 +81,7 @@ def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
83
  guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
84
  device=device,
85
  shift=noise_shift,
86
- use_ode_dopri5=False,
87
  )
88
  samples = (samples * 255).round().astype("uint8")
89
  samples = [Image.fromarray(sample) for sample in samples]
@@ -108,8 +106,8 @@ with gr.Blocks(css=css) as demo:
108
  gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
109
  gr.HTML("""
110
  <div class="follow-link">
111
- For online class-to-image generation, please try
112
- <a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-Class2Image">class-to-image</a>.
113
  For more details, refer to our
114
  <a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
115
  </div>
@@ -129,6 +127,7 @@ with gr.Blocks(css=css) as demo:
129
  else:
130
  # text input
131
  user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
 
132
  noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
133
  cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
134
  num_steps_per_stage = []
@@ -139,6 +138,6 @@ with gr.Blocks(css=css) as demo:
139
  button = gr.Button("Generate", variant="primary")
140
  with gr.Column():
141
  output = gr.Gallery(label='Generated Images', height=700)
142
- button.click(infer, inputs=[noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
143
  demo.queue()
144
- demo.launch(share=True, debug=True)
 
8
  from huggingface_hub import snapshot_download
9
 
10
  import torch
11
+ # from transformers import T5EncoderModel, AutoTokenizer
12
 
13
  from pixelflow.scheduling_pixelflow import PixelFlowScheduler
14
  from pixelflow.pipeline_pixelflow import PixelFlowPipeline
 
22
  args = parser.parse_args()
23
 
24
  # deploy
25
+ args.checkpoint = "pixelflow_c2i"
26
+ args.class_cond = True
27
 
 
28
 
29
  if args.class_cond:
30
+ output_dir = args.checkpoint
31
  if not os.path.exists(output_dir):
32
  snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
33
  config = OmegaConf.load(f"{output_dir}/config.yaml")
 
39
  resolution = 256
40
  NUM_EXAMPLES = 4
41
  else:
42
+ raise NotImplementedError("Please run locally.")
 
43
  config = OmegaConf.load(f"{output_dir}/config.yaml")
44
+ model = config_utils.instantiate_from_config(config.model).to(device)
45
  print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
46
  ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
47
+ text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl").to(device)
48
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
49
  resolution = 1024
50
  NUM_EXAMPLES = 1
 
54
  print(f"outside space.GPU. {torch.cuda.is_available()=}")
55
  if torch.cuda.is_available():
56
  model = model.cuda()
 
57
  device = torch.device("cuda")
58
  else:
59
  raise ValueError("No GPU")
 
68
  max_token_length=512,
69
  )
70
 
71
+ @spaces.GPU
72
+ def infer(use_ode_dopri5, noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
73
  print(f"inside space.GPU. {torch.cuda.is_available()=}")
74
  seed_everything(seed)
75
  with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
 
81
  guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
82
  device=device,
83
  shift=noise_shift,
84
+ use_ode_dopri5=use_ode_dopri5,
85
  )
86
  samples = (samples * 255).round().astype("uint8")
87
  samples = [Image.fromarray(sample) for sample in samples]
 
106
  gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
107
  gr.HTML("""
108
  <div class="follow-link">
109
+ For online text-to-image generation, please try
110
+ <a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-Text2Image">text-to-image</a>.
111
  For more details, refer to our
112
  <a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
113
  </div>
 
127
  else:
128
  # text input
129
  user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
130
+ ode_dopri5 = gr.Checkbox(label="Dopri5 ODE", info="Use Dopri5 ODE solver")
131
  noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
132
  cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
133
  num_steps_per_stage = []
 
138
  button = gr.Button("Generate", variant="primary")
139
  with gr.Column():
140
  output = gr.Gallery(label='Generated Images', height=700)
141
+ button.click(infer, inputs=[ode_dopri5, noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
142
  demo.queue()
143
+ demo.launch(share=True, debug=True)