Spaces:

ShoufaChen
/

PixelFlow

Running on Zero

App Files Files Community

ShoufaChen commited on Apr 13

Commit

3d5f671

verified ·

1 Parent(s): 218c9e9

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from omegaconf import OmegaConf
 from huggingface_hub import snapshot_download
 import torch
-from transformers import T5EncoderModel, AutoTokenizer
 from pixelflow.scheduling_pixelflow import PixelFlowScheduler
 from pixelflow.pipeline_pixelflow import PixelFlowPipeline
@@ -22,12 +22,12 @@ parser.add_argument('--class_cond', action='store_true', help='use class conditi
 args = parser.parse_args()
 # deploy
-args.checkpoint = "pixelflow_t2i"
-args.class_cond = False
-output_dir = args.checkpoint
 if args.class_cond:
     if not os.path.exists(output_dir):
         snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
     config = OmegaConf.load(f"{output_dir}/config.yaml")
@@ -39,13 +39,12 @@ if args.class_cond:
     resolution = 256
     NUM_EXAMPLES = 4
 else:
-    if not os.path.exists(output_dir):
-        snapshot_download(repo_id="ShoufaChen/PixelFlow-Text2Image", local_dir=output_dir)
     config = OmegaConf.load(f"{output_dir}/config.yaml")
-    model = config_utils.instantiate_from_config(config.model)
     print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
     ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
-    text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl")
     tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
     resolution = 1024
     NUM_EXAMPLES = 1
@@ -55,7 +54,6 @@ model.eval()
 print(f"outside space.GPU. {torch.cuda.is_available()=}")
 if torch.cuda.is_available():
     model = model.cuda()
-    text_encoder = text_encoder.cuda() if text_encoder else None
     device = torch.device("cuda")
 else:
     raise ValueError("No GPU")
@@ -70,8 +68,8 @@ pipeline = PixelFlowPipeline(
     max_token_length=512,
 )
-@spaces.GPU(duration=120)
-def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
     print(f"inside space.GPU. {torch.cuda.is_available()=}")
     seed_everything(seed)
     with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
@@ -83,7 +81,7 @@ def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
             guidance_scale=cfg_scale,         # The guidance for the first frame, set it to 7 for 384p variant
             device=device,
             shift=noise_shift,
-            use_ode_dopri5=False,
         )
     samples = (samples * 255).round().astype("uint8")
     samples = [Image.fromarray(sample) for sample in samples]
@@ -108,8 +106,8 @@ with gr.Blocks(css=css) as demo:
     gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
     gr.HTML("""
         <div class="follow-link">
-            For online class-to-image generation, please try
-            <a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-Class2Image">class-to-image</a>.
             For more details, refer to our
                 <a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
         </div>
@@ -129,6 +127,7 @@ with gr.Blocks(css=css) as demo:
                         else:
                             # text input
                             user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
                     noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
                     cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
                     num_steps_per_stage = []
@@ -139,6 +138,6 @@ with gr.Blocks(css=css) as demo:
                     button = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     output = gr.Gallery(label='Generated Images', height=700)
-                    button.click(infer, inputs=[noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
     demo.queue()
-    demo.launch(share=True, debug=True)

 from huggingface_hub import snapshot_download
 import torch
+# from transformers import T5EncoderModel, AutoTokenizer
 from pixelflow.scheduling_pixelflow import PixelFlowScheduler
 from pixelflow.pipeline_pixelflow import PixelFlowPipeline
 args = parser.parse_args()
 # deploy
+args.checkpoint = "pixelflow_c2i"
+args.class_cond = True
 if args.class_cond:
+    output_dir = args.checkpoint
     if not os.path.exists(output_dir):
         snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
     config = OmegaConf.load(f"{output_dir}/config.yaml")
     resolution = 256
     NUM_EXAMPLES = 4
 else:
+    raise NotImplementedError("Please run locally.")
     config = OmegaConf.load(f"{output_dir}/config.yaml")
+    model = config_utils.instantiate_from_config(config.model).to(device)
     print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
     ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
+    text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl").to(device)
     tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
     resolution = 1024
     NUM_EXAMPLES = 1
 print(f"outside space.GPU. {torch.cuda.is_available()=}")
 if torch.cuda.is_available():
     model = model.cuda()
     device = torch.device("cuda")
 else:
     raise ValueError("No GPU")
     max_token_length=512,
 )
+@spaces.GPU
+def infer(use_ode_dopri5, noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
     print(f"inside space.GPU. {torch.cuda.is_available()=}")
     seed_everything(seed)
     with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
             guidance_scale=cfg_scale,         # The guidance for the first frame, set it to 7 for 384p variant
             device=device,
             shift=noise_shift,
+            use_ode_dopri5=use_ode_dopri5,
         )
     samples = (samples * 255).round().astype("uint8")
     samples = [Image.fromarray(sample) for sample in samples]
     gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
     gr.HTML("""
         <div class="follow-link">
+            For online text-to-image generation, please try
+            <a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-Text2Image">text-to-image</a>.
             For more details, refer to our
                 <a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
         </div>
                         else:
                             # text input
                             user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
+                    ode_dopri5 = gr.Checkbox(label="Dopri5 ODE", info="Use Dopri5 ODE solver")
                     noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
                     cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
                     num_steps_per_stage = []
                     button = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     output = gr.Gallery(label='Generated Images', height=700)
+                    button.click(infer, inputs=[ode_dopri5, noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
     demo.queue()
+    demo.launch(share=True, debug=True)