Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from omegaconf import OmegaConf
|
|
8 |
from huggingface_hub import snapshot_download
|
9 |
|
10 |
import torch
|
11 |
-
from transformers import T5EncoderModel, AutoTokenizer
|
12 |
|
13 |
from pixelflow.scheduling_pixelflow import PixelFlowScheduler
|
14 |
from pixelflow.pipeline_pixelflow import PixelFlowPipeline
|
@@ -22,12 +22,12 @@ parser.add_argument('--class_cond', action='store_true', help='use class conditi
|
|
22 |
args = parser.parse_args()
|
23 |
|
24 |
# deploy
|
25 |
-
args.checkpoint = "
|
26 |
-
args.class_cond =
|
27 |
|
28 |
-
output_dir = args.checkpoint
|
29 |
|
30 |
if args.class_cond:
|
|
|
31 |
if not os.path.exists(output_dir):
|
32 |
snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
|
33 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
@@ -39,13 +39,12 @@ if args.class_cond:
|
|
39 |
resolution = 256
|
40 |
NUM_EXAMPLES = 4
|
41 |
else:
|
42 |
-
|
43 |
-
snapshot_download(repo_id="ShoufaChen/PixelFlow-Text2Image", local_dir=output_dir)
|
44 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
45 |
-
model = config_utils.instantiate_from_config(config.model)
|
46 |
print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
47 |
ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
|
48 |
-
text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl")
|
49 |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
|
50 |
resolution = 1024
|
51 |
NUM_EXAMPLES = 1
|
@@ -55,7 +54,6 @@ model.eval()
|
|
55 |
print(f"outside space.GPU. {torch.cuda.is_available()=}")
|
56 |
if torch.cuda.is_available():
|
57 |
model = model.cuda()
|
58 |
-
text_encoder = text_encoder.cuda() if text_encoder else None
|
59 |
device = torch.device("cuda")
|
60 |
else:
|
61 |
raise ValueError("No GPU")
|
@@ -70,8 +68,8 @@ pipeline = PixelFlowPipeline(
|
|
70 |
max_token_length=512,
|
71 |
)
|
72 |
|
73 |
-
@spaces.GPU
|
74 |
-
def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
75 |
print(f"inside space.GPU. {torch.cuda.is_available()=}")
|
76 |
seed_everything(seed)
|
77 |
with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
|
@@ -83,7 +81,7 @@ def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
|
83 |
guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
|
84 |
device=device,
|
85 |
shift=noise_shift,
|
86 |
-
use_ode_dopri5=
|
87 |
)
|
88 |
samples = (samples * 255).round().astype("uint8")
|
89 |
samples = [Image.fromarray(sample) for sample in samples]
|
@@ -108,8 +106,8 @@ with gr.Blocks(css=css) as demo:
|
|
108 |
gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
|
109 |
gr.HTML("""
|
110 |
<div class="follow-link">
|
111 |
-
For online
|
112 |
-
<a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-
|
113 |
For more details, refer to our
|
114 |
<a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
|
115 |
</div>
|
@@ -129,6 +127,7 @@ with gr.Blocks(css=css) as demo:
|
|
129 |
else:
|
130 |
# text input
|
131 |
user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
|
|
|
132 |
noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
|
133 |
cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
|
134 |
num_steps_per_stage = []
|
@@ -139,6 +138,6 @@ with gr.Blocks(css=css) as demo:
|
|
139 |
button = gr.Button("Generate", variant="primary")
|
140 |
with gr.Column():
|
141 |
output = gr.Gallery(label='Generated Images', height=700)
|
142 |
-
button.click(infer, inputs=[noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
|
143 |
demo.queue()
|
144 |
-
demo.launch(share=True, debug=True)
|
|
|
8 |
from huggingface_hub import snapshot_download
|
9 |
|
10 |
import torch
|
11 |
+
# from transformers import T5EncoderModel, AutoTokenizer
|
12 |
|
13 |
from pixelflow.scheduling_pixelflow import PixelFlowScheduler
|
14 |
from pixelflow.pipeline_pixelflow import PixelFlowPipeline
|
|
|
22 |
args = parser.parse_args()
|
23 |
|
24 |
# deploy
|
25 |
+
args.checkpoint = "pixelflow_c2i"
|
26 |
+
args.class_cond = True
|
27 |
|
|
|
28 |
|
29 |
if args.class_cond:
|
30 |
+
output_dir = args.checkpoint
|
31 |
if not os.path.exists(output_dir):
|
32 |
snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
|
33 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
|
|
39 |
resolution = 256
|
40 |
NUM_EXAMPLES = 4
|
41 |
else:
|
42 |
+
raise NotImplementedError("Please run locally.")
|
|
|
43 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
44 |
+
model = config_utils.instantiate_from_config(config.model).to(device)
|
45 |
print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
46 |
ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
|
47 |
+
text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl").to(device)
|
48 |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
|
49 |
resolution = 1024
|
50 |
NUM_EXAMPLES = 1
|
|
|
54 |
print(f"outside space.GPU. {torch.cuda.is_available()=}")
|
55 |
if torch.cuda.is_available():
|
56 |
model = model.cuda()
|
|
|
57 |
device = torch.device("cuda")
|
58 |
else:
|
59 |
raise ValueError("No GPU")
|
|
|
68 |
max_token_length=512,
|
69 |
)
|
70 |
|
71 |
+
@spaces.GPU
|
72 |
+
def infer(use_ode_dopri5, noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
73 |
print(f"inside space.GPU. {torch.cuda.is_available()=}")
|
74 |
seed_everything(seed)
|
75 |
with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
|
|
|
81 |
guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
|
82 |
device=device,
|
83 |
shift=noise_shift,
|
84 |
+
use_ode_dopri5=use_ode_dopri5,
|
85 |
)
|
86 |
samples = (samples * 255).round().astype("uint8")
|
87 |
samples = [Image.fromarray(sample) for sample in samples]
|
|
|
106 |
gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
|
107 |
gr.HTML("""
|
108 |
<div class="follow-link">
|
109 |
+
For online text-to-image generation, please try
|
110 |
+
<a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-Text2Image">text-to-image</a>.
|
111 |
For more details, refer to our
|
112 |
<a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
|
113 |
</div>
|
|
|
127 |
else:
|
128 |
# text input
|
129 |
user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
|
130 |
+
ode_dopri5 = gr.Checkbox(label="Dopri5 ODE", info="Use Dopri5 ODE solver")
|
131 |
noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
|
132 |
cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
|
133 |
num_steps_per_stage = []
|
|
|
138 |
button = gr.Button("Generate", variant="primary")
|
139 |
with gr.Column():
|
140 |
output = gr.Gallery(label='Generated Images', height=700)
|
141 |
+
button.click(infer, inputs=[ode_dopri5, noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
|
142 |
demo.queue()
|
143 |
+
demo.launch(share=True, debug=True)
|