Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import streamlit as st | |
from diffusers import AutoencoderKL, DDIMScheduler | |
from transformers import CLIPTextModel, CLIPTokenizer | |
from src.mgd_pipelines.mgd_pipe import MGDPipe | |
from src.mgd_pipelines.mgd_pipe_disentangled import MGDPipeDisentangled | |
from src.utils.image_from_pipe import generate_images_from_mgd_pipe | |
from accelerate import Accelerator | |
from diffusers.utils import check_min_version | |
from src.utils.set_seeds import set_seed | |
# Will error if the minimal version of diffusers is not installed. Remove at your own risks. | |
check_min_version("0.10.0.dev0") | |
# Set the environment variables for Hugging Face Spaces | |
os.environ["TOKENIZERS_PARALLELISM"] = "true" | |
os.environ["WANDB_START_METHOD"] = "thread" | |
# Streamlit interface components | |
st.title("Fashion Image Generation with Multimodal Garment Designer") | |
# Streamlit Input Parameters | |
category = st.selectbox("Select Category", ["dresses", "upper_body", "lower_body", "all"]) | |
guidance_scale = st.slider("Guidance Scale", min_value=0.1, max_value=20.0, value=7.5, step=0.1) | |
guidance_scale_pose = st.slider("Guidance Scale (Pose)", min_value=0.1, max_value=20.0, value=7.5, step=0.1) | |
guidance_scale_sketch = st.slider("Guidance Scale (Sketch)", min_value=0.1, max_value=20.0, value=7.5, step=0.1) | |
sketch_cond_rate = st.slider("Sketch Conditioning Rate", min_value=0.1, max_value=1.0, value=0.5, step=0.05) | |
start_cond_rate = st.slider("Start Conditioning Rate", min_value=0.1, max_value=1.0, value=0.5, step=0.05) | |
seed = st.number_input("Seed", value=42, min_value=1) | |
# Button to run the image generation | |
if st.button("Generate Image"): | |
# Initialize Accelerator (for mixed precision, etc.) | |
accelerator = Accelerator() | |
device = accelerator.device | |
# Set the seed | |
set_seed(seed) | |
# Model and Tokenizer loading (use pre-trained from Hugging Face) | |
model_name = "stabilityai/stable-diffusion-2-1-base" # Use appropriate model name | |
# Load scheduler, tokenizer, and models | |
val_scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler") | |
val_scheduler.set_timesteps(50, device=device) | |
tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer") | |
text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder") | |
vae = AutoencoderKL.from_pretrained(model_name, subfolder="vae") | |
# Load UNet model (you can use your own model) | |
unet = torch.hub.load( | |
dataset="aimagelab/multimodal-garment-designer", | |
repo_or_dir="aimagelab/multimodal-garment-designer", | |
source="github", | |
model="mgd", | |
pretrained=True, | |
) | |
# Freeze VAE and text encoder | |
vae.requires_grad_(False) | |
text_encoder.requires_grad_(False) | |
# Select pipeline (use disentangled option if needed) | |
val_pipe = MGDPipe( | |
text_encoder=text_encoder, | |
vae=vae, | |
unet=unet.to(vae.dtype), | |
tokenizer=tokenizer, | |
scheduler=val_scheduler, | |
).to(device) | |
# Run image generation using your pipeline | |
with torch.no_grad(): | |
# Generate the image | |
images = generate_images_from_mgd_pipe( | |
test_order="test", # or some predefined order | |
pipe=val_pipe, | |
test_dataloader=None, # Adjust accordingly, or use pre-existing dataset | |
save_name="generated_image", | |
dataset="dresscode", # Adjust if needed | |
output_dir=".", # Save location | |
guidance_scale=guidance_scale, | |
guidance_scale_pose=guidance_scale_pose, | |
guidance_scale_sketch=guidance_scale_sketch, | |
sketch_cond_rate=sketch_cond_rate, | |
start_cond_rate=start_cond_rate, | |
no_pose=False, | |
disentagle=False, # Adjust if needed | |
seed=seed, | |
) | |
# Display the generated image | |
st.image(images[0], caption="Generated Fashion Image", use_column_width=True) | |