|
import gradio as gr |
|
import numpy as np |
|
from PIL import Image, ImageDraw |
|
from gradio_client import Client, handle_file |
|
import random |
|
import tempfile |
|
import os |
|
import logging |
|
import torch |
|
from diffusers import AutoencoderKL, TCDScheduler |
|
from diffusers.models.model_loading_utils import load_state_dict |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
try: |
|
import spaces |
|
except: |
|
|
|
class spaces: |
|
@staticmethod |
|
def GPU(duration=None): |
|
def decorator(func): |
|
return func |
|
return decorator |
|
|
|
|
|
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1" |
|
|
|
|
|
try: |
|
import mmaudio |
|
from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video, |
|
setup_eval_logging) |
|
from mmaudio.model.flow_matching import FlowMatching |
|
from mmaudio.model.networks import MMAudio, get_my_mmaudio |
|
from mmaudio.model.sequence_config import SequenceConfig |
|
from mmaudio.model.utils.features_utils import FeaturesUtils |
|
MMAUDIO_AVAILABLE = True |
|
except ImportError: |
|
MMAUDIO_AVAILABLE = False |
|
logging.warning("MMAudio not available. Sound generation will be disabled.") |
|
|
|
|
|
try: |
|
from controlnet_union import ControlNetModel_Union |
|
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline |
|
|
|
|
|
config_file = hf_hub_download( |
|
"xinsir/controlnet-union-sdxl-1.0", |
|
filename="config_promax.json", |
|
) |
|
|
|
config = ControlNetModel_Union.load_config(config_file) |
|
controlnet_model = ControlNetModel_Union.from_config(config) |
|
|
|
model_file = hf_hub_download( |
|
"xinsir/controlnet-union-sdxl-1.0", |
|
filename="diffusion_pytorch_model_promax.safetensors", |
|
) |
|
state_dict = load_state_dict(model_file) |
|
loaded_keys = list(state_dict.keys()) |
|
|
|
result = ControlNetModel_Union._load_pretrained_model( |
|
controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys |
|
) |
|
|
|
model = result[0] |
|
model = model.to(device="cuda", dtype=torch.float16) |
|
|
|
|
|
vae = AutoencoderKL.from_pretrained( |
|
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 |
|
).to("cuda") |
|
|
|
|
|
pipe = StableDiffusionXLFillPipeline.from_pretrained( |
|
"SG161222/RealVisXL_V5.0_Lightning", |
|
torch_dtype=torch.float16, |
|
vae=vae, |
|
controlnet=model, |
|
variant="fp16", |
|
).to("cuda") |
|
|
|
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config) |
|
|
|
OUTPAINT_MODEL_LOADED = True |
|
except Exception as e: |
|
logging.error(f"Failed to load outpainting models: {str(e)}") |
|
OUTPAINT_MODEL_LOADED = False |
|
|
|
|
|
if MMAUDIO_AVAILABLE: |
|
try: |
|
|
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
torch.backends.cudnn.allow_tf32 = True |
|
torch.backends.cudnn.benchmark = True |
|
else: |
|
device = torch.device("cpu") |
|
|
|
dtype = torch.bfloat16 |
|
|
|
|
|
model_cfg: ModelConfig = all_model_cfg['large_44k_v2'] |
|
model_cfg.download_if_needed() |
|
|
|
setup_eval_logging() |
|
|
|
|
|
def get_mmaudio_model(): |
|
with torch.cuda.device(device): |
|
seq_cfg = model_cfg.seq_cfg |
|
net: MMAudio = get_my_mmaudio(model_cfg.model_name).to(device, dtype).eval() |
|
net.load_weights(torch.load(model_cfg.model_path, map_location=device, weights_only=True)) |
|
logging.info(f'Loaded MMAudio weights from {model_cfg.model_path}') |
|
|
|
feature_utils = FeaturesUtils( |
|
tod_vae_ckpt=model_cfg.vae_path, |
|
synchformer_ckpt=model_cfg.synchformer_ckpt, |
|
enable_conditions=True, |
|
mode=model_cfg.mode, |
|
bigvgan_vocoder_ckpt=model_cfg.bigvgan_16k_path, |
|
need_vae_encoder=False |
|
).to(device, dtype).eval() |
|
|
|
return net, feature_utils, seq_cfg |
|
|
|
mmaudio_net, mmaudio_feature_utils, mmaudio_seq_cfg = get_mmaudio_model() |
|
MMAUDIO_LOADED = True |
|
except Exception as e: |
|
logging.error(f"Failed to load MMAudio models: {str(e)}") |
|
MMAUDIO_LOADED = False |
|
else: |
|
MMAUDIO_LOADED = False |
|
|
|
|
|
TEXT2IMG_API_URL = "http://211.233.58.201:7896" |
|
VIDEO_API_URL = "http://211.233.58.201:7875" |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
IMAGE_PRESETS = { |
|
"์ปค์คํ
": {"width": 1024, "height": 1024}, |
|
"1:1 ์ ์ฌ๊ฐํ": {"width": 1024, "height": 1024}, |
|
"4:3 ํ์ค": {"width": 1024, "height": 768}, |
|
"16:9 ์์ด๋์คํฌ๋ฆฐ": {"width": 1024, "height": 576}, |
|
"9:16 ์ธ๋กํ": {"width": 576, "height": 1024}, |
|
"6:19 ํน์ ์ธ๋กํ": {"width": 324, "height": 1024}, |
|
"Instagram ์ ์ฌ๊ฐํ": {"width": 1080, "height": 1080}, |
|
"Instagram ์คํ ๋ฆฌ": {"width": 1080, "height": 1920}, |
|
"Instagram ๊ฐ๋กํ": {"width": 1080, "height": 566}, |
|
"Facebook ์ปค๋ฒ": {"width": 820, "height": 312}, |
|
"Twitter ํค๋": {"width": 1500, "height": 500}, |
|
"YouTube ์ธ๋ค์ผ": {"width": 1280, "height": 720}, |
|
"LinkedIn ๋ฐฐ๋": {"width": 1584, "height": 396}, |
|
} |
|
|
|
def update_dimensions(preset): |
|
if preset in IMAGE_PRESETS: |
|
return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"] |
|
return 1024, 1024 |
|
|
|
def generate_text_to_image(prompt, width, height, guidance, inference_steps, seed): |
|
if not prompt: |
|
return None, "ํ๋กฌํํธ๋ฅผ ์
๋ ฅํด์ฃผ์ธ์" |
|
|
|
try: |
|
client = Client(TEXT2IMG_API_URL) |
|
if seed == -1: |
|
seed = random.randint(0, 9999999) |
|
|
|
result = client.predict( |
|
prompt=prompt, |
|
width=int(width), |
|
height=int(height), |
|
guidance=float(guidance), |
|
inference_steps=int(inference_steps), |
|
seed=int(seed), |
|
do_img2img=False, |
|
init_image=None, |
|
image2image_strength=0.8, |
|
resize_img=True, |
|
api_name="/generate_image" |
|
) |
|
return result[0], f"์ฌ์ฉ๋ ์๋: {result[1]}" |
|
except Exception as e: |
|
logging.error(f"Image generation error: {str(e)}") |
|
return None, f"์ค๋ฅ: {str(e)}" |
|
|
|
@spaces.GPU(duration=60) |
|
@torch.inference_mode() |
|
def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5, target_duration=8.0): |
|
"""๋น๋์ค์ ์ฌ์ด๋๋ฅผ ์ถ๊ฐํ๋ ํจ์""" |
|
if not MMAUDIO_LOADED: |
|
logging.error("MMAudio model not loaded") |
|
return video_path |
|
|
|
try: |
|
rng = torch.Generator(device=device) |
|
rng.manual_seed(seed) |
|
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
|
|
|
clip_frames, sync_frames, actual_duration = load_video(video_path, target_duration) |
|
clip_frames = clip_frames.unsqueeze(0) |
|
sync_frames = sync_frames.unsqueeze(0) |
|
mmaudio_seq_cfg.duration = actual_duration |
|
mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len) |
|
|
|
|
|
audios = generate(clip_frames, |
|
sync_frames, [prompt], |
|
negative_text=[negative_prompt], |
|
feature_utils=mmaudio_feature_utils, |
|
net=mmaudio_net, |
|
fm=fm, |
|
rng=rng, |
|
cfg_strength=cfg_strength) |
|
audio = audios.float().cpu()[0] |
|
|
|
|
|
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name |
|
make_video(video_path, |
|
video_save_path, |
|
audio, |
|
sampling_rate=mmaudio_seq_cfg.sampling_rate, |
|
duration_sec=mmaudio_seq_cfg.duration) |
|
|
|
return video_save_path |
|
except Exception as e: |
|
logging.error(f"Video to audio error: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
return video_path |
|
|
|
def generate_video_from_image(image, prompt="", length=4.0, sound_generation="์ฌ์ด๋ ์์", sound_prompt="", sound_negative_prompt="music"): |
|
if image is None: |
|
return None |
|
|
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp: |
|
temp_path = fp.name |
|
Image.fromarray(image).save(temp_path) |
|
|
|
|
|
client = Client(VIDEO_API_URL) |
|
result = client.predict( |
|
input_image=handle_file(temp_path), |
|
prompt=prompt if prompt else "Generate natural motion", |
|
n_prompt="", |
|
seed=random.randint(0, 9999999), |
|
use_teacache=True, |
|
video_length=float(length), |
|
api_name="/process" |
|
) |
|
|
|
os.unlink(temp_path) |
|
|
|
if result and len(result) > 0: |
|
video_dict = result[0] |
|
video_path = video_dict.get("video") if isinstance(video_dict, dict) else None |
|
|
|
|
|
if video_path: |
|
|
|
if sound_generation == "์ฌ์ด๋ ์์ฑ" and MMAUDIO_LOADED: |
|
try: |
|
|
|
if not sound_prompt: |
|
sound_prompt = prompt if prompt else "ambient sound" |
|
|
|
|
|
video_with_sound = video_to_audio( |
|
video_path=video_path, |
|
prompt=sound_prompt, |
|
negative_prompt=sound_negative_prompt, |
|
seed=random.randint(0, 9999999), |
|
num_steps=25, |
|
cfg_strength=4.5, |
|
target_duration=length |
|
) |
|
return video_with_sound |
|
except Exception as e: |
|
logging.error(f"Sound generation error: {str(e)}") |
|
|
|
return video_path |
|
|
|
|
|
return video_path |
|
|
|
except Exception as e: |
|
logging.error(f"Video generation error: {str(e)}") |
|
return None |
|
|
|
def prepare_image_and_mask(image, width, height, overlap_percentage, alignment): |
|
"""์ด๋ฏธ์ง์ ๋ง์คํฌ๋ฅผ ์ค๋นํ๋ ํจ์""" |
|
if image is None: |
|
return None, None |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image).convert('RGB') |
|
|
|
target_size = (width, height) |
|
|
|
|
|
scale_factor = min(target_size[0] / image.width, target_size[1] / image.height) |
|
new_width = int(image.width * scale_factor) |
|
new_height = int(image.height * scale_factor) |
|
|
|
|
|
source = image.resize((new_width, new_height), Image.LANCZOS) |
|
|
|
|
|
overlap_x = int(new_width * (overlap_percentage / 100)) |
|
overlap_y = int(new_height * (overlap_percentage / 100)) |
|
overlap_x = max(overlap_x, 1) |
|
overlap_y = max(overlap_y, 1) |
|
|
|
|
|
if alignment == "๊ฐ์ด๋ฐ": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "์ผ์ชฝ": |
|
margin_x = 0 |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "์ค๋ฅธ์ชฝ": |
|
margin_x = target_size[0] - new_width |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "์": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = 0 |
|
elif alignment == "์๋": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = target_size[1] - new_height |
|
|
|
|
|
background = Image.new('RGB', target_size, (255, 255, 255)) |
|
background.paste(source, (margin_x, margin_y)) |
|
|
|
|
|
mask = Image.new('L', target_size, 255) |
|
mask_draw = ImageDraw.Draw(mask) |
|
|
|
|
|
white_gaps_patch = 2 |
|
|
|
left_overlap = margin_x + overlap_x if alignment != "์ผ์ชฝ" else margin_x |
|
right_overlap = margin_x + new_width - overlap_x if alignment != "์ค๋ฅธ์ชฝ" else margin_x + new_width |
|
top_overlap = margin_y + overlap_y if alignment != "์" else margin_y |
|
bottom_overlap = margin_y + new_height - overlap_y if alignment != "์๋" else margin_y + new_height |
|
|
|
mask_draw.rectangle([ |
|
(left_overlap, top_overlap), |
|
(right_overlap, bottom_overlap) |
|
], fill=0) |
|
|
|
return background, mask |
|
|
|
@spaces.GPU(duration=24) |
|
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8): |
|
"""์ด๋ฏธ์ง ์์ํ์ธํ
์คํ""" |
|
if image is None: |
|
return None |
|
|
|
if not OUTPAINT_MODEL_LOADED: |
|
return Image.new('RGB', (width, height), (200, 200, 200)) |
|
|
|
try: |
|
|
|
background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment) |
|
if background is None: |
|
return None |
|
|
|
|
|
cnet_image = background.copy() |
|
cnet_image.paste(0, (0, 0), mask) |
|
|
|
|
|
final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k" |
|
|
|
|
|
with torch.autocast(device_type="cuda", dtype=torch.float16): |
|
( |
|
prompt_embeds, |
|
negative_prompt_embeds, |
|
pooled_prompt_embeds, |
|
negative_pooled_prompt_embeds, |
|
) = pipe.encode_prompt(final_prompt, "cuda", True) |
|
|
|
|
|
for generated_image in pipe( |
|
prompt_embeds=prompt_embeds, |
|
negative_prompt_embeds=negative_prompt_embeds, |
|
pooled_prompt_embeds=pooled_prompt_embeds, |
|
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, |
|
image=cnet_image, |
|
num_inference_steps=num_steps |
|
): |
|
|
|
pass |
|
|
|
|
|
final_image = generated_image |
|
|
|
|
|
final_image = final_image.convert("RGBA") |
|
cnet_image.paste(final_image, (0, 0), mask) |
|
|
|
return cnet_image |
|
|
|
except Exception as e: |
|
logging.error(f"Outpainting error: {str(e)}") |
|
return background if 'background' in locals() else None |
|
|
|
|
|
css = """ |
|
:root { |
|
--primary-color: #f8c3cd; |
|
--secondary-color: #b3e5fc; |
|
--background-color: #f5f5f7; |
|
--card-background: #ffffff; |
|
--text-color: #424242; |
|
--accent-color: #ffb6c1; |
|
--success-color: #c8e6c9; |
|
--warning-color: #fff9c4; |
|
--shadow-color: rgba(0, 0, 0, 0.1); |
|
--border-radius: 12px; |
|
} |
|
.gradio-container { |
|
max-width: 1200px !important; |
|
margin: 0 auto !important; |
|
} |
|
.panel-box { |
|
border-radius: var(--border-radius) !important; |
|
box-shadow: 0 8px 16px var(--shadow-color) !important; |
|
background-color: var(--card-background) !important; |
|
padding: 20px !important; |
|
margin-bottom: 20px !important; |
|
} |
|
#generate-btn, #video-btn, #outpaint-btn { |
|
background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important; |
|
font-size: 1.1rem !important; |
|
padding: 12px 24px !important; |
|
margin-top: 10px !important; |
|
width: 100% !important; |
|
} |
|
.tabitem { |
|
min-height: 700px !important; |
|
} |
|
""" |
|
|
|
|
|
demo = gr.Blocks(css=css, title="AI ์ด๋ฏธ์ง & ๋น๋์ค ์์ฑ๊ธฐ") |
|
|
|
with demo: |
|
gr.Markdown("# ๐จ Ginigen ์คํ๋์ค") |
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.Tab("ํ
์คํธโ์ด๋ฏธ์งโ๋น๋์ค", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ ์ด๋ฏธ์ง ์์ฑ ์ค์ ") |
|
|
|
prompt = gr.Textbox( |
|
label="ํ๋กฌํํธ(ํ๊ธ/์์ด ๊ฐ๋ฅ)", |
|
placeholder="์์ฑํ๊ณ ์ถ์ ์ด๋ฏธ์ง๋ฅผ ์ค๋ช
ํ์ธ์...", |
|
lines=3 |
|
) |
|
|
|
size_preset = gr.Dropdown( |
|
choices=list(IMAGE_PRESETS.keys()), |
|
value="1:1 ์ ์ฌ๊ฐํ", |
|
label="ํฌ๊ธฐ ํ๋ฆฌ์
" |
|
) |
|
|
|
with gr.Row(): |
|
width = gr.Slider(256, 2048, 1024, step=64, label="๋๋น") |
|
height = gr.Slider(256, 2048, 1024, step=64, label="๋์ด") |
|
|
|
with gr.Row(): |
|
guidance = gr.Slider(1.0, 20.0, 3.5, step=0.1, label="๊ฐ์ด๋์ค") |
|
steps = gr.Slider(1, 50, 30, step=1, label="์คํ
") |
|
|
|
seed = gr.Number(label="์๋ (-1=๋๋ค)", value=-1) |
|
|
|
generate_btn = gr.Button("๐จ ์ด๋ฏธ์ง ์์ฑ", variant="primary", elem_id="generate-btn") |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฌ ๋น๋์ค ์์ฑ ์ค์ ") |
|
|
|
video_prompt = gr.Textbox( |
|
label="(์ ํ) ๋น๋์ค ํ๋กฌํํธ(์์ด๋ก ์
๋ ฅ)", |
|
placeholder="๋น๋์ค์ ์์ง์์ ์ค๋ช
ํ์ธ์... (๋น์๋๋ฉด ๊ธฐ๋ณธ ์์ง์ ์ ์ฉ)", |
|
lines=2 |
|
) |
|
|
|
video_length = gr.Slider( |
|
minimum=1, |
|
maximum=60, |
|
value=4, |
|
step=0.5, |
|
label="๋น๋์ค ๊ธธ์ด (์ด)", |
|
info="1์ด์์ 60์ด๊น์ง ์ ํ ๊ฐ๋ฅํฉ๋๋ค" |
|
) |
|
|
|
|
|
sound_generation = gr.Radio( |
|
choices=["์ฌ์ด๋ ์์", "์ฌ์ด๋ ์์ฑ"], |
|
value="์ฌ์ด๋ ์์", |
|
label="์ฌ์ด๋ ์ต์
", |
|
info="๋น๋์ค์ ์ฌ์ด๋๋ฅผ ์ถ๊ฐํ ์ง ์ ํํ์ธ์" |
|
) |
|
|
|
|
|
with gr.Column(visible=False) as sound_options: |
|
sound_prompt = gr.Textbox( |
|
label="์ฌ์ด๋ ํ๋กฌํํธ (์ ํ)", |
|
placeholder="์์ฑํ ์ฌ์ด๋๋ฅผ ์ค๋ช
ํ์ธ์... (๋น์๋๋ฉด ๋น๋์ค ํ๋กฌํํธ ์ฌ์ฉ)", |
|
lines=2 |
|
) |
|
sound_negative_prompt = gr.Textbox( |
|
label="์ฌ์ด๋ ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ", |
|
value="music", |
|
lines=1 |
|
) |
|
|
|
video_btn = gr.Button("๐ฌ ๋น๋์ค๋ก ๋ณํ", variant="secondary", elem_id="video-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ผ๏ธ ์์ฑ ๊ฒฐ๊ณผ") |
|
|
|
output_image = gr.Image(label="์์ฑ๋ ์ด๋ฏธ์ง", type="numpy") |
|
output_seed = gr.Textbox(label="์๋ ์ ๋ณด") |
|
output_video = gr.Video(label="์์ฑ๋ ๋น๋์ค") |
|
|
|
|
|
with gr.Tab("์ด๋ฏธ์ง ๋น์จ ๋ณ๊ฒฝ/์์ฑ", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ผ๏ธ ์ด๋ฏธ์ง ์
๋ก๋") |
|
|
|
input_image = gr.Image( |
|
label="์๋ณธ ์ด๋ฏธ์ง", |
|
type="numpy" |
|
) |
|
|
|
outpaint_prompt = gr.Textbox( |
|
label="ํ๋กฌํํธ (์ ํ)", |
|
placeholder="ํ์ฅํ ์์ญ์ ๋ํ ์ค๋ช
...", |
|
lines=2 |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### โ๏ธ ์์ํ์ธํ
์ค์ ") |
|
|
|
outpaint_size_preset = gr.Dropdown( |
|
choices=list(IMAGE_PRESETS.keys()), |
|
value="16:9 ์์ด๋์คํฌ๋ฆฐ", |
|
label="๋ชฉํ ํฌ๊ธฐ ํ๋ฆฌ์
" |
|
) |
|
|
|
with gr.Row(): |
|
outpaint_width = gr.Slider(256, 2048, 1280, step=64, label="๋ชฉํ ๋๋น") |
|
outpaint_height = gr.Slider(256, 2048, 720, step=64, label="๋ชฉํ ๋์ด") |
|
|
|
alignment = gr.Dropdown( |
|
choices=["๊ฐ์ด๋ฐ", "์ผ์ชฝ", "์ค๋ฅธ์ชฝ", "์", "์๋"], |
|
value="๊ฐ์ด๋ฐ", |
|
label="์ ๋ ฌ" |
|
) |
|
|
|
overlap_percentage = gr.Slider( |
|
minimum=1, |
|
maximum=50, |
|
value=10, |
|
step=1, |
|
label="๋ง์คํฌ ์ค๋ฒ๋ฉ (%)" |
|
) |
|
|
|
outpaint_steps = gr.Slider( |
|
minimum=4, |
|
maximum=12, |
|
value=8, |
|
step=1, |
|
label="์ถ๋ก ์คํ
" |
|
) |
|
|
|
outpaint_btn = gr.Button("๐จ ์์ํ์ธํ
์คํ", variant="primary", elem_id="outpaint-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ผ๏ธ ๊ฒฐ๊ณผ") |
|
|
|
outpaint_result = gr.Image(label="์์ํ์ธํ
๊ฒฐ๊ณผ") |
|
|
|
|
|
size_preset.change(update_dimensions, [size_preset], [width, height]) |
|
|
|
generate_btn.click( |
|
generate_text_to_image, |
|
[prompt, width, height, guidance, steps, seed], |
|
[output_image, output_seed] |
|
) |
|
|
|
|
|
def toggle_sound_options(choice): |
|
return gr.update(visible=(choice == "์ฌ์ด๋ ์์ฑ")) |
|
|
|
sound_generation.change( |
|
toggle_sound_options, |
|
[sound_generation], |
|
[sound_options] |
|
) |
|
|
|
video_btn.click( |
|
generate_video_from_image, |
|
[output_image, video_prompt, video_length, sound_generation, sound_prompt, sound_negative_prompt], |
|
[output_video] |
|
) |
|
|
|
|
|
outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height]) |
|
|
|
outpaint_btn.click( |
|
outpaint_image, |
|
[input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps], |
|
[outpaint_result] |
|
) |
|
|
|
demo.launch() |