EyeSee_chi

Running

App Files Files Community

LouisLi commited on Jun 6, 2024

Commit

2757a07

verified ·

1 Parent(s): 1c15d66

Update app.py

Browse files

Files changed (1) hide show

app.py +201 -154

app.py CHANGED Viewed

@@ -34,13 +34,19 @@ import asyncio
-import uuid
-from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
-from diffusers.utils import export_to_video
-from safetensors.torch import load_file
 #from diffusers.models.modeling_outputs import Transformer2DModelOutput
-# import spaces  #
 import imageio
@@ -292,82 +298,106 @@ def make3d(images):
 ###############################################################################
 ###############################################################################
-############# this part is for text to video #############
 ###############################################################################
-MORE = """ ## TRY Other Models
-        ### JARVIS: Your VOICE Assistant -> https://huggingface.co/spaces/KingNish/JARVIS
-        ### Instant Image: 4k images in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Image
-        """
-# Constants
-bases = {
-    "Cartoon": "frankjoshua/toonyou_beta6",
-    "Realistic": "emilianJR/epiCRealism",
-    "3d": "Lykon/DreamShaper",
-    "Anime": "Yntec/mistoonAnime2"
-}
-step_loaded = None
-base_loaded = "Realistic"
-motion_loaded = None
-# Ensure model and scheduler are initialized in GPU-enabled function
-if not torch.cuda.is_available():
-    raise NotImplementedError("No GPU detected!")
-device = "cuda"
-dtype = torch.float16
-pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
-pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
-# Safety checkers
-from transformers import CLIPFeatureExtractor
-feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32")
-# Function
-#@spaces.GPU(duration=60,queue=False)
-def generate_image(prompt, base="Realistic", motion="", step=8, progress=gr.Progress()):
-    global step_loaded
-    global base_loaded
-    global motion_loaded
-    print(prompt, base, step)
-    if step_loaded != step:
-        repo = "ByteDance/AnimateDiff-Lightning"
-        ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
-        pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)
-        step_loaded = step
-    if base_loaded != base:
-        pipe.unet.load_state_dict(torch.load(hf_hub_download(bases[base], "unet/diffusion_pytorch_model.bin"), map_location=device), strict=False)
-        base_loaded = base
-    if motion_loaded != motion:
-        pipe.unload_lora_weights()
-        if motion != "":
-            pipe.load_lora_weights(motion, adapter_name="motion")
-            pipe.set_adapters(["motion"], [0.7])
-        motion_loaded = motion
-    progress((0, step))
-    def progress_callback(i, t, z):
-        progress((i+1, step))
-    output = pipe(prompt=prompt, guidance_scale=1.2, num_inference_steps=step, callback=progress_callback, callback_steps=1)
-    name = str(uuid.uuid4()).replace("-", "")
-    path = f"/tmp/{name}.mp4"
-    export_to_video(output.frames[0], path, fps=10)
-    return path
 ###############################################################################
-############# above part is for text to video #############
 ###############################################################################
@@ -1353,99 +1383,116 @@ def create_ui():
         ###############################################################################
         ###############################################################################
-        ############# this part is for text to video #############
         ###############################################################################
-        with gr.Row(variant="panel") as text2video_model:
-            with gr.Column():
-                with gr.Row():
-                    prompt = gr.Textbox(
-                        label='Prompt'
                     )
                 with gr.Row():
-                    select_base = gr.Dropdown(
-                        label='Base model',
-                        choices=[
-                            "Cartoon",
-                            "Realistic",
-                            "3d",
-                            "Anime",
-                        ],
-                        value=base_loaded,
-                        interactive=True
-                    )
-                    select_motion = gr.Dropdown(
-                        label='Motion',
-                        choices=[
-                            ("Default", ""),
-                            ("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
-                            ("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
-                            ("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
-                            ("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
-                            ("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
-                            ("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
-                            ("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
-                            ("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
-                        ],
-                        value="guoyww/animatediff-motion-lora-zoom-in",
-                        interactive=True
-                    )
-                    select_step = gr.Dropdown(
-                        label='Inference steps',
-                        choices=[
-                            ('1-Step', 1),
-                            ('2-Step', 2),
-                            ('4-Step', 4),
-                            ('8-Step', 8),
-                        ],
-                        value=4,
-                        interactive=True
-                    )
-                    submit = gr.Button(
-                        scale=1,
-                        variant='primary'
                     )
-            with gr.Column():
-                with gr.Row():
-                    video = gr.Video(
-                        label='AnimateDiff-Lightning',
-                        autoplay=True,
-                        height=512,
-                        width=512,
-                        elem_id="video_output"
                     )
-            prompt.submit(
-                fn=generate_image,
-                inputs=[prompt, select_base, select_motion, select_step],
-                outputs=video,
             )
-            submit.click(
-                fn=generate_image,
-                inputs=[prompt, select_base, select_motion, select_step],
-                outputs=video,
             )
-            gr.Examples(
-                examples=[
-                ["Focus: Eiffel Tower (Animate: Clouds moving)"], #Atmosphere Movement Example
-                ["Focus: Trees In forest (Animate: Lion running)"], #Object Movement Example
-                ["Focus: Astronaut in Space"], #Normal
-                ["Focus: Group of Birds in sky (Animate:  Birds Moving) (Shot From distance)"], #Camera distance
-                ["Focus:  Statue of liberty (Shot from Drone) (Animate: Drone coming toward statue)"], #Camera Movement
-                ["Focus: Panda in Forest (Animate: Drinking Tea)"], #Doing Something
-                ["Focus: Kids Playing (Season: Winter)"], #Atmosphere or Season
-                {"Focus: Cars in Street (Season: Rain, Daytime) (Shot from Distance) (Movement: Cars running)"} #Mixture
-            ],
-                fn=generate_image,
-                inputs=[prompt],
-                outputs=video,
-                cache_examples=True,
-        )
         ###############################################################################
-        ############# above part is for text to video #############
         ###############################################################################
         def clear_tts_fields():
             return [gr.update(value=""), gr.update(value=""), None, None, gr.update(value=False), gr.update(value=True), None, None]

+# import uuid
+# from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
+# from diffusers.utils import export_to_video
+# from safetensors.torch import load_file
 #from diffusers.models.modeling_outputs import Transformer2DModelOutput
+import random
+import uuid
+import json
+from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 import imageio
 ###############################################################################
 ###############################################################################
+############# this part is for text to image #############
 ###############################################################################
+# Use environment variables for flexibility
+MODEL_ID = os.getenv("MODEL_ID", "sd-community/sdxl-flash")
+MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
+USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
+ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
+BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))  # Allow generating multiple images at once
+# Determine device and load model outside of function for efficiency
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    use_safetensors=True,
+    add_watermarker=False,
+).to(device)
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+# Torch compile for potential speedup (experimental)
+if USE_TORCH_COMPILE:
+    pipe.compile()
+# CPU offloading for larger RAM capacity (experimental)
+if ENABLE_CPU_OFFLOAD:
+    pipe.enable_model_cpu_offload()
+MAX_SEED = np.iinfo(np.int32).max
+def save_image(img):
+    unique_name = str(uuid.uuid4()) + ".png"
+    img.save(unique_name)
+    return unique_name
+def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
+# @spaces.GPU(duration=30, queue=False)
+def generate(
+    prompt: str,
+    negative_prompt: str = "",
+    use_negative_prompt: bool = False,
+    seed: int = 1,
+    width: int = 1024,
+    height: int = 1024,
+    guidance_scale: float = 3,
+    num_inference_steps: int = 30,
+    randomize_seed: bool = False,
+    use_resolution_binning: bool = True,
+    num_images: int = 1,  # Number of images to generate
+    progress=gr.Progress(track_tqdm=True),
+):
+    seed = int(randomize_seed_fn(seed, randomize_seed))
+    generator = torch.Generator(device=device).manual_seed(seed)
+    # Improved options handling
+    options = {
+        "prompt": [prompt] * num_images,
+        "negative_prompt": [negative_prompt] * num_images if use_negative_prompt else None,
+        "width": width,
+        "height": height,
+        "guidance_scale": guidance_scale,
+        "num_inference_steps": num_inference_steps,
+        "generator": generator,
+        "output_type": "pil",
+    }
+    # Use resolution binning for faster generation with less VRAM usage
+    if use_resolution_binning:
+        options["use_resolution_binning"] = True
+    # Generate images potentially in batches
+    images = []
+    for i in range(0, num_images, BATCH_SIZE):
+        batch_options = options.copy()
+        batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
+        if "negative_prompt" in batch_options:
+            batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
+        images.extend(pipe(**batch_options).images)
+    image_paths = [save_image(img) for img in images]
+    return image_paths, seed
+examples = [
+    "a cat eating a piece of cheese",
+    "a ROBOT riding a BLUE horse on Mars, photorealistic, 4k",
+    "Ironman VS Hulk, ultrarealistic",
+    "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k",
+    "An alien holding a sign board containing the word 'Flash', futuristic, neonpunk",
+    "Kids going to school, Anime style"
+]
 ###############################################################################
+############# above part is for text to image #############
 ###############################################################################
         ###############################################################################
         ###############################################################################
+        ############# this part is for text to image #############
         ###############################################################################
+        with gr.Row(variant="panel") as text2image_model:
+            with gr.Row():
+                prompt = gr.Text(
+                    label="Prompt",
+                    show_label=False,
+                    max_lines=1,
+                    placeholder="Enter your prompt",
+                    container=False,
+                )
+                run_button = gr.Button("Run", scale=0)
+            result = gr.Gallery(label="Result", columns=1, show_label=False)
+        with gr.Accordion("Advanced options", open=False):
+            num_images = gr.Slider(
+                label="Number of Images",
+                minimum=1,
+                maximum=4,
+                step=1,
+                value=1,
+            )
+            with gr.Row():
+                use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True)
+                negative_prompt = gr.Text(
+                    label="Negative prompt",
+                    max_lines=5,
+                    lines=4,
+                    placeholder="Enter a negative prompt",
+                    value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW",
+                    visible=True,
+                )
+            seed = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
+            )
+            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            with gr.Row(visible=True):
+                width = gr.Slider(
+                    label="Width",
+                    minimum=512,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=64,
+                    value=1024,
+                )
+                    height = gr.Slider(
+                        label="Height",
+                        minimum=512,
+                        maximum=MAX_IMAGE_SIZE,
+                        step=64,
+                        value=1024,
                     )
                 with gr.Row():
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=0.1,
+                        maximum=6,
+                        step=0.1,
+                        value=3.0,
                     )
+                    num_inference_steps = gr.Slider(
+                        label="Number of inference steps",
+                        minimum=1,
+                        maximum=15,
+                        step=1,
+                        value=8,
                     )
+            gr.Examples(
+                examples=examples,
+                inputs=prompt,
+                cache_examples=False
             )
+            use_negative_prompt.change(
+                fn=lambda x: gr.update(visible=x),
+                inputs=use_negative_prompt,
+                outputs=negative_prompt,
+                api_name=False,
             )
+            gr.on(
+                triggers=[
+                    prompt.submit,
+                    negative_prompt.submit,
+                    run_button.click,
+                ],
+                fn=generate,
+                inputs=[
+                    prompt,
+                    negative_prompt,
+                    use_negative_prompt,
+                    seed,
+                    width,
+                    height,
+                    guidance_scale,
+                    num_inference_steps,
+                    randomize_seed,
+                    num_images
+                ],
+                outputs=[result, seed],
+                api_name="run",
+            )
         ###############################################################################
+        ############# above part is for text to image #############
         ###############################################################################
         def clear_tts_fields():
             return [gr.update(value=""), gr.update(value=""), None, None, gr.update(value=False), gr.update(value=True), None, None]