Spaces:

aicollective1
/

aicollectiveapp

Runtime error

App Files Files Community

Vijish commited on Jul 23, 2024

Commit

682647c

verified ·

1 Parent(s): c39c22f

Create app.py

Browse files

Files changed (1) hide show

app.py +393 -0

app.py ADDED Viewed

	@@ -0,0 +1,393 @@

+import gradio as gr
+from diffusers import DiffusionPipeline, ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, UniPCMultistepScheduler
+from stable_diffusion_xl_reference import StableDiffusionXLReferencePipeline
+from controlnet_aux import OpenposeDetector
+#from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+from controlnet_aux import MidasDetector, ZoeDetector
+from tqdm import tqdm
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+import os
+import random
+import gc
+def clear_memory():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+# Global variable definitions
+controlnet_pipe = None
+reference_pipe = None
+pipe = None
+# Load the base model
+model = "aicollective1/aicollective"
+pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch.float16)
+pipe.to("cuda")
+# Placeholder for ControlNet models to be loaded dynamically
+controlnet_models = {
+    "Canny": None,
+    "Depth": None,
+    "OpenPose": None,
+    "Reference": None
+}
+# Load necessary models and feature extractors for depth estimation and OpenPose
+#feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
+#depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+processor_zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
+processor_midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
+openpose_processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
+controlnet_pipe = None  # Initial placeholder, will be loaded dynamically
+reference_pipe = None  # Initial placeholder for reference pipeline
+# Define the prompts and negative prompts for each style
+styles = {
+    "Anime Studio Dance": {
+        "prompt": ("anime screencap of a man wearing a white helmet with pointed ears,\n"
+                   "Outfit: closed animal print shirt,\n"
+                   "Action: anime style, looking at viewer, solo, upper body,\n"
+                   "((masterpiece)), (best quality), (extremely detailed), depth of field, sketch, "
+                   "dark intense shadows, sharp focus, soft lighting, hdr, colorful, good composition, spectacular,"),
+        "negative_prompt": ("realistic, (painting by bad-artist-anime:0.9), (painting by bad-artist:0.9), watermark, "
+                            "text, error, blurry, jpeg artifacts, cropped, worst quality, low quality, normal quality, "
+                            "jpeg artifacts, signature, watermark, username, artist name, (worst quality, low quality:1.4), "
+                            "bad anatomy, watermark, signature, text, logo")
+    },
+    "Vintage Realistic": {
+        "prompt": ("a masterpiece close up shoot photography of an man wearing a animal print helmet with pointed ears,\n"
+                   "Outfit: wearing an big oversized outfit, white leather jacket,\n"
+                   "Action: sitting on steps,\n"
+                   "hyper realistic with detailed textures, cinematic film still of Photorealism, realistic skin texture, "
+                   "subsurface scattering, skinny, Photorealism, often for highly detailed representation, photographic accuracy, "
+                   "shallow depth of field, vignette, highly detailed, bokeh, epic, gorgeous, sharp, perfect hands,\n"
+                   "<lora:add-detail-xl:1> <lora:Vintage_Street_Photo:0.9>"),
+        "negative_prompt": ("deformed skin, skin veins, black skin, blurry, text, yellow, deformed, (worst quality, low resolution, "
+                            "bad hands, open mouth), text, watermark, artist name, distorted, twisted, watermark, 3d render, "
+                            "distorted, twisted, watermark, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, "
+                            "glitch, deformed, mutated, ugly, disfigured, photoshopped skin, airbrushed skin, glossy skin, canvas frame, "
+                            "(high contrast:1.2), (over saturated:1.2), (glossy:1.1), cartoon, 3d, disfigured, Photoshop, video game, "
+                            "ugly, tiling, poorly drawn hands, 3d render, impressionism, digital art")
+    },
+    "Anime 90's Aesthetic": {
+        "prompt": ("an man wearing a white helmet with pointed ears, perfect chin,\n"
+                   "Outfit: wearing oversized hoodie, animal print pants,\n"
+                   "Action: dancing in nature, music production, music instruments made of wood,\n"
+                   "A screengrab of an anime, 90's aesthetic,"),
+        "negative_prompt": ("photo, real, realistic, blurry, text, yellow, deformed, (worst quality, low resolution, bad hands,), "
+                            "text, watermark, artist name, distorted, twisted, watermark, 3d render, distorted, twisted, watermark, "
+                            "text, abstract, glitch, deformed, mutated, ugly, disfigured, photoshopped skin, airbrushed skin, glossy skin, "
+                            "canvas frame, (high contrast:1.2), (over saturated:1.2), (glossy:1.1), disfigured, Photoshop, video game, "
+                            "ugly, tiling, poorly drawn hands, 3d render, impressionism, eyes, mouth, black skin, pale skin, hair, beard")
+    },
+    "Anime Style": {
+        "prompt": ("An man wearing a white helmet with pointed ears sitting on the steps of an Asian street shop,\n"
+                   "Outfit: wearing blue pants and a yellow jacket with a red backpack, in the anime style with detailed "
+                   "character design in the style of Atey Ghailan, featured in CGSociety, character concept art in the style of Katsuhiro Otomo"),
+        "negative_prompt": ("real, deformed fingers, chin, deformed hands, blurry, text, yellow, deformed, (worst quality, low resolution, "
+                            "bad hands, open mouth), text, watermark, artist name, distorted, twisted, watermark, 3d, distorted, twisted, "
+                            "watermark, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, "
+                            "ugly, disfigured, photoshopped skin, airbrushed skin, glossy skin, canvas frame, (high contrast:1.2), "
+                            "(over saturated:1.2), (glossy:1.1), cartoon, 3d, disfigured, Photoshop, video game, ugly, tiling, "
+                            "poorly drawn hands, 3d render, impressionism, digital art")
+    },
+    "Real 70s": {
+        "prompt": ("a masterpiece close up shoot photography of an man wearing a white helmet with pointed ears,\n"
+                   "Outfit: wearing an oversized trippy 70s shirt and scarf,\n"
+                   "Action: standing on the ocean,\n"
+                   "shot in the style of Erwin Olaf, hyper realistic with detailed textures, cinematic film still of Photorealism, "
+                   "realistic skin texture, subsurface scattering, skinny, Photorealism, often for highly detailed representation, "
+                   "photographic accuracy, shallow depth of field, vignette, highly detailed, bokeh, epic, gorgeous, sharp,"),
+        "negative_prompt": ("deformed skin, skin veins, black skin, blurry, text, yellow, deformed, (worst quality, low resolution, "
+                            "bad hands, open mouth), text, watermark, artist name, distorted, twisted, watermark, 3d render, distorted, "
+                            "twisted, watermark, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, "
+                            "mutated, ugly, disfigured, photoshopped skin, airbrushed skin, glossy skin, canvas frame, (high contrast:1.2), "
+                            "(over saturated:1.2), (glossy:1.1), cartoon, 3d, disfigured, Photoshop, video game, ugly, tiling, "
+                            "poorly drawn hands, 3d render, impressionism, digital art")
+    }
+}
+# Define the style images
+style_images = {
+    "Anime Studio Dance": "style/Anime Studio Dance.png",
+    "Vintage Realistic": "style/Vintage Realistic.png",
+    "Anime 90's Aesthetic": "style/Anime 90's Aesthetic.png",
+    "Anime Style": "style/Anime Style.png",
+    "Real 70s": "style/Real 70s.png"
+}
+# Function to load ControlNet models dynamically
+def load_controlnet_model(controlnet_type):
+    global controlnet_pipe, pipe, reference_pipe, controlnet_models, vae, model
+    clear_memory()
+    if controlnet_models[controlnet_type] is None:
+        if controlnet_type in ["Canny", "Depth", "OpenPose"]:
+            controlnet_models[controlnet_type] = ControlNetModel.from_pretrained(
+                "xinsir/controlnet-union-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True
+            )
+        elif controlnet_type == "Reference":
+            controlnet_models[controlnet_type] = StableDiffusionXLReferencePipeline.from_pretrained(
+                model, torch_dtype=torch.float16, use_safetensors=True
+            )
+    if 'controlnet_pipe' in globals() and controlnet_pipe is not None:
+        controlnet_pipe.to("cpu")
+        del controlnet_pipe
+        globals()['controlnet_pipe'] = None
+    if 'reference_pipe' in globals() and reference_pipe is not None:
+        reference_pipe.to("cpu")
+        del reference_pipe
+        globals()['reference_pipe'] = None
+    if pipe is not None:
+        pipe.to("cpu")
+    clear_memory()
+    if controlnet_type == "Reference":
+        reference_pipe = controlnet_models[controlnet_type]
+        reference_pipe.scheduler = UniPCMultistepScheduler.from_config(reference_pipe.scheduler.config)
+        reference_pipe.to("cuda")
+        globals()['reference_pipe'] = reference_pipe
+    else:
+        controlnet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+            model, controlnet=controlnet_models[controlnet_type], vae=vae, torch_dtype=torch.float16, use_safetensors=True
+        )
+        controlnet_pipe.scheduler = UniPCMultistepScheduler.from_config(controlnet_pipe.scheduler.config)
+        controlnet_pipe.to("cuda")
+        globals()['controlnet_pipe'] = controlnet_pipe
+    clear_memory()
+    return f"Loaded {controlnet_type} model."
+# Preprocessing functions for each ControlNet type
+def preprocess_canny(image):
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    if image.dtype != np.uint8:
+        image = (image * 255).astype(np.uint8)
+    image = cv2.Canny(image, 100, 200)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    return Image.fromarray(image)
+def preprocess_depth(image, target_size=(1024, 1024)):
+    if isinstance(image, Image.Image):
+        img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    else:
+        img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    depth_img = processor_zoe(img, output_type='cv2') if random.random() > 0.5 else processor_midas(img, output_type='cv2')
+    height, width = depth_img.shape[:2]
+    ratio = min(target_size[0] / width, target_size[1] / height)
+    new_width, new_height = int(width * ratio), int(height * ratio)
+    depth_img_resized = cv2.resize(depth_img, (new_width, new_height))
+    return Image.fromarray(depth_img_resized)
+def preprocess_openpose(image):
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    image = openpose_processor(image, hand_and_face=False, output_type='cv2')
+    height, width = image.shape[:2]
+    ratio = np.sqrt(1024. * 1024. / (width * height))
+    new_width, new_height = int(width * ratio), int(height * ratio)
+    image = cv2.resize(image, (new_width, new_height))
+    return Image.fromarray(image)
+def process_image_batch(images, pipe, prompt, negative_prompt, progress, batch_size=2):
+    all_processed_images = []
+    for i in range(0, len(images), batch_size):
+        batch = images[i:i+batch_size]
+        batch_prompt = [prompt] * len(batch)
+        batch_negative_prompt = [negative_prompt] * len(batch)
+        if isinstance(pipe, StableDiffusionXLReferencePipeline):
+            processed_batch = []
+            for img in batch:
+                result = pipe(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    ref_image=img,
+                    num_inference_steps=20
+                ).images
+                processed_batch.extend(result)
+        else:
+            processed_batch = pipe(
+                prompt=batch_prompt,
+                negative_prompt=batch_negative_prompt,
+                image=batch,
+                num_inference_steps=20
+            ).images
+        all_processed_images.extend(processed_batch)
+        progress((i + batch_size) / len(images))  # Update progress bar
+        clear_memory()  # Clear memory after each batch
+    return all_processed_images
+# Define the function to generate images
+def generate_images_with_progress(prompt, negative_prompt, batch_count, use_controlnet, controlnet_type, mode, control_image, batch_images_input, progress=gr.Progress(track_tqdm=True)):
+    global controlnet_pipe, pipe, reference_pipe
+    clear_memory()
+    if use_controlnet:
+        if controlnet_type not in controlnet_models or controlnet_models[controlnet_type] is None:
+            raise ValueError(f"{controlnet_type} model not loaded. Please load the model first.")
+        if mode == "Single Image":
+            control_images = [control_image]
+        else:
+            control_images = [Image.open(img).convert("RGB") for img in batch_images_input]
+        preprocessed_images = []
+        for img in tqdm(control_images, desc="Preprocessing images"):
+            if controlnet_type == "Canny":
+                preprocessed_images.append(preprocess_canny(img))
+            elif controlnet_type == "Depth":
+                preprocessed_images.append(preprocess_depth(img))
+            elif controlnet_type == "OpenPose":
+                preprocessed_images.append(preprocess_openpose(img))
+            else:  # Reference
+                preprocessed_images.append(img)
+        if controlnet_type == "Reference":
+            images = process_image_batch(preprocessed_images, reference_pipe, prompt, negative_prompt, progress)
+        else:
+            images = process_image_batch(preprocessed_images, controlnet_pipe, prompt, negative_prompt, progress)
+    else:
+        if 'controlnet_pipe' in globals() and controlnet_pipe is not None:
+            controlnet_pipe.to("cpu")
+            del controlnet_pipe
+            globals()['controlnet_pipe'] = None
+        if 'reference_pipe' in globals() and reference_pipe is not None:
+            reference_pipe.to("cpu")
+            del reference_pipe
+            globals()['reference_pipe'] = None
+        clear_memory()
+        if pipe is None:
+            pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch.float16)
+        pipe.to("cuda")
+        images = []
+        for i in tqdm(range(batch_count), desc="Generating images"):
+            generated = pipe(prompt=[prompt], negative_prompt=[negative_prompt], num_inference_steps=20, width=1024, height=1024).images
+            images.extend(generated)
+            progress((i + 1) / batch_count)  # Update progress bar
+            clear_memory()  # Clear memory after each image, even in single image mode
+    clear_memory()
+    return images
+# Function to extract PNG metadata
+def extract_png_info(image_path):
+    metadata = image_path.info  # This is a dictionary containing key-value pairs of metadata
+    return metadata
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Image Generation with Custom Prompts and Styles")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(label="Prompt", lines=8, interactive=True)
+            with gr.Accordion("Negative Prompt (Minimize/Expand)", open=False):
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="",
+                    lines=5
+                )
+            batch_count = gr.Slider(minimum=1, maximum=10, step=1, label="Batch Count", value=1)
+            use_controlnet = gr.Checkbox(label="Use ControlNet", value=False)
+            controlnet_type = gr.Dropdown(choices=["Canny", "Depth", "OpenPose", "Reference"], label="ControlNet Type")
+            controlnet_status = gr.Textbox(label="ControlNet Status", value="", interactive=False)
+            mode = gr.Radio(choices=["Single Image", "Batch"], label="Mode", value="Single Image")
+            with gr.Tabs() as tabs:
+                with gr.TabItem("Single Image"):
+                    control_image = gr.Image(label="Control Image", type='pil')
+                with gr.TabItem("Batch"):
+                    batch_images_input = gr.File(label="Upload Images", file_count='multiple')
+                with gr.TabItem("Extract Metadata"):
+                    png_image = gr.Image(label="Upload PNG Image", type='pil')
+                    metadata_output = gr.JSON(label="PNG Metadata")
+        with gr.Column(scale=2):
+            style_images_gallery = gr.Gallery(
+                label="Choose a Style",
+                value=list(style_images.values()),
+                interactive=True,
+                elem_id="style-gallery",
+                columns=5,
+                object_fit="contain",
+                allow_preview=False
+            )
+            gallery = gr.Gallery(label="Generated Images", show_label=False, elem_id="gallery", height="auto")
+    selected_style = gr.State(value="Anime Studio Dance")
+    def select_style(evt: gr.SelectData):
+        style_names = list(styles.keys())
+        if evt.index < 0 or evt.index >= len(style_names):
+            raise ValueError(f"Invalid index: {evt.index}")
+        selected_style = style_names[evt.index]
+        return styles[selected_style]["prompt"], styles[selected_style]["negative_prompt"], selected_style
+    style_images_gallery.select(fn=select_style, inputs=[], outputs=[prompt, negative_prompt, selected_style])
+    def update_controlnet(controlnet_type):
+        status = load_controlnet_model(controlnet_type)
+        return status
+    controlnet_type.change(fn=update_controlnet, inputs=controlnet_type, outputs=controlnet_status)
+    generate_button = gr.Button("Generate Images")
+    generate_button.click(
+        generate_images_with_progress,
+        inputs=[prompt, negative_prompt, batch_count, use_controlnet, controlnet_type, mode, control_image, batch_images_input],
+        outputs=gallery
+    )
+    metadata_button = gr.Button("Extract Metadata")
+    metadata_button.click(
+        fn=extract_png_info,
+        inputs=png_image,
+        outputs=metadata_output
+    )
+    with gr.Row():
+        generate_button
+# At the end of your script:
+if __name__ == "__main__":
+    # Your Gradio interface setup here
+    demo.launch(debug=True)
+    clear_memory()