Spaces:

willsh1997
/

littlefreaksgradio

Running

App Files Files Community

willsh1997 commited on Dec 10, 2024

Commit

c94b544

1 Parent(s): 4f9b231

:sparkles: :clown_face: init commit of little freaks

Browse files

Files changed (14) hide show

.gitignore +162 -0
README.md +6 -11
app.py +427 -0
requirements.txt +7 -0
willpaint-imgs/chaosclicker-willpaint.png +0 -0
willpaint-imgs/contentconnoisseur-willpaint.png +0 -0
willpaint-imgs/digitaldaydreamer-willpaint.png +0 -0
willpaint-imgs/ecoexplorer-willpaint.png +0 -0
willpaint-imgs/fandomfox-willpaint.png +0 -0
willpaint-imgs/mememaven-willpaint.png +0 -0
willpaint-imgs/newnerd-willpaint.png +0 -0
willpaint-imgs/nostalgicnavigator-willpaint.png +0 -0
willpaint-imgs/scrollseeker-willpaint.png +0 -0
willpaint-imgs/trendtracker-willpaint.png +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,14 +1,9 @@
----
-title: Littlefreaksgradio
-emoji: 👁
-colorFrom: red
-colorTo: red
 sdk: gradio
-sdk_version: 5.8.0
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: make little freaks at the click of a button!
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+title: little freaks gradio
+emoji: :clown_face:
+colorFrom: purple
+colorTo: purple
 sdk: gradio
+sdk_version: 3.1.7
 app_file: app.py
 pinned: false
+license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,427 @@

+### BACKEND
+import requests
+import torch
+from PIL import Image
+from io import BytesIO
+from diffusers import StableUnCLIPImg2ImgPipeline, UnCLIPImageVariationPipeline, ImagePipelineOutput
+import inspect
+from typing import List, Optional, Union
+import PIL.Image
+import torch
+from torch.nn import functional as F
+from transformers import (
+    CLIPImageProcessor,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    CLIPVisionModelWithProjection,
+)
+import gradio as gr
+class customUnClipPipeline(UnCLIPImageVariationPipeline):
+    def _encode_prompt(self, prompt,  device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt = "",):
+        batch_size = len(prompt) if isinstance(prompt, list) else 1
+        # get prompt text embeddings
+        text_inputs = self.tokenizer(
+            prompt,
+            padding="max_length",
+            max_length=self.tokenizer.model_max_length,
+            return_tensors="pt",
+        )
+        text_input_ids = text_inputs.input_ids
+        text_mask = text_inputs.attention_mask.bool().to(device)
+        text_encoder_output = self.text_encoder(text_input_ids.to(device))
+        prompt_embeds = text_encoder_output.text_embeds
+        text_encoder_hidden_states = text_encoder_output.last_hidden_state
+        prompt_embeds = prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
+        text_encoder_hidden_states = text_encoder_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
+        text_mask = text_mask.repeat_interleave(num_images_per_prompt, dim=0)
+        if do_classifier_free_guidance:
+            uncond_tokens = [negative_prompt] * batch_size
+            max_length = text_input_ids.shape[-1]
+            uncond_input = self.tokenizer(
+                uncond_tokens,
+                padding="max_length",
+                max_length=max_length,
+                truncation=True,
+                return_tensors="pt",
+            )
+            uncond_text_mask = uncond_input.attention_mask.bool().to(device)
+            negative_prompt_embeds_text_encoder_output = self.text_encoder(uncond_input.input_ids.to(device))
+            negative_prompt_embeds = negative_prompt_embeds_text_encoder_output.text_embeds
+            uncond_text_encoder_hidden_states = negative_prompt_embeds_text_encoder_output.last_hidden_state
+            # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+            seq_len = negative_prompt_embeds.shape[1]
+            negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt)
+            negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len)
+            seq_len = uncond_text_encoder_hidden_states.shape[1]
+            uncond_text_encoder_hidden_states = uncond_text_encoder_hidden_states.repeat(1, num_images_per_prompt, 1)
+            uncond_text_encoder_hidden_states = uncond_text_encoder_hidden_states.view(
+                batch_size * num_images_per_prompt, seq_len, -1
+            )
+            uncond_text_mask = uncond_text_mask.repeat_interleave(num_images_per_prompt, dim=0)
+            # done duplicates
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
+            text_encoder_hidden_states = torch.cat([uncond_text_encoder_hidden_states, text_encoder_hidden_states])
+            text_mask = torch.cat([uncond_text_mask, text_mask])
+        return prompt_embeds, text_encoder_hidden_states, text_mask
+    @torch.no_grad()
+    def __call__(
+        self,
+        text_input: str = "",
+        negative_prompt: str = "",
+        image: Optional[Union[PIL.Image.Image, List[PIL.Image.Image], torch.Tensor]] = None,
+        num_images_per_prompt: int = 1,
+        decoder_num_inference_steps: int = 25,
+        super_res_num_inference_steps: int = 7,
+        generator: Optional[torch.Generator] = None,
+        decoder_latents: Optional[torch.Tensor] = None,
+        super_res_latents: Optional[torch.Tensor] = None,
+        image_embeddings: Optional[torch.Tensor] = None,
+        decoder_guidance_scale: float = 8.0,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+    ):
+        """
+        The call function to the pipeline for generation.
+        Args:
+            image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
+                `Image` or tensor representing an image batch to be used as the starting point. If you provide a
+                tensor, it needs to be compatible with the [`CLIPImageProcessor`]
+                [configuration](https://huggingface.co/fusing/karlo-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
+                Can be left as `None` only when `image_embeddings` are passed.
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            decoder_num_inference_steps (`int`, *optional*, defaults to 25):
+                The number of denoising steps for the decoder. More denoising steps usually lead to a higher quality
+                image at the expense of slower inference.
+            super_res_num_inference_steps (`int`, *optional*, defaults to 7):
+                The number of denoising steps for super resolution. More denoising steps usually lead to a higher
+                quality image at the expense of slower inference.
+            generator (`torch.Generator`, *optional*):
+                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
+                generation deterministic.
+            decoder_latents (`torch.Tensor` of shape (batch size, channels, height, width), *optional*):
+                Pre-generated noisy latents to be used as inputs for the decoder.
+            super_res_latents (`torch.Tensor` of shape (batch size, channels, super res height, super res width), *optional*):
+                Pre-generated noisy latents to be used as inputs for the decoder.
+            decoder_guidance_scale (`float`, *optional*, defaults to 4.0):
+                A higher guidance scale value encourages the model to generate images closely linked to the text
+                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
+            image_embeddings (`torch.Tensor`, *optional*):
+                Pre-defined image embeddings that can be derived from the image encoder. Pre-defined image embeddings
+                can be passed for tasks like image interpolations. `image` can be left as `None`.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
+        Returns:
+            [`~pipelines.ImagePipelineOutput`] or `tuple`:
+                If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
+                returned where the first element is a list with the generated images.
+        """
+        if image is not None:
+            if isinstance(image, PIL.Image.Image):
+                batch_size = 1
+            elif isinstance(image, list):
+                batch_size = len(image)
+            else:
+                batch_size = image.shape[0]
+        else:
+            batch_size = image_embeddings.shape[0]
+        prompt = [text_input] * batch_size
+        device = self._execution_device
+        batch_size = batch_size * num_images_per_prompt
+        do_classifier_free_guidance = decoder_guidance_scale > 1.0
+        prompt_embeds, text_encoder_hidden_states, text_mask = self._encode_prompt(
+            prompt,  device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt ,
+        )
+        image_embeddings = self._encode_image(image, device, num_images_per_prompt, image_embeddings)
+        # decoder
+        text_encoder_hidden_states, additive_clip_time_embeddings = self.text_proj(
+            image_embeddings=image_embeddings,
+            prompt_embeds=prompt_embeds,
+            text_encoder_hidden_states=text_encoder_hidden_states,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+        )
+        if device.type == "mps":
+            # HACK: MPS: There is a panic when padding bool tensors,
+            # so cast to int tensor for the pad and back to bool afterwards
+            text_mask = text_mask.type(torch.int)
+            decoder_text_mask = F.pad(text_mask, (self.text_proj.clip_extra_context_tokens, 0), value=1)
+            decoder_text_mask = decoder_text_mask.type(torch.bool)
+        else:
+            decoder_text_mask = F.pad(text_mask, (self.text_proj.clip_extra_context_tokens, 0), value=True)
+        self.decoder_scheduler.set_timesteps(decoder_num_inference_steps, device=device)
+        decoder_timesteps_tensor = self.decoder_scheduler.timesteps
+        num_channels_latents = self.decoder.config.in_channels
+        height = self.decoder.config.sample_size
+        width = self.decoder.config.sample_size
+        if decoder_latents is None:
+            decoder_latents = self.prepare_latents(
+                (batch_size, num_channels_latents, height, width),
+                text_encoder_hidden_states.dtype,
+                device,
+                generator,
+                decoder_latents,
+                self.decoder_scheduler,
+            )
+        for i, t in enumerate(self.progress_bar(decoder_timesteps_tensor)):
+            # expand the latents if we are doing classifier free guidance
+            latent_model_input = torch.cat([decoder_latents] * 2) if do_classifier_free_guidance else decoder_latents
+            noise_pred = self.decoder(
+                sample=latent_model_input,
+                timestep=t,
+                encoder_hidden_states=text_encoder_hidden_states,
+                class_labels=additive_clip_time_embeddings,
+                attention_mask=decoder_text_mask,
+            ).sample
+            if do_classifier_free_guidance:
+                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                noise_pred_uncond, _ = noise_pred_uncond.split(latent_model_input.shape[1], dim=1)
+                noise_pred_text, predicted_variance = noise_pred_text.split(latent_model_input.shape[1], dim=1)
+                noise_pred = noise_pred_uncond + decoder_guidance_scale * (noise_pred_text - noise_pred_uncond)
+                noise_pred = torch.cat([noise_pred, predicted_variance], dim=1)
+            if i + 1 == decoder_timesteps_tensor.shape[0]:
+                prev_timestep = None
+            else:
+                prev_timestep = decoder_timesteps_tensor[i + 1]
+            # compute the previous noisy sample x_t -> x_t-1
+            decoder_latents = self.decoder_scheduler.step(
+                noise_pred, t, decoder_latents, prev_timestep=prev_timestep, generator=generator
+            ).prev_sample
+        decoder_latents = decoder_latents.clamp(-1, 1)
+        image_small = decoder_latents
+        # done decoder
+        # super res
+        self.super_res_scheduler.set_timesteps(super_res_num_inference_steps, device=device)
+        super_res_timesteps_tensor = self.super_res_scheduler.timesteps
+        channels = self.super_res_first.config.in_channels // 2
+        height = self.super_res_first.config.sample_size
+        width = self.super_res_first.config.sample_size
+        if super_res_latents is None:
+            super_res_latents = self.prepare_latents(
+                (batch_size, channels, height, width),
+                image_small.dtype,
+                device,
+                generator,
+                super_res_latents,
+                self.super_res_scheduler,
+            )
+        if device.type == "mps":
+            # MPS does not support many interpolations
+            image_upscaled = F.interpolate(image_small, size=[height, width])
+        else:
+            interpolate_antialias = {}
+            if "antialias" in inspect.signature(F.interpolate).parameters:
+                interpolate_antialias["antialias"] = True
+            image_upscaled = F.interpolate(
+                image_small, size=[height, width], mode="bicubic", align_corners=False, **interpolate_antialias
+            )
+        for i, t in enumerate(self.progress_bar(super_res_timesteps_tensor)):
+            # no classifier free guidance
+            if i == super_res_timesteps_tensor.shape[0] - 1:
+                unet = self.super_res_last
+            else:
+                unet = self.super_res_first
+            latent_model_input = torch.cat([super_res_latents, image_upscaled], dim=1)
+            noise_pred = unet(
+                sample=latent_model_input,
+                timestep=t,
+            ).sample
+            if i + 1 == super_res_timesteps_tensor.shape[0]:
+                prev_timestep = None
+            else:
+                prev_timestep = super_res_timesteps_tensor[i + 1]
+            # compute the previous noisy sample x_t -> x_t-1
+            super_res_latents = self.super_res_scheduler.step(
+                noise_pred, t, super_res_latents, prev_timestep=prev_timestep, generator=generator
+            ).prev_sample
+        image = super_res_latents
+        # done super res
+        self.maybe_free_model_hooks()
+        # post processing
+        image = image * 0.5 + 0.5
+        image = image.clamp(0, 1)
+        image = image.cpu().permute(0, 2, 3, 1).float().numpy()
+        if output_type == "pil":
+            image = self.numpy_to_pil(image)
+        if not return_dict:
+            return (image,)
+        return ImagePipelineOutput(images=image)
+### ADDITIONAL PIPELINE CODE FOR KARLO
+pipe = customUnClipPipeline.from_pretrained("kakaobrain/karlo-v1-alpha-image-variations", torch_dtype=torch.float16, trust_remote_code=True, accelerator='ort', device='cpu')
+# pipe.enable_model_cpu_offload()
+# func for getting tensor embeddings from cand image
+def load_image(image_dir):
+    image = Image.open(image_dir).convert("RGB")
+    return image
+def load_img_from_URL(URL):
+    response = requests.get(URL)
+    init_image = Image.open(BytesIO(response.content)).convert("RGB")
+    return init_image
+def embed_img(input_image):
+    tokens = pipe.feature_extractor(input_image)
+    img_model = pipe.image_encoder.to('cpu')
+    with torch.no_grad():
+        embeds = img_model(torch.tensor(tokens.pixel_values[0]).unsqueeze(0))
+    return embeds.image_embeds.to('cpu')
+def localimg_2_embed(image_dir):
+    embeds = embed_img(load_image(image_dir))
+    return embeds
+def URLimg_2_embed(URL):
+    embeds = embed_img(load_img_from_URL(URL))
+    return embeds
+# random generator for softmaxxed outputs
+def random_probdist(num_cands):
+    random_numbers = torch.randn(num_cands)
+    softmax_output = torch.nn.functional.softmax(random_numbers, dim=0).reshape((num_cands,1))
+    return softmax_output
+def scalesum_candtensors(list_scale, cand_tensors):
+    '''
+    quick note - just make sure your list_scale is the same length as ur cand_tensors, and also adds up to 1
+    '''
+    assert sum(list_scale) == 1, f"you didn't input a valid probability distribution - make sure your scales add up to 1, currently it adds up to {sum(list_scale)}"
+    assert len(list_scale) == len(cand_tensors), f"your scale list is not the same length as your list of candidate tensors. len list = {len(list_scale)}, len cand tensors = {len(cand_tensors)}"
+    scaled = torch.tensor(list_scale), cand_tensors
+    output = scaled.sum(dim=0)
+    return output
+def random_candtensor(cand_tensors):
+    scaled = random_probdist(len(cand_tensors)) * cand_tensors
+    output = scaled.sum(dim=0)
+    return output
+# for displaying images
+def image_grid(imgs, rows, cols):
+    assert len(imgs) == rows*cols
+    w, h = imgs[0].size
+    grid = Image.new('RGB', size=(cols*w, rows*h))
+    grid_w, grid_h = grid.size
+    for i, img in enumerate(imgs):
+        grid.paste(img, box=(i%cols*w, i//cols*h))
+    return grid
+chaosclicker_willtensor = localimg_2_embed('willpaint-imgs/chaosclicker-willpaint.png').to('cpu')
+contentcnsr_willtensor = localimg_2_embed('willpaint-imgs/contentconnoisseur-willpaint.png').to('cpu')
+digdaydrmr_willtensor = localimg_2_embed('willpaint-imgs/digitaldaydreamer-willpaint.png').to('cpu')
+ecoexplr_willtensor = localimg_2_embed('willpaint-imgs/ecoexplorer-willpaint.png').to('cpu')
+fandomfox_willtensor = localimg_2_embed('willpaint-imgs/fandomfox-willpaint.png').to('cpu')
+mememaven_willtensor = localimg_2_embed('willpaint-imgs/mememaven-willpaint.png').to('cpu')
+newsnerd_willtensor = localimg_2_embed('willpaint-imgs/newnerd-willpaint.png').to('cpu')
+nostalgicnvgtr_willtensor = localimg_2_embed('willpaint-imgs/nostalgicnavigator-willpaint.png').to('cpu')
+scrollseeker_willtensor = localimg_2_embed('willpaint-imgs/scrollseeker-willpaint.png').to('cpu')
+trendtracker_willtensor = localimg_2_embed('willpaint-imgs/trendtracker-willpaint.png').to('cpu')
+will_cand_tensors = torch.cat([chaosclicker_willtensor,
+                                contentcnsr_willtensor ,
+                                digdaydrmr_willtensor,
+                                ecoexplr_willtensor,
+                                fandomfox_willtensor,
+                                mememaven_willtensor,
+                                newsnerd_willtensor,
+                                nostalgicnvgtr_willtensor,
+                                scrollseeker_willtensor,
+                                trendtracker_willtensor,], dim=0)
+### FUNCTION FOR EXECUTION
+def generate_freak():
+    will_randomised_input = random_candtensor(will_cand_tensors).unsqueeze(0).half()
+    output = pipe(image_embeddings=will_randomised_input, num_images_per_prompt=1)
+    return output.images[0]
+### GRADIO BACKEND
+gr.Interface(
+    generate_freak,
+    inputs=None,
+    outputs=gr.Image(),
+    title="Make a little freak!",
+    description="click the button and make a freak!"
+).launch()