Spaces:

BioMike
/

EmojiVAE

Sleeping

App Files Files Community

BioMike commited on May 3

Commit

2c480a0

verified ·

1 Parent(s): 5d66e0c

Upload 16 files

Browse files

Files changed (16) hide show

app.py +14 -0
encoding.py +51 -0
example_images/image0.jpg +0 -0
example_images/image1.jpg +0 -0
example_images/image10.jpg +0 -0
example_images/image2.jpg +0 -0
example_images/image3.jpg +0 -0
example_images/image4.jpg +0 -0
example_images/image5.jpg +0 -0
example_images/image6.jpg +0 -0
example_images/image7.jpg +0 -0
example_images/image8.jpg +0 -0
generation.py +34 -0
interpolation.py +84 -0
model.py +87 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import gradio as gr
+from encoding import get_interface as encoding_page
+from generation import get_interface as generation_page
+from interpolation import get_interface as interpolation_page
+with gr.Blocks() as demo:
+    with gr.Tab("Encode & Reconstruct"):
+        encoding_page()
+    with gr.Tab("Generate from Noise"):
+        generation_page()
+    with gr.Tab("Interpolate"):
+        interpolation_page()
+demo.launch()

encoding.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import gradio as gr
+from torchvision import transforms
+from PIL import Image
+import numpy as np
+from model import model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+resize_input = transforms.Resize((32, 32))
+to_tensor = transforms.ToTensor()
+def reconstruct_image(image):
+    image = Image.fromarray(image).convert('RGB')
+    image_32 = resize_input(image)
+    image_tensor = to_tensor(image_32).unsqueeze(0).to(device)
+    with torch.no_grad():
+        mu, _ = model.encode(image_tensor)
+        recon = model.decode(mu)
+    recon_np = recon.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    recon_img = Image.fromarray((recon_np * 255).astype(np.uint8)).resize((512, 512))
+    orig_resized = image_32.resize((512, 512))
+    return orig_resized, recon_img
+def get_interface():
+    with gr.Blocks() as iface:
+        gr.Markdown("## Encoding & Reconstruction")
+        with gr.Row():
+            input_image = gr.Image(label="Input (Downsampled to 32x32)", type="numpy")
+            output_image = gr.Image(label="Reconstructed", type="pil")
+        run_button = gr.Button("Run Reconstruction")
+        run_button.click(fn=reconstruct_image, inputs=input_image, outputs=[input_image, output_image])
+        examples = [
+            ["example_images/image1.jpg"],
+            ["example_images/image2.jpg"],
+            ["example_images/image3.jpg"],
+            ["example_images/image10.jpg"],
+            ["example_images/image4.jpg"],
+            ["example_images/image5.jpg"],
+            ["example_images/image6.jpg"],
+            ["example_images/image7.jpg"],
+            ["example_images/image8.jpg"],
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=[input_image],
+        )
+    return iface

example_images/image0.jpg ADDED Viewed

example_images/image1.jpg ADDED Viewed

example_images/image10.jpg ADDED Viewed

example_images/image2.jpg ADDED Viewed

example_images/image3.jpg ADDED Viewed

example_images/image4.jpg ADDED Viewed

example_images/image5.jpg ADDED Viewed

example_images/image6.jpg ADDED Viewed

example_images/image7.jpg ADDED Viewed

example_images/image8.jpg ADDED Viewed

generation.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import gradio as gr
+from torchvision import transforms
+from PIL import Image
+from model import model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+latent_dim = model.config.latent_dim
+def generate_from_noise():
+    z = torch.randn(1, latent_dim).to(device)
+    with torch.no_grad():
+        generated = model.decode(z)
+    gen_img = generated.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    gen_pil = Image.fromarray((gen_img * 255).astype("uint8")).resize((512, 512))
+    return gen_pil
+def get_interface():
+    with gr.Blocks() as iface:
+        gr.Markdown("## Generate from Random Noise")
+        generate_button = gr.Button("Generate Image")
+        output_image = gr.Image(label="Generated Image", type="pil")
+        generate_button.click(fn=generate_from_noise, inputs=[], outputs=output_image)
+        examples = [[]]
+        gr.Examples(
+            examples=examples,
+            inputs=[],
+            outputs=output_image,
+            fn=generate_from_noise,
+            cache_examples=False
+        )
+    return iface

interpolation.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+import gradio as gr
+from torchvision import transforms
+from PIL import Image
+import numpy as np
+from model import model
+import tempfile
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+transform = transforms.Compose([
+    transforms.Resize((32, 32)),
+    transforms.ToTensor()
+])
+resize_output = transforms.Resize((512, 512))
+def interpolate_vectors(v1, v2, num_steps):
+    return [v1 * (1 - alpha) + v2 * alpha for alpha in np.linspace(0, 1, num_steps)]
+def to_pil(img_tensor):
+    img = img_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    img = (img * 255).clip(0, 255).astype(np.uint8)
+    return Image.fromarray(img)
+def interpolate_images_gif(img1, img2, num_interpolations=10, duration=100):
+    img1 = Image.fromarray(img1).convert('RGB')
+    img2 = Image.fromarray(img2).convert('RGB')
+    img1_tensor = transform(img1).unsqueeze(0).to(device)
+    img2_tensor = transform(img2).unsqueeze(0).to(device)
+    with torch.no_grad():
+        mu1, _ = model.encode(img1_tensor)
+        mu2, _ = model.encode(img2_tensor)
+        interpolated = interpolate_vectors(mu1, mu2, num_interpolations)
+        decoded_images = []
+        for z in interpolated:
+            out = model.decode(z)
+            img = to_pil(out)
+            img_resized = resize_output(img)
+            decoded_images.append(img_resized)
+    tmp_file = tempfile.NamedTemporaryFile(suffix=".gif", delete=False)
+    decoded_images[0].save(
+        tmp_file.name,
+        save_all=True,
+        append_images=decoded_images[1:],
+        duration=duration,
+        loop=0
+    )
+    return tmp_file.name
+def get_interface():
+    with gr.Blocks() as iface:
+        gr.Markdown("## Latent Space Interpolation (GIF Output)")
+        with gr.Row():
+            img1 = gr.Image(label="First Image", type="numpy")
+            img2 = gr.Image(label="Second Image", type="numpy")
+        slider_steps = gr.Slider(5, 30, value=10, step=1, label="Number of Interpolations")
+        slider_duration = gr.Slider(50, 500, value=100, step=10, label="Duration per Frame (ms)")
+        output_gif = gr.Image(label="Interpolation GIF")
+        run_button = gr.Button("Interpolate")
+        run_button.click(
+            fn=interpolate_images_gif,
+            inputs=[img1, img2, slider_steps, slider_duration],
+            outputs=output_gif
+        )
+        examples = [
+            ["example_images/image1.jpg", "example_images/image2.jpg", 10, 100],
+            ["example_images/image3.jpg", "example_images/image4.jpg", 15, 150],
+            ["example_images/image5.jpg", "example_images/image6.jpg", 20, 200],
+            ["example_images/image7.jpg", "example_images/image8.jpg", 25, 250],
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=[img1, img2, slider_steps, slider_duration],
+            outputs=output_gif,
+            fn=interpolate_images_gif,
+            cache_examples=False
+        )
+    return iface

model.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import PreTrainedModel, PretrainedConfig
+class BaseVAE(nn.Module):
+    def __init__(self, latent_dim=16):
+        super(BaseVAE, self).__init__()
+        self.latent_dim = latent_dim
+        self.encoder = nn.Sequential(
+            nn.Conv2d(3, 32, 4, 2, 1),    # 32x32 -> 16x16
+            nn.BatchNorm2d(32),
+            nn.ReLU(),
+            nn.Conv2d(32, 64, 4, 2, 1),   # 16x16 -> 8x8
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.Conv2d(64, 128, 4, 2, 1),  # 8x8 -> 4x4
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            nn.Flatten()
+        )
+        self.fc_mu = nn.Linear(128 * 4 * 4, latent_dim)
+        self.fc_logvar = nn.Linear(128 * 4 * 4, latent_dim)
+        self.decoder_input = nn.Linear(latent_dim, 128 * 4 * 4)
+        self.decoder = nn.Sequential(
+            nn.ConvTranspose2d(128, 64, 4, 2, 1),  # 4x4 -> 8x8
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.ConvTranspose2d(64, 32, 4, 2, 1),   # 8x8 -> 16x16
+            nn.BatchNorm2d(32),
+            nn.ReLU(),
+            nn.ConvTranspose2d(32, 3, 4, 2, 1),    # 16x16 -> 32x32
+            nn.Sigmoid()
+        )
+    def encode(self, x):
+        x = self.encoder(x)
+        mu = self.fc_mu(x)
+        logvar = self.fc_logvar(x)
+        return mu, logvar
+    def reparameterize(self, mu, logvar):
+        std = torch.exp(0.5 * logvar)
+        eps = torch.randn_like(std)
+        return mu + eps * std
+    def decode(self, z):
+        x = self.decoder_input(z)
+        x = x.view(-1, 128, 4, 4)
+        return self.decoder(x)
+    def forward(self, x):
+        mu, logvar = self.encode(x)
+        z = self.reparameterize(mu, logvar)
+        recon = self.decode(z)
+        return recon, mu, logvar
+class VAEConfig(PretrainedConfig):
+    model_type = "vae"
+    def __init__(self, latent_dim=16, **kwargs):
+        super().__init__(**kwargs)
+        self.latent_dim = latent_dim
+class VAEModel(PreTrainedModel):
+    config_class = VAEConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.vae = BaseVAE(latent_dim=config.latent_dim)
+        self.post_init()
+    def forward(self, x):
+        return self.vae(x)
+    def encode(self, x):
+        return self.vae.encode(x)
+    def decode(self, z):
+        return self.vae.decode(z)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = VAEModel.from_pretrained("BioMike/emoji-vae-init").to(device)
+model.eval()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+gradio
+transformers
+datasets
+huggingface_hub
+pillow
+numpy