Spaces:

ds1david
/

sculpt

Runtime error

App Files Files Community

ds1david commited on Mar 18

Commit

d160dc6

1 Parent(s): 4a3fe77

fixing bugs

Browse files

Files changed (2) hide show

app.py +48 -82
utils.py +29 -31

app.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import gradio as gr
-import torch
 import jax
 import jax.numpy as jnp
 import numpy as np
 from PIL import Image
-import pickle
-import warnings
-import logging
-from huggingface_hub import hf_hub_download
 from diffusers import StableDiffusionXLImg2ImgPipeline
 from transformers import DPTImageProcessor, DPTForDepthEstimation
 from model import build_thera
 # Configuração de logging
 logging.basicConfig(
@@ -23,111 +26,84 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Configurações e supressão de avisos
-warnings.filterwarnings("ignore", category=FutureWarning)
-warnings.filterwarnings("ignore", category=UserWarning)
-# Configurar dispositivos
 JAX_DEVICE = jax.devices("cpu")[0]
 TORCH_DEVICE = "cpu"
-# 1. Carregar modelos do Thera ----------------------------------------------------------------
 def load_thera_model(repo_id, filename):
     try:
-        logger.info(f"Carregando modelo Thera de {repo_id}")
         model_path = hf_hub_download(repo_id=repo_id, filename=filename)
         with open(model_path, 'rb') as fh:
             check = pickle.load(fh)
             variables = check['model']
             backbone, size = check['backbone'], check['size']
-        model = build_thera(3, backbone, size)
-        return model, variables
     except Exception as e:
-        logger.error(f"Erro ao carregar modelo: {str(e)}")
         raise
-logger.info("Carregando Thera EDSR...")
 model_edsr, variables_edsr = load_thera_model("prs-eth/thera-edsr-pro", "model.pkl")
-# 2. Carregar SDXL + LoRA ---------------------------------------------------------------------
-try:
-    logger.info("Carregando SDXL + LoRA...")
-    pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
-        "stabilityai/stable-diffusion-xl-base-1.0",
-        torch_dtype=torch.float32
-    ).to(TORCH_DEVICE)
-    pipe.load_lora_weights("KappaNeuro/bas-relief", weight_name="BAS-RELIEF.safetensors")
-except Exception as e:
-    logger.error(f"Erro ao carregar SDXL: {str(e)}")
-    raise
-# 3. Carregar modelo de profundidade ----------------------------------------------------------
-try:
-    logger.info("Carregando DPT Depth...")
-    feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
-    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(TORCH_DEVICE)
-except Exception as e:
-    logger.error(f"Erro ao carregar DPT: {str(e)}")
-    raise
 def adjust_size(size):
-    """Garante que o tamanho seja divisível por 8"""
-    return (size // 8) * 8
 def full_pipeline(image, prompt, scale_factor=2.0, progress=gr.Progress()):
     try:
-        progress(0.1, desc="Pré-processamento...")
-        # Converter e verificar imagem
         image = image.convert("RGB")
         source = np.array(image) / 255.0
-        # Adicionar dimensão de batch se necessário
-        if source.ndim == 3:
-            source = source[np.newaxis, ...]
-        # Ajustar tamanho alvo
         target_shape = (
             adjust_size(int(image.height * scale_factor)),
             adjust_size(int(image.width * scale_factor))
         )
-        progress(0.3, desc="Super-resolução...")
-        source_jax = jax.device_put(source, JAX_DEVICE)
         t = jnp.array([1.0 / (scale_factor ** 2)], dtype=jnp.float32)
-        # Processar com Thera
         upscaled = model_edsr.apply(
             variables_edsr,
             source_jax,
             t,
             target_shape
         )
-        # Remover dimensão de batch se necessário
-        if upscaled.ndim == 4:
-            upscaled = upscaled[0]
-        upscaled_pil = Image.fromarray((np.array(upscaled) * 255).astype(np.uint8))
-        progress(0.6, desc="Gerando Bas-Relief...")
-        full_prompt = f"BAS-RELIEF {prompt}, ultra detailed engraving, 16K resolution"
         bas_relief = pipe(
-            prompt=full_prompt,
             image=upscaled_pil,
             strength=0.7,
             num_inference_steps=25
         ).images[0]
         progress(0.8, desc="Calculando profundidade...")
         inputs = feature_extractor(bas_relief, return_tensors="pt").to(TORCH_DEVICE)
         with torch.no_grad():
-            outputs = depth_model(**inputs)
-            depth = outputs.predicted_depth
         depth_map = torch.nn.functional.interpolate(
             depth.unsqueeze(1),
@@ -141,34 +117,24 @@ def full_pipeline(image, prompt, scale_factor=2.0, progress=gr.Progress()):
         return upscaled_pil, bas_relief, depth_pil
     except Exception as e:
-        logger.error(f"Erro: {str(e)}", exc_info=True)
-        raise gr.Error(f"Erro: {str(e)}")
-# Interface Gradio ----------------------------------------------------------------------------
 with gr.Blocks(title="SuperRes + BasRelief") as app:
-    gr.Markdown("## 🖼️ Super Resolução + Bas-Relief + Mapa de Profundidade")
     with gr.Row():
         with gr.Column():
-            img_input = gr.Image(type="pil", label="Imagem de Entrada")
-            prompt = gr.Textbox(
-                label="Descrição",
-                value="insanely detailed and complex engraving relief, ultra-high definition"
-            )
-            scale = gr.Slider(1.0, 4.0, value=2.0, label="Fator de Escala")
-            btn = gr.Button("Processar")
         with gr.Column():
-            img_upscaled = gr.Image(label="Super Resolvida")
             img_basrelief = gr.Image(label="Bas-Relief")
             img_depth = gr.Image(label="Profundidade")
-    btn.click(
-        full_pipeline,
-        inputs=[img_input, prompt, scale],
-        outputs=[img_upscaled, img_basrelief, img_depth]
-    )
 if __name__ == "__main__":
-    app.launch()  # Sem compartilhamento público

+import logging
+import pickle
+import warnings
 import gradio as gr
 import jax
 import jax.numpy as jnp
 import numpy as np
+import torch
 from PIL import Image
 from diffusers import StableDiffusionXLImg2ImgPipeline
+from huggingface_hub import hf_hub_download
 from transformers import DPTImageProcessor, DPTForDepthEstimation
 from model import build_thera
+from utils import make_grid
 # Configuração de logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+# Configurações
+warnings.filterwarnings("ignore")
 JAX_DEVICE = jax.devices("cpu")[0]
 TORCH_DEVICE = "cpu"
 def load_thera_model(repo_id, filename):
     try:
         model_path = hf_hub_download(repo_id=repo_id, filename=filename)
         with open(model_path, 'rb') as fh:
             check = pickle.load(fh)
             variables = check['model']
             backbone, size = check['backbone'], check['size']
+        return build_thera(3, backbone, size), variables
     except Exception as e:
+        logger.error(f"Erro ao carregar Thera: {str(e)}")
         raise
+logger.info("Carregando modelos...")
 model_edsr, variables_edsr = load_thera_model("prs-eth/thera-edsr-pro", "model.pkl")
+pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    torch_dtype=torch.float32
+).to(TORCH_DEVICE)
+pipe.load_lora_weights("KappaNeuro/bas-relief", weight_name="BAS-RELIEF.safetensors")
+feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(TORCH_DEVICE)
 def adjust_size(size):
+    return max(8, (size // 8) * 8)
 def full_pipeline(image, prompt, scale_factor=2.0, progress=gr.Progress()):
     try:
+        progress(0.1, desc="Iniciando...")
         image = image.convert("RGB")
         source = np.array(image) / 255.0
+        # Ajuste de dimensões
         target_shape = (
             adjust_size(int(image.height * scale_factor)),
             adjust_size(int(image.width * scale_factor))
         )
+        logger.info(f"Transformação: {image.size} → {target_shape}")
+        # Gerar grid
+        coords = make_grid(target_shape)
+        logger.debug(f"Coords shape: {coords.shape}")
+        # Super-resolução
+        progress(0.3, desc="Processando super-resolução...")
+        source_jax = jax.device_put(source[np.newaxis, ...], JAX_DEVICE)
         t = jnp.array([1.0 / (scale_factor ** 2)], dtype=jnp.float32)
         upscaled = model_edsr.apply(
             variables_edsr,
             source_jax,
             t,
             target_shape
         )
+        upscaled_pil = Image.fromarray((np.array(upscaled[0]) * 255).astype(np.uint8))
+        # Bas-Relief
+        progress(0.6, desc="Gerando relevo...")
         bas_relief = pipe(
+            prompt=f"BAS-RELIEF {prompt}, ultra detailed engraving, 16K resolution",
             image=upscaled_pil,
             strength=0.7,
             num_inference_steps=25
         ).images[0]
+        # Depth Map
         progress(0.8, desc="Calculando profundidade...")
         inputs = feature_extractor(bas_relief, return_tensors="pt").to(TORCH_DEVICE)
         with torch.no_grad():
+            depth = depth_model(**inputs).predicted_depth
         depth_map = torch.nn.functional.interpolate(
             depth.unsqueeze(1),
         return upscaled_pil, bas_relief, depth_pil
     except Exception as e:
+        logger.error(f"ERRO: {str(e)}", exc_info=True)
+        raise gr.Error(f"Erro no processamento: {str(e)}")
+# Interface
 with gr.Blocks(title="SuperRes + BasRelief") as app:
+    gr.Markdown("## 🖼️ Super Resolução + 🗿 Bas-Relief + 🗺️ Mapa de Profundidade")
     with gr.Row():
         with gr.Column():
+            img_input = gr.Image(type="pil", label="Entrada")
+            prompt = gr.Textbox("Escultura detalhada em mármore, alto relevo", label="Descrição")
+            scale = gr.Slider(1.0, 4.0, value=2.0, label="Escala")
+            btn = gr.Button("Processar ▶️")
         with gr.Column():
+            img_upscaled = gr.Image(label="Super Resolução")
             img_basrelief = gr.Image(label="Bas-Relief")
             img_depth = gr.Image(label="Profundidade")
+    btn.click(full_pipeline, [img_input, prompt, scale], [img_upscaled, img_basrelief, img_depth])
 if __name__ == "__main__":
+    app.launch()

utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from functools import partial
 import jax
 import numpy as np
@@ -14,46 +14,44 @@ def repeat_vmap(fun, in_axes=None):
 def make_grid(patch_size: int | tuple[int, int]):
     if isinstance(patch_size, int):
-        patch_size = (patch_size, patch_size)
     offset_h, offset_w = 1 / (2 * np.array(patch_size))
     space_h = np.linspace(-0.5 + offset_h, 0.5 - offset_h, patch_size[0])
     space_w = np.linspace(-0.5 + offset_w, 0.5 - offset_w, patch_size[1])
-    return np.stack(np.meshgrid(space_h, space_w, indexing='ij'), axis=-1)  # [h, w]
 def interpolate_grid(coords, grid, order=0):
-    """
-    Args:
         coords: Tensor de shape (B, H, W, 2) ou (H, W, 2)
         grid: Tensor de shape (B, H', W', C)
     """
-    # Adicionar dimensão de batch se necessário
-    if coords.ndim == 3:
-        coords = coords[np.newaxis, ...]
-    # Verificar dimensões
-    assert coords.ndim == 4, f"Dimensões inválidas para coords: {coords.shape}"
-    assert grid.ndim == 4, f"Dimensões inválidas para grid: {grid.shape}"
-    # Ajustar transposição de forma segura
-    try:
         coords = coords.transpose((0, 3, 1, 2))
-    except ValueError as e:
-        raise ValueError(f"Falha na transposição: {coords.shape} → (0,3,1,2)") from e
-    # Conversão de coordenadas
-    coords = coords.at[:, 0].set(coords[:, 0] * grid.shape[-3] + (grid.shape[-3] - 1) / 2)
-    coords = coords.at[:, 1].set(coords[:, 1] * grid.shape[-2] + (grid.shape[-2] - 1) / 2)
-    # Interpolação com JAX
-    map_coordinates = partial(jax.scipy.ndimage.map_coordinates,
-                              order=order,
-                              mode='nearest')
-    return jax.vmap(  # Sobre batches
-        jax.vmap(  # Sobre canais
-            map_coordinates,
-            in_axes=(2, None),  # (C, H', W'), (B, 2, H, W)
             out_axes=2
-        )
-    )(grid, coords)

 from functools import partial
 import jax
+import jax.numpy as jnp
 import numpy as np
 def make_grid(patch_size: int | tuple[int, int]):
     if isinstance(patch_size, int):
+        patch_size = (max(1, patch_size), max(1, patch_size))
     offset_h, offset_w = 1 / (2 * np.array(patch_size))
     space_h = np.linspace(-0.5 + offset_h, 0.5 - offset_h, patch_size[0])
     space_w = np.linspace(-0.5 + offset_w, 0.5 - offset_w, patch_size[1])
+    grid = np.stack(np.meshgrid(space_h, space_w, indexing='ij'), axis=-1)
+    return grid[np.newaxis, ...]  # Adiciona dimensão de batch
 def interpolate_grid(coords, grid, order=0):
+    """Args:
         coords: Tensor de shape (B, H, W, 2) ou (H, W, 2)
         grid: Tensor de shape (B, H', W', C)
+        order: default 0
     """
+    try:
+        # Converter para array JAX e ajustar dimensões
+        coords = jnp.asarray(coords)
+        while coords.ndim < 4:
+            coords = coords[jnp.newaxis, ...]
+        # Verificação final de dimensões
+        if coords.shape[-1] != 2 or coords.ndim != 4:
+            raise ValueError(f"Formato inválido: {coords.shape}. Esperado (B, H, W, 2)")
+        # Transformação de coordenadas
         coords = coords.transpose((0, 3, 1, 2))
+        coords = coords.at[:, 0].set(coords[:, 0] * grid.shape[-3] + (grid.shape[-3] - 1) / 2)
+        coords = coords.at[:, 1].set(coords[:, 1] * grid.shape[-2] + (grid.shape[-2] - 1) / 2)
+        # Função de interpolação vetorizada
+        map_fn = jax.vmap(jax.vmap(
+            partial(jax.scipy.ndimage.map_coordinates, order=order, mode='nearest'),
+            in_axes=(2, None),
             out_axes=2
+        ))
+        return map_fn(grid, coords)
+    except Exception as e:
+        raise RuntimeError(f"Falha na interpolação: {str(e)}") from e