Spaces:

ds1david
/

sculpt

Runtime error

App Files Files Community

ds1david commited on Mar 18

Commit

a02c6d7

1 Parent(s): eb02bc3

New logic

Browse files

Files changed (2) hide show

app.py +6 -6
super_resolve.py +87 -118

app.py CHANGED Viewed

@@ -25,15 +25,15 @@ def load_thera_model(repo_id, filename):
     model_path = hf_hub_download(repo_id=repo_id, filename=filename)
     with open(model_path, 'rb') as fh:
         check = pickle.load(fh)
-        # Ajustar a estrutura dos parâmetros
-        params = check['model']['params']  # Acessar os parâmetros corretamente
         backbone, size = check['backbone'], check['size']
     model = build_thera(3, backbone, size)
-    return model, params
 print("Carregando Thera EDSR...")
-model_edsr, params_edsr = load_thera_model("prs-eth/thera-edsr-pro", "model.pkl")
 # 2. Carregar SDXL + LoRA ---------------------------------------------------------------------
 print("Carregando SDXL + LoRA...")
@@ -60,9 +60,9 @@ def full_pipeline(image, prompt, scale_factor=2.0):
         source_jax = jax.device_put(source, JAX_DEVICE)
         t = jnp.array([1.0 / (scale_factor ** 2)], dtype=jnp.float32)
-        # Chamada corrigida com estrutura de parâmetros adequada
         upscaled = model_edsr.apply(
-            params_edsr,  # Parâmetros já estruturados corretamente
             source_jax,
             t,
             target_shape

     model_path = hf_hub_download(repo_id=repo_id, filename=filename)
     with open(model_path, 'rb') as fh:
         check = pickle.load(fh)
+        # Carregar estrutura completa de variáveis
+        variables = check['model']  # Deve conter {'params': ...}
         backbone, size = check['backbone'], check['size']
     model = build_thera(3, backbone, size)
+    return model, variables
 print("Carregando Thera EDSR...")
+model_edsr, variables_edsr = load_thera_model("prs-eth/thera-edsr-pro", "model.pkl")
 # 2. Carregar SDXL + LoRA ---------------------------------------------------------------------
 print("Carregando SDXL + LoRA...")
         source_jax = jax.device_put(source, JAX_DEVICE)
         t = jnp.array([1.0 / (scale_factor ** 2)], dtype=jnp.float32)
+        # Chamada corrigida com estrutura de variáveis correta
         upscaled = model_edsr.apply(
+            variables_edsr,  # Estrutura completa {'params': ...}
             source_jax,
             t,
             target_shape

super_resolve.py CHANGED Viewed

@@ -1,130 +1,99 @@
-import gradio as gr
-import torch
 import jax
 import jax.numpy as jnp
 import numpy as np
 from PIL import Image
-import pickle
-import warnings
-from huggingface_hub import hf_hub_download
-from diffusers import StableDiffusionXLImg2ImgPipeline
-from transformers import DPTImageProcessor, DPTForDepthEstimation
 from model import build_thera
-# Configurações e supressão de avisos
-warnings.filterwarnings("ignore", category=FutureWarning)
-warnings.filterwarnings("ignore", category=UserWarning)
-# Configurar dispositivos
-JAX_DEVICE = jax.devices("cpu")[0]
-TORCH_DEVICE = "cpu"
-# 1. Carregar modelos do Thera ----------------------------------------------------------------
-def load_thera_model(repo_id, filename):
-    model_path = hf_hub_download(repo_id=repo_id, filename=filename)
-    with open(model_path, 'rb') as fh:
         check = pickle.load(fh)
-        # Carregar estrutura completa de variáveis
-        variables = check['model']  # Deve conter {'params': ...}
-        backbone, size = check['backbone'], check['size']
     model = build_thera(3, backbone, size)
-    return model, variables
-print("Carregando Thera EDSR...")
-model_edsr, variables_edsr = load_thera_model("prs-eth/thera-edsr-pro", "model.pkl")
-# 2. Carregar SDXL + LoRA ---------------------------------------------------------------------
-print("Carregando SDXL + LoRA...")
-pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    torch_dtype=torch.float32
-).to(TORCH_DEVICE)
-pipe.load_lora_weights("KappaNeuro/bas-relief", weight_name="BAS-RELIEF.safetensors")
-# 3. Carregar modelo de profundidade ----------------------------------------------------------
-print("Carregando DPT Depth...")
-feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
-depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(TORCH_DEVICE)
-# Pipeline principal --------------------------------------------------------------------------
-def full_pipeline(image, prompt, scale_factor=2.0):
-    try:
-        # 1. Super Resolução com Thera
-        image = image.convert("RGB")
-        source = np.array(image) / 255.0
-        target_shape = (int(image.height * scale_factor), int(image.width * scale_factor))
-        source_jax = jax.device_put(source, JAX_DEVICE)
-        t = jnp.array([1.0 / (scale_factor ** 2)], dtype=jnp.float32)
-        # Chamada corrigida com estrutura de variáveis correta
-        upscaled = model_edsr.apply(
-            variables_edsr,  # Estrutura completa {'params': ...}
-            source_jax,
-            t,
-            target_shape
-        )
-        upscaled_pil = Image.fromarray((np.array(upscaled) * 255).astype(np.uint8))
-        # 2. Gerar Bas-Relief
-        full_prompt = f"BAS-RELIEF {prompt}, insanely detailed and complex engraving relief, ultra-high definition, rich in detail, 16K resolution"
-        bas_relief = pipe(
-            prompt=full_prompt,
-            image=upscaled_pil,
-            strength=0.7,
-            num_inference_steps=25,
-            guidance_scale=7.5
-        ).images[0]
-        # 3. Calcular Depth Map
-        inputs = feature_extractor(bas_relief, return_tensors="pt").to(TORCH_DEVICE)
-        with torch.no_grad():
-            outputs = depth_model(**inputs)
-            depth = outputs.predicted_depth
-        depth_map = torch.nn.functional.interpolate(
-            depth.unsqueeze(1),
-            size=bas_relief.size[::-1],
-            mode="bicubic"
-        ).squeeze().cpu().numpy()
-        depth_normalized = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-        depth_pil = Image.fromarray((depth_normalized * 255).astype(np.uint8))
-        return upscaled_pil, bas_relief, depth_pil
-    except Exception as e:
-        raise gr.Error(f"Erro no processamento: {str(e)}")
-# Interface Gradio ----------------------------------------------------------------------------
-with gr.Blocks(title="Super Res + Bas-Relief") as app:
-    gr.Markdown("## 🔍 Super Resolução + 🗿 Bas-Relief + 🗺️ Profundidade")
-    with gr.Row():
-        with gr.Column():
-            img_input = gr.Image(type="pil", label="Imagem de Entrada")
-            prompt = gr.Textbox(
-                label="Descrição do Relevo",
-                value="insanely detailed and complex engraving relief, ultra-high definition, rich in detail, and 16K resolution."
-            )
-            scale = gr.Slider(1.0, 4.0, value=2.0, label="Fator de Escala")
-            btn = gr.Button("Processar")
-        with gr.Column():
-            img_upscaled = gr.Image(label="Imagem Super Resolvida")
-            img_basrelief = gr.Image(label="Resultado Bas-Relief")
-            img_depth = gr.Image(label="Mapa de Profundidade")
-    btn.click(
-        full_pipeline,
-        inputs=[img_input, prompt, scale],
-        outputs=[img_upscaled, img_basrelief, img_depth]
-    )
-if __name__ == "__main__":
-    app.launch(share=false)

+#!/usr/bin/env python
+from argparse import ArgumentParser, Namespace
+import pickle
 import jax
+from jax import jit
 import jax.numpy as jnp
 import numpy as np
 from PIL import Image
 from model import build_thera
+from utils import make_grid, interpolate_grid
+MEAN = jnp.array([.4488, .4371, .4040])
+VAR = jnp.array([.25, .25, .25])
+PATCH_SIZE = 256
+def process_single(source, apply_encoder, apply_decoder, params, target_shape):
+    t = jnp.float32((target_shape[0] / source.shape[1])**-2)[None]
+    coords_nearest = jnp.asarray(make_grid(target_shape)[None])
+    source_up = interpolate_grid(coords_nearest, source[None])
+    source = jax.nn.standardize(source, mean=MEAN, variance=VAR)[None]
+    encoding = apply_encoder(params, source)
+    coords = jnp.asarray(make_grid(source_up.shape[1:3])[None])  # global sampling coords
+    out = jnp.full_like(source_up, jnp.nan, dtype=jnp.float32)
+    for h_min in range(0, coords.shape[1], PATCH_SIZE):
+        h_max = min(h_min + PATCH_SIZE, coords.shape[1])
+        for w_min in range(0, coords.shape[2], PATCH_SIZE):
+            # apply decoder with one patch of coordinates
+            w_max = min(w_min + PATCH_SIZE, coords.shape[2])
+            coords_patch = coords[:, h_min:h_max, w_min:w_max]
+            out_patch = apply_decoder(params, encoding, coords_patch, t)
+            out = out.at[:, h_min:h_max, w_min:w_max].set(out_patch)
+    out = out * jnp.sqrt(VAR)[None, None, None] + MEAN[None, None, None]
+    out += source_up
+    return out
+def process(source, model, params, target_shape, do_ensemble=True):
+    apply_encoder = jit(model.apply_encoder)
+    apply_decoder = jit(model.apply_decoder)
+    outs = []
+    for i_rot in range(4 if do_ensemble else 1):
+        source_ = jnp.rot90(source, k=i_rot, axes=(-3, -2))
+        target_shape_ = tuple(reversed(target_shape)) if i_rot % 2 else target_shape
+        out = process_single(source_, apply_encoder, apply_decoder, params, target_shape_)
+        outs.append(jnp.rot90(out, k=i_rot, axes=(-2, -3)))
+    out = jnp.stack(outs).mean(0).clip(0., 1.)
+    return jnp.rint(out[0] * 255).astype(jnp.uint8)
+def main(args: Namespace):
+    source = np.asarray(Image.open(args.in_file)) / 255.
+    if args.scale is not None:
+        if args.size is not None:
+            raise ValueError('Cannot specify both size and scale')
+        target_shape = (
+            round(source.shape[0] * args.scale),
+            round(source.shape[1] * args.scale),
+        )
+    elif args.size is not None:
+        target_shape = args.size
+    else:
+        raise ValueError('Must specify either size or scale')
+    with open(args.checkpoint, 'rb') as fh:
         check = pickle.load(fh)
+        params, backbone, size = check['model'], check['backbone'], check['size']
     model = build_thera(3, backbone, size)
+    out = process(source, model, params, target_shape, not args.no_ensemble)
+    Image.fromarray(np.asarray(out)).save(args.out_file)
+def parse_args() -> Namespace:
+    parser = ArgumentParser()
+    parser.add_argument('in_file')
+    parser.add_argument('out_file')
+    parser.add_argument('--scale', type=float, help='Scale factor for super-resolution')
+    parser.add_argument('--size', type=int, nargs=2,
+                        help='Target size (h, w), mutually exclusive with --scale')
+    parser.add_argument('--checkpoint', help='Path to checkpoint file')
+    parser.add_argument('--no-ensemble', action='store_true', help='Disable geo-ensemble')
+    return parser.parse_args()
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)