Spaces:

ds1david
/

sculpt

Runtime error

File size: 4,198 Bytes

import gradio as gr
import torch
import numpy as np
from PIL import Image
from peft import PeftModel
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel
from torchvision import transforms

# Configurações iniciais
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32


# --- Carregamento dos Modelos ---

# 1. Thera: Super Resolução
def load_thera_model():
    # Modelo hipotético - ajuste conforme implementação real do Thera
    model = torch.hub.load('prs-eth/thera', 'thera', trust_repo=True)
    return model.to(DEVICE)


# 2. Depth Map com PEFT
def load_depth_model():
    base_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
    model = PeftModel.from_pretrained(base_model, "danube2024/dpt-peft-lora")
    return model.to(DEVICE).eval()


# 3. Bas-Relief com ControlNet
def load_controlnet():
    controlnet = ControlNetModel.from_pretrained(
        "danube2024/controlnet-bas-relief",
        torch_dtype=TORCH_DTYPE
    )
    pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-base-1.0",
        controlnet=controlnet,
        torch_dtype=TORCH_DTYPE
    )
    pipe.load_lora_weights("danube2024/bas-relief-lora")
    return pipe.to(DEVICE)


# --- Processamento ---

def run_thera(image, model):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])

    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        output = model(input_tensor)

    output_img = transforms.ToPILImage()(output.squeeze().cpu().clamp(-1, 1) * 0.5 + 0.5)
    return output_img


def create_depth_map(image, model, feature_extractor):
    inputs = feature_extractor(images=image, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )
    return prediction.squeeze().cpu().numpy()


def create_bas_relief(prompt, image, depth_map, pipe):
    control_image = Image.fromarray((depth_map * 255).astype(np.uint8))

    image = image.resize((1024, 1024))
    control_image = control_image.resize((1024, 1024))

    result = pipe(
        prompt=prompt,
        image=image,
        control_image=control_image,
        strength=0.8,
        num_inference_steps=30
    ).images[0]

    return result


# --- Interface Gradio ---

with gr.Blocks() as app:
    gr.Markdown("# 🖼️ Super Resolução + Depth Map + Bas-Relief")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Imagem de Entrada")
            prompt = gr.Textbox("high quality bas-relief sculpture, intricate details")
            submit_btn = gr.Button("Processar")

        with gr.Column():
            upscaled_output = gr.Image(label="Imagem Super Resolvida")
            depth_output = gr.Image(label="Mapa de Profundidade")
            basrelief_output = gr.Image(label="Resultado Bas-Relief")


    def process(image, prompt):
        # Carregar modelos
        thera_model = load_thera_model()
        depth_model = load_depth_model()
        feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
        basrelief_pipe = load_controlnet()

        # 1. Super Resolução
        upscaled = run_thera(image, thera_model)

        # 2. Depth Map
        depth = create_depth_map(upscaled, depth_model, feature_extractor)
        depth_normalized = (depth - depth.min()) / (depth.max() - depth.min())

        # 3. Bas-Relief
        basrelief = create_bas_relief(prompt, upscaled, depth_normalized, basrelief_pipe)

        return upscaled, depth_normalized, basrelief


    submit_btn.click(
        process,
        inputs=[input_image, prompt],
        outputs=[upscaled_output, depth_output, basrelief_output]
    )

if __name__ == "__main__":
    app.launch()