import gradio as gr import torch import numpy as np from diffusers import StableDiffusionXLImg2ImgPipeline from transformers import DPTFeatureExtractor, DPTForDepthEstimation from PIL import Image, ImageEnhance, ImageOps device = "cpu" # or "cuda" if you have a GPU torch_dtype = torch.float32 print("Loading SDXL Img2Img model...") pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch_dtype ).to(device) print("Loading bas-relief LoRA weights with PEFT...") pipe.load_lora_weights( "KappaNeuro/bas-relief", weight_name="BAS-RELIEF.safetensors", peft_backend="peft" ) print("Loading DPT Depth Model...") feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device) def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image: d_min, d_max = depth_arr.min(), depth_arr.max() depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8) depth_stretched = (depth_stretched * 255).astype(np.uint8) depth_pil = Image.fromarray(depth_stretched) depth_pil = ImageOps.autocontrast(depth_pil) enhancer = ImageEnhance.Sharpness(depth_pil) depth_pil = enhancer.enhance(2.0) return depth_pil def generate_bas_relief_and_depth(input_image: Image.Image): # Redimensionar a imagem para o tamanho esperado input_image = input_image.resize((512, 512)) # Prompt fixo para ativar o LoRA prompt = "BAS-RELIEF" print("Gerando imagem no estilo baixo-relevo...") result = pipe( prompt=prompt, image=input_image, strength=0.7, # Controla a intensidade da transformaĆ§Ć£o num_inference_steps=15, guidance_scale=7.5 ) generated_image = result.images[0] print("Calculando mapa de profundidade...") inputs = feature_extractor(generated_image, return_tensors="pt").to(device) with torch.no_grad(): outputs = depth_model(**inputs) predicted_depth = outputs.predicted_depth prediction = torch.nn.functional.interpolate( predicted_depth.unsqueeze(1), size=generated_image.size[::-1], mode="bicubic", align_corners=False ).squeeze() depth_map_pil = enhance_depth_map(prediction.cpu().numpy()) return generated_image, depth_map_pil title = "Conversor para Baixo-relevo (SDXL + LoRA) com Mapa de Profundidade" description = ( "Carrega stable-diffusion-xl-base-1.0 no CPU, aplica LoRA de 'KappaNeuro/bas-relief' " "para transformar imagens em baixo-relevo e calcula o mapa de profundidade correspondente." ) iface = gr.Interface( fn=generate_bas_relief_and_depth, inputs=gr.Image(label="Imagem de Entrada", type="pil"), outputs=[ gr.Image(label="Imagem em Baixo-relevo"), gr.Image(label="Mapa de Profundidade") ], title=title, description=description ) if __name__ == "__main__": iface.launch()