sculpt / app.py
ds1david's picture
First commit
98889c8
raw
history blame
3 kB
import gradio as gr
import torch
import numpy as np
from diffusers import StableDiffusionXLImg2ImgPipeline
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image, ImageEnhance, ImageOps
device = "cpu" # or "cuda" if you have a GPU
torch_dtype = torch.float32
print("Loading SDXL Img2Img model...")
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch_dtype
).to(device)
print("Loading bas-relief LoRA weights with PEFT...")
pipe.load_lora_weights(
"KappaNeuro/bas-relief",
weight_name="BAS-RELIEF.safetensors",
peft_backend="peft"
)
print("Loading DPT Depth Model...")
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
d_min, d_max = depth_arr.min(), depth_arr.max()
depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
depth_stretched = (depth_stretched * 255).astype(np.uint8)
depth_pil = Image.fromarray(depth_stretched)
depth_pil = ImageOps.autocontrast(depth_pil)
enhancer = ImageEnhance.Sharpness(depth_pil)
depth_pil = enhancer.enhance(2.0)
return depth_pil
def generate_bas_relief_and_depth(input_image: Image.Image):
# Redimensionar a imagem para o tamanho esperado
input_image = input_image.resize((512, 512))
# Prompt fixo para ativar o LoRA
prompt = "BAS-RELIEF"
print("Gerando imagem no estilo baixo-relevo...")
result = pipe(
prompt=prompt,
image=input_image,
strength=0.7, # Controla a intensidade da transformação
num_inference_steps=15,
guidance_scale=7.5
)
generated_image = result.images[0]
print("Calculando mapa de profundidade...")
inputs = feature_extractor(generated_image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = depth_model(**inputs)
predicted_depth = outputs.predicted_depth
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=generated_image.size[::-1],
mode="bicubic",
align_corners=False
).squeeze()
depth_map_pil = enhance_depth_map(prediction.cpu().numpy())
return generated_image, depth_map_pil
title = "Conversor para Baixo-relevo (SDXL + LoRA) com Mapa de Profundidade"
description = (
"Carrega stable-diffusion-xl-base-1.0 no CPU, aplica LoRA de 'KappaNeuro/bas-relief' "
"para transformar imagens em baixo-relevo e calcula o mapa de profundidade correspondente."
)
iface = gr.Interface(
fn=generate_bas_relief_and_depth,
inputs=gr.Image(label="Imagem de Entrada", type="pil"),
outputs=[
gr.Image(label="Imagem em Baixo-relevo"),
gr.Image(label="Mapa de Profundidade")
],
title=title,
description=description
)
if __name__ == "__main__":
iface.launch()