|
import gradio as gr |
|
import torch |
|
import numpy as np |
|
from PIL import Image |
|
from peft import PeftModel |
|
from transformers import DPTFeatureExtractor, DPTForDepthEstimation |
|
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel |
|
from torchvision import transforms |
|
|
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
|
|
|
|
|
|
def load_thera_model(): |
|
|
|
model = torch.hub.load('prs-eth/thera', 'thera', trust_repo=True) |
|
return model.to(DEVICE) |
|
|
|
|
|
|
|
def load_depth_model(): |
|
base_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") |
|
model = PeftModel.from_pretrained(base_model, "danube2024/dpt-peft-lora") |
|
return model.to(DEVICE).eval() |
|
|
|
|
|
|
|
def load_controlnet(): |
|
controlnet = ControlNetModel.from_pretrained( |
|
"danube2024/controlnet-bas-relief", |
|
torch_dtype=TORCH_DTYPE |
|
) |
|
pipe = StableDiffusionXLControlNetPipeline.from_pretrained( |
|
"stabilityai/stable-diffusion-xl-base-1.0", |
|
controlnet=controlnet, |
|
torch_dtype=TORCH_DTYPE |
|
) |
|
pipe.load_lora_weights("danube2024/bas-relief-lora") |
|
return pipe.to(DEVICE) |
|
|
|
|
|
|
|
|
|
def run_thera(image, model): |
|
transform = transforms.Compose([ |
|
transforms.ToTensor(), |
|
transforms.Normalize([0.5], [0.5]) |
|
]) |
|
|
|
input_tensor = transform(image).unsqueeze(0).to(DEVICE) |
|
with torch.no_grad(): |
|
output = model(input_tensor) |
|
|
|
output_img = transforms.ToPILImage()(output.squeeze().cpu().clamp(-1, 1) * 0.5 + 0.5) |
|
return output_img |
|
|
|
|
|
def create_depth_map(image, model, feature_extractor): |
|
inputs = feature_extractor(images=image, return_tensors="pt").to(DEVICE) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
predicted_depth = outputs.predicted_depth |
|
|
|
prediction = torch.nn.functional.interpolate( |
|
predicted_depth.unsqueeze(1), |
|
size=image.size[::-1], |
|
mode="bicubic", |
|
align_corners=False, |
|
) |
|
return prediction.squeeze().cpu().numpy() |
|
|
|
|
|
def create_bas_relief(prompt, image, depth_map, pipe): |
|
control_image = Image.fromarray((depth_map * 255).astype(np.uint8)) |
|
|
|
image = image.resize((1024, 1024)) |
|
control_image = control_image.resize((1024, 1024)) |
|
|
|
result = pipe( |
|
prompt=prompt, |
|
image=image, |
|
control_image=control_image, |
|
strength=0.8, |
|
num_inference_steps=30 |
|
).images[0] |
|
|
|
return result |
|
|
|
|
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("# 🖼️ Super Resolução + Depth Map + Bas-Relief") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
input_image = gr.Image(type="pil", label="Imagem de Entrada") |
|
prompt = gr.Textbox("high quality bas-relief sculpture, intricate details") |
|
submit_btn = gr.Button("Processar") |
|
|
|
with gr.Column(): |
|
upscaled_output = gr.Image(label="Imagem Super Resolvida") |
|
depth_output = gr.Image(label="Mapa de Profundidade") |
|
basrelief_output = gr.Image(label="Resultado Bas-Relief") |
|
|
|
|
|
def process(image, prompt): |
|
|
|
thera_model = load_thera_model() |
|
depth_model = load_depth_model() |
|
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") |
|
basrelief_pipe = load_controlnet() |
|
|
|
|
|
upscaled = run_thera(image, thera_model) |
|
|
|
|
|
depth = create_depth_map(upscaled, depth_model, feature_extractor) |
|
depth_normalized = (depth - depth.min()) / (depth.max() - depth.min()) |
|
|
|
|
|
basrelief = create_bas_relief(prompt, upscaled, depth_normalized, basrelief_pipe) |
|
|
|
return upscaled, depth_normalized, basrelief |
|
|
|
|
|
submit_btn.click( |
|
process, |
|
inputs=[input_image, prompt], |
|
outputs=[upscaled_output, depth_output, basrelief_output] |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |