Spaces:

HikariDawn
/

APISR

Running on T4

App Files Files Community

Arrcttacsrks commited on Nov 8

Commit

69f4568

•

1 Parent(s): 67f8d32

Update test_code/inference.py

Browse files

Files changed (1) hide show

test_code/inference.py +83 -132

test_code/inference.py CHANGED Viewed

@@ -1,153 +1,104 @@
-'''
-    This is file is to execute the inference for a single image or a folder input
-'''
-import argparse
-import os, sys, cv2, shutil, warnings
-import torch
 import gradio as gr
-from torchvision.transforms import ToTensor
-from torchvision.utils import save_image
-warnings.simplefilter("default")
-os.environ["PYTHONWARNINGS"] = "default"
-# Import files from the local folder
-root_path = os.path.abspath('.')
-sys.path.append(root_path)
-from test_code.test_utils import load_grl, load_rrdb, load_cunet
-@torch.no_grad      # You must add these time, else it will have Out of Memory
-def super_resolve_img(generator, input_path, output_path=None, weight_dtype=torch.float32, downsample_threshold=720, crop_for_4x=True):
-    ''' Super Resolve a low resolution image
-    Args:
-        generator (torch):              the generator class that is already loaded
-        input_path (str):               the path to the input lr images
-        output_path (str):              the directory to store the generated images
-        weight_dtype (bool):            the weight type (float32/float16)
-        downsample_threshold (int):     the threshold of height/width (short side) to downsample the input
-        crop_for_4x (bool):             whether we crop the lr images to match 4x scale (needed for some situation)
-    '''
-    print("Processing image {}".format(input_path))
-    # Read the image and do preprocess
-    img_lr = cv2.imread(input_path)
-    h, w, c = img_lr.shape
-    # Downsample if needed
-    short_side = min(h, w)
-    if downsample_threshold != -1 and short_side > downsample_threshold:
-        resize_ratio = short_side / downsample_threshold
-        img_lr = cv2.resize(img_lr, (int(w/resize_ratio), int(h/resize_ratio)), interpolation = cv2.INTER_LINEAR)
-    # Crop if needed
     if crop_for_4x:
-        h, w, _ = img_lr.shape
         if h % 4 != 0:
-            img_lr = img_lr[:4*(h//4),:,:]
         if w % 4 != 0:
-            img_lr = img_lr[:,:4*(w//4),:]
-    # Check if the size is out of the boundary
-    h, w, c = img_lr.shape
-    if h*w > 720*1280:
-        raise gr.Error("The input image size is too large. The largest area we support is 720x1280=921600 pixel!")
-    # Transform to tensor
-    img_lr = cv2.cvtColor(img_lr, cv2.COLOR_BGR2RGB)
-    img_lr = ToTensor()(img_lr).unsqueeze(0).cuda()     # Use tensor format
-    img_lr = img_lr.to(dtype=weight_dtype)
-    # Model inference
-    print("lr shape is ", img_lr.shape)
-    super_resolved_img = generator(img_lr)
-    # Store the generated result
-    with torch.cuda.amp.autocast():
-        if output_path is not None:
-            save_image(super_resolved_img, output_path)
-    # Empty the cache every time you finish processing one image
-    torch.cuda.empty_cache()
-    return super_resolved_img
-if __name__ == "__main__":
-    # Fundamental setting
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input_dir', type = str, default = '__assets__/lr_inputs', help="Can be either single image input or a folder input")
-    parser.add_argument('--model', type = str, default = 'GRL', help=" 'GRL' || 'RRDB' (for ESRNET & ESRGAN) || 'CUNET' (for Real-ESRGAN) ")
-    parser.add_argument('--scale', type = int, default = 4, help="Up scaler factor")
-    parser.add_argument('--weight_path', type = str, default = 'pretrained/4x_APISR_GRL_GAN_generator.pth', help="Weight path directory, usually under saved_models folder")
-    parser.add_argument('--store_dir', type = str, default = 'sample_outputs', help="The folder to store the super-resolved images")
-    parser.add_argument('--float16_inference', type = bool, default = False, help="Float16 inference, only useful in RRDB now")      # Currently, this is only supported in RRDB, there is some bug with GRL model
-    args = parser.parse_args()
-    # Sample Command
-    # 4x GRL (Default):     python test_code/inference.py --model GRL --scale 4 --weight_path pretrained/4x_APISR_GRL_GAN_generator.pth
-    # 2x RRDB:              python test_code/inference.py --model RRDB --scale 2 --weight_path pretrained/2x_APISR_RRDB_GAN_generator.pth
-    # Read argument and prepare the folder needed
-    input_dir = args.input_dir
-    model = args.model
-    weight_path = args.weight_path
-    store_dir = args.store_dir
-    scale = args.scale
-    float16_inference = args.float16_inference
-    # Check the path of the weight
-    if not os.path.exists(weight_path):
-        print("we cannot locate weight path ", weight_path)
-        # TODO: I am not sure if I should automatically download weight from github release based on the upscale factor and model name.
-        os._exit(0)
-    # Prepare the store folder
-    if os.path.exists(store_dir):
-        shutil.rmtree(store_dir)
-    os.makedirs(store_dir)
-    # Define the weight type
-    if float16_inference:
-        torch.backends.cudnn.benchmark = True
-        weight_dtype = torch.float16
-    else:
-        weight_dtype = torch.float32
-    # Load the model
-    if model == "GRL":
-        generator = load_grl(weight_path, scale=scale)  # GRL for Real-World SR only support 4x upscaling
-    elif model == "RRDB":
-        generator = load_rrdb(weight_path, scale=scale)  # Can be any size
-    generator = generator.to(dtype=weight_dtype)
-    # Take the input path and do inference
-    if os.path.isdir(store_dir):    # If the input is a directory, we will iterate it
-        for filename in sorted(os.listdir(input_dir)):
-            input_path = os.path.join(input_dir, filename)
-            output_path = os.path.join(store_dir, filename)
-            # In default, we will automatically use crop to match 4x size
-            super_resolve_img(generator, input_path, output_path, weight_dtype, crop_for_4x=True)
-    else:   # If the input is a single image, we will process it directly and write on the same folder
-        filename = os.path.split(input_dir)[-1].split('.')[0]
-        output_path = os.path.join(store_dir, filename+"_"+str(scale)+"x.png")
-        # In default, we will automatically use crop to match 4x size
-        super_resolve_img(generator, input_dir, output_path, weight_dtype, crop_for_4x=True)

+import os
+import cv2
+import numpy as np
+import onnxruntime as ort
 import gradio as gr
+from PIL import Image
+# Path to the model in Hugging Face Space
+MODEL_PATH = "pretrained/4xGRL.onnx"  # Adjust this if the model is stored in a different location
+# Preprocessing function for images (similar to original script)
+def preprocess_image(img, target_height=180, target_width=320, crop_for_4x=True, downsample_threshold=720):
+    ''' Preprocess the image to match model input expectations '''
+    img = np.array(img)
+    # Convert to RGB (OpenCV uses BGR by default)
+    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    # Resize if necessary (downsample based on the downsample threshold)
+    h, w, _ = img_rgb.shape
+    short_side = min(h, w)
+    # Downsample if the short side exceeds the threshold
+    if short_side > downsample_threshold:
+        resize_ratio = short_side / downsample_threshold
+        img_rgb = cv2.resize(img_rgb, (int(w / resize_ratio), int(h / resize_ratio)), interpolation=cv2.INTER_LINEAR)
+    # Crop to match 4x scaling if needed
     if crop_for_4x:
+        h, w, _ = img_rgb.shape
         if h % 4 != 0:
+            img_rgb = img_rgb[:4 * (h // 4), :, :]
         if w % 4 != 0:
+            img_rgb = img_rgb[:, :4 * (w // 4), :]
+    # Resize the image to match the model's expected input size (e.g., 180x320)
+    img_resized = cv2.resize(img_rgb, (target_width, target_height))  # Resize to 180x320
+    return img_resized
+# Inference function to process image using ONNX model
+def inference(img, model_name="4xGRL"):
+    try:
+        # Ensure correct dtype for ONNX
+        weight_dtype = np.float32  # ONNX uses numpy arrays, so use np.float32
+        if model_name == "4xGRL":
+            # Load the ONNX model
+            ort_session = ort.InferenceSession(MODEL_PATH)
+            # Preprocess the image (resize, crop, etc.)
+            img_resized = preprocess_image(img)
+            # Prepare the input in the format expected by the model (e.g., (N, C, H, W))
+            input_image = np.transpose(img_resized, (2, 0, 1))  # Convert to (C, H, W)
+            input_image = np.expand_dims(input_image, axis=0)  # Add batch dimension
+            input_image = input_image.astype(weight_dtype)  # Convert to float32
+            # Run the model
+            ort_inputs = {ort_session.get_inputs()[0].name: input_image}
+            ort_outs = ort_session.run(None, ort_inputs)
+            # Post-process the output
+            output_image = ort_outs[0]  # Assuming the model output is in the first position
+            output_image = np.transpose(output_image.squeeze(), (1, 2, 0))  # Convert to (H, W, C)
+            output_image = np.clip(output_image, 0, 255).astype(np.uint8)  # Ensure valid image range
+            # Convert output to PIL Image for Gradio
+            output_pil = Image.fromarray(output_image)
+            return output_pil
+        else:
+            raise Exception("Model not supported")
+    except Exception as error:
+        return f"An error occurred: {error}"
+# Gradio interface
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Anime Super-Resolution using ONNX")
+        gr.Markdown("Upload an anime image to enhance it using the 4xGRL model.")
+        # File input for image
+        with gr.Row():
+            input_image = gr.Image(type="pil", label="Upload Image", interactive=True)
+        # Process button
+        with gr.Row():
+            process_button = gr.Button("Process Image")
+        # Output for result image
+        with gr.Row():
+            result_image = gr.Image(type="pil", label="Processed Image")
+        # Functionality for processing image
+        process_button.click(inference, inputs=input_image, outputs=result_image)
+    return demo
+# Launch the app
+demo = create_interface()
+demo.launch(share=True)