Real-ESRGAN_Pytorch

Running

File size: 3,177 Bytes

9bdccea
3e3e7be
9bdccea
 
 
 
64daa3d
37f1dd0
 
33a9dc7
9bdccea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bca482d
33a9dc7
bca482d
 
 
 
 
a734e0b
9bdccea
 
 
 
a734e0b
 
9bdccea
a734e0b
 
 
9bdccea
 
a734e0b
 
64daa3d
a734e0b
9bdccea
 
 
 
a734e0b
 
9bdccea
 
 
a734e0b
 
 
9bdccea
a734e0b
9bdccea
 
a734e0b
 
9bdccea
 
a734e0b
33a9dc7
9bdccea
a734e0b
33a9dc7
 
9bdccea
a734e0b

from PIL import Image
import cv2
import torch
from RealESRGAN import RealESRGAN
import tempfile
import numpy as np
from tqdm import tqdm
import pydub
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
    if img is None:
        raise Exception("Image not uploaded")
    
    width, height = img.size
    
    if width >= 5000 or height >= 5000:
        raise Exception("The image is too large.")

    model = RealESRGAN(device, scale=size_modifier)
    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)

    result = model.predict(img.convert('RGB'))
    print(f"Image size ({device}): {size_modifier} ... OK")
    return result

def infer_video(video_filepath: str, size_modifier: int) -> str:
    model = RealESRGAN(device, scale=size_modifier)
    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
    
    # Extract audio from the original video file
    audio = AudioSegment.from_file(video_filepath, format="mp4")
    audio_array = np.array(audio.get_array_of_samples())

    # Create a VideoCapture object for the video file
    cap = cv2.VideoCapture(video_filepath)

    # Create a temporary file for the output video
    tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
    vid_output = tmpfile.name
    tmpfile.close()

    # Create a VideoWriter object for the output video
    vid_writer = cv2.VideoWriter(
        vid_output,
        fourcc=cv2.VideoWriter.fourcc(*'mp4v'),
        fps=cap.get(cv2.CAP_PROP_FPS),
        frameSize=(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
    )

    # Process each frame of the video and write it to the output video
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for i in tqdm.tqdm(range(n_frames)):
        # Read the next frame
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the frame to RGB and feed it to the model
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        upscaled_frame = model.predict(frame.convert('RGB'))

        # Convert the upscaled frame back to BGR and write it to the output video
        upscaled_frame = np.array(upscaled_frame)
        upscaled_frame = cv2.cvtColor(upscaled_frame, cv2.COLOR_RGB2BGR)

        # Write the upscaled frame to the output video
        vid_writer.write(upscaled_frame)

    # Release the VideoCapture and VideoWriter objects
    cap.release()
    vid_writer.release()

    # Create a new VideoFileClip object from the output video
    output_clip = VideoFileClip(vid_output)

    # Add the audio back to the output video
    audio_clip = AudioFileClip(f"{video_filepath.split('.')[0]}.wav", fps=output_clip.fps)
    output_clip = output_clip.set_audio(audio_clip)

    # Save the output video to a new file
    output_clip.write_videofile(f'output_{video_filepath}')

    return f'output_{video_filepath}'