Spaces:

SherryX
/

STAR

Configuration error

App Files Files Community

This PR fixes the space

#10

by Fabrice-TIERCELIN - opened May 22

base: refs/heads/main

←

from: refs/pr/10

Discussion Files changed

+152

-149

Files changed (4) hide show

app.py +6 -3
video_super_resolution/scripts/inference_sr.py +142 -142
video_super_resolution/scripts/inference_sr.sh +1 -1
video_to_video/video_to_video_model.py +3 -3

app.py CHANGED Viewed

@@ -2,16 +2,19 @@ import spaces
 import os
 import gradio as gr
 from video_super_resolution.scripts.inference_sr import STAR_sr
 # Example video and prompt pairs
 examples = [
-    ["examples/023_klingai_reedit.mp4", "The video shows a panda strumming a guitar on a rock by a tranquil lake at sunset. With its black-and-white fur, the panda sits against a backdrop of mountains and a vibrant sky painted in orange and pink hues. The serene scene highlights relaxation and whimsy, with the panda, guitar, and lake harmoniously positioned. The natural landscape's depth and perspective enhance the focus on the panda's peaceful interaction with the guitar.", 4, 24, 250],
     ["examples/017_klingai_reedit.mp4", "The video depicts a majestic lion with eagle-like wings standing on a grassy hill against rolling green hills and a clear sky. The lion’s golden mane contrasts with the warm hues of the scene, and its intense gaze draws focus. The detailed, fully spread wings add a fantastical element. A 'PremiumBeat' watermark appears in the lower right, hinting at the image's source. The style blends realism with fantasy, showcasing the lion's mythical nature.", 4, 24, 250],
     ["examples/016_video.mp4", "The video is a black-and-white silent film featuring two men in wheelchairs on a pier. The foreground man, in a suit and hat, holds a sign reading 'HELP CRIPPLE.' The background shows a building and a boat, with early 20th-century clothing and image quality suggesting a narrative of disability and assistance.", 4, 24, 300],
 ]
 # Define a GPU-decorated function for enhancement
-@spaces.GPU()
 def enhance_with_gpu(input_video, input_text, upscale, max_chunk_len, chunk_size):
     """在每次调用时创建新的 STAR_sr 实例，确保参数正确传递"""
     star = STAR_sr(
@@ -49,7 +52,7 @@ def star_demo(result_dir="./tmp/"):
                 gr.Examples(
                     examples=examples,
-                    inputs=[input_video, input_text],
                     outputs=[output_video],
                     fn=enhance_with_gpu,  # Use the GPU-decorated function
                     cache_examples=False,

 import os
 import gradio as gr
 from video_super_resolution.scripts.inference_sr import STAR_sr
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="SherryX/STAR", filename="I2VGen-XL-based/heavy_deg.pt", local_dir="pretrained_weight")
 # Example video and prompt pairs
 examples = [
+    ["examples/023_klingai_reedit.mp4", "The video shows a panda strumming a guitar on a rock by a tranquil lake at sunset. With its black-and-white fur, the panda sits against a backdrop of mountains and a vibrant sky painted in orange and pink hues. The serene scene highlights relaxation and whimsy, with the panda, guitar, and lake harmoniously positioned. The natural landscape's depth and perspective enhance the focus on the panda's peaceful interaction with the guitar.", 2, 24, 250],
     ["examples/017_klingai_reedit.mp4", "The video depicts a majestic lion with eagle-like wings standing on a grassy hill against rolling green hills and a clear sky. The lion’s golden mane contrasts with the warm hues of the scene, and its intense gaze draws focus. The detailed, fully spread wings add a fantastical element. A 'PremiumBeat' watermark appears in the lower right, hinting at the image's source. The style blends realism with fantasy, showcasing the lion's mythical nature.", 4, 24, 250],
     ["examples/016_video.mp4", "The video is a black-and-white silent film featuring two men in wheelchairs on a pier. The foreground man, in a suit and hat, holds a sign reading 'HELP CRIPPLE.' The background shows a building and a boat, with early 20th-century clothing and image quality suggesting a narrative of disability and assistance.", 4, 24, 300],
 ]
 # Define a GPU-decorated function for enhancement
+@spaces.GPU(duration=180)
 def enhance_with_gpu(input_video, input_text, upscale, max_chunk_len, chunk_size):
     """在每次调用时创建新的 STAR_sr 实例，确保参数正确传递"""
     star = STAR_sr(
                 gr.Examples(
                     examples=examples,
+                    inputs=[input_video, input_text, upscale, max_chunk_len, chunk_size],
                     outputs=[output_video],
                     fn=enhance_with_gpu,  # Use the GPU-decorated function
                     cache_examples=False,

video_super_resolution/scripts/inference_sr.py CHANGED Viewed

@@ -1,142 +1,142 @@
-import os
-import torch
-from argparse import ArgumentParser, Namespace
-import json
-from typing import Any, Dict, List, Mapping, Tuple
-from easydict import EasyDict
-from video_to_video.video_to_video_model import VideoToVideo_sr
-from video_to_video.utils.seed import setup_seed
-from video_to_video.utils.logger import get_logger
-from video_super_resolution.color_fix import adain_color_fix
-from inference_utils import *
-logger = get_logger()
-class STAR_sr():
-    def __init__(self,
-                 result_dir='./results/',
-                 file_name='000_video.mp4',
-                 model_path='./pretrained_weight',
-                 solver_mode='fast',
-                 steps=15,
-                 guide_scale=7.5,
-                 upscale=4,
-                 max_chunk_len=32,
-                 variant_info=None,
-                 chunk_size=3,
-                 ):
-        self.model_path=model_path
-        logger.info('checkpoint_path: {}'.format(self.model_path))
-        self.result_dir = result_dir
-        self.file_name = file_name
-        os.makedirs(self.result_dir, exist_ok=True)
-        model_cfg = EasyDict(__name__='model_cfg')
-        model_cfg.model_path = self.model_path
-        model_cfg.chunk_size = chunk_size
-        self.model = VideoToVideo_sr(model_cfg)
-        steps = 15 if solver_mode == 'fast' else steps
-        self.solver_mode=solver_mode
-        self.steps=steps
-        self.guide_scale=guide_scale
-        self.upscale = upscale
-        self.max_chunk_len=max_chunk_len
-        self.variant_info=variant_info
-    def enhance_a_video(self, video_path, prompt):
-        logger.info('input video path: {}'.format(video_path))
-        text = prompt
-        logger.info('text: {}'.format(text))
-        caption = text + self.model.positive_prompt
-        input_frames, input_fps = load_video(video_path)
-        in_f_num = len(input_frames)
-        logger.info('input frames length: {}'.format(in_f_num))
-        logger.info('input fps: {}'.format(input_fps))
-        video_data = preprocess(input_frames)
-        _, _, h, w = video_data.shape
-        logger.info('input resolution: {}'.format((h, w)))
-        target_h, target_w = h * self.upscale, w * self.upscale   # adjust_resolution(h, w, up_scale=4)
-        logger.info('target resolution: {}'.format((target_h, target_w)))
-        pre_data = {'video_data': video_data, 'y': caption}
-        pre_data['target_res'] = (target_h, target_w)
-        total_noise_levels = 900
-        setup_seed(666)
-        with torch.no_grad():
-            data_tensor = collate_fn(pre_data, 'cuda:0')
-            output = self.model.test(data_tensor, total_noise_levels, steps=self.steps, \
-                                solver_mode=self.solver_mode, guide_scale=self.guide_scale, \
-                                max_chunk_len=self.max_chunk_len
-                                )
-        output = tensor2vid(output)
-        # Using color fix
-        output = adain_color_fix(output, video_data)
-        save_video(output, self.result_dir, self.file_name, fps=input_fps)
-        return os.path.join(self.result_dir, self.file_name)
-def parse_args():
-    parser = ArgumentParser()
-    parser.add_argument("--input_path", required=True, type=str, help="input video path")
-    parser.add_argument("--save_dir", type=str, default='results', help="save directory")
-    parser.add_argument("--file_name", type=str, help="file name")
-    parser.add_argument("--model_path", type=str, default='./pretrained_weight/model.pt', help="model path")
-    parser.add_argument("--prompt", type=str, default='a good video', help="prompt")
-    parser.add_argument("--upscale", type=int, default=4, help='up-scale')
-    parser.add_argument("--max_chunk_len", type=int, default=32, help='max_chunk_len')
-    parser.add_argument("--variant_info", type=str, default=None, help='information of inference strategy')
-    parser.add_argument("--cfg", type=float, default=7.5)
-    parser.add_argument("--solver_mode", type=str, default='fast', help='fast | normal')
-    parser.add_argument("--steps", type=int, default=15)
-    return parser.parse_args()
-def main():
-    args = parse_args()
-    input_path = args.input_path
-    prompt = args.prompt
-    model_path = args.model_path
-    save_dir = args.save_dir
-    file_name = args.file_name
-    upscale = args.upscale
-    max_chunk_len = args.max_chunk_len
-    steps = args.steps
-    solver_mode = args.solver_mode
-    guide_scale = args.cfg
-    assert solver_mode in ('fast', 'normal')
-    star_sr = STAR_sr(
-                            result_dir=save_dir,
-                            file_name=file_name,  # new added
-                            model_path=model_path,
-                            solver_mode=solver_mode,
-                            steps=steps,
-                            guide_scale=guide_scale,
-                            upscale=upscale,
-                            max_chunk_len=max_chunk_len,
-                            variant_info=None,
-                            )
-    star_sr.enhance_a_video(input_path, prompt)
-if __name__ == '__main__':
-    main()

+import os
+import torch
+from argparse import ArgumentParser, Namespace
+import json
+from typing import Any, Dict, List, Mapping, Tuple
+from easydict import EasyDict
+from video_to_video.video_to_video_model import VideoToVideo_sr
+from video_to_video.utils.seed import setup_seed
+from video_to_video.utils.logger import get_logger
+from video_super_resolution.color_fix import adain_color_fix
+from inference_utils import *
+logger = get_logger()
+class STAR_sr():
+    def __init__(self,
+                 result_dir='./results/',
+                 file_name='000_video.mp4',
+                 model_path='./pretrained_weight',
+                 solver_mode='fast',
+                 steps=15,
+                 guide_scale=7.5,
+                 upscale=4,
+                 max_chunk_len=32,
+                 variant_info=None,
+                 chunk_size=3,
+                 ):
+        self.model_path=model_path
+        logger.info('checkpoint_path: {}'.format(self.model_path))
+        self.result_dir = result_dir
+        self.file_name = file_name
+        os.makedirs(self.result_dir, exist_ok=True)
+        model_cfg = EasyDict(__name__='model_cfg')
+        model_cfg.model_path = self.model_path
+        model_cfg.chunk_size = chunk_size
+        self.model = VideoToVideo_sr(model_cfg)
+        steps = 15 if solver_mode == 'fast' else steps
+        self.solver_mode=solver_mode
+        self.steps=steps
+        self.guide_scale=guide_scale
+        self.upscale = upscale
+        self.max_chunk_len=max_chunk_len
+        self.variant_info=variant_info
+    def enhance_a_video(self, video_path, prompt):
+        logger.info('input video path: {}'.format(video_path))
+        text = prompt
+        logger.info('text: {}'.format(text))
+        caption = text + self.model.positive_prompt
+        input_frames, input_fps = load_video(video_path)
+        in_f_num = len(input_frames)
+        logger.info('input frames length: {}'.format(in_f_num))
+        logger.info('input fps: {}'.format(input_fps))
+        video_data = preprocess(input_frames)
+        _, _, h, w = video_data.shape
+        logger.info('input resolution: {}'.format((h, w)))
+        target_h, target_w = h * self.upscale, w * self.upscale   # adjust_resolution(h, w, up_scale=4)
+        logger.info('target resolution: {}'.format((target_h, target_w)))
+        pre_data = {'video_data': video_data, 'y': caption}
+        pre_data['target_res'] = (target_h, target_w)
+        total_noise_levels = 900
+        setup_seed(666)
+        with torch.no_grad():
+            data_tensor = collate_fn(pre_data, 'cuda:0')
+            output = self.model.test(data_tensor, total_noise_levels, steps=self.steps, \
+                                solver_mode=self.solver_mode, guide_scale=self.guide_scale, \
+                                max_chunk_len=self.max_chunk_len
+                                )
+        output = tensor2vid(output)
+        # Using color fix
+        output = adain_color_fix(output, video_data)
+        save_video(output, self.result_dir, self.file_name, fps=input_fps)
+        return os.path.join(self.result_dir, self.file_name)
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument("--input_path", required=True, type=str, help="input video path")
+    parser.add_argument("--save_dir", type=str, default='results', help="save directory")
+    parser.add_argument("--file_name", type=str, help="file name")
+    parser.add_argument("--model_path", type=str, default='./pretrained_weight/I2VGen-XL-based/heavy_deg.pt', help="model path")
+    parser.add_argument("--prompt", type=str, default='a good video', help="prompt")
+    parser.add_argument("--upscale", type=int, default=4, help='up-scale')
+    parser.add_argument("--max_chunk_len", type=int, default=32, help='max_chunk_len')
+    parser.add_argument("--variant_info", type=str, default=None, help='information of inference strategy')
+    parser.add_argument("--cfg", type=float, default=7.5)
+    parser.add_argument("--solver_mode", type=str, default='fast', help='fast | normal')
+    parser.add_argument("--steps", type=int, default=15)
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    input_path = args.input_path
+    prompt = args.prompt
+    model_path = args.model_path
+    save_dir = args.save_dir
+    file_name = args.file_name
+    upscale = args.upscale
+    max_chunk_len = args.max_chunk_len
+    steps = args.steps
+    solver_mode = args.solver_mode
+    guide_scale = args.cfg
+    assert solver_mode in ('fast', 'normal')
+    star_sr = STAR_sr(
+                            result_dir=save_dir,
+                            file_name=file_name,  # new added
+                            model_path=model_path,
+                            solver_mode=solver_mode,
+                            steps=steps,
+                            guide_scale=guide_scale,
+                            upscale=upscale,
+                            max_chunk_len=max_chunk_len,
+                            variant_info=None,
+                            )
+    star_sr.enhance_a_video(input_path, prompt)
+if __name__ == '__main__':
+    main()

video_super_resolution/scripts/inference_sr.sh CHANGED Viewed

@@ -45,7 +45,7 @@ for i in "${!mp4_files[@]}"; do
         --solver_mode 'fast' \
         --steps 15 \
         --input_path "${mp4_file}" \
-        --model_path /mnt/bn/videodataset/VSR/pretrained_models/STAR/model.pt \
         --prompt "${line}" \
         --upscale 4 \
         --max_chunk_len ${frame_length} \

         --solver_mode 'fast' \
         --steps 15 \
         --input_path "${mp4_file}" \
+        --model_path /mnt/bn/videodataset/VSR/pretrained_models/STAR/heavy_deg.pt \
         --prompt "${line}" \
         --upscale 4 \
         --max_chunk_len ${frame_length} \

video_to_video/video_to_video_model.py CHANGED Viewed

@@ -17,10 +17,10 @@ from diffusers import AutoencoderKLTemporalDecoder
 import requests
 def download_model(url, model_path):
-    if not os.path.exists(os.path.join(model_path, 'model.pt')):
         print(f"Model not found at {model_path}, downloading...")
         response = requests.get(url, stream=True)
-        with open(os.path.join(model_path, 'model.pt'), 'wb') as f:
             for chunk in response.iter_content(chunk_size=1024):
                 if chunk:
                     f.write(chunk)
@@ -54,7 +54,7 @@ class VideoToVideo_sr():
         download_model(model_url, cfg.model_path)
         # 拼接完整路径
-        model_file_path = os.path.join(cfg.model_path, 'model.pt')
         print('model_file_path:', model_file_path)
         # 加载模型

 import requests
 def download_model(url, model_path):
+    if not os.path.exists(os.path.join(model_path, 'heavy_deg.pt')):
         print(f"Model not found at {model_path}, downloading...")
         response = requests.get(url, stream=True)
+        with open(os.path.join(model_path, 'heavy_deg.pt'), 'wb') as f:
             for chunk in response.iter_content(chunk_size=1024):
                 if chunk:
                     f.write(chunk)
         download_model(model_url, cfg.model_path)
         # 拼接完整路径
+        model_file_path = os.path.join('pretrained_weight', 'I2VGen-XL-based', 'heavy_deg.pt')
         print('model_file_path:', model_file_path)
         # 加载模型