Spaces:

skytnt
/

full-body-anime-gan

Running

App Files Files Community

skytnt commited on Aug 10, 2022

Commit

fc2d897

•

1 Parent(s): 90074d8

update model version 2

Browse files

Files changed (6) hide show

app.py +34 -38
models/fb_encoder.onnx +0 -3
models/g_mapping.onnx +0 -3
models/g_synthesis.onnx +0 -3
models/waifu_dect.onnx +0 -3
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import imageio
 import numpy as np
 import onnx
 import onnxruntime as rt
 from numpy.random import RandomState
 from skimage import transform
@@ -74,55 +75,48 @@ def nms(pred, conf_thres, iou_thres, max_instance=20):  # pred (anchor_num, 5 +
 class Model:
     def __init__(self):
-        self.img_avg = None
         self.detector = None
         self.encoder = None
         self.g_synthesis = None
         self.g_mapping = None
-        self.w_avg = None
         self.detector_stride = None
         self.detector_imgsz = None
         self.detector_class_names = None
-        self.load_models("./models/")
-    def load_models(self, model_dir):
         providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-        g_mapping = onnx.load(model_dir + "g_mapping.onnx")
         w_avg = [x for x in g_mapping.graph.initializer if x.name == "w_avg"][0]
         w_avg = np.frombuffer(w_avg.raw_data, dtype=np.float32)[np.newaxis, :]
         w_avg = w_avg.repeat(16, axis=0)[np.newaxis, :]
         self.w_avg = w_avg
-        self.g_mapping = rt.InferenceSession(model_dir + "g_mapping.onnx", providers=providers)
-        self.g_synthesis = rt.InferenceSession(model_dir + "g_synthesis.onnx", providers=providers)
-        self.encoder = rt.InferenceSession(model_dir + "fb_encoder.onnx", providers=providers)
-        self.detector = rt.InferenceSession(model_dir + "waifu_dect.onnx", providers=providers)
         detector_meta = self.detector.get_modelmeta().custom_metadata_map
         self.detector_stride = int(detector_meta['stride'])
         self.detector_imgsz = 1088
         self.detector_class_names = eval(detector_meta['names'])
-        self.img_avg = transform.resize(self.g_synthesis.run(None, {'w': w_avg})[0][0].transpose(1, 2, 0),
-                                        (256, 256)).transpose(2, 0, 1)[np.newaxis, :]
-    def get_img(self, w):
-        img = self.g_synthesis.run(None, {'w': w})[0]
         return (img.transpose(0, 2, 3, 1) * 127.5 + 128).clip(0, 255).astype(np.uint8)[0]
-    def get_w(self, z, psi):
-        return self.g_mapping.run(None, {'z': z, 'psi': np.asarray([psi], dtype=np.float32)})[0]
-    def encode_img(self, img, iteration=5):
-        target_img = transform.resize(((img / 255 - 0.5) / 0.5), (256, 256)).transpose(2, 0, 1)[np.newaxis, :].astype(
             np.float32)
-        w = self.w_avg.copy()
-        from_img = self.img_avg.copy()
-        for i in range(iteration):
-            dimg = np.concatenate([target_img, from_img], axis=1)
-            dw = self.encoder.run(None, {'dimg': dimg})[0]
-            w += dw
-            from_img = transform.resize(self.g_synthesis.run(None, {'w': w})[0][0].transpose(1, 2, 0),
-                                        (256, 256)).transpose(2, 0, 1)[np.newaxis, :]
-        return w
     def detect(self, im0, conf_thres, iou_thres, detail=False):
         if im0 is None:
@@ -217,11 +211,11 @@ class Model:
             imgs.append(temp_img)
         return imgs
-    def gen_video(self, w1, w2, path, frame_num=10):
         video = imageio.get_writer(path, mode='I', fps=frame_num // 2, codec='libx264', bitrate='16M')
         lin = np.linspace(0, 1, frame_num)
         for i in range(0, frame_num):
-            img = self.get_img(((1 - lin[i]) * w1) + (lin[i] * w2))
             video.append_data(img)
         video.close()
@@ -232,10 +226,10 @@ def get_thumbnail(img):
     return img_new
-def gen_fn(method, seed, psi):
     z = RandomState(int(seed) + 2 ** 31).randn(1, 512) if method == 1 else np.random.randn(1, 512)
-    w = model.get_w(z.astype(dtype=np.float32), psi)
-    img_out = model.get_img(w)
     return img_out, w, get_thumbnail(img_out)
@@ -250,11 +244,10 @@ def encode_img_fn(img):
     return "success", imgs[0], img_out, w, get_thumbnail(img_out)
-def gen_video_fn(w1, w2, frame):
     if w1 is None or w2 is None:
         return None
-    model.gen_video(w1, w2, "video.mp4",
-                    int(frame))
     return "video.mp4"
@@ -274,7 +267,9 @@ if __name__ == '__main__':
                         with gr.Row():
                             gen_input1 = gr.Radio(label="method", choices=["random", "use seed"], type="index")
                             gen_input2 = gr.Number(value=1, label="seed ( int between -2^31 and 2^31 - 1 )")
-                        gen_input3 = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.6, label="truncation psi")
                         with gr.Group():
                             gen_submit = gr.Button("Generate", variant="primary")
                     with gr.Column():
@@ -327,7 +322,7 @@ if __name__ == '__main__':
                             generate_video_button = gr.Button("Generate", variant="primary")
                     with gr.Column():
                         generate_video_output = gr.Video(label="output video")
-        gen_submit.click(gen_fn, [gen_input1, gen_input2, gen_input3],
                          [gen_output1, select_img_input_w1, select_img_input_img1])
         encode_img_submit.click(encode_img_fn, [encode_img_input],
                                 [encode_img_output1, encode_img_output2, encode_img_output3, select_img_input_w2,
@@ -341,6 +336,7 @@ if __name__ == '__main__':
                                  [select_img2_dropdown, select_img_input_img1, select_img_input_img2,
                                   select_img_input_w1, select_img_input_w2],
                                  [select_img2_output_img, select_img2_output_w])
-        generate_video_button.click(gen_video_fn, [select_img1_output_w, select_img2_output_w, generate_video_frame],
                                     [generate_video_output])
     app.launch()

 import numpy as np
 import onnx
 import onnxruntime as rt
+import huggingface_hub
 from numpy.random import RandomState
 from skimage import transform
 class Model:
     def __init__(self):
         self.detector = None
         self.encoder = None
         self.g_synthesis = None
         self.g_mapping = None
         self.detector_stride = None
         self.detector_imgsz = None
         self.detector_class_names = None
+        self.w_avg = None
+        self.load_models("skytnt/fbanime-gan")
+    def load_models(self, repo):
+        g_mapping_path = huggingface_hub.hf_hub_download(repo, "g_mapping.onnx")
+        g_synthesis_path = huggingface_hub.hf_hub_download(repo, "g_synthesis.onnx")
+        encoder_path = huggingface_hub.hf_hub_download(repo, "encoder.onnx")
+        detector_path = huggingface_hub.hf_hub_download(repo, "waifu_dect.onnx")
         providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+        g_mapping = onnx.load(g_mapping_path)
         w_avg = [x for x in g_mapping.graph.initializer if x.name == "w_avg"][0]
         w_avg = np.frombuffer(w_avg.raw_data, dtype=np.float32)[np.newaxis, :]
         w_avg = w_avg.repeat(16, axis=0)[np.newaxis, :]
         self.w_avg = w_avg
+        self.g_mapping = rt.InferenceSession(g_mapping_path, providers=providers)
+        self.g_synthesis = rt.InferenceSession(g_synthesis_path, providers=providers)
+        self.encoder = rt.InferenceSession(encoder_path, providers=providers)
+        self.detector = rt.InferenceSession(detector_path, providers=providers)
         detector_meta = self.detector.get_modelmeta().custom_metadata_map
         self.detector_stride = int(detector_meta['stride'])
         self.detector_imgsz = 1088
         self.detector_class_names = eval(detector_meta['names'])
+    def get_img(self, w, noise=0):
+        img = self.g_synthesis.run(None, {'w': w, "noise": np.asarray([noise], dtype=np.float32)})[0]
         return (img.transpose(0, 2, 3, 1) * 127.5 + 128).clip(0, 255).astype(np.uint8)[0]
+    def get_w(self, z, psi1, psi2):
+        return self.g_mapping.run(None, {'z': z, 'psi': np.asarray([psi1, psi2], dtype=np.float32)})[0]
+    def encode_img(self, img):
+        img = transform.resize(((img / 255 - 0.5) / 0.5), (256, 256)).transpose(2, 0, 1)[np.newaxis, :].astype(
             np.float32)
+        return self.encoder.run(None, {'img': img})[0] + self.w_avg
     def detect(self, im0, conf_thres, iou_thres, detail=False):
         if im0 is None:
             imgs.append(temp_img)
         return imgs
+    def gen_video(self, w1, w2, noise, path, frame_num=10):
         video = imageio.get_writer(path, mode='I', fps=frame_num // 2, codec='libx264', bitrate='16M')
         lin = np.linspace(0, 1, frame_num)
         for i in range(0, frame_num):
+            img = self.get_img(((1 - lin[i]) * w1) + (lin[i] * w2), noise)
             video.append_data(img)
         video.close()
     return img_new
+def gen_fn(method, seed, psi1, psi2, noise):
     z = RandomState(int(seed) + 2 ** 31).randn(1, 512) if method == 1 else np.random.randn(1, 512)
+    w = model.get_w(z.astype(dtype=np.float32), psi1, psi2)
+    img_out = model.get_img(w, noise)
     return img_out, w, get_thumbnail(img_out)
     return "success", imgs[0], img_out, w, get_thumbnail(img_out)
+def gen_video_fn(w1, w2, noise, frame):
     if w1 is None or w2 is None:
         return None
+    model.gen_video(w1, w2, noise, "video.mp4", int(frame))
     return "video.mp4"
                         with gr.Row():
                             gen_input1 = gr.Radio(label="method", choices=["random", "use seed"], type="index")
                             gen_input2 = gr.Number(value=1, label="seed ( int between -2^31 and 2^31 - 1 )")
+                        gen_input3 = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.6, label="truncation psi 1")
+                        gen_input4 = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label="truncation psi 2")
+                        gen_input5 = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label="noise strength")
                         with gr.Group():
                             gen_submit = gr.Button("Generate", variant="primary")
                     with gr.Column():
                             generate_video_button = gr.Button("Generate", variant="primary")
                     with gr.Column():
                         generate_video_output = gr.Video(label="output video")
+        gen_submit.click(gen_fn, [gen_input1, gen_input2, gen_input3, gen_input4, gen_input5],
                          [gen_output1, select_img_input_w1, select_img_input_img1])
         encode_img_submit.click(encode_img_fn, [encode_img_input],
                                 [encode_img_output1, encode_img_output2, encode_img_output3, select_img_input_w2,
                                  [select_img2_dropdown, select_img_input_img1, select_img_input_img2,
                                   select_img_input_w1, select_img_input_w2],
                                  [select_img2_output_img, select_img2_output_w])
+        generate_video_button.click(gen_video_fn,
+                                    [select_img1_output_w, select_img2_output_w, gen_input5, generate_video_frame],
                                     [generate_video_output])
     app.launch()

models/fb_encoder.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1e8d206092b3e686b6d4798f8976e154413b12316161b5e1b077a493a41d75e4
-size 706114106

models/g_mapping.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b6e5918a214bb2b1cbdecb76f9c2124fd5fa2cb88e02de16d10530f7441fb205
-size 8410285

models/g_synthesis.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:12929197c3eeb423c5987303995ef640eb5e2e44638cd3c0657a8aed67fc2aab
-size 112794026

models/waifu_dect.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4a5de6949912bf94c3307f2b18ebc7b49f309e713b1799d29805ccd882e327d3
-size 83550422

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ onnx
 onnxruntime-gpu
 scikit-image
 imageio-ffmpeg

 onnxruntime-gpu
 scikit-image
 imageio-ffmpeg
+huggingface_hub