StevenZhang commited on
Commit
04d1fd1
·
1 Parent(s): 97be4bd

init upload

Browse files
README.md CHANGED
@@ -1,3 +1,55 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ ```
6
+ import torch
7
+ from transformers import AutoTokenizer, UMT5EncoderModel
8
+ from diffusers import AutoencoderKLWan, WanPipeline, WanTransformer3DModel, FlowMatchEulerDiscreteScheduler
9
+ from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
10
+ from diffusers.utils import export_to_video
11
+ from torchvision import transforms
12
+ import os
13
+ import cv2
14
+ os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
15
+ import numpy as np
16
+
17
+ pretrained_model_name_or_path = "./wan_t2v"
18
+ transformer_t2v = WanTransformer3DModel.from_pretrained(pretrained_model_name_or_path, subfolder='transformer')
19
+
20
+ text_encoder = UMT5EncoderModel.from_pretrained(pretrained_model_name_or_path, subfolder='text_encoder',
21
+ torch_dtype=torch.bfloat16)
22
+
23
+ pipe = WanPipeline.from_pretrained(
24
+ pretrained_model_name_or_path,
25
+ transformer=transformer_t2v,
26
+ text_encoder=text_encoder,
27
+ )
28
+
29
+ negative_prompt = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走'
30
+
31
+ device = "cuda"
32
+ seed = 0
33
+
34
+ generator = torch.Generator(device=device).manual_seed(seed)
35
+ inputs = {
36
+ "prompt": "两只拟人化的猫咪身穿舒适的拳击装备,戴着鲜艳的手套,在聚光灯照射的舞台上激烈对战",
37
+ "negative_prompt": negative_prompt,
38
+ "generator": generator,
39
+ "num_inference_steps": 50,
40
+ "flow_shift": 5.0,
41
+ "guidance_scale": 5.0,
42
+ "height": 720,
43
+ "width": 1280,
44
+ "num_frames": 81,
45
+ "max_sequence_length": 512,
46
+ "output_type": "np"
47
+ }
48
+
49
+ pipe.enable_model_cpu_offload()
50
+
51
+ video = pipe(**inputs).frames[0]
52
+
53
+ export_to_video(video, "output.mp4", fps=16)
54
+
55
+ ```
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanxTransformer3DModel",
3
+ "_diffusers_version": "0.33.0.dev0",
4
+ "_name_or_path": "xxx/wanx_i2v",
5
+ "add_img_emb": true,
6
+ "added_kv_proj_dim": 5120,
7
+ "attention_head_dim": 128,
8
+ "cross_attn_norm": true,
9
+ "eps": 1e-06,
10
+ "ffn_dim": 13824,
11
+ "freq_dim": 256,
12
+ "in_channels": 36,
13
+ "num_attention_heads": 40,
14
+ "num_layers": 40,
15
+ "out_channels": 16,
16
+ "patch_size": [
17
+ 1,
18
+ 2,
19
+ 2
20
+ ],
21
+ "qk_norm": true,
22
+ "text_dim": 4096,
23
+ "window_size": [
24
+ -1,
25
+ -1
26
+ ]
27
+ }
diffusion_pytorch_model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:457639497b204e838c0b5e7f5955e8b1b0f9f04213bd9853e40cd77771569685
3
+ size 9952163512
diffusion_pytorch_model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda5a7f06db0164b852b52fc56db9cb82c502e963f0e7d407af8e13bac31826b
3
+ size 9797226656
diffusion_pytorch_model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4dfe5a12ac35c857fe307e91899d0f9c473551c2a828e80718f340489b27bd
3
+ size 9975437232
diffusion_pytorch_model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744a014df498e5b579d34a12e9ea836b2cd9adf9e0ef77b4f7378ad762091573
3
+ size 9975566544
diffusion_pytorch_model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49e16fed3592ffc07d65503f39517f3190d8e52130418706ea1ba678f207050
3
+ size 9902022768
diffusion_pytorch_model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d173bad73ebcdde9fe4efef487d1df9c993dfc3590b041e05e7806418479be52
3
+ size 9902063944
diffusion_pytorch_model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b4173328f39217e177fe40cf94e509a213955aea842b086f3fd17e6c286832
3
+ size 6075990120
diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
wan_t2v_fp32_example.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, UMT5EncoderModel
3
+ from diffusers import AutoencoderKLWan, WanPipeline, WanTransformer3DModel, FlowMatchEulerDiscreteScheduler
4
+ from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
5
+ from diffusers.utils import export_to_video
6
+ from torchvision import transforms
7
+ import os
8
+ import cv2
9
+ os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
10
+ import numpy as np
11
+
12
+ pretrained_model_name_or_path = "./wan_t2v"
13
+ transformer_t2v = WanTransformer3DModel.from_pretrained(pretrained_model_name_or_path, subfolder='transformer')
14
+
15
+ text_encoder = UMT5EncoderModel.from_pretrained(pretrained_model_name_or_path, subfolder='text_encoder',
16
+ torch_dtype=torch.bfloat16)
17
+
18
+ pipe = WanPipeline.from_pretrained(
19
+ pretrained_model_name_or_path,
20
+ transformer=transformer_t2v,
21
+ text_encoder=text_encoder,
22
+ )
23
+
24
+ negative_prompt = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走'
25
+
26
+ device = "cuda"
27
+ seed = 0
28
+
29
+ generator = torch.Generator(device=device).manual_seed(seed)
30
+ inputs = {
31
+ "prompt": "两只拟人化的猫咪身穿舒适的拳击装备,戴着鲜艳的手套,在聚光灯照射的舞台上激烈对战",
32
+ "negative_prompt": negative_prompt,
33
+ "generator": generator,
34
+ "num_inference_steps": 50,
35
+ "flow_shift": 5.0,
36
+ "guidance_scale": 5.0,
37
+ "height": 720,
38
+ "width": 1280,
39
+ "num_frames": 81,
40
+ "max_sequence_length": 512,
41
+ "output_type": "np"
42
+ }
43
+
44
+ pipe.enable_model_cpu_offload()
45
+
46
+ video = pipe(**inputs).frames[0]
47
+
48
+ export_to_video(video, "output.mp4", fps=16)