Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -37,9 +37,9 @@ class NagWanTransformer3DModel(nn.Module):
|
|
37 |
self,
|
38 |
in_channels: int = 4,
|
39 |
out_channels: int = 4,
|
40 |
-
hidden_size: int =
|
41 |
-
num_layers: int =
|
42 |
-
num_heads: int =
|
43 |
):
|
44 |
super().__init__()
|
45 |
self.in_channels = in_channels
|
@@ -57,15 +57,15 @@ class NagWanTransformer3DModel(nn.Module):
|
|
57 |
})()
|
58 |
|
59 |
# Simple conv layers for demo
|
60 |
-
self.conv_in = nn.Conv3d(in_channels,
|
61 |
-
self.conv_mid = nn.Conv3d(
|
62 |
-
self.conv_out = nn.Conv3d(
|
63 |
|
64 |
# Time embedding
|
65 |
self.time_embed = nn.Sequential(
|
66 |
nn.Linear(1, hidden_size),
|
67 |
nn.SiLU(),
|
68 |
-
nn.Linear(hidden_size,
|
69 |
)
|
70 |
|
71 |
@staticmethod
|
@@ -428,10 +428,10 @@ DEFAULT_STEPS = 2
|
|
428 |
DEFAULT_SEED = 2025
|
429 |
DEFAULT_H_SLIDER_VALUE = 128
|
430 |
DEFAULT_W_SLIDER_VALUE = 128
|
431 |
-
NEW_FORMULA_MAX_AREA =
|
432 |
|
433 |
-
SLIDER_MIN_H, SLIDER_MAX_H = 128,
|
434 |
-
SLIDER_MIN_W, SLIDER_MAX_W = 128,
|
435 |
MAX_SEED = np.iinfo(np.int32).max
|
436 |
|
437 |
FIXED_FPS = 16
|
@@ -493,9 +493,9 @@ print("Creating simplified NAG transformer model...")
|
|
493 |
transformer = NagWanTransformer3DModel(
|
494 |
in_channels=4,
|
495 |
out_channels=4,
|
496 |
-
hidden_size=1280
|
497 |
-
num_layers=
|
498 |
-
num_heads=
|
499 |
)
|
500 |
|
501 |
print("Creating pipeline...")
|
@@ -511,8 +511,7 @@ pipe = NAGWanPipeline(
|
|
511 |
beta_end=0.012,
|
512 |
beta_schedule="scaled_linear",
|
513 |
clip_sample=False,
|
514 |
-
|
515 |
-
steps_offset=1,
|
516 |
)
|
517 |
)
|
518 |
|
|
|
37 |
self,
|
38 |
in_channels: int = 4,
|
39 |
out_channels: int = 4,
|
40 |
+
hidden_size: int = 64,
|
41 |
+
num_layers: int = 1,
|
42 |
+
num_heads: int = 4,
|
43 |
):
|
44 |
super().__init__()
|
45 |
self.in_channels = in_channels
|
|
|
57 |
})()
|
58 |
|
59 |
# Simple conv layers for demo
|
60 |
+
self.conv_in = nn.Conv3d(in_channels, hidden_size, kernel_size=3, padding=1)
|
61 |
+
self.conv_mid = nn.Conv3d(hidden_size, hidden_size, kernel_size=3, padding=1)
|
62 |
+
self.conv_out = nn.Conv3d(hidden_size, out_channels, kernel_size=3, padding=1)
|
63 |
|
64 |
# Time embedding
|
65 |
self.time_embed = nn.Sequential(
|
66 |
nn.Linear(1, hidden_size),
|
67 |
nn.SiLU(),
|
68 |
+
nn.Linear(hidden_size, hidden_size),
|
69 |
)
|
70 |
|
71 |
@staticmethod
|
|
|
428 |
DEFAULT_SEED = 2025
|
429 |
DEFAULT_H_SLIDER_VALUE = 128
|
430 |
DEFAULT_W_SLIDER_VALUE = 128
|
431 |
+
NEW_FORMULA_MAX_AREA = 128.0 * 128.0
|
432 |
|
433 |
+
SLIDER_MIN_H, SLIDER_MAX_H = 128, 256
|
434 |
+
SLIDER_MIN_W, SLIDER_MAX_W = 128, 256
|
435 |
MAX_SEED = np.iinfo(np.int32).max
|
436 |
|
437 |
FIXED_FPS = 16
|
|
|
493 |
transformer = NagWanTransformer3DModel(
|
494 |
in_channels=4,
|
495 |
out_channels=4,
|
496 |
+
hidden_size=64, # Reduced from 1280 for demo
|
497 |
+
num_layers=1, # Reduced for demo
|
498 |
+
num_heads=4 # Reduced for demo
|
499 |
)
|
500 |
|
501 |
print("Creating pipeline...")
|
|
|
511 |
beta_end=0.012,
|
512 |
beta_schedule="scaled_linear",
|
513 |
clip_sample=False,
|
514 |
+
prediction_type="epsilon",
|
|
|
515 |
)
|
516 |
)
|
517 |
|