sachin
commited on
Commit
·
54103f9
1
Parent(s):
ecf3eb5
update
Browse files- docs/issues.md +6 -0
- src/server/main.py +4 -4
docs/issues.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-03-17 22:33:24,340 - parler_tts.modeling_parler_tts - WARNING - `prompt_attention_mask` is specified but `attention_mask` is not. A full `attention_mask` will be created. Make sure this is the intended behaviour.
|
2 |
+
W0317 22:33:36.322000 1 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode
|
3 |
+
CUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed 51 distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.
|
4 |
+
|
5 |
+
|
6 |
+
|
src/server/main.py
CHANGED
@@ -97,17 +97,17 @@ class TTSModelManager:
|
|
97 |
|
98 |
|
99 |
# TODO - temporary disable -torch.compile
|
100 |
-
|
101 |
# Update model configuration
|
102 |
model.config.pad_token_id = tokenizer.pad_token_id
|
103 |
# Update for deprecation: use max_batch_size instead of batch_size
|
104 |
if hasattr(model.generation_config.cache_config, 'max_batch_size'):
|
105 |
model.generation_config.cache_config.max_batch_size = 1
|
106 |
model.generation_config.cache_implementation = "static"
|
107 |
-
|
108 |
# Compile the model
|
109 |
-
|
110 |
-
compile_mode = "reduce-overhead"
|
111 |
|
112 |
model.forward = torch.compile(model.forward, mode=compile_mode)
|
113 |
|
|
|
97 |
|
98 |
|
99 |
# TODO - temporary disable -torch.compile
|
100 |
+
'''
|
101 |
# Update model configuration
|
102 |
model.config.pad_token_id = tokenizer.pad_token_id
|
103 |
# Update for deprecation: use max_batch_size instead of batch_size
|
104 |
if hasattr(model.generation_config.cache_config, 'max_batch_size'):
|
105 |
model.generation_config.cache_config.max_batch_size = 1
|
106 |
model.generation_config.cache_implementation = "static"
|
107 |
+
'''
|
108 |
# Compile the model
|
109 |
+
compile_mode = "default"
|
110 |
+
#compile_mode = "reduce-overhead"
|
111 |
|
112 |
model.forward = torch.compile(model.forward, mode=compile_mode)
|
113 |
|