sachin commited on
Commit
54103f9
·
1 Parent(s): ecf3eb5
Files changed (2) hide show
  1. docs/issues.md +6 -0
  2. src/server/main.py +4 -4
docs/issues.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 2025-03-17 22:33:24,340 - parler_tts.modeling_parler_tts - WARNING - `prompt_attention_mask` is specified but `attention_mask` is not. A full `attention_mask` will be created. Make sure this is the intended behaviour.
2
+ W0317 22:33:36.322000 1 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode
3
+ CUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed 51 distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.
4
+
5
+
6
+
src/server/main.py CHANGED
@@ -97,17 +97,17 @@ class TTSModelManager:
97
 
98
 
99
  # TODO - temporary disable -torch.compile
100
-
101
  # Update model configuration
102
  model.config.pad_token_id = tokenizer.pad_token_id
103
  # Update for deprecation: use max_batch_size instead of batch_size
104
  if hasattr(model.generation_config.cache_config, 'max_batch_size'):
105
  model.generation_config.cache_config.max_batch_size = 1
106
  model.generation_config.cache_implementation = "static"
107
-
108
  # Compile the model
109
- ##compile_mode = "default"
110
- compile_mode = "reduce-overhead"
111
 
112
  model.forward = torch.compile(model.forward, mode=compile_mode)
113
 
 
97
 
98
 
99
  # TODO - temporary disable -torch.compile
100
+ '''
101
  # Update model configuration
102
  model.config.pad_token_id = tokenizer.pad_token_id
103
  # Update for deprecation: use max_batch_size instead of batch_size
104
  if hasattr(model.generation_config.cache_config, 'max_batch_size'):
105
  model.generation_config.cache_config.max_batch_size = 1
106
  model.generation_config.cache_implementation = "static"
107
+ '''
108
  # Compile the model
109
+ compile_mode = "default"
110
+ #compile_mode = "reduce-overhead"
111
 
112
  model.forward = torch.compile(model.forward, mode=compile_mode)
113