dwb2023 commited on
Commit
a161199
·
verified ·
1 Parent(s): ef0e867

switch to bfloat16

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -42,7 +42,8 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(
42
  use_cache=False,
43
  device_map="auto",
44
  low_cpu_mem_usage=True,
45
- attn_implementation="flash_attention_2"
 
46
  )
47
 
48
  # Flash Attention setup for memory and speed optimization if supported
@@ -64,7 +65,7 @@ pipe = pipeline(
64
  model=model,
65
  tokenizer=tokenizer,
66
  feature_extractor=feature_extractor,
67
- chunk_length_s=900, # Increased to 15 minutes
68
  )
69
 
70
  def reset_and_update_dataset(new_data):
 
42
  use_cache=False,
43
  device_map="auto",
44
  low_cpu_mem_usage=True,
45
+ attn_implementation="flash_attention_2",
46
+ torch_dtype=torch.bfloat16
47
  )
48
 
49
  # Flash Attention setup for memory and speed optimization if supported
 
65
  model=model,
66
  tokenizer=tokenizer,
67
  feature_extractor=feature_extractor,
68
+ chunk_length_s=30, # 30 seconds
69
  )
70
 
71
  def reset_and_update_dataset(new_data):