alakxender commited on
Commit
d2db553
·
1 Parent(s): 8e370dd
Files changed (2) hide show
  1. .gradio/cached_examples/16/log.csv +1 -1
  2. app.py +29 -11
.gradio/cached_examples/16/log.csv CHANGED
@@ -1,2 +1,2 @@
1
  component 0,timestamp
2
- Ka ކައެއް ބުނެދޭ އަޑު އަހަމާހޭ ނޭފަތް ބުނެޔޭ ވަސް ބަލަމާހޭ ތުންފަތް ބުނެޔޭ އެހިނި ތުންވެލަމުން އަތްތައް ގުޅުވާ އެކު އުޅެމާހޭ ލޯ ބުންޏޭ ބަލަމުން ބަލަމުންދާ The one who is in love with the are the birds. They are the birds.,2025-05-16 18:20:28.737297
 
1
  component 0,timestamp
2
+ Ka ކައެއް ބުނެދޭ އަޑު އަހަމާހޭ ނޭފަތް ބުނެޔޭ ވަސް ބަލަމާހޭ ތުންފަތް ބުނެޔޭ އެހިނި ތުންވެލަމުން އަތްތައް ގުޅުވާ އެކު އުޅެމާހޭ ލޯ ބުންޏޭ ބަލަމުން ބަލަމުންދާ It's a good thing that you're here. You're a good girl. You're a good girl. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place.,2025-05-16 18:41:56.105015
app.py CHANGED
@@ -43,16 +43,34 @@ pipe = pipeline(
43
 
44
  # Define the generation arguments
45
 
46
- generate_kwargs = {
47
- "max_new_tokens": model.config.max_target_positions-4,
48
- "num_beams": 4,
49
- "condition_on_prev_tokens": False,
50
- "compression_ratio_threshold": 1.35,
51
- #"temperature": (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
52
- #"logprob_threshold": -1.0,
53
- #"no_speech_threshold": 0.6,
54
- #"return_timestamps"=True
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # IMPORTANT: Fix for forced_decoder_ids error
58
  # Remove forced_decoder_ids from the model's generation config
@@ -74,7 +92,7 @@ def transcribe(audio_input):
74
  # Use the defined generate_kwargs dictionary
75
  result = pipe(
76
  audio_input,
77
- generate_kwargs=generate_kwargs
78
  )
79
  return result["text"]
80
  except Exception as e:
 
43
 
44
  # Define the generation arguments
45
 
46
+ # Define optimized generation arguments
47
+ def get_generate_kwargs(is_short_audio=False):
48
+ """
49
+ Get appropriate generation parameters based on audio length.
50
+ Short audio transcription benefits from different parameters.
51
+ """
52
+ common_kwargs = {
53
+ "max_new_tokens": model.config.max_target_positions-4,
54
+ "num_beams": 4,
55
+ "condition_on_prev_tokens": False,
56
+ }
57
+
58
+ if is_short_audio:
59
+ # Parameters optimized for short audio:
60
+ return {
61
+ **common_kwargs,
62
+ "compression_ratio_threshold": 1.5, # Balanced setting to avoid repetition
63
+ "no_speech_threshold": 0.4, # Higher threshold to reduce hallucinations
64
+ "repetition_penalty": 1.5, # Add penalty for repeated tokens
65
+ "return_timestamps": True, # Get timestamps for better segmentation
66
+ }
67
+ else:
68
+ # Parameters for longer audio:
69
+ return {
70
+ **common_kwargs,
71
+ "compression_ratio_threshold": 1.35, # Standard compression ratio for longer audio
72
+ "repetition_penalty": 1.2, # Light penalty for repeated tokens
73
+ }
74
 
75
  # IMPORTANT: Fix for forced_decoder_ids error
76
  # Remove forced_decoder_ids from the model's generation config
 
92
  # Use the defined generate_kwargs dictionary
93
  result = pipe(
94
  audio_input,
95
+ generate_kwargs=get_generate_kwargs()
96
  )
97
  return result["text"]
98
  except Exception as e: