Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
d2db553
1
Parent(s):
8e370dd
- .gradio/cached_examples/16/log.csv +1 -1
- app.py +29 -11
.gradio/cached_examples/16/log.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
component 0,timestamp
|
2 |
-
Ka ކައެއް ބުނެދޭ އަޑު އަހަމާހޭ ނޭފަތް ބުނެޔޭ ވަސް ބަލަމާހޭ ތުންފަތް ބުނެޔޭ އެހިނި ތުންވެލަމުން އަތްތައް ގުޅުވާ އެކު އުޅެމާހޭ ލޯ ބުންޏޭ ބަލަމުން ބަލަމުންދާ
|
|
|
1 |
component 0,timestamp
|
2 |
+
Ka ކައެއް ބުނެދޭ އަޑު އަހަމާހޭ ނޭފަތް ބުނެޔޭ ވަސް ބަލަމާހޭ ތުންފަތް ބުނެޔޭ އެހިނި ތުންވެލަމުން އަތްތައް ގުޅުވާ އެކު އުޅެމާހޭ ލޯ ބުންޏޭ ބަލަމުން ބަލަމުންދާ It's a good thing that you're here. You're a good girl. You're a good girl. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place.,2025-05-16 18:41:56.105015
|
app.py
CHANGED
@@ -43,16 +43,34 @@ pipe = pipeline(
|
|
43 |
|
44 |
# Define the generation arguments
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
"
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# IMPORTANT: Fix for forced_decoder_ids error
|
58 |
# Remove forced_decoder_ids from the model's generation config
|
@@ -74,7 +92,7 @@ def transcribe(audio_input):
|
|
74 |
# Use the defined generate_kwargs dictionary
|
75 |
result = pipe(
|
76 |
audio_input,
|
77 |
-
generate_kwargs=
|
78 |
)
|
79 |
return result["text"]
|
80 |
except Exception as e:
|
|
|
43 |
|
44 |
# Define the generation arguments
|
45 |
|
46 |
+
# Define optimized generation arguments
|
47 |
+
def get_generate_kwargs(is_short_audio=False):
|
48 |
+
"""
|
49 |
+
Get appropriate generation parameters based on audio length.
|
50 |
+
Short audio transcription benefits from different parameters.
|
51 |
+
"""
|
52 |
+
common_kwargs = {
|
53 |
+
"max_new_tokens": model.config.max_target_positions-4,
|
54 |
+
"num_beams": 4,
|
55 |
+
"condition_on_prev_tokens": False,
|
56 |
+
}
|
57 |
+
|
58 |
+
if is_short_audio:
|
59 |
+
# Parameters optimized for short audio:
|
60 |
+
return {
|
61 |
+
**common_kwargs,
|
62 |
+
"compression_ratio_threshold": 1.5, # Balanced setting to avoid repetition
|
63 |
+
"no_speech_threshold": 0.4, # Higher threshold to reduce hallucinations
|
64 |
+
"repetition_penalty": 1.5, # Add penalty for repeated tokens
|
65 |
+
"return_timestamps": True, # Get timestamps for better segmentation
|
66 |
+
}
|
67 |
+
else:
|
68 |
+
# Parameters for longer audio:
|
69 |
+
return {
|
70 |
+
**common_kwargs,
|
71 |
+
"compression_ratio_threshold": 1.35, # Standard compression ratio for longer audio
|
72 |
+
"repetition_penalty": 1.2, # Light penalty for repeated tokens
|
73 |
+
}
|
74 |
|
75 |
# IMPORTANT: Fix for forced_decoder_ids error
|
76 |
# Remove forced_decoder_ids from the model's generation config
|
|
|
92 |
# Use the defined generate_kwargs dictionary
|
93 |
result = pipe(
|
94 |
audio_input,
|
95 |
+
generate_kwargs=get_generate_kwargs()
|
96 |
)
|
97 |
return result["text"]
|
98 |
except Exception as e:
|