Spaces:

alakxender
/

asr-dhivehi-demo

Running on Zero

App Files Files Community

alakxender commited on May 16

Commit

d2db553

1 Parent(s): 8e370dd

f

Browse files

Files changed (2) hide show

.gradio/cached_examples/16/log.csv +1 -1
app.py +29 -11

.gradio/cached_examples/16/log.csv CHANGED Viewed

	@@ -1,2 +1,2 @@
1	component 0,timestamp
2	- Ka ކައެއް ބުނެދޭ އަޑު އަހަމާހޭ ނޭފަތް ބުނެޔޭ ވަސް ބަލަމާހޭ ތުންފަތް ބުނެޔޭ އެހިނި ތުންވެލަމުން އަތްތައް ގުޅުވާ އެކު އުޅެމާހޭ ލޯ ބުންޏޭ ބަލަމުން ބަލަމުންދާ ~~The~~ ~~one~~ ~~who~~ is in ~~love~~ ~~with~~ the ~~are~~ the ~~birds~~. They ~~are~~ the ~~birds~~.,2025-05-16 18:20:28.~~737297~~


1	component 0,timestamp
2	+ Ka ކައެއް ބުނެދޭ އަޑު އަހަމާހޭ ނޭފަތް ބުނެޔޭ ވަސް ބަލަމާހޭ ތުންފަތް ބުނެޔޭ އެހިނި ތުންވެލަމުން އަތްތައް ގުޅުވާ އެކު އުޅެމާހޭ ލޯ ބުންޏޭ ބަލަމުން ބަލަމުންދާ It's a good thing that you're here. You're a good girl. You're a good girl. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place. They're going to the wrong place.,2025-05-16 18:41:56.105015

app.py CHANGED Viewed

@@ -43,16 +43,34 @@ pipe = pipeline(
 # Define the generation arguments
-generate_kwargs = {
-    "max_new_tokens": model.config.max_target_positions-4,
-    "num_beams": 4,
-    "condition_on_prev_tokens": False,
-    "compression_ratio_threshold": 1.35,
-    #"temperature": (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
-    #"logprob_threshold": -1.0,
-    #"no_speech_threshold": 0.6,
-    #"return_timestamps"=True
-}
 # IMPORTANT: Fix for forced_decoder_ids error
 # Remove forced_decoder_ids from the model's generation config
@@ -74,7 +92,7 @@ def transcribe(audio_input):
         # Use the defined generate_kwargs dictionary
         result = pipe(
             audio_input,
-            generate_kwargs=generate_kwargs
         )
         return result["text"]
     except Exception as e:

 # Define the generation arguments
+# Define optimized generation arguments
+def get_generate_kwargs(is_short_audio=False):
+    """
+    Get appropriate generation parameters based on audio length.
+    Short audio transcription benefits from different parameters.
+    """
+    common_kwargs = {
+        "max_new_tokens": model.config.max_target_positions-4,
+        "num_beams": 4,
+        "condition_on_prev_tokens": False,
+    }
+    if is_short_audio:
+        # Parameters optimized for short audio:
+        return {
+            **common_kwargs,
+            "compression_ratio_threshold": 1.5,     # Balanced setting to avoid repetition
+            "no_speech_threshold": 0.4,             # Higher threshold to reduce hallucinations
+            "repetition_penalty": 1.5,              # Add penalty for repeated tokens
+            "return_timestamps": True,              # Get timestamps for better segmentation
+        }
+    else:
+        # Parameters for longer audio:
+        return {
+            **common_kwargs,
+            "compression_ratio_threshold": 1.35,    # Standard compression ratio for longer audio
+            "repetition_penalty": 1.2,              # Light penalty for repeated tokens
+        }
 # IMPORTANT: Fix for forced_decoder_ids error
 # Remove forced_decoder_ids from the model's generation config
         # Use the defined generate_kwargs dictionary
         result = pipe(
             audio_input,
+            generate_kwargs=get_generate_kwargs()
         )
         return result["text"]
     except Exception as e: