Spaces:

UcsTurkey
/

flare

Running

App Files Files Community

ciyidogan commited on 17 days ago

Commit

5789d1c

verified ·

1 Parent(s): d846f5e

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +51 -51

stt/stt_google.py CHANGED Viewed

@@ -127,56 +127,56 @@ class GoogleSTT(STTInterface):
             log_error(f"❌ Error analyzing audio: {e}")
     def _trim_silence(self, audio_data: bytes) -> bytes:
-            """Trim silence from beginning and end of audio"""
-            try:
-                if len(audio_data) < 100:
-                    return audio_data
-                # Convert to samples
-                samples = list(struct.unpack(f'{len(audio_data)//2}h', audio_data))
-                # Silence threshold - daha düşük bir threshold kullan
-                silence_threshold = 200  # Daha düşük threshold
-                # Find first non-silent sample
-                start_idx = 0
-                for i, sample in enumerate(samples):
-                    if abs(sample) > silence_threshold:
-                        start_idx = i
-                        break
-                # Find last non-silent sample
-                end_idx = len(samples) - 1
-                for i in range(len(samples) - 1, -1, -1):
-                    if abs(samples[i]) > silence_threshold:
-                        end_idx = i
-                        break
-                # Ensure we have some audio
-                if start_idx >= end_idx:
-                    log_warning("⚠️ No audio content above silence threshold")
-                    return audio_data
-                # Add small padding (250ms = 4000 samples at 16kHz)
-                padding = 2000  # 125ms padding
-                start_idx = max(0, start_idx - padding)
-                end_idx = min(len(samples) - 1, end_idx + padding)
-                # Extract trimmed audio
-                trimmed_samples = samples[start_idx:end_idx + 1]
-                log_info(f"🔧 Silence trimming: {len(samples)} → {len(trimmed_samples)} samples")
-                log_info(f"🔧 Trimmed duration: {len(trimmed_samples)/16000:.2f}s")
-                # Convert back to bytes
-                trimmed_audio = struct.pack(f'{len(trimmed_samples)}h', *trimmed_samples)
-                return trimmed_audio
-            except Exception as e:
-                log_error(f"❌ Silence trimming failed: {e}")
                 return audio_data
     async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
         """Transcribe audio data using Google Cloud Speech API"""
         try:
@@ -194,8 +194,8 @@ class GoogleSTT(STTInterface):
             trimmed_audio = self._trim_silence(audio_data)
             # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
-            wav_audio = self._convert_to_wav_proper(audio_data, config.sample_rate)
-            log_info(f"🔧 WAV conversion: {len(audio_data)} PCM → {len(wav_audio)} WAV")
             import tempfile
             import os
@@ -203,7 +203,7 @@ class GoogleSTT(STTInterface):
             # Raw PCM kaydet
             pcm_file = tempfile.mktemp(suffix='.pcm')
             with open(pcm_file, 'wb') as f:
-                f.write(audio_data)
             log_info(f"🔍 Raw PCM saved to: {pcm_file}")
             # WAV kaydet

             log_error(f"❌ Error analyzing audio: {e}")
     def _trim_silence(self, audio_data: bytes) -> bytes:
+        """Trim silence from beginning and end of audio"""
+        try:
+            if len(audio_data) < 100:
+                return audio_data
+            # Convert to samples
+            samples = list(struct.unpack(f'{len(audio_data)//2}h', audio_data))
+            # Silence threshold - daha düşük bir threshold kullan
+            silence_threshold = 200  # Daha düşük threshold
+            # Find first non-silent sample
+            start_idx = 0
+            for i, sample in enumerate(samples):
+                if abs(sample) > silence_threshold:
+                    start_idx = i
+                    break
+            # Find last non-silent sample
+            end_idx = len(samples) - 1
+            for i in range(len(samples) - 1, -1, -1):
+                if abs(samples[i]) > silence_threshold:
+                    end_idx = i
+                    break
+            # Ensure we have some audio
+            if start_idx >= end_idx:
+                log_warning("⚠️ No audio content above silence threshold")
                 return audio_data
+            # Add small padding (250ms = 4000 samples at 16kHz)
+            padding = 2000  # 125ms padding
+            start_idx = max(0, start_idx - padding)
+            end_idx = min(len(samples) - 1, end_idx + padding)
+            # Extract trimmed audio
+            trimmed_samples = samples[start_idx:end_idx + 1]
+            log_info(f"🔧 Silence trimming: {len(samples)} → {len(trimmed_samples)} samples")
+            log_info(f"🔧 Trimmed duration: {len(trimmed_samples)/16000:.2f}s")
+            # Convert back to bytes
+            trimmed_audio = struct.pack(f'{len(trimmed_samples)}h', *trimmed_samples)
+            return trimmed_audio
+        except Exception as e:
+            log_error(f"❌ Silence trimming failed: {e}")
+            return audio_data
     async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
         """Transcribe audio data using Google Cloud Speech API"""
         try:
             trimmed_audio = self._trim_silence(audio_data)
             # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
+            wav_audio = self._convert_to_wav_proper(trimmed_audio, config.sample_rate)
+            log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
             import tempfile
             import os
             # Raw PCM kaydet
             pcm_file = tempfile.mktemp(suffix='.pcm')
             with open(pcm_file, 'wb') as f:
+                f.write(trimmed_audio)
             log_info(f"🔍 Raw PCM saved to: {pcm_file}")
             # WAV kaydet