Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 8 days ago

Commit

98d7635

verified ·

1 Parent(s): dcc497e

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +45 -15

stt/stt_google.py CHANGED Viewed

@@ -140,7 +140,9 @@ class GoogleSTT(STTInterface):
                 log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
                 return None
-            # ✅ Configure recognition - RAW PCM için
             recognition_config = RecognitionConfig(
                 encoding=RecognitionConfig.AudioEncoding.LINEAR16,
                 sample_rate_hertz=16000,
@@ -150,6 +152,9 @@ class GoogleSTT(STTInterface):
                 enable_automatic_punctuation=True,
             )
             # ✅ RAW audio gönder, WAV conversion yapmadan
             audio = RecognitionAudio(content=audio_data)  # Direkt raw PCM
@@ -201,20 +206,45 @@ class GoogleSTT(STTInterface):
             return None
     def _convert_to_wav(self, audio_data: bytes, sample_rate: int) -> bytes:
-        """Convert raw PCM audio to WAV format"""
-        # Create WAV file in memory
-        wav_buffer = io.BytesIO()
-        with wave.open(wav_buffer, 'wb') as wav_file:
-            # Set WAV parameters
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 16-bit
-            wav_file.setframerate(sample_rate)
-            wav_file.writeframes(audio_data)
-        # Get WAV data
-        wav_buffer.seek(0)
-        return wav_buffer.read()
     def get_supported_languages(self) -> List[str]:
         """Get list of supported language codes"""

                 log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
                 return None
+            wav_audio = self._convert_to_wav(audio_data, 16000)
+            # Configure recognition
             recognition_config = RecognitionConfig(
                 encoding=RecognitionConfig.AudioEncoding.LINEAR16,
                 sample_rate_hertz=16000,
                 enable_automatic_punctuation=True,
             )
+            # ✅ WAV audio gönder
+            audio = RecognitionAudio(content=wav_audio)
             # ✅ RAW audio gönder, WAV conversion yapmadan
             audio = RecognitionAudio(content=audio_data)  # Direkt raw PCM
             return None
     def _convert_to_wav(self, audio_data: bytes, sample_rate: int) -> bytes:
+        """Convert raw PCM to proper WAV format"""
+        try:
+            import struct
+            # WAV file parameters
+            channels = 1
+            sample_width = 2  # 16-bit
+            frame_rate = sample_rate
+            audio_length = len(audio_data)
+            # Create proper WAV header
+            wav_header = struct.pack('<4sI4s4sIHHIIHH4sI',
+                b'RIFF',                                    # ChunkID
+                36 + audio_length,                          # ChunkSize
+                b'WAVE',                                    # Format
+                b'fmt ',                                    # Subchunk1ID
+                16,                                         # Subchunk1Size (PCM)
+                1,                                          # AudioFormat (PCM = 1)
+                channels,                                   # NumChannels
+                frame_rate,                                 # SampleRate
+                frame_rate * channels * sample_width,       # ByteRate
+                channels * sample_width,                    # BlockAlign
+                sample_width * 8,                           # BitsPerSample
+                b'data',                                    # Subchunk2ID
+                audio_length                                # Subchunk2Size
+            )
+            # Combine header and audio data
+            wav_data = wav_header + audio_data
+            log_info(f"🔧 WAV conversion: {len(audio_data)} PCM → {len(wav_data)} WAV")
+            log_info(f"🔧 WAV specs: {channels}ch, {frame_rate}Hz, {sample_width*8}bit")
+            return wav_data
+        except Exception as e:
+            log_error(f"WAV conversion failed: {e}")
+            # Fallback to raw PCM
+            return audio_data
     def get_supported_languages(self) -> List[str]:
         """Get list of supported language codes"""