Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 6 days ago

Commit

ae45ffa

verified ·

1 Parent(s): 9c58077

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +146 -52

stt/stt_google.py CHANGED Viewed

@@ -240,58 +240,152 @@ class GoogleSTT(STTInterface):
             return None
     def _convert_to_wav_proper(self, audio_data: bytes, sample_rate: int) -> bytes:
-        """Convert raw PCM to proper WAV format - EXACTLY like test code"""
-        try:
-            # ✅ Test kodundan aynı WAV header oluşturma
-            length = len(audio_data)
-            buffer_size = 44 + length  # WAV header + data
-            # ✅ BytesIO kullanarak memory'de WAV oluştur
-            wav_buffer = io.BytesIO()
-            # ✅ Test kodundan aynı header yazma
-            def write_string(data: str):
-                wav_buffer.write(data.encode('ascii'))
-            def write_uint32(value: int):
-                wav_buffer.write(struct.pack('<I', value))
-            def write_uint16(value: int):
-                wav_buffer.write(struct.pack('<H', value))
-            # RIFF header
-            write_string('RIFF')
-            write_uint32(36 + length)  # File size - 8
-            write_string('WAVE')
-            # fmt chunk
-            write_string('fmt ')
-            write_uint32(16)  # Subchunk1Size (PCM)
-            write_uint16(1)   # AudioFormat (PCM = 1)
-            write_uint16(1)   # NumChannels (mono)
-            write_uint32(sample_rate)  # SampleRate
-            write_uint32(sample_rate * 1 * 2)  # ByteRate
-            write_uint16(1 * 2)  # BlockAlign
-            write_uint16(16)  # BitsPerSample
-            # data chunk
-            write_string('data')
-            write_uint32(length)  # Subchunk2Size
-            # Audio data
-            wav_buffer.write(audio_data)
-            wav_data = wav_buffer.getvalue()
-            wav_buffer.close()
-            log_info(f"🔧 WAV specs: 1ch, {sample_rate}Hz, 16bit")
-            return wav_data
-        except Exception as e:
-            log_error(f"❌ WAV conversion failed: {e}")
-            # Fallback to raw PCM
-            return audio_data
     def get_supported_languages(self) -> List[str]:
         """Get list of supported language codes"""

             return None
     def _convert_to_wav_proper(self, audio_data: bytes, sample_rate: int) -> bytes:
+            """Convert raw PCM to proper WAV format - EXACTLY like test code"""
+            try:
+                # ✅ Test kodundan aynı WAV header oluşturma
+                length = len(audio_data)
+                # ✅ Debug: İlk birkaç byte'ı kontrol et
+                if length >= 20:
+                    first_samples = struct.unpack('<10h', audio_data[:20])
+                    log_info(f"🔍 First 10 PCM samples: {first_samples}")
+                    log_info(f"🔍 Max amplitude in first 10: {max(abs(s) for s in first_samples)}")
+                # ✅ BytesIO kullanarak memory'de WAV oluştur
+                wav_buffer = io.BytesIO()
+                # ✅ Test kodundan aynı header yazma
+                def write_string(data: str):
+                    wav_buffer.write(data.encode('ascii'))
+                def write_uint32(value: int):
+                    wav_buffer.write(struct.pack('<I', value))
+                def write_uint16(value: int):
+                    wav_buffer.write(struct.pack('<H', value))
+                # RIFF header
+                write_string('RIFF')
+                write_uint32(36 + length)  # File size - 8
+                write_string('WAVE')
+                # fmt chunk
+                write_string('fmt ')
+                write_uint32(16)  # Subchunk1Size (PCM)
+                write_uint16(1)   # AudioFormat (PCM = 1)
+                write_uint16(1)   # NumChannels (mono)
+                write_uint32(sample_rate)  # SampleRate
+                write_uint32(sample_rate * 1 * 2)  # ByteRate
+                write_uint16(1 * 2)  # BlockAlign
+                write_uint16(16)  # BitsPerSample
+                # data chunk
+                write_string('data')
+                write_uint32(length)  # Subchunk2Size
+                # Audio data
+                wav_buffer.write(audio_data)
+                wav_data = wav_buffer.getvalue()
+                wav_buffer.close()
+                # ✅ Debug: WAV header'ını kontrol et
+                if len(wav_data) >= 44:
+                    header_bytes = wav_data[:44]
+                    log_info(f"🔍 WAV header (first 44 bytes): {header_bytes.hex()}")
+                    # Header parse et
+                    riff = header_bytes[0:4].decode('ascii')
+                    file_size = struct.unpack('<I', header_bytes[4:8])[0]
+                    wave = header_bytes[8:12].decode('ascii')
+                    fmt_chunk = header_bytes[12:16].decode('ascii')
+                    fmt_size = struct.unpack('<I', header_bytes[16:20])[0]
+                    audio_format = struct.unpack('<H', header_bytes[20:22])[0]
+                    channels = struct.unpack('<H', header_bytes[22:24])[0]
+                    sample_rate_check = struct.unpack('<I', header_bytes[24:28])[0]
+                    byte_rate = struct.unpack('<I', header_bytes[28:32])[0]
+                    block_align = struct.unpack('<H', header_bytes[32:34])[0]
+                    bits_per_sample = struct.unpack('<H', header_bytes[34:36])[0]
+                    data_chunk = header_bytes[36:40].decode('ascii')
+                    data_size = struct.unpack('<I', header_bytes[40:44])[0]
+                    log_info(f"🔍 WAV Header Analysis:")
+                    log_info(f"  RIFF: {riff}")
+                    log_info(f"  File Size: {file_size}")
+                    log_info(f"  WAVE: {wave}")
+                    log_info(f"  FMT Chunk: {fmt_chunk}")
+                    log_info(f"  Audio Format: {audio_format} (should be 1)")
+                    log_info(f"  Channels: {channels} (should be 1)")
+                    log_info(f"  Sample Rate: {sample_rate_check} (should be {sample_rate})")
+                    log_info(f"  Byte Rate: {byte_rate}")
+                    log_info(f"  Block Align: {block_align}")
+                    log_info(f"  Bits Per Sample: {bits_per_sample}")
+                    log_info(f"  Data Chunk: {data_chunk}")
+                    log_info(f"  Data Size: {data_size} (should be {length})")
+                    # ✅ Validation
+                    if riff != 'RIFF':
+                        log_error(f"❌ Invalid RIFF header: {riff}")
+                    if wave != 'WAVE':
+                        log_error(f"❌ Invalid WAVE header: {wave}")
+                    if audio_format != 1:
+                        log_error(f"❌ Invalid audio format: {audio_format}")
+                    if channels != 1:
+                        log_error(f"❌ Invalid channel count: {channels}")
+                    if sample_rate_check != sample_rate:
+                        log_error(f"❌ Invalid sample rate: {sample_rate_check}")
+                    if data_size != length:
+                        log_error(f"❌ Invalid data size: {data_size} vs {length}")
+                # ✅ Debug: WAV dosyasını geçici olarak kaydet (test için)
+                import tempfile
+                import os
+                temp_file = tempfile.mktemp(suffix='.wav')
+                try:
+                    with open(temp_file, 'wb') as f:
+                        f.write(wav_data)
+                    # WAV dosyasının gerçekten valid olduğunu kontrol et
+                    import wave
+                    with wave.open(temp_file, 'rb') as wav_file:
+                        wav_channels = wav_file.getnchannels()
+                        wav_sample_width = wav_file.getsampwidth()
+                        wav_sample_rate = wav_file.getframerate()
+                        wav_frames = wav_file.getnframes()
+                        log_info(f"🔍 WAV File Validation:")
+                        log_info(f"  Channels: {wav_channels}")
+                        log_info(f"  Sample Width: {wav_sample_width}")
+                        log_info(f"  Sample Rate: {wav_sample_rate}")
+                        log_info(f"  Frames: {wav_frames}")
+                        log_info(f"  Duration: {wav_frames / wav_sample_rate:.2f}s")
+                        # İlk birkaç frame'i oku
+                        first_frames = wav_file.readframes(10)
+                        if first_frames:
+                            first_samples_wav = struct.unpack('<10h', first_frames[:20])
+                            log_info(f"🔍 First 10 samples from WAV: {first_samples_wav}")
+                    log_info(f"✅ WAV file created and validated: {temp_file}")
+                except Exception as e:
+                    log_error(f"❌ WAV validation failed: {e}")
+                finally:
+                    # Cleanup
+                    if os.path.exists(temp_file):
+                        os.unlink(temp_file)
+                log_info(f"🔧 WAV specs: 1ch, {sample_rate}Hz, 16bit")
+                return wav_data
+            except Exception as e:
+                log_error(f"❌ WAV conversion failed: {e}")
+                import traceback
+                log_error(f"Traceback: {traceback.format_exc()}")
+                # Fallback to raw PCM
+                return audio_data
     def get_supported_languages(self) -> List[str]:
         """Get list of supported language codes"""