ciyidogan commited on
Commit
ae45ffa
·
verified ·
1 Parent(s): 9c58077

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +146 -52
stt/stt_google.py CHANGED
@@ -240,58 +240,152 @@ class GoogleSTT(STTInterface):
240
  return None
241
 
242
  def _convert_to_wav_proper(self, audio_data: bytes, sample_rate: int) -> bytes:
243
- """Convert raw PCM to proper WAV format - EXACTLY like test code"""
244
- try:
245
- # ✅ Test kodundan aynı WAV header oluşturma
246
- length = len(audio_data)
247
- buffer_size = 44 + length # WAV header + data
248
-
249
- # BytesIO kullanarak memory'de WAV oluştur
250
- wav_buffer = io.BytesIO()
251
-
252
- # Test kodundan aynı header yazma
253
- def write_string(data: str):
254
- wav_buffer.write(data.encode('ascii'))
255
-
256
- def write_uint32(value: int):
257
- wav_buffer.write(struct.pack('<I', value))
258
-
259
- def write_uint16(value: int):
260
- wav_buffer.write(struct.pack('<H', value))
261
-
262
- # RIFF header
263
- write_string('RIFF')
264
- write_uint32(36 + length) # File size - 8
265
- write_string('WAVE')
266
-
267
- # fmt chunk
268
- write_string('fmt ')
269
- write_uint32(16) # Subchunk1Size (PCM)
270
- write_uint16(1) # AudioFormat (PCM = 1)
271
- write_uint16(1) # NumChannels (mono)
272
- write_uint32(sample_rate) # SampleRate
273
- write_uint32(sample_rate * 1 * 2) # ByteRate
274
- write_uint16(1 * 2) # BlockAlign
275
- write_uint16(16) # BitsPerSample
276
-
277
- # data chunk
278
- write_string('data')
279
- write_uint32(length) # Subchunk2Size
280
-
281
- # Audio data
282
- wav_buffer.write(audio_data)
283
-
284
- wav_data = wav_buffer.getvalue()
285
- wav_buffer.close()
286
-
287
- log_info(f"🔧 WAV specs: 1ch, {sample_rate}Hz, 16bit")
288
-
289
- return wav_data
290
-
291
- except Exception as e:
292
- log_error(f"❌ WAV conversion failed: {e}")
293
- # Fallback to raw PCM
294
- return audio_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  def get_supported_languages(self) -> List[str]:
297
  """Get list of supported language codes"""
 
240
  return None
241
 
242
  def _convert_to_wav_proper(self, audio_data: bytes, sample_rate: int) -> bytes:
243
+ """Convert raw PCM to proper WAV format - EXACTLY like test code"""
244
+ try:
245
+ # ✅ Test kodundan aynı WAV header oluşturma
246
+ length = len(audio_data)
247
+
248
+ # ✅ Debug: İlk birkaç byte'ı kontrol et
249
+ if length >= 20:
250
+ first_samples = struct.unpack('<10h', audio_data[:20])
251
+ log_info(f"🔍 First 10 PCM samples: {first_samples}")
252
+ log_info(f"🔍 Max amplitude in first 10: {max(abs(s) for s in first_samples)}")
253
+
254
+ # ✅ BytesIO kullanarak memory'de WAV oluştur
255
+ wav_buffer = io.BytesIO()
256
+
257
+ # ✅ Test kodundan aynı header yazma
258
+ def write_string(data: str):
259
+ wav_buffer.write(data.encode('ascii'))
260
+
261
+ def write_uint32(value: int):
262
+ wav_buffer.write(struct.pack('<I', value))
263
+
264
+ def write_uint16(value: int):
265
+ wav_buffer.write(struct.pack('<H', value))
266
+
267
+ # RIFF header
268
+ write_string('RIFF')
269
+ write_uint32(36 + length) # File size - 8
270
+ write_string('WAVE')
271
+
272
+ # fmt chunk
273
+ write_string('fmt ')
274
+ write_uint32(16) # Subchunk1Size (PCM)
275
+ write_uint16(1) # AudioFormat (PCM = 1)
276
+ write_uint16(1) # NumChannels (mono)
277
+ write_uint32(sample_rate) # SampleRate
278
+ write_uint32(sample_rate * 1 * 2) # ByteRate
279
+ write_uint16(1 * 2) # BlockAlign
280
+ write_uint16(16) # BitsPerSample
281
+
282
+ # data chunk
283
+ write_string('data')
284
+ write_uint32(length) # Subchunk2Size
285
+
286
+ # Audio data
287
+ wav_buffer.write(audio_data)
288
+
289
+ wav_data = wav_buffer.getvalue()
290
+ wav_buffer.close()
291
+
292
+ # ✅ Debug: WAV header'ını kontrol et
293
+ if len(wav_data) >= 44:
294
+ header_bytes = wav_data[:44]
295
+ log_info(f"🔍 WAV header (first 44 bytes): {header_bytes.hex()}")
296
+
297
+ # Header parse et
298
+ riff = header_bytes[0:4].decode('ascii')
299
+ file_size = struct.unpack('<I', header_bytes[4:8])[0]
300
+ wave = header_bytes[8:12].decode('ascii')
301
+ fmt_chunk = header_bytes[12:16].decode('ascii')
302
+ fmt_size = struct.unpack('<I', header_bytes[16:20])[0]
303
+ audio_format = struct.unpack('<H', header_bytes[20:22])[0]
304
+ channels = struct.unpack('<H', header_bytes[22:24])[0]
305
+ sample_rate_check = struct.unpack('<I', header_bytes[24:28])[0]
306
+ byte_rate = struct.unpack('<I', header_bytes[28:32])[0]
307
+ block_align = struct.unpack('<H', header_bytes[32:34])[0]
308
+ bits_per_sample = struct.unpack('<H', header_bytes[34:36])[0]
309
+ data_chunk = header_bytes[36:40].decode('ascii')
310
+ data_size = struct.unpack('<I', header_bytes[40:44])[0]
311
+
312
+ log_info(f"🔍 WAV Header Analysis:")
313
+ log_info(f" RIFF: {riff}")
314
+ log_info(f" File Size: {file_size}")
315
+ log_info(f" WAVE: {wave}")
316
+ log_info(f" FMT Chunk: {fmt_chunk}")
317
+ log_info(f" Audio Format: {audio_format} (should be 1)")
318
+ log_info(f" Channels: {channels} (should be 1)")
319
+ log_info(f" Sample Rate: {sample_rate_check} (should be {sample_rate})")
320
+ log_info(f" Byte Rate: {byte_rate}")
321
+ log_info(f" Block Align: {block_align}")
322
+ log_info(f" Bits Per Sample: {bits_per_sample}")
323
+ log_info(f" Data Chunk: {data_chunk}")
324
+ log_info(f" Data Size: {data_size} (should be {length})")
325
+
326
+ # ✅ Validation
327
+ if riff != 'RIFF':
328
+ log_error(f"❌ Invalid RIFF header: {riff}")
329
+ if wave != 'WAVE':
330
+ log_error(f"❌ Invalid WAVE header: {wave}")
331
+ if audio_format != 1:
332
+ log_error(f"❌ Invalid audio format: {audio_format}")
333
+ if channels != 1:
334
+ log_error(f"❌ Invalid channel count: {channels}")
335
+ if sample_rate_check != sample_rate:
336
+ log_error(f"❌ Invalid sample rate: {sample_rate_check}")
337
+ if data_size != length:
338
+ log_error(f"❌ Invalid data size: {data_size} vs {length}")
339
+
340
+ # ✅ Debug: WAV dosyasını geçici olarak kaydet (test için)
341
+ import tempfile
342
+ import os
343
+
344
+ temp_file = tempfile.mktemp(suffix='.wav')
345
+ try:
346
+ with open(temp_file, 'wb') as f:
347
+ f.write(wav_data)
348
+
349
+ # WAV dosyasının gerçekten valid olduğunu kontrol et
350
+ import wave
351
+ with wave.open(temp_file, 'rb') as wav_file:
352
+ wav_channels = wav_file.getnchannels()
353
+ wav_sample_width = wav_file.getsampwidth()
354
+ wav_sample_rate = wav_file.getframerate()
355
+ wav_frames = wav_file.getnframes()
356
+
357
+ log_info(f"🔍 WAV File Validation:")
358
+ log_info(f" Channels: {wav_channels}")
359
+ log_info(f" Sample Width: {wav_sample_width}")
360
+ log_info(f" Sample Rate: {wav_sample_rate}")
361
+ log_info(f" Frames: {wav_frames}")
362
+ log_info(f" Duration: {wav_frames / wav_sample_rate:.2f}s")
363
+
364
+ # İlk birkaç frame'i oku
365
+ first_frames = wav_file.readframes(10)
366
+ if first_frames:
367
+ first_samples_wav = struct.unpack('<10h', first_frames[:20])
368
+ log_info(f"🔍 First 10 samples from WAV: {first_samples_wav}")
369
+
370
+ log_info(f"✅ WAV file created and validated: {temp_file}")
371
+
372
+ except Exception as e:
373
+ log_error(f"❌ WAV validation failed: {e}")
374
+ finally:
375
+ # Cleanup
376
+ if os.path.exists(temp_file):
377
+ os.unlink(temp_file)
378
+
379
+ log_info(f"🔧 WAV specs: 1ch, {sample_rate}Hz, 16bit")
380
+
381
+ return wav_data
382
+
383
+ except Exception as e:
384
+ log_error(f"❌ WAV conversion failed: {e}")
385
+ import traceback
386
+ log_error(f"Traceback: {traceback.format_exc()}")
387
+ # Fallback to raw PCM
388
+ return audio_data
389
 
390
  def get_supported_languages(self) -> List[str]:
391
  """Get list of supported language codes"""