ciyidogan commited on
Commit
5789d1c
·
verified ·
1 Parent(s): d846f5e

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +51 -51
stt/stt_google.py CHANGED
@@ -127,56 +127,56 @@ class GoogleSTT(STTInterface):
127
  log_error(f"❌ Error analyzing audio: {e}")
128
 
129
  def _trim_silence(self, audio_data: bytes) -> bytes:
130
- """Trim silence from beginning and end of audio"""
131
- try:
132
- if len(audio_data) < 100:
133
- return audio_data
134
-
135
- # Convert to samples
136
- samples = list(struct.unpack(f'{len(audio_data)//2}h', audio_data))
137
-
138
- # Silence threshold - daha düşük bir threshold kullan
139
- silence_threshold = 200 # Daha düşük threshold
140
-
141
- # Find first non-silent sample
142
- start_idx = 0
143
- for i, sample in enumerate(samples):
144
- if abs(sample) > silence_threshold:
145
- start_idx = i
146
- break
147
-
148
- # Find last non-silent sample
149
- end_idx = len(samples) - 1
150
- for i in range(len(samples) - 1, -1, -1):
151
- if abs(samples[i]) > silence_threshold:
152
- end_idx = i
153
- break
154
-
155
- # Ensure we have some audio
156
- if start_idx >= end_idx:
157
- log_warning("⚠️ No audio content above silence threshold")
158
- return audio_data
159
-
160
- # Add small padding (250ms = 4000 samples at 16kHz)
161
- padding = 2000 # 125ms padding
162
- start_idx = max(0, start_idx - padding)
163
- end_idx = min(len(samples) - 1, end_idx + padding)
164
-
165
- # Extract trimmed audio
166
- trimmed_samples = samples[start_idx:end_idx + 1]
167
-
168
- log_info(f"🔧 Silence trimming: {len(samples)} → {len(trimmed_samples)} samples")
169
- log_info(f"🔧 Trimmed duration: {len(trimmed_samples)/16000:.2f}s")
170
-
171
- # Convert back to bytes
172
- trimmed_audio = struct.pack(f'{len(trimmed_samples)}h', *trimmed_samples)
173
-
174
- return trimmed_audio
175
-
176
- except Exception as e:
177
- log_error(f"❌ Silence trimming failed: {e}")
178
  return audio_data
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
181
  """Transcribe audio data using Google Cloud Speech API"""
182
  try:
@@ -194,8 +194,8 @@ class GoogleSTT(STTInterface):
194
  trimmed_audio = self._trim_silence(audio_data)
195
 
196
  # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
197
- wav_audio = self._convert_to_wav_proper(audio_data, config.sample_rate)
198
- log_info(f"🔧 WAV conversion: {len(audio_data)} PCM → {len(wav_audio)} WAV")
199
 
200
  import tempfile
201
  import os
@@ -203,7 +203,7 @@ class GoogleSTT(STTInterface):
203
  # Raw PCM kaydet
204
  pcm_file = tempfile.mktemp(suffix='.pcm')
205
  with open(pcm_file, 'wb') as f:
206
- f.write(audio_data)
207
  log_info(f"🔍 Raw PCM saved to: {pcm_file}")
208
 
209
  # WAV kaydet
 
127
  log_error(f"❌ Error analyzing audio: {e}")
128
 
129
  def _trim_silence(self, audio_data: bytes) -> bytes:
130
+ """Trim silence from beginning and end of audio"""
131
+ try:
132
+ if len(audio_data) < 100:
133
+ return audio_data
134
+
135
+ # Convert to samples
136
+ samples = list(struct.unpack(f'{len(audio_data)//2}h', audio_data))
137
+
138
+ # Silence threshold - daha düşük bir threshold kullan
139
+ silence_threshold = 200 # Daha düşük threshold
140
+
141
+ # Find first non-silent sample
142
+ start_idx = 0
143
+ for i, sample in enumerate(samples):
144
+ if abs(sample) > silence_threshold:
145
+ start_idx = i
146
+ break
147
+
148
+ # Find last non-silent sample
149
+ end_idx = len(samples) - 1
150
+ for i in range(len(samples) - 1, -1, -1):
151
+ if abs(samples[i]) > silence_threshold:
152
+ end_idx = i
153
+ break
154
+
155
+ # Ensure we have some audio
156
+ if start_idx >= end_idx:
157
+ log_warning("⚠️ No audio content above silence threshold")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  return audio_data
159
 
160
+ # Add small padding (250ms = 4000 samples at 16kHz)
161
+ padding = 2000 # 125ms padding
162
+ start_idx = max(0, start_idx - padding)
163
+ end_idx = min(len(samples) - 1, end_idx + padding)
164
+
165
+ # Extract trimmed audio
166
+ trimmed_samples = samples[start_idx:end_idx + 1]
167
+
168
+ log_info(f"🔧 Silence trimming: {len(samples)} → {len(trimmed_samples)} samples")
169
+ log_info(f"🔧 Trimmed duration: {len(trimmed_samples)/16000:.2f}s")
170
+
171
+ # Convert back to bytes
172
+ trimmed_audio = struct.pack(f'{len(trimmed_samples)}h', *trimmed_samples)
173
+
174
+ return trimmed_audio
175
+
176
+ except Exception as e:
177
+ log_error(f"❌ Silence trimming failed: {e}")
178
+ return audio_data
179
+
180
  async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
181
  """Transcribe audio data using Google Cloud Speech API"""
182
  try:
 
194
  trimmed_audio = self._trim_silence(audio_data)
195
 
196
  # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
197
+ wav_audio = self._convert_to_wav_proper(trimmed_audio, config.sample_rate)
198
+ log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
199
 
200
  import tempfile
201
  import os
 
203
  # Raw PCM kaydet
204
  pcm_file = tempfile.mktemp(suffix='.pcm')
205
  with open(pcm_file, 'wb') as f:
206
+ f.write(trimmed_audio)
207
  log_info(f"🔍 Raw PCM saved to: {pcm_file}")
208
 
209
  # WAV kaydet