flare / stt /test_single_wav.py
ciyidogan's picture
Create test_single_wav.py
03ef174 verified
#!/usr/bin/env python3
"""
Single WAV file tester
Usage: python test_single_wav.py <wav_file>
"""
import sys
import wave
from google.cloud import speech
def test_wav_file(wav_file_path):
try:
print(f"Testing WAV file: {wav_file_path}")
# Read WAV file
with wave.open(wav_file_path, 'rb') as wav_file:
n_channels = wav_file.getnchannels()
sample_width = wav_file.getsampwidth()
sample_rate = wav_file.getframerate()
n_frames = wav_file.getnframes()
wav_audio = wav_file.readframes(n_frames)
print(f"WAV Info: {n_channels}ch, {sample_width*8}bit, {sample_rate}Hz, {n_frames} frames, {n_frames/sample_rate:.2f}s")
# Google STT client
credentials_path = "./credentials/google-service-account.json"
client = speech.SpeechClient.from_service_account_file(credentials_path)
# Config
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=sample_rate,
language_code="tr-TR",
audio_channel_count=n_channels,
enable_separate_recognition_per_channel=False,
)
# Audio
audio = speech.RecognitionAudio(content=wav_audio)
# Recognize
print("Sending to Google API...")
response = client.recognize(config=config, audio=audio)
print(f"Response: {response}")
print(f"Results count: {len(response.results)}")
if response.results:
for i, result in enumerate(response.results):
print(f"Result {i}: {result}")
if result.alternatives:
print(f" Transcript: '{result.alternatives[0].transcript}'")
print(f" Confidence: {result.alternatives[0].confidence}")
return True
else:
print("No results returned")
return False
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python test_single_wav.py <wav_file>")
sys.exit(1)
wav_file = sys.argv[1]
success = test_wav_file(wav_file)
sys.exit(0 if success else 1)