amsa-smart-form / speech_to_text.py
cristopher adasme
first commit
1d0f027
import whisper
import speech_recognition as sr
from pydub import AudioSegment
def transcribe_speech_recognition(file_path):
r = sr.Recognizer()
audio_file = sr.AudioFile(file_path)
with audio_file as source:
audio = r.record(source)
transcription_result = r.recognize_google(audio, language="es")
return transcription_result
# Function to convert mp3 file to wav
def convert_mp3_to_wav(mp3_path):
audio = AudioSegment.from_mp3(mp3_path)
wav_path = mp3_path.replace('.mp3', '.wav')
audio.export(wav_path, format="wav")
return wav_path
# Function to transcribe audio using OpenAI Whisper
def transcribe_whisper(file_path):
# models = ['base', 'small', 'medium', 'large', 'tiny']
model = whisper.load_model("base")
result = model.transcribe(file_path)
return result["text"]