import librosa from transformers import pipeline import logging # Set up logging logging.basicConfig(level=logging.DEBUG) ASR_SAMPLING_RATE = 16_000 try: pipe = pipeline("automatic-speech-recognition", model="facebook/mms-1b-all") logging.info("ASR pipeline loaded successfully.") except Exception as e: logging.error(f"Error loading ASR pipeline: {e}") def transcribe(audio): try: if audio is None: logging.error("No audio file provided") return "ERROR: You have to either use the microphone or upload an audio file" logging.info(f"Loading audio file: {audio}") # Try loading the audio file with librosa try: audio_samples, _ = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True) except FileNotFoundError: logging.error("Audio file not found") return "ERROR: Audio file not found" except Exception as e: logging.error(f"Error loading audio file with librosa: {e}") return f"ERROR: Unable to load audio file - {e}" # Process the audio with the pipeline try: transcription = pipe(audio_samples)["text"] except Exception as e: logging.error(f"Error during transcription with pipeline: {e}") return f"ERROR: Transcription failed - {e}" logging.info("Transcription completed successfully.") return transcription except Exception as e: logging.error(f"Error during transcription: {e}") return "ERROR"