import os import numpy as np import pandas as pd import whisper # import torchaudio # import librosa import sys import os os.environ["CUDA_VISIBLE_DEVICES"] = "2" from tqdm.notebook import tqdm # from whisper.normalizers import EnglishTextNormalizer class Decoder: """ Class to perform ASR predictions """ def __init__(self, model_type, language='mr'): """Initialization of the class More details on the whisper model and its types can be found here: https://github.com/openai/whisper Convert HF model to openai-whisper: https://github.com/openai/whisper/discussions/830 Args: model_type (str): Should be one of 'tiny', 'base', 'small', 'medium', 'large', 'large-v2' """ assert model_type in ['tiny', 'base', 'small', 'medium', 'large', 'large-v2', 'large-v3'], "Wrong model type" print('Info: Loading model') self.model = whisper.load_model(model_type) self.decode_options = whisper.DecodingOptions(language=language, without_timestamps=True) self.device = "cuda" # if torch.cuda.is_available() else "cpu" print("Info: Initialization done") def decode(self, filepath): """Get the transcription(in hindi) for the audio file Args: filepath (str): Absolute path of the audio file Returns: str: transcription of the audio in hindi """ print() result = self.model.transcribe(filepath, language="mr", verbose=False, without_timestamps=True, fp16=False) return result["text"] if __name__ == "__main__": assert len(sys.argv) == 2, "Ohh no, audio file seems to be missing" audio_folder_path = sys.argv[1] # Initialize the Decoder obj = Decoder('large-v3', language='mr') # Create a DataFrame to store file names and corresponding transcripts transcripts_df = pd.DataFrame(columns=['MP3_File', 'Transcript']) count=0 # Iterate through all MP3 files in the folder for filename in os.listdir(audio_folder_path): if filename.endswith(".mp3"): mp3_file_path = os.path.join(audio_folder_path, filename) # Decode the MP3 file asr_output = obj.decode(mp3_file_path) # print(asr_output) # Append the file name and transcript to the DataFrame transcripts_df = transcripts_df.append({'MP3_File': filename, 'Transcript': asr_output}, ignore_index=True) count+=1 if count % 10 == 0: print(f'{count} files done') # Save the transcript to a text file output_dir = "./" # asr_save_path = os.path.join(output_dir, filename.replace(".mp3", ".txt")) # with open(asr_save_path, 'w') as f: # f.write(asr_output) # Save the DataFrame to a CSV file csv_save_path = os.path.join(output_dir, "transcripts_marathi.csv") transcripts_df.to_csv(csv_save_path, index=False) print("Transcription and CSV file creation completed.")