import random from umsc import UgMultiScriptConverter import torchaudio import string import epitran from difflib import SequenceMatcher import numpy as np # Lists of Uyghur short and long texts short_texts = [ "سالام", "رەھمەت", "ياخشىمۇسىز" ] long_texts = [ "مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", "يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.", "بىزنىڭ ئۆيدە تۆت تەكچە تۆتىلىسى تەكتەكچە" ] # Front-End Utils def generate_short_text(script_choice): """Generate a random Uyghur short text based on the type.""" ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') text = random.choice(short_texts) if script_choice == "Uyghur Latin": return ug_arab_to_latn(text) return text def generate_long_text(script_choice): """Generate a random Uyghur long text based on the type.""" ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') text = random.choice(long_texts) if script_choice == "Uyghur Latin": return ug_arab_to_latn(text) return text # ASR Utils def load_and_resample_audio(audio_data, target_rate): """Load audio and resample based on target sample rate""" if isinstance(audio_data, tuple): # microphone sampling_rate, audio_input = audio_data audio_input = (audio_input / 32768.0).astype(np.float32) elif isinstance(audio_data, str): # file upload audio_input, sampling_rate = torchaudio.load(audio_data) else: return "<>".format(type(audio_data)) # Resample if needed if sampling_rate != target_rate: resampler = torchaudio.transforms.Resample(sampling_rate, target_rate) audio_input = resampler(audio_input) return audio_input, target_rate def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'): """ Calculate pronunciation accuracy between reference and ASR output text using Epitran. Args: reference_text (str): The ground truth text in Uyghur (Arabic script). output_text (str): The ASR output text in Uyghur (Arabic script). language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur). Returns: float: Pronunciation accuracy as a percentage. str: IPA transliteration of the reference text. str: IPA transliteration of the output text. """ # Initialize Epitran for Uyghur (Arabic script) ipa_converter = epitran.Epitran(language_code) # Remove punctuation from both texts reference_text_clean = remove_punctuation(reference_text) output_text_clean = remove_punctuation(output_text) # Transliterate both texts to IPA reference_ipa = ipa_converter.transliterate(reference_text_clean) output_ipa = ipa_converter.transliterate(output_text_clean) # Calculate pronunciation accuracy using SequenceMatcher matcher = SequenceMatcher(None, reference_ipa, output_ipa) match_ratio = matcher.ratio() # This is the fraction of matching characters # Convert to percentage pronunciation_accuracy = match_ratio * 100 # Generate HTML for comparison comparison_html = "" for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): ref_segment = reference_ipa[i1:i2] out_segment = output_ipa[j1:j2] if opcode == 'equal': # Matching characters comparison_html += f'{ref_segment}' elif opcode == 'replace': # Mismatched characters comparison_html += f'{ref_segment}' elif opcode == 'delete': # Characters in reference but not in output comparison_html += f'{ref_segment}' elif opcode == 'insert': # Characters in output but not in reference comparison_html += f'{out_segment}' return reference_ipa, output_ipa, comparison_html, pronunciation_accuracy def remove_punctuation(text): """Helper function to remove punctuation from text.""" return text.translate(str.maketrans('', '', string.punctuation))