Abdurahman
app
534d3b0
raw
history blame
4.94 kB
import random
from umsc import UgMultiScriptConverter
import string
import epitran
from difflib import SequenceMatcher
# import httpcore
# setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
## Global Vars
# Lists of Uyghur short and long texts
short_texts = [
"سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
]
long_texts = [
"مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.",
"يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.",
"بىزنىڭ ئۆيدە تۆت تەكچە، تۆتىلىسى تەك-تەكچە",
"قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
]
# Initialize uyghur script converter
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
# Initialize Epitran for Uyghur (Arabic script)
ipa_converter = epitran.Epitran('uig-Arab')
## Front-End Utils
def generate_short_text(script_choice):
"""Generate a random Uyghur short text based on the type."""
text = random.choice(short_texts)
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
def generate_long_text(script_choice):
"""Generate a random Uyghur long text based on the type."""
text = random.choice(long_texts)
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
## ASR Utils
def remove_punctuation(text):
"""Helper function to remove punctuation from text."""
extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Add your additional custom punctuation from the training set here
all_punctuation = string.punctuation + extra_punctuation
return text.translate(str.maketrans('', '', all_punctuation))
# def load_and_resample_audio(audio_data, target_rate):
# """Load audio and resample based on target sample rate"""
# if isinstance(audio_data, tuple):
# # microphone
# sampling_rate, audio_input = audio_data
# audio_input = (audio_input / 32768.0).astype(np.float32)
# elif isinstance(audio_data, str):
# # file upload
# audio_input, sampling_rate = torchaudio.load(audio_data)
# else:
# return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
# # Resample if needed
# if sampling_rate != target_rate:
# resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
# audio_input = resampler(audio_input)
# return audio_input, target_rate
def calculate_pronunciation_accuracy(reference_text, output_text, script):
"""
Calculate pronunciation accuracy between reference and ASR output text using Epitran.
Args:
reference_text (str): The ground truth text in Uyghur (Arabic script).
output_text (str): The ASR output text in Uyghur (Arabic script).
language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).
Returns:
float: Pronunciation accuracy as a percentage.
str: IPA transliteration of the reference text.
str: IPA transliteration of the output text.
"""
# make sure input text is arabic script for IPA conversion
if script == 'Uyghur Latin':
reference_text = ug_latn_to_arab(reference_text)
# Remove punctuation from both texts
reference_text_clean = remove_punctuation(reference_text)
output_text_clean = remove_punctuation(output_text)
# Transliterate both texts to IPA
reference_ipa = ipa_converter.transliterate(reference_text_clean)
output_ipa = ipa_converter.transliterate(output_text_clean)
# Calculate pronunciation accuracy using SequenceMatcher
matcher = SequenceMatcher(None, reference_text_clean, output_text_clean)
match_ratio = matcher.ratio() # This is the fraction of matching characters
# Convert to percentage
pronunciation_accuracy = match_ratio * 100
# Convert reference back to original script for feedback output
if script == 'Uyghur Latin':
reference_text_clean = ug_arab_to_latn(reference_text_clean)
# Generate Markdown-compatible styled text
comparison_md = """### Pronunciation Feedback
"""
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
ref_segment = reference_text_clean[i1:i2]
out_segment = output_text_clean[j1:j2]
if opcode == 'equal': # Matching characters
comparison_md += f'<span style="color: blue; font-size: 20px;">{ref_segment}</span>'
elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing
comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
comparison_md = f"<div>{comparison_md}</div>"
return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy