Abdurahman commited on
Commit
f0e249a
·
1 Parent(s): f4d4d8e
Files changed (3) hide show
  1. asr.py +2 -7
  2. tts.py +2 -3
  3. util.py +23 -11
asr.py CHANGED
@@ -2,7 +2,6 @@ import numpy as np
2
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
  import torch
4
  import torchaudio
5
- from umsc import UgMultiScriptConverter
6
  import util
7
 
8
  # Model ID and setup
@@ -46,16 +45,12 @@ def asr(audio_data, target_rate = 16000):
46
  def check_pronunciation(input_text, script, user_audio):
47
  # Transcripts from user input audio
48
  transcript_ugLatn_box = asr(user_audio)
49
- ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
50
- transcript_ugArab_box = ug_latn_to_arab(transcript_ugLatn_box)
51
-
52
- if script == 'Uyghur Latin':
53
- input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script for IPA conversion
54
 
55
  # Get IPA and Pronunciation Feedback
56
  machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
57
  reference_text = input_text,
58
  output_text = transcript_ugArab_box,
59
- language_code='uig-Arab')
60
 
61
  return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score
 
2
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
  import torch
4
  import torchaudio
 
5
  import util
6
 
7
  # Model ID and setup
 
45
  def check_pronunciation(input_text, script, user_audio):
46
  # Transcripts from user input audio
47
  transcript_ugLatn_box = asr(user_audio)
48
+ transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
 
 
 
 
49
 
50
  # Get IPA and Pronunciation Feedback
51
  machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
52
  reference_text = input_text,
53
  output_text = transcript_ugArab_box,
54
+ script=script)
55
 
56
  return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score
tts.py CHANGED
@@ -1,7 +1,7 @@
1
  from transformers import VitsModel, AutoTokenizer
2
  import torch
3
- from umsc import UgMultiScriptConverter
4
  import scipy.io.wavfile
 
5
 
6
  # Model ID and setup
7
  model_id = "facebook/mms-tts-uig-script_arabic"
@@ -17,9 +17,8 @@ def generate_audio(input_text, script):
17
  Generate audio for the given input text and script
18
  """
19
  # Convert text to Uyghur Arabic if needed
20
- ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
21
  if script != "Uyghur Arabic":
22
- input_text = ug_latn_to_arab(input_text)
23
 
24
  # Tokenize and move inputs to the same device as the model
25
  tts_inputs = tts_tokenizer(input_text, return_tensors="pt").to(device)
 
1
  from transformers import VitsModel, AutoTokenizer
2
  import torch
 
3
  import scipy.io.wavfile
4
+ import util
5
 
6
  # Model ID and setup
7
  model_id = "facebook/mms-tts-uig-script_arabic"
 
17
  Generate audio for the given input text and script
18
  """
19
  # Convert text to Uyghur Arabic if needed
 
20
  if script != "Uyghur Arabic":
21
+ input_text = util.ug_latn_to_arab(input_text)
22
 
23
  # Tokenize and move inputs to the same device as the model
24
  tts_inputs = tts_tokenizer(input_text, return_tensors="pt").to(device)
util.py CHANGED
@@ -4,6 +4,7 @@ import string
4
  import epitran
5
  from difflib import SequenceMatcher
6
 
 
7
  # Lists of Uyghur short and long texts
8
  short_texts = [
9
  "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
@@ -15,8 +16,15 @@ long_texts = [
15
  "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
16
  ]
17
 
18
- # Front-End Utils
19
  ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
 
 
 
 
 
 
 
20
  def generate_short_text(script_choice):
21
  """Generate a random Uyghur short text based on the type."""
22
  text = random.choice(short_texts)
@@ -27,7 +35,15 @@ def generate_long_text(script_choice):
27
  text = random.choice(long_texts)
28
  return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
29
 
30
- # ASR Utils
 
 
 
 
 
 
 
 
31
  # def load_and_resample_audio(audio_data, target_rate):
32
  # """Load audio and resample based on target sample rate"""
33
  # if isinstance(audio_data, tuple):
@@ -46,7 +62,7 @@ def generate_long_text(script_choice):
46
 
47
  # return audio_input, target_rate
48
 
49
- def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
50
  """
51
  Calculate pronunciation accuracy between reference and ASR output text using Epitran.
52
 
@@ -60,8 +76,10 @@ def calculate_pronunciation_accuracy(reference_text, output_text, language_code=
60
  str: IPA transliteration of the reference text.
61
  str: IPA transliteration of the output text.
62
  """
63
- # Initialize Epitran for Uyghur (Arabic script)
64
- ipa_converter = epitran.Epitran(language_code)
 
 
65
 
66
  # Remove punctuation from both texts
67
  reference_text_clean = remove_punctuation(reference_text)
@@ -93,9 +111,3 @@ def calculate_pronunciation_accuracy(reference_text, output_text, language_code=
93
 
94
  return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy
95
 
96
- def remove_punctuation(text):
97
- """Helper function to remove punctuation from text."""
98
- extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Add your additional custom punctuation from the training set here
99
- all_punctuation = string.punctuation + extra_punctuation
100
-
101
- return text.translate(str.maketrans('', '', all_punctuation))
 
4
  import epitran
5
  from difflib import SequenceMatcher
6
 
7
+ ## Global Vars
8
  # Lists of Uyghur short and long texts
9
  short_texts = [
10
  "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
 
16
  "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
17
  ]
18
 
19
+ # Initialize uyghur script converter
20
  ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
21
+ ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
22
+
23
+ # Initialize Epitran for Uyghur (Arabic script)
24
+ ipa_converter = epitran.Epitran(language_code='uig-Arab')
25
+
26
+
27
+ ## Front-End Utils
28
  def generate_short_text(script_choice):
29
  """Generate a random Uyghur short text based on the type."""
30
  text = random.choice(short_texts)
 
35
  text = random.choice(long_texts)
36
  return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
37
 
38
+
39
+ ## ASR Utils
40
+ def remove_punctuation(text):
41
+ """Helper function to remove punctuation from text."""
42
+ extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Add your additional custom punctuation from the training set here
43
+ all_punctuation = string.punctuation + extra_punctuation
44
+
45
+ return text.translate(str.maketrans('', '', all_punctuation))
46
+
47
  # def load_and_resample_audio(audio_data, target_rate):
48
  # """Load audio and resample based on target sample rate"""
49
  # if isinstance(audio_data, tuple):
 
62
 
63
  # return audio_input, target_rate
64
 
65
+ def calculate_pronunciation_accuracy(reference_text, output_text, script):
66
  """
67
  Calculate pronunciation accuracy between reference and ASR output text using Epitran.
68
 
 
76
  str: IPA transliteration of the reference text.
77
  str: IPA transliteration of the output text.
78
  """
79
+
80
+
81
+ if script == 'Uyghur Latin':
82
+ reference_text = ug_latn_to_arab(reference_text) # make sure input text is arabic script for IPA conversion
83
 
84
  # Remove punctuation from both texts
85
  reference_text_clean = remove_punctuation(reference_text)
 
111
 
112
  return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy
113