Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
5abbb8c
1
Parent(s):
bfec55c
refactor: remove use of utilsFileIO.py, update [email protected] and [email protected]
Browse files- aip_trainer/lambdas/lambdaSpeechToScore.py +24 -17
- aip_trainer/utilsFileIO.py +0 -9
- {templates → static}/main.html +14 -15
- webApp.py +7 -6
aip_trainer/lambdas/lambdaSpeechToScore.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
|
2 |
import base64
|
3 |
import json
|
4 |
import os
|
|
|
5 |
import time
|
6 |
|
7 |
import audioread
|
@@ -11,7 +11,6 @@ from torchaudio.transforms import Resample
|
|
11 |
|
12 |
from aip_trainer import WordMatching as wm, app_logger
|
13 |
from aip_trainer import pronunciationTrainer
|
14 |
-
from aip_trainer import utilsFileIO
|
15 |
|
16 |
|
17 |
trainer_SST_lambda = {
|
@@ -23,7 +22,6 @@ transform = Resample(orig_freq=48000, new_freq=16000)
|
|
23 |
|
24 |
|
25 |
def lambda_handler(event, context):
|
26 |
-
|
27 |
data = json.loads(event['body'])
|
28 |
|
29 |
real_text = data['title']
|
@@ -43,21 +41,24 @@ def lambda_handler(event, context):
|
|
43 |
'body': ''
|
44 |
}
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
app_logger.info(f'Time for saving binary in file: {duration}.')
|
53 |
|
54 |
start = time.time()
|
|
|
55 |
signal, fs = audioread_load(random_file_name)
|
56 |
|
|
|
|
|
|
|
57 |
signal = transform(torch.Tensor(signal)).unsqueeze(0)
|
58 |
|
59 |
duration = time.time() - start
|
60 |
-
app_logger.info(f'
|
61 |
|
62 |
result = trainer_SST_lambda[language].processAudioForGivenText(
|
63 |
signal, real_text)
|
@@ -65,7 +66,7 @@ def lambda_handler(event, context):
|
|
65 |
start = time.time()
|
66 |
os.remove(random_file_name)
|
67 |
duration = time.time() - start
|
68 |
-
app_logger.info(f'
|
69 |
|
70 |
start = time.time()
|
71 |
real_transcripts_ipa = ' '.join(
|
@@ -83,7 +84,6 @@ def lambda_handler(event, context):
|
|
83 |
|
84 |
is_letter_correct_all_words = ''
|
85 |
for idx, word_real in enumerate(words_real):
|
86 |
-
|
87 |
mapped_letters, mapped_letters_indices = wm.get_best_mapped_words(
|
88 |
mapped_words[idx], word_real)
|
89 |
|
@@ -96,7 +96,8 @@ def lambda_handler(event, context):
|
|
96 |
pair_accuracy_category = ' '.join(
|
97 |
[str(category) for category in result['pronunciation_categories']])
|
98 |
duration = time.time() - start
|
99 |
-
|
|
|
100 |
|
101 |
res = {'real_transcript': result['recording_transcript'],
|
102 |
'ipa_transcript': result['recording_ipa'],
|
@@ -110,8 +111,12 @@ def lambda_handler(event, context):
|
|
110 |
|
111 |
return json.dumps(res)
|
112 |
|
|
|
113 |
# From Librosa
|
114 |
|
|
|
|
|
|
|
115 |
|
116 |
def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
|
117 |
"""Load an audio buffer using audioread.
|
@@ -120,17 +125,18 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
|
|
120 |
"""
|
121 |
|
122 |
y = []
|
|
|
123 |
with audioread.audio_open(path) as input_file:
|
124 |
sr_native = input_file.samplerate
|
125 |
n_channels = input_file.channels
|
126 |
|
127 |
-
s_start =
|
128 |
|
129 |
if duration is None:
|
130 |
s_end = np.inf
|
131 |
else:
|
132 |
-
|
133 |
-
|
134 |
|
135 |
n = 0
|
136 |
|
@@ -168,6 +174,7 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
|
|
168 |
|
169 |
return y, sr_native
|
170 |
|
|
|
171 |
# From Librosa
|
172 |
|
173 |
|
|
|
|
|
1 |
import base64
|
2 |
import json
|
3 |
import os
|
4 |
+
import tempfile
|
5 |
import time
|
6 |
|
7 |
import audioread
|
|
|
11 |
|
12 |
from aip_trainer import WordMatching as wm, app_logger
|
13 |
from aip_trainer import pronunciationTrainer
|
|
|
14 |
|
15 |
|
16 |
trainer_SST_lambda = {
|
|
|
22 |
|
23 |
|
24 |
def lambda_handler(event, context):
|
|
|
25 |
data = json.loads(event['body'])
|
26 |
|
27 |
real_text = data['title']
|
|
|
41 |
'body': ''
|
42 |
}
|
43 |
|
44 |
+
start0 = time.time()
|
45 |
+
with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
|
46 |
+
f1.write(file_bytes)
|
47 |
+
duration = time.time() - start0
|
48 |
+
app_logger.info(f'Saved binary in file in {duration}s.')
|
49 |
+
random_file_name = f1.name
|
|
|
50 |
|
51 |
start = time.time()
|
52 |
+
app_logger.info(f'Loading .ogg file file {random_file_name} ...')
|
53 |
signal, fs = audioread_load(random_file_name)
|
54 |
|
55 |
+
duration = time.time() - start
|
56 |
+
app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
|
57 |
+
|
58 |
signal = transform(torch.Tensor(signal)).unsqueeze(0)
|
59 |
|
60 |
duration = time.time() - start
|
61 |
+
app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
|
62 |
|
63 |
result = trainer_SST_lambda[language].processAudioForGivenText(
|
64 |
signal, real_text)
|
|
|
66 |
start = time.time()
|
67 |
os.remove(random_file_name)
|
68 |
duration = time.time() - start
|
69 |
+
app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
|
70 |
|
71 |
start = time.time()
|
72 |
real_transcripts_ipa = ' '.join(
|
|
|
84 |
|
85 |
is_letter_correct_all_words = ''
|
86 |
for idx, word_real in enumerate(words_real):
|
|
|
87 |
mapped_letters, mapped_letters_indices = wm.get_best_mapped_words(
|
88 |
mapped_words[idx], word_real)
|
89 |
|
|
|
96 |
pair_accuracy_category = ' '.join(
|
97 |
[str(category) for category in result['pronunciation_categories']])
|
98 |
duration = time.time() - start
|
99 |
+
duration_tot = time.time() - start0
|
100 |
+
app_logger.info(f'Time to post-process results: {duration}, tot_duration:{duration_tot}.')
|
101 |
|
102 |
res = {'real_transcript': result['recording_transcript'],
|
103 |
'ipa_transcript': result['recording_ipa'],
|
|
|
111 |
|
112 |
return json.dumps(res)
|
113 |
|
114 |
+
|
115 |
# From Librosa
|
116 |
|
117 |
+
def calc_start_end(sr_native, time_position, n_channels):
|
118 |
+
return int(np.round(sr_native * time_position)) * n_channels
|
119 |
+
|
120 |
|
121 |
def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
|
122 |
"""Load an audio buffer using audioread.
|
|
|
125 |
"""
|
126 |
|
127 |
y = []
|
128 |
+
app_logger.debug(f"reading audio file at path:{path} ...")
|
129 |
with audioread.audio_open(path) as input_file:
|
130 |
sr_native = input_file.samplerate
|
131 |
n_channels = input_file.channels
|
132 |
|
133 |
+
s_start = calc_start_end(sr_native, offset, n_channels)
|
134 |
|
135 |
if duration is None:
|
136 |
s_end = np.inf
|
137 |
else:
|
138 |
+
duration = calc_start_end(sr_native, duration, n_channels)
|
139 |
+
s_end = duration + s_start
|
140 |
|
141 |
n = 0
|
142 |
|
|
|
174 |
|
175 |
return y, sr_native
|
176 |
|
177 |
+
|
178 |
# From Librosa
|
179 |
|
180 |
|
aip_trainer/utilsFileIO.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
import string
|
2 |
-
import random
|
3 |
-
|
4 |
-
|
5 |
-
def generateRandomString(str_length: int = 20):
|
6 |
-
|
7 |
-
# printing lowercase
|
8 |
-
letters = string.ascii_lowercase
|
9 |
-
return ''.join(random.choice(letters) for i in range(str_length))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{templates → static}/main.html
RENAMED
@@ -4,23 +4,22 @@
|
|
4 |
<head>
|
5 |
<meta charset="UTF-8">
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7 |
-
|
8 |
-
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"
|
9 |
-
integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
|
10 |
-
|
11 |
-
</script>
|
12 |
-
<link rel="stylesheet" href="../static/css/style-new.css">
|
13 |
-
<script src="../static/javascript/callbacks.js"></script>
|
14 |
-
|
15 |
-
|
16 |
<title>AI pronunciation trainer</title>
|
17 |
|
18 |
-
<
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
</head>
|
25 |
|
26 |
<body style="height: 100%; width: 100%; background-color: white; max-width: 90%;">
|
|
|
4 |
<head>
|
5 |
<meta charset="UTF-8">
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
<title>AI pronunciation trainer</title>
|
8 |
|
9 |
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.3/css/bootstrap.min.css"
|
10 |
+
crossorigin="anonymous"
|
11 |
+
integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH"
|
12 |
+
rel="stylesheet"
|
13 |
+
/>
|
14 |
+
<script src="https://code.jquery.com/jquery-3.7.1.js"
|
15 |
+
crossorigin="anonymous"
|
16 |
+
integrity="sha384-wsqsSADZR1YRBEZ4/kKHNSmU+aX8ojbnKUMN4RyD3jDkxw5mHtoe2z/T/n4l56U/"
|
17 |
+
type="text/javascript"
|
18 |
+
></script>
|
19 |
+
|
20 |
+
<script src="static/javascript/callbacks.js"></script>
|
21 |
+
<link rel="stylesheet" href="static/css/style-new.css" />
|
22 |
+
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
|
23 |
</head>
|
24 |
|
25 |
<body style="height: 100%; width: 100%; background-color: white; max-width: 90%;">
|
webApp.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
-
|
2 |
-
import webbrowser
|
3 |
import os
|
|
|
|
|
|
|
4 |
from flask_cors import CORS
|
5 |
-
import json
|
6 |
|
7 |
-
from aip_trainer.lambdas import lambdaTTS
|
8 |
-
from aip_trainer.lambdas import lambdaSpeechToScore
|
9 |
from aip_trainer.lambdas import lambdaGetSample
|
|
|
|
|
10 |
|
11 |
|
12 |
-
app = Flask(__name__)
|
13 |
cors = CORS(app)
|
14 |
app.config['CORS_HEADERS'] = '*'
|
15 |
|
|
|
1 |
+
import json
|
|
|
2 |
import os
|
3 |
+
import webbrowser
|
4 |
+
|
5 |
+
from flask import Flask, render_template, request
|
6 |
from flask_cors import CORS
|
|
|
7 |
|
|
|
|
|
8 |
from aip_trainer.lambdas import lambdaGetSample
|
9 |
+
from aip_trainer.lambdas import lambdaSpeechToScore
|
10 |
+
from aip_trainer.lambdas import lambdaTTS
|
11 |
|
12 |
|
13 |
+
app = Flask(__name__, template_folder="static")
|
14 |
cors = CORS(app)
|
15 |
app.config['CORS_HEADERS'] = '*'
|
16 |
|