alessandro trinca tornidor commited on
Commit
5abbb8c
·
1 Parent(s): bfec55c

refactor: remove use of utilsFileIO.py, update [email protected] and [email protected]

Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -1,7 +1,7 @@
1
-
2
  import base64
3
  import json
4
  import os
 
5
  import time
6
 
7
  import audioread
@@ -11,7 +11,6 @@ from torchaudio.transforms import Resample
11
 
12
  from aip_trainer import WordMatching as wm, app_logger
13
  from aip_trainer import pronunciationTrainer
14
- from aip_trainer import utilsFileIO
15
 
16
 
17
  trainer_SST_lambda = {
@@ -23,7 +22,6 @@ transform = Resample(orig_freq=48000, new_freq=16000)
23
 
24
 
25
  def lambda_handler(event, context):
26
-
27
  data = json.loads(event['body'])
28
 
29
  real_text = data['title']
@@ -43,21 +41,24 @@ def lambda_handler(event, context):
43
  'body': ''
44
  }
45
 
46
- start = time.time()
47
- random_file_name = './' + utilsFileIO.generateRandomString() + '.ogg'
48
- f = open(random_file_name, 'wb')
49
- f.write(file_bytes)
50
- f.close()
51
- duration = time.time() - start
52
- app_logger.info(f'Time for saving binary in file: {duration}.')
53
 
54
  start = time.time()
 
55
  signal, fs = audioread_load(random_file_name)
56
 
 
 
 
57
  signal = transform(torch.Tensor(signal)).unsqueeze(0)
58
 
59
  duration = time.time() - start
60
- app_logger.info(f'Time for loading .ogg file file: {duration}.')
61
 
62
  result = trainer_SST_lambda[language].processAudioForGivenText(
63
  signal, real_text)
@@ -65,7 +66,7 @@ def lambda_handler(event, context):
65
  start = time.time()
66
  os.remove(random_file_name)
67
  duration = time.time() - start
68
- app_logger.info(f'Time for deleting file: {duration}')
69
 
70
  start = time.time()
71
  real_transcripts_ipa = ' '.join(
@@ -83,7 +84,6 @@ def lambda_handler(event, context):
83
 
84
  is_letter_correct_all_words = ''
85
  for idx, word_real in enumerate(words_real):
86
-
87
  mapped_letters, mapped_letters_indices = wm.get_best_mapped_words(
88
  mapped_words[idx], word_real)
89
 
@@ -96,7 +96,8 @@ def lambda_handler(event, context):
96
  pair_accuracy_category = ' '.join(
97
  [str(category) for category in result['pronunciation_categories']])
98
  duration = time.time() - start
99
- app_logger.info(f'Time to post-process results: {duration}')
 
100
 
101
  res = {'real_transcript': result['recording_transcript'],
102
  'ipa_transcript': result['recording_ipa'],
@@ -110,8 +111,12 @@ def lambda_handler(event, context):
110
 
111
  return json.dumps(res)
112
 
 
113
  # From Librosa
114
 
 
 
 
115
 
116
  def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
117
  """Load an audio buffer using audioread.
@@ -120,17 +125,18 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
120
  """
121
 
122
  y = []
 
123
  with audioread.audio_open(path) as input_file:
124
  sr_native = input_file.samplerate
125
  n_channels = input_file.channels
126
 
127
- s_start = int(np.round(sr_native * offset)) * n_channels
128
 
129
  if duration is None:
130
  s_end = np.inf
131
  else:
132
- s_end = s_start + \
133
- (int(np.round(sr_native * duration)) * n_channels)
134
 
135
  n = 0
136
 
@@ -168,6 +174,7 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
168
 
169
  return y, sr_native
170
 
 
171
  # From Librosa
172
 
173
 
 
 
1
  import base64
2
  import json
3
  import os
4
+ import tempfile
5
  import time
6
 
7
  import audioread
 
11
 
12
  from aip_trainer import WordMatching as wm, app_logger
13
  from aip_trainer import pronunciationTrainer
 
14
 
15
 
16
  trainer_SST_lambda = {
 
22
 
23
 
24
  def lambda_handler(event, context):
 
25
  data = json.loads(event['body'])
26
 
27
  real_text = data['title']
 
41
  'body': ''
42
  }
43
 
44
+ start0 = time.time()
45
+ with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
46
+ f1.write(file_bytes)
47
+ duration = time.time() - start0
48
+ app_logger.info(f'Saved binary in file in {duration}s.')
49
+ random_file_name = f1.name
 
50
 
51
  start = time.time()
52
+ app_logger.info(f'Loading .ogg file file {random_file_name} ...')
53
  signal, fs = audioread_load(random_file_name)
54
 
55
+ duration = time.time() - start
56
+ app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
57
+
58
  signal = transform(torch.Tensor(signal)).unsqueeze(0)
59
 
60
  duration = time.time() - start
61
+ app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
62
 
63
  result = trainer_SST_lambda[language].processAudioForGivenText(
64
  signal, real_text)
 
66
  start = time.time()
67
  os.remove(random_file_name)
68
  duration = time.time() - start
69
+ app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
70
 
71
  start = time.time()
72
  real_transcripts_ipa = ' '.join(
 
84
 
85
  is_letter_correct_all_words = ''
86
  for idx, word_real in enumerate(words_real):
 
87
  mapped_letters, mapped_letters_indices = wm.get_best_mapped_words(
88
  mapped_words[idx], word_real)
89
 
 
96
  pair_accuracy_category = ' '.join(
97
  [str(category) for category in result['pronunciation_categories']])
98
  duration = time.time() - start
99
+ duration_tot = time.time() - start0
100
+ app_logger.info(f'Time to post-process results: {duration}, tot_duration:{duration_tot}.')
101
 
102
  res = {'real_transcript': result['recording_transcript'],
103
  'ipa_transcript': result['recording_ipa'],
 
111
 
112
  return json.dumps(res)
113
 
114
+
115
  # From Librosa
116
 
117
+ def calc_start_end(sr_native, time_position, n_channels):
118
+ return int(np.round(sr_native * time_position)) * n_channels
119
+
120
 
121
  def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
122
  """Load an audio buffer using audioread.
 
125
  """
126
 
127
  y = []
128
+ app_logger.debug(f"reading audio file at path:{path} ...")
129
  with audioread.audio_open(path) as input_file:
130
  sr_native = input_file.samplerate
131
  n_channels = input_file.channels
132
 
133
+ s_start = calc_start_end(sr_native, offset, n_channels)
134
 
135
  if duration is None:
136
  s_end = np.inf
137
  else:
138
+ duration = calc_start_end(sr_native, duration, n_channels)
139
+ s_end = duration + s_start
140
 
141
  n = 0
142
 
 
174
 
175
  return y, sr_native
176
 
177
+
178
  # From Librosa
179
 
180
 
aip_trainer/utilsFileIO.py DELETED
@@ -1,9 +0,0 @@
1
- import string
2
- import random
3
-
4
-
5
- def generateRandomString(str_length: int = 20):
6
-
7
- # printing lowercase
8
- letters = string.ascii_lowercase
9
- return ''.join(random.choice(letters) for i in range(str_length))
 
 
 
 
 
 
 
 
 
 
{templates → static}/main.html RENAMED
@@ -4,23 +4,22 @@
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1">
7
-
8
- <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"
9
- integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
10
-
11
- </script>
12
- <link rel="stylesheet" href="../static/css/style-new.css">
13
- <script src="../static/javascript/callbacks.js"></script>
14
-
15
-
16
  <title>AI pronunciation trainer</title>
17
 
18
- <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js" type="text/javascript"></script>
19
-
20
- <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
21
-
22
-
23
-
 
 
 
 
 
 
 
 
24
  </head>
25
 
26
  <body style="height: 100%; width: 100%; background-color: white; max-width: 90%;">
 
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1">
 
 
 
 
 
 
 
 
 
7
  <title>AI pronunciation trainer</title>
8
 
9
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.3/css/bootstrap.min.css"
10
+ crossorigin="anonymous"
11
+ integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH"
12
+ rel="stylesheet"
13
+ />
14
+ <script src="https://code.jquery.com/jquery-3.7.1.js"
15
+ crossorigin="anonymous"
16
+ integrity="sha384-wsqsSADZR1YRBEZ4/kKHNSmU+aX8ojbnKUMN4RyD3jDkxw5mHtoe2z/T/n4l56U/"
17
+ type="text/javascript"
18
+ ></script>
19
+
20
+ <script src="static/javascript/callbacks.js"></script>
21
+ <link rel="stylesheet" href="static/css/style-new.css" />
22
+ <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
23
  </head>
24
 
25
  <body style="height: 100%; width: 100%; background-color: white; max-width: 90%;">
webApp.py CHANGED
@@ -1,15 +1,16 @@
1
- from flask import Flask, render_template, request
2
- import webbrowser
3
  import os
 
 
 
4
  from flask_cors import CORS
5
- import json
6
 
7
- from aip_trainer.lambdas import lambdaTTS
8
- from aip_trainer.lambdas import lambdaSpeechToScore
9
  from aip_trainer.lambdas import lambdaGetSample
 
 
10
 
11
 
12
- app = Flask(__name__)
13
  cors = CORS(app)
14
  app.config['CORS_HEADERS'] = '*'
15
 
 
1
+ import json
 
2
  import os
3
+ import webbrowser
4
+
5
+ from flask import Flask, render_template, request
6
  from flask_cors import CORS
 
7
 
 
 
8
  from aip_trainer.lambdas import lambdaGetSample
9
+ from aip_trainer.lambdas import lambdaSpeechToScore
10
+ from aip_trainer.lambdas import lambdaTTS
11
 
12
 
13
+ app = Flask(__name__, template_folder="static")
14
  cors = CORS(app)
15
  app.config['CORS_HEADERS'] = '*'
16