LAURENT Valentin commited on
Commit
f6be1a2
·
1 Parent(s): 370db2b

pca version

Browse files
Files changed (1) hide show
  1. tasks/audio.py +22 -15
tasks/audio.py CHANGED
@@ -18,23 +18,25 @@ router = APIRouter()
18
 
19
  DESCRIPTION = "Random Baseline"
20
  ROUTE = "/audio"
21
- def create_spec(dataset):
 
22
  spectograms = []
23
-
24
  for d in dataset:
25
- audio_sample = d["audio"]["array"] if d["audio"]["sampling_rate"] == 12000 else librosa.resample(
26
- d["audio"]["array"],
27
- orig_sr= d["audio"]["sampling_rate"],
28
- target_sr=12000
29
- )
 
30
  if len(audio_sample) == 0:
31
  continue
32
- if len(audio_sample) < 36000:
33
- padding_needed = 36000 - len(audio_sample)
34
  repeats = (padding_needed // len(audio_sample)) + 1
35
- audio_sample = np.concatenate([audio_sample] + [audio_sample[:padding_needed]] * repeats)[:36000]
36
- elif len(audio_sample) > 36000:
37
- audio_sample = audio_sample[:36000]
38
 
39
  rms = np.sqrt(np.mean(np.square(audio_sample)))
40
  scalar = 10 ** (-20 / 20) / (rms + 1e-8)
@@ -43,13 +45,13 @@ def create_spec(dataset):
43
  y=audio_sample*scalar,
44
  sr=12000,
45
  n_fft=2048,
46
- hop_length=512,
47
- n_mels=32,
48
  power=2.0,
49
  )
50
  mel_db = librosa.power_to_db(mel, ref=np.max)
51
  mel_db_normalized = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-8)
52
- spectograms.append(np.float16(mel_db_normalized).T.flatten())
53
 
54
  return np.stack(spectograms)
55
 
@@ -88,6 +90,11 @@ async def evaluate_audio(request: AudioEvaluationRequest):
88
  H = np.load("H.npy")
89
  W_test = np.dot(test_spec, H)
90
  model = joblib.load('model.joblib')
 
 
 
 
 
91
  #--------------------------------------------------------------------------------------------
92
  # YOUR MODEL INFERENCE CODE HERE
93
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
 
18
 
19
  DESCRIPTION = "Random Baseline"
20
  ROUTE = "/audio"
21
+
22
+ def create_spec(dataset, target_sampling_rate=3000):
23
  spectograms = []
24
+ audio_length = int(36000/(12000/target_sampling_rate))
25
  for d in dataset:
26
+ audio_sample = librosa.resample(
27
+ d["audio"]["array"],
28
+ orig_sr= d["audio"]["sampling_rate"],
29
+ target_sr=target_sampling_rate
30
+ )
31
+
32
  if len(audio_sample) == 0:
33
  continue
34
+ if len(audio_sample) < audio_length:
35
+ padding_needed = audio_length - len(audio_sample)
36
  repeats = (padding_needed // len(audio_sample)) + 1
37
+ audio_sample = np.concatenate([audio_sample] + [audio_sample[:padding_needed]] * repeats)[:audio_length]
38
+ elif len(audio_sample) > audio_length:
39
+ audio_sample = audio_sample[:audio_length]
40
 
41
  rms = np.sqrt(np.mean(np.square(audio_sample)))
42
  scalar = 10 ** (-20 / 20) / (rms + 1e-8)
 
45
  y=audio_sample*scalar,
46
  sr=12000,
47
  n_fft=2048,
48
+ hop_length=1024,
49
+ n_mels=12,
50
  power=2.0,
51
  )
52
  mel_db = librosa.power_to_db(mel, ref=np.max)
53
  mel_db_normalized = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-8)
54
+ spectograms.append(mel_db_normalized.T.flatten())
55
 
56
  return np.stack(spectograms)
57
 
 
90
  H = np.load("H.npy")
91
  W_test = np.dot(test_spec, H)
92
  model = joblib.load('model.joblib')
93
+
94
+ test_spec = create_spec(test_dataset)
95
+ model = joblib.load("3000_40_100/model.joblib")
96
+ H = np.load("3000_40_100/H.npy")
97
+ W_test = np.dot(test_spec, H)
98
  #--------------------------------------------------------------------------------------------
99
  # YOUR MODEL INFERENCE CODE HERE
100
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.