Update app.py
Browse files
app.py
CHANGED
@@ -68,12 +68,10 @@ os.system("gsutil -q -m cp gs://magentadata/soundfonts/SGM-v2.01-Sal-Guit-Bass-V
|
|
68 |
|
69 |
|
70 |
|
71 |
-
|
72 |
import functools
|
73 |
import os
|
74 |
|
75 |
import numpy as np
|
76 |
-
|
77 |
import tensorflow.compat.v2 as tf
|
78 |
|
79 |
import functools
|
@@ -85,24 +83,28 @@ import seqio
|
|
85 |
import t5
|
86 |
import t5x
|
87 |
|
88 |
-
|
89 |
-
from mt3 import spectrograms
|
90 |
-
from mt3 import vocabularies
|
91 |
from mt3 import metrics_utils
|
92 |
from mt3 import models
|
93 |
from mt3 import network
|
94 |
from mt3 import note_sequences
|
95 |
from mt3 import preprocessors
|
|
|
|
|
|
|
|
|
96 |
|
97 |
import nest_asyncio
|
98 |
nest_asyncio.apply()
|
99 |
|
|
|
100 |
SF2_PATH = 'SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
|
101 |
|
102 |
-
def upload_audio(
|
|
|
|
|
|
|
103 |
return note_seq.audio_io.wav_data_to_samples_librosa(
|
104 |
-
|
105 |
-
|
106 |
|
107 |
|
108 |
|
@@ -123,16 +125,16 @@ class InferenceModel(object):
|
|
123 |
else:
|
124 |
raise ValueError('unknown model_type: %s' % model_type)
|
125 |
|
126 |
-
gin_files = ['/
|
127 |
-
'/
|
128 |
|
129 |
self.batch_size = 8
|
130 |
self.outputs_length = 1024
|
131 |
-
self.sequence_length = {'inputs': self.inputs_length,
|
132 |
'targets': self.outputs_length}
|
133 |
|
134 |
self.partitioner = t5x.partitioning.PjitPartitioner(
|
135 |
-
|
136 |
|
137 |
# Build Codecs and Vocabularies.
|
138 |
self.spectrogram_config = spectrograms.SpectrogramConfig()
|
@@ -221,9 +223,10 @@ class InferenceModel(object):
|
|
221 |
|
222 |
def __call__(self, audio):
|
223 |
"""Infer note sequence from audio samples.
|
224 |
-
|
225 |
Args:
|
226 |
audio: 1-d numpy array of audio samples (16kHz) for a single example.
|
|
|
227 |
Returns:
|
228 |
A note_sequence of the transcribed audio.
|
229 |
"""
|
@@ -312,6 +315,7 @@ class InferenceModel(object):
|
|
312 |
|
313 |
|
314 |
|
|
|
315 |
inference_model = InferenceModel('/home/user/app/checkpoints/mt3/', 'mt3')
|
316 |
|
317 |
|
|
|
68 |
|
69 |
|
70 |
|
|
|
71 |
import functools
|
72 |
import os
|
73 |
|
74 |
import numpy as np
|
|
|
75 |
import tensorflow.compat.v2 as tf
|
76 |
|
77 |
import functools
|
|
|
83 |
import t5
|
84 |
import t5x
|
85 |
|
|
|
|
|
|
|
86 |
from mt3 import metrics_utils
|
87 |
from mt3 import models
|
88 |
from mt3 import network
|
89 |
from mt3 import note_sequences
|
90 |
from mt3 import preprocessors
|
91 |
+
from mt3 import spectrograms
|
92 |
+
from mt3 import vocabularies
|
93 |
+
|
94 |
+
|
95 |
|
96 |
import nest_asyncio
|
97 |
nest_asyncio.apply()
|
98 |
|
99 |
+
SAMPLE_RATE = 16000
|
100 |
SF2_PATH = 'SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
|
101 |
|
102 |
+
def upload_audio(sample_rate):
|
103 |
+
data = list(files.upload().values())
|
104 |
+
if len(data) > 1:
|
105 |
+
print('Multiple files uploaded; using only one.')
|
106 |
return note_seq.audio_io.wav_data_to_samples_librosa(
|
107 |
+
data[0], sample_rate=sample_rate)
|
|
|
108 |
|
109 |
|
110 |
|
|
|
125 |
else:
|
126 |
raise ValueError('unknown model_type: %s' % model_type)
|
127 |
|
128 |
+
gin_files = ['/content/mt3/gin/model.gin',
|
129 |
+
f'/content/mt3/gin/{model_type}.gin']
|
130 |
|
131 |
self.batch_size = 8
|
132 |
self.outputs_length = 1024
|
133 |
+
self.sequence_length = {'inputs': self.inputs_length,
|
134 |
'targets': self.outputs_length}
|
135 |
|
136 |
self.partitioner = t5x.partitioning.PjitPartitioner(
|
137 |
+
num_partitions=1)
|
138 |
|
139 |
# Build Codecs and Vocabularies.
|
140 |
self.spectrogram_config = spectrograms.SpectrogramConfig()
|
|
|
223 |
|
224 |
def __call__(self, audio):
|
225 |
"""Infer note sequence from audio samples.
|
226 |
+
|
227 |
Args:
|
228 |
audio: 1-d numpy array of audio samples (16kHz) for a single example.
|
229 |
+
|
230 |
Returns:
|
231 |
A note_sequence of the transcribed audio.
|
232 |
"""
|
|
|
315 |
|
316 |
|
317 |
|
318 |
+
|
319 |
inference_model = InferenceModel('/home/user/app/checkpoints/mt3/', 'mt3')
|
320 |
|
321 |
|