oniati commited on
Commit
f88fd30
·
verified ·
1 Parent(s): 23bd4c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -68,12 +68,10 @@ os.system("gsutil -q -m cp gs://magentadata/soundfonts/SGM-v2.01-Sal-Guit-Bass-V
68
 
69
 
70
 
71
-
72
  import functools
73
  import os
74
 
75
  import numpy as np
76
-
77
  import tensorflow.compat.v2 as tf
78
 
79
  import functools
@@ -85,24 +83,28 @@ import seqio
85
  import t5
86
  import t5x
87
 
88
-
89
- from mt3 import spectrograms
90
- from mt3 import vocabularies
91
  from mt3 import metrics_utils
92
  from mt3 import models
93
  from mt3 import network
94
  from mt3 import note_sequences
95
  from mt3 import preprocessors
 
 
 
 
96
 
97
  import nest_asyncio
98
  nest_asyncio.apply()
99
 
 
100
  SF2_PATH = 'SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
101
 
102
- def upload_audio(audio, sample_rate):
 
 
 
103
  return note_seq.audio_io.wav_data_to_samples_librosa(
104
- audio, sample_rate=sample_rate)
105
-
106
 
107
 
108
 
@@ -123,16 +125,16 @@ class InferenceModel(object):
123
  else:
124
  raise ValueError('unknown model_type: %s' % model_type)
125
 
126
- gin_files = ['/home/user/app/mt3/gin/model.gin',
127
- '/home/user/app/mt3/gin/mt3.gin']
128
 
129
  self.batch_size = 8
130
  self.outputs_length = 1024
131
- self.sequence_length = {'inputs': self.inputs_length,
132
  'targets': self.outputs_length}
133
 
134
  self.partitioner = t5x.partitioning.PjitPartitioner(
135
- model_parallel_submesh=None, num_partitions=1)
136
 
137
  # Build Codecs and Vocabularies.
138
  self.spectrogram_config = spectrograms.SpectrogramConfig()
@@ -221,9 +223,10 @@ class InferenceModel(object):
221
 
222
  def __call__(self, audio):
223
  """Infer note sequence from audio samples.
224
-
225
  Args:
226
  audio: 1-d numpy array of audio samples (16kHz) for a single example.
 
227
  Returns:
228
  A note_sequence of the transcribed audio.
229
  """
@@ -312,6 +315,7 @@ class InferenceModel(object):
312
 
313
 
314
 
 
315
  inference_model = InferenceModel('/home/user/app/checkpoints/mt3/', 'mt3')
316
 
317
 
 
68
 
69
 
70
 
 
71
  import functools
72
  import os
73
 
74
  import numpy as np
 
75
  import tensorflow.compat.v2 as tf
76
 
77
  import functools
 
83
  import t5
84
  import t5x
85
 
 
 
 
86
  from mt3 import metrics_utils
87
  from mt3 import models
88
  from mt3 import network
89
  from mt3 import note_sequences
90
  from mt3 import preprocessors
91
+ from mt3 import spectrograms
92
+ from mt3 import vocabularies
93
+
94
+
95
 
96
  import nest_asyncio
97
  nest_asyncio.apply()
98
 
99
+ SAMPLE_RATE = 16000
100
  SF2_PATH = 'SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
101
 
102
+ def upload_audio(sample_rate):
103
+ data = list(files.upload().values())
104
+ if len(data) > 1:
105
+ print('Multiple files uploaded; using only one.')
106
  return note_seq.audio_io.wav_data_to_samples_librosa(
107
+ data[0], sample_rate=sample_rate)
 
108
 
109
 
110
 
 
125
  else:
126
  raise ValueError('unknown model_type: %s' % model_type)
127
 
128
+ gin_files = ['/content/mt3/gin/model.gin',
129
+ f'/content/mt3/gin/{model_type}.gin']
130
 
131
  self.batch_size = 8
132
  self.outputs_length = 1024
133
+ self.sequence_length = {'inputs': self.inputs_length,
134
  'targets': self.outputs_length}
135
 
136
  self.partitioner = t5x.partitioning.PjitPartitioner(
137
+ num_partitions=1)
138
 
139
  # Build Codecs and Vocabularies.
140
  self.spectrogram_config = spectrograms.SpectrogramConfig()
 
223
 
224
  def __call__(self, audio):
225
  """Infer note sequence from audio samples.
226
+
227
  Args:
228
  audio: 1-d numpy array of audio samples (16kHz) for a single example.
229
+
230
  Returns:
231
  A note_sequence of the transcribed audio.
232
  """
 
315
 
316
 
317
 
318
+
319
  inference_model = InferenceModel('/home/user/app/checkpoints/mt3/', 'mt3')
320
 
321