TobDeBer commited on
Commit
493ca62
·
1 Parent(s): e0a1eb7

remove mic

Browse files
Files changed (1) hide show
  1. app.py +13 -53
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- #
3
  from transformers import Wav2Vec2FeatureExtractor
4
  from transformers import AutoModel
5
  import torch
@@ -18,7 +18,6 @@ import importlib
18
  modeling_MERT = importlib.import_module("MERT-v1-95M.modeling_MERT")
19
 
20
  from Prediction_Head.MTGGenre_head import MLPProberBase
21
- # input cr: https://huggingface.co/spaces/thealphhamerc/audio-to-text/blob/main/app.py
22
 
23
 
24
  logger = logging.getLogger("MERT-v1-95M-app")
@@ -34,13 +33,9 @@ inputs = [
34
  gr.components.Audio(type="filepath", label="Add music audio file"),
35
  gr.inputs.Audio(source="microphone", type="filepath"),
36
  ]
37
- live_inputs = [
38
- gr.Audio(source="microphone",streaming=True, type="filepath"),
39
- ]
40
 
41
  title = "One Model for All Music Understanding Tasks"
42
  description = "An example of using the [MERT-v1-95M](https://huggingface.co/m-a-p/MERT-v1-95M) model as backbone to conduct multiple music understanding tasks with the universal representation. \n Due the hardware limitation of the machine hosting this demo (2 CPU and 16GB RAM) only the first 4 seconds of audio are used!"
43
- # article = "The tasks include EMO, GS, MTGInstrument, MTGGenre, MTGTop50, MTGMood, NSynthI, NSynthP, VocalSetS, VocalSetT. \n\n More models can be referred at the [map organization page](https://huggingface.co/m-a-p)."
44
  with open('./README.md', 'r') as f:
45
  # skip the header
46
  header_count = 0
@@ -52,11 +47,6 @@ with open('./README.md', 'r') as f:
52
  # read the rest conent
53
  article = f.read()
54
 
55
- audio_examples = [
56
- # ["input/example-1.wav"],
57
- # ["input/example-2.wav"],
58
- ]
59
-
60
  df_init = pd.DataFrame(columns=['Task', 'Top 1', 'Top 2', 'Top 3', 'Top 4', 'Top 5'])
61
  transcription_df = gr.DataFrame(value=df_init, label="Output Dataframe", row_count=(
62
  0, "dynamic"), max_rows=30, wrap=True, overflow_row_behaviour='paginate')
@@ -128,7 +118,7 @@ for task in TASKS:
128
  model.to(device)
129
 
130
 
131
- def model_infernce(inputs):
132
  waveform, sample_rate = torchaudio.load(inputs)
133
 
134
  resample_rate = processor.sampling_rate
@@ -194,50 +184,20 @@ def model_infernce(inputs):
194
  def convert_audio(inputs, microphone):
195
  if (microphone is not None):
196
  inputs = microphone
197
- df = model_infernce(inputs)
198
  return df
199
 
200
- def live_convert_audio(microphone):
201
- if (microphone is not None):
202
- inputs = microphone
203
- df = model_infernce(inputs)
204
- return df
205
-
206
- audio_chunked = gr.Interface(
207
- fn=convert_audio,
208
- inputs=inputs,
209
- outputs=outputs,
210
- allow_flagging="never",
211
- title=title,
212
- description=description,
213
- article=article,
214
- examples=audio_examples,
215
- )
216
-
217
- live_audio_chunked = gr.Interface(
218
- fn=live_convert_audio,
219
- inputs=live_inputs,
220
- outputs=outputs_live,
221
- allow_flagging="never",
222
- title=title,
223
- description=description,
224
- article=article,
225
- # examples=audio_examples,
226
- live=True,
227
- )
228
-
229
-
230
  demo = gr.Blocks()
231
  with demo:
232
- gr.TabbedInterface(
233
- [
234
- audio_chunked,
235
- live_audio_chunked,
236
- ],
237
- [
238
- "Audio File or Recording",
239
- "Live Streaming Music"
240
- ]
241
  )
 
242
  # demo.queue(concurrency_count=1, max_size=5)
243
- demo.launch(show_api=False)
 
1
  import gradio as gr
2
+
3
  from transformers import Wav2Vec2FeatureExtractor
4
  from transformers import AutoModel
5
  import torch
 
18
  modeling_MERT = importlib.import_module("MERT-v1-95M.modeling_MERT")
19
 
20
  from Prediction_Head.MTGGenre_head import MLPProberBase
 
21
 
22
 
23
  logger = logging.getLogger("MERT-v1-95M-app")
 
33
  gr.components.Audio(type="filepath", label="Add music audio file"),
34
  gr.inputs.Audio(source="microphone", type="filepath"),
35
  ]
 
 
 
36
 
37
  title = "One Model for All Music Understanding Tasks"
38
  description = "An example of using the [MERT-v1-95M](https://huggingface.co/m-a-p/MERT-v1-95M) model as backbone to conduct multiple music understanding tasks with the universal representation. \n Due the hardware limitation of the machine hosting this demo (2 CPU and 16GB RAM) only the first 4 seconds of audio are used!"
 
39
  with open('./README.md', 'r') as f:
40
  # skip the header
41
  header_count = 0
 
47
  # read the rest conent
48
  article = f.read()
49
 
 
 
 
 
 
50
  df_init = pd.DataFrame(columns=['Task', 'Top 1', 'Top 2', 'Top 3', 'Top 4', 'Top 5'])
51
  transcription_df = gr.DataFrame(value=df_init, label="Output Dataframe", row_count=(
52
  0, "dynamic"), max_rows=30, wrap=True, overflow_row_behaviour='paginate')
 
118
  model.to(device)
119
 
120
 
121
+ def model_inference(inputs):
122
  waveform, sample_rate = torchaudio.load(inputs)
123
 
124
  resample_rate = processor.sampling_rate
 
184
  def convert_audio(inputs, microphone):
185
  if (microphone is not None):
186
  inputs = microphone
187
+ df = model_inference(inputs)
188
  return df
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  demo = gr.Blocks()
191
  with demo:
192
+ gr.Interface(
193
+ fn=convert_audio,
194
+ inputs=inputs,
195
+ outputs=outputs,
196
+ allow_flagging="never",
197
+ title=title,
198
+ description=description,
199
+ article=article,
 
200
  )
201
+
202
  # demo.queue(concurrency_count=1, max_size=5)
203
+ demo.launch()