Music-Descriptor

Running

App Files Files Community

TobDeBer commited on Oct 14, 2024

Commit

493ca62

1 Parent(s): e0a1eb7

remove mic

Browse files

Files changed (1) hide show

app.py +13 -53

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-#
 from transformers import Wav2Vec2FeatureExtractor
 from transformers import AutoModel
 import torch
@@ -18,7 +18,6 @@ import importlib
 modeling_MERT = importlib.import_module("MERT-v1-95M.modeling_MERT")
 from Prediction_Head.MTGGenre_head import MLPProberBase
-# input cr: https://huggingface.co/spaces/thealphhamerc/audio-to-text/blob/main/app.py
 logger = logging.getLogger("MERT-v1-95M-app")
@@ -34,13 +33,9 @@ inputs = [
     gr.components.Audio(type="filepath", label="Add music audio file"),
     gr.inputs.Audio(source="microphone", type="filepath"),
 ]
-live_inputs = [
-    gr.Audio(source="microphone",streaming=True, type="filepath"),
-]
 title = "One Model for All Music Understanding Tasks"
 description = "An example of using the [MERT-v1-95M](https://huggingface.co/m-a-p/MERT-v1-95M) model as backbone to conduct multiple music understanding tasks with the universal representation. \n Due the hardware limitation of the machine hosting this demo (2 CPU and 16GB RAM) only the first 4 seconds of audio are used!"
-# article = "The tasks include EMO, GS, MTGInstrument, MTGGenre, MTGTop50, MTGMood, NSynthI, NSynthP, VocalSetS, VocalSetT. \n\n More models can be referred at the [map organization page](https://huggingface.co/m-a-p)."
 with open('./README.md', 'r') as f:
     # skip the header
     header_count = 0
@@ -52,11 +47,6 @@ with open('./README.md', 'r') as f:
     # read the rest conent
     article = f.read()
-audio_examples = [
-    # ["input/example-1.wav"],
-    # ["input/example-2.wav"],
-]
 df_init = pd.DataFrame(columns=['Task', 'Top 1', 'Top 2', 'Top 3', 'Top 4', 'Top 5'])
 transcription_df = gr.DataFrame(value=df_init, label="Output Dataframe", row_count=(
     0, "dynamic"), max_rows=30, wrap=True, overflow_row_behaviour='paginate')
@@ -128,7 +118,7 @@ for task in TASKS:
 model.to(device)
-def model_infernce(inputs):
     waveform, sample_rate = torchaudio.load(inputs)
     resample_rate = processor.sampling_rate
@@ -194,50 +184,20 @@ def model_infernce(inputs):
 def convert_audio(inputs, microphone):
     if (microphone is not None):
         inputs = microphone
-    df = model_infernce(inputs)
     return df
-def live_convert_audio(microphone):
-    if (microphone is not None):
-        inputs = microphone
-    df = model_infernce(inputs)
-    return df
-audio_chunked = gr.Interface(
-    fn=convert_audio,
-    inputs=inputs,
-    outputs=outputs,
-    allow_flagging="never",
-    title=title,
-    description=description,
-    article=article,
-    examples=audio_examples,
-)
-live_audio_chunked = gr.Interface(
-    fn=live_convert_audio,
-    inputs=live_inputs,
-    outputs=outputs_live,
-    allow_flagging="never",
-    title=title,
-    description=description,
-    article=article,
-    # examples=audio_examples,
-    live=True,
-)
 demo = gr.Blocks()
 with demo:
-    gr.TabbedInterface(
-        [
-            audio_chunked,
-            live_audio_chunked,
-        ],
-        [
-            "Audio File or Recording",
-            "Live Streaming Music"
-        ]
     )
 # demo.queue(concurrency_count=1, max_size=5)
-demo.launch(show_api=False)

 import gradio as gr
 from transformers import Wav2Vec2FeatureExtractor
 from transformers import AutoModel
 import torch
 modeling_MERT = importlib.import_module("MERT-v1-95M.modeling_MERT")
 from Prediction_Head.MTGGenre_head import MLPProberBase
 logger = logging.getLogger("MERT-v1-95M-app")
     gr.components.Audio(type="filepath", label="Add music audio file"),
     gr.inputs.Audio(source="microphone", type="filepath"),
 ]
 title = "One Model for All Music Understanding Tasks"
 description = "An example of using the [MERT-v1-95M](https://huggingface.co/m-a-p/MERT-v1-95M) model as backbone to conduct multiple music understanding tasks with the universal representation. \n Due the hardware limitation of the machine hosting this demo (2 CPU and 16GB RAM) only the first 4 seconds of audio are used!"
 with open('./README.md', 'r') as f:
     # skip the header
     header_count = 0
     # read the rest conent
     article = f.read()
 df_init = pd.DataFrame(columns=['Task', 'Top 1', 'Top 2', 'Top 3', 'Top 4', 'Top 5'])
 transcription_df = gr.DataFrame(value=df_init, label="Output Dataframe", row_count=(
     0, "dynamic"), max_rows=30, wrap=True, overflow_row_behaviour='paginate')
 model.to(device)
+def model_inference(inputs):
     waveform, sample_rate = torchaudio.load(inputs)
     resample_rate = processor.sampling_rate
 def convert_audio(inputs, microphone):
     if (microphone is not None):
         inputs = microphone
+    df = model_inference(inputs)
     return df
 demo = gr.Blocks()
 with demo:
+    gr.Interface(
+        fn=convert_audio,
+        inputs=inputs,
+        outputs=outputs,
+        allow_flagging="never",
+        title=title,
+        description=description,
+        article=article,
     )
 # demo.queue(concurrency_count=1, max_size=5)
+demo.launch()