Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -123,7 +123,12 @@ def play_video(youtube_url):
|
|
123 |
|
124 |
|
125 |
AUDIO_EXAMPLES = glob.glob('examples/*.*', recursive=True)
|
126 |
-
YOUTUBE_EXAMPLES = ["https://www.youtube.com/watch?v=vMboypSkj3c"
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
# theme = 'gradio/dracula_revamped' #'Insuz/Mocha' #gr.themes.Soft()
|
129 |
# with gr.Blocks(theme=theme) as demo:
|
@@ -157,9 +162,18 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
157 |
gr.Markdown(
|
158 |
"""
|
159 |
## 🎶YourMT3+: Multi-instrument Music Transcription with Enhanced Transformer Architectures and Cross-dataset Stem Augmentation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
#### Caution:
|
161 |
- Currently running on CPU, and it takes longer than 3 minutes for a 30-second input.
|
162 |
-
- For acadmic reproduction purpose, we strongly recommend to use
|
163 |
### [Paper](https://arxiv.org/abs/2407.04822) [Code](https://github.com/mimbres/YourMT3)
|
164 |
""")
|
165 |
|
|
|
123 |
|
124 |
|
125 |
AUDIO_EXAMPLES = glob.glob('examples/*.*', recursive=True)
|
126 |
+
YOUTUBE_EXAMPLES = ["https://www.youtube.com/watch?v=vMboypSkj3c",
|
127 |
+
"https://youtu.be/OXXRoa1U6xU?si=nhJ6lzGenCmk4P7R",
|
128 |
+
"https://youtu.be/EOJ0wH6h3rE?si=a99k6BnSajvNmXcn",
|
129 |
+
"https://youtu.be/7mjQooXt28o?si=qqmMxCxwqBlLPDI2",
|
130 |
+
"https://youtu.be/bnS-HK_lTHA?si=PQLVAab3QHMbv0S3https://youtu.be/zJB0nnOc7bM?si=EA1DN8nHWJcpQWp_",
|
131 |
+
"https://youtu.be/mIWYTg55h10?si=WkbtKfL6NlNquvT8"]
|
132 |
|
133 |
# theme = 'gradio/dracula_revamped' #'Insuz/Mocha' #gr.themes.Soft()
|
134 |
# with gr.Blocks(theme=theme) as demo:
|
|
|
162 |
gr.Markdown(
|
163 |
"""
|
164 |
## 🎶YourMT3+: Multi-instrument Music Transcription with Enhanced Transformer Architectures and Cross-dataset Stem Augmentation
|
165 |
+
### Model card:
|
166 |
+
- Model name: `YPTF.MoE+Multi`
|
167 |
+
- Encoder backbone: Perceiver-TF + Mixture of Experts (2/8)
|
168 |
+
- Decoder backbone: Multi-channel T5-small
|
169 |
+
- Tokenizer: MT3 tokens with Singing extension
|
170 |
+
- Dataset: YourMT3 dataset
|
171 |
+
- Augmentation strategy: Intra-/Cross dataset stem augment, No Pitch-shifting
|
172 |
+
- FP Precision: BF16-mixed for training, FP16 for inference
|
173 |
+
|
174 |
#### Caution:
|
175 |
- Currently running on CPU, and it takes longer than 3 minutes for a 30-second input.
|
176 |
+
- For acadmic reproduction purpose, we strongly recommend to use [Colab Demo](https://colab.research.google.com/drive/1AgOVEBfZknDkjmSRA7leoa81a2vrnhBG?usp=sharing) with multiple checkpoints.
|
177 |
### [Paper](https://arxiv.org/abs/2407.04822) [Code](https://github.com/mimbres/YourMT3)
|
178 |
""")
|
179 |
|