Spaces:

soiz
/

voice-to-pth

Running

soiz commited on Nov 11, 2024

Commit

447cda5

verified ·

1 Parent(s): ea1c27c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,14 +10,17 @@ def audio_to_pth(audio):
     # メルスペクトログラムに変換
     mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
-    # メルスペクトログラムを対数スケールに変換（TTSモデルに適した形式）
     mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
     # メルスペクトログラムをテンソルに変換
     tensor = torch.tensor(mel_spectrogram_db)
     # テンソルを .pth ファイルに保存
-    output_path = "audio_features.pth"
     torch.save(tensor, output_path)
     return output_path
@@ -27,8 +30,8 @@ iface = gr.Interface(
     fn=audio_to_pth,
     inputs=gr.Audio(type="filepath"),
     outputs="file",
-    title="Audio to .PTH Converter",
-    description="Upload an audio file to convert it to a .pth file containing audio features in mel spectrogram format."
 )
 iface.launch()

     # メルスペクトログラムに変換
     mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
+    # メルスペクトログラムを対数スケールに変換
     mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
     # メルスペクトログラムをテンソルに変換
     tensor = torch.tensor(mel_spectrogram_db)
+    # テンソルを5次元に変換
+    tensor = tensor.unsqueeze(0).unsqueeze(0).unsqueeze(0)  # 5次元に拡張
     # テンソルを .pth ファイルに保存
+    output_path = "audio_features_5d.pth"
     torch.save(tensor, output_path)
     return output_path
     fn=audio_to_pth,
     inputs=gr.Audio(type="filepath"),
     outputs="file",
+    title="Audio to 5D Tensor .PTH Converter",
+    description="Upload an audio file to convert it to a .pth file containing a 5D tensor with audio features in mel spectrogram format."
 )
 iface.launch()