admin commited on
Commit
6e103ce
·
1 Parent(s): 47a1023
Files changed (1) hide show
  1. app.py +40 -53
app.py CHANGED
@@ -44,63 +44,48 @@ def circular_padding(y: np.ndarray, sr: int, dur=3):
44
 
45
 
46
  def wav2mel(audio_path: str):
47
- os.makedirs(TEMP_DIR, exist_ok=True)
48
- try:
49
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
50
- y = circular_padding(y, sr)
51
- mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
52
- log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
53
- librosa.display.specshow(log_mel_spec)
54
- plt.axis("off")
55
- plt.savefig(
56
- f"{TEMP_DIR}/output.jpg",
57
- bbox_inches="tight",
58
- pad_inches=0.0,
59
- )
60
- plt.close()
61
-
62
- except Exception as e:
63
- print(f"Error converting {audio_path} : {e}")
64
 
65
 
66
  def wav2cqt(audio_path: str):
67
- os.makedirs(TEMP_DIR, exist_ok=True)
68
- try:
69
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
70
- y = circular_padding(y, sr)
71
- cqt_spec = librosa.cqt(y=y, sr=sr)
72
- log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
73
- librosa.display.specshow(log_cqt_spec)
74
- plt.axis("off")
75
- plt.savefig(
76
- f"{TEMP_DIR}/output.jpg",
77
- bbox_inches="tight",
78
- pad_inches=0.0,
79
- )
80
- plt.close()
81
-
82
- except Exception as e:
83
- print(f"Error converting {audio_path} : {e}")
84
 
85
 
86
  def wav2chroma(audio_path: str):
87
- os.makedirs(TEMP_DIR, exist_ok=True)
88
- try:
89
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
90
- y = circular_padding(y, sr)
91
- chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
92
- log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
93
- librosa.display.specshow(log_chroma_spec)
94
- plt.axis("off")
95
- plt.savefig(
96
- f"{TEMP_DIR}/output.jpg",
97
- bbox_inches="tight",
98
- pad_inches=0.0,
99
- )
100
- plt.close()
101
-
102
- except Exception as e:
103
- print(f"Error converting {audio_path} : {e}")
104
 
105
 
106
  def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
@@ -110,13 +95,15 @@ def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
110
  if not wav_path:
111
  return None, "Please input an audio!"
112
 
 
 
113
  try:
114
  model = EvalNet(log_name, len(TRANSLATE)).model
 
 
115
  except Exception as e:
116
  return None, f"{e}"
117
 
118
- spec = log_name.split("_")[-3]
119
- eval("wav2%s" % spec)(wav_path)
120
  input = embed_img(f"{folder_path}/output.jpg")
121
  output: torch.Tensor = model(input)
122
  pred_id = torch.max(output.data, 1)[1]
 
44
 
45
 
46
  def wav2mel(audio_path: str):
47
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
48
+ y = circular_padding(y, sr)
49
+ mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
50
+ log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
51
+ librosa.display.specshow(log_mel_spec)
52
+ plt.axis("off")
53
+ plt.savefig(
54
+ f"{TEMP_DIR}/output.jpg",
55
+ bbox_inches="tight",
56
+ pad_inches=0.0,
57
+ )
58
+ plt.close()
 
 
 
 
 
59
 
60
 
61
  def wav2cqt(audio_path: str):
62
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
63
+ y = circular_padding(y, sr)
64
+ cqt_spec = librosa.cqt(y=y, sr=sr)
65
+ log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
66
+ librosa.display.specshow(log_cqt_spec)
67
+ plt.axis("off")
68
+ plt.savefig(
69
+ f"{TEMP_DIR}/output.jpg",
70
+ bbox_inches="tight",
71
+ pad_inches=0.0,
72
+ )
73
+ plt.close()
 
 
 
 
 
74
 
75
 
76
  def wav2chroma(audio_path: str):
77
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
78
+ y = circular_padding(y, sr)
79
+ chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
80
+ log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
81
+ librosa.display.specshow(log_chroma_spec)
82
+ plt.axis("off")
83
+ plt.savefig(
84
+ f"{TEMP_DIR}/output.jpg",
85
+ bbox_inches="tight",
86
+ pad_inches=0.0,
87
+ )
88
+ plt.close()
 
 
 
 
 
89
 
90
 
91
  def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
 
95
  if not wav_path:
96
  return None, "Please input an audio!"
97
 
98
+ spec = log_name.split("_")[-3]
99
+ os.makedirs(folder_path, exist_ok=True)
100
  try:
101
  model = EvalNet(log_name, len(TRANSLATE)).model
102
+ eval("wav2%s" % spec)(wav_path)
103
+
104
  except Exception as e:
105
  return None, f"{e}"
106
 
 
 
107
  input = embed_img(f"{folder_path}/output.jpg")
108
  output: torch.Tensor = model(input)
109
  pred_id = torch.max(output.data, 1)[1]