admin commited on
Commit
c5069aa
·
1 Parent(s): 54b4b45
Files changed (1) hide show
  1. app.py +73 -86
app.py CHANGED
@@ -40,96 +40,81 @@ def circular_padding(spec: np.ndarray, end: int):
40
 
41
 
42
  def wav2mel(audio_path: str, width=3):
43
- os.makedirs(TEMP_DIR, exist_ok=True)
44
- try:
45
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
46
- total_frames = len(y)
47
- if total_frames % (width * sr) != 0:
48
- count = total_frames // (width * sr) + 1
49
- y = circular_padding(y, count * width * sr)
50
-
51
- mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
52
- log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
53
- dur = librosa.get_duration(y=y, sr=sr)
54
- total_frames = log_mel_spec.shape[1]
55
- step = int(width * total_frames / dur)
56
- count = int(total_frames / step)
57
- begin = int(0.5 * (total_frames - count * step))
58
- end = begin + step * count
59
- for i in range(begin, end, step):
60
- librosa.display.specshow(log_mel_spec[:, i : i + step])
61
- plt.axis("off")
62
- plt.savefig(
63
- f"{TEMP_DIR}/{i}.jpg",
64
- bbox_inches="tight",
65
- pad_inches=0.0,
66
- )
67
- plt.close()
68
-
69
- except Exception as e:
70
- print(f"Error converting {audio_path} : {e}")
71
 
72
 
73
  def wav2cqt(audio_path: str, width=3):
74
- os.makedirs(TEMP_DIR, exist_ok=True)
75
- try:
76
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
77
- total_frames = len(y)
78
- if total_frames % (width * sr) != 0:
79
- count = total_frames // (width * sr) + 1
80
- y = circular_padding(y, count * width * sr)
81
-
82
- cqt_spec = librosa.cqt(y=y, sr=sr)
83
- log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
84
- dur = librosa.get_duration(y=y, sr=sr)
85
- total_frames = log_cqt_spec.shape[1]
86
- step = int(width * total_frames / dur)
87
- count = int(total_frames / step)
88
- begin = int(0.5 * (total_frames - count * step))
89
- end = begin + step * count
90
- for i in range(begin, end, step):
91
- librosa.display.specshow(log_cqt_spec[:, i : i + step])
92
- plt.axis("off")
93
- plt.savefig(
94
- f"{TEMP_DIR}/{i}.jpg",
95
- bbox_inches="tight",
96
- pad_inches=0.0,
97
- )
98
- plt.close()
99
-
100
- except Exception as e:
101
- print(f"Error converting {audio_path} : {e}")
102
 
103
 
104
  def wav2chroma(audio_path: str, width=3):
105
- os.makedirs(TEMP_DIR, exist_ok=True)
106
- try:
107
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
108
- total_frames = len(y)
109
- if total_frames % (width * sr) != 0:
110
- count = total_frames // (width * sr) + 1
111
- y = circular_padding(y, count * width * sr)
112
-
113
- chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
114
- log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
115
- dur = librosa.get_duration(y=y, sr=sr)
116
- total_frames = log_chroma_spec.shape[1]
117
- step = int(width * total_frames / dur)
118
- count = int(total_frames / step)
119
- begin = int(0.5 * (total_frames - count * step))
120
- end = begin + step * count
121
- for i in range(begin, end, step):
122
- librosa.display.specshow(log_chroma_spec[:, i : i + step])
123
- plt.axis("off")
124
- plt.savefig(
125
- f"{TEMP_DIR}/{i}.jpg",
126
- bbox_inches="tight",
127
- pad_inches=0.0,
128
- )
129
- plt.close()
130
-
131
- except Exception as e:
132
- print(f"Error converting {audio_path} : {e}")
133
 
134
 
135
  def most_frequent_value(lst: list):
@@ -149,13 +134,15 @@ def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
149
  if not wav_path:
150
  return None, "Please input an audio!"
151
 
 
 
152
  try:
153
  model = EvalNet(log_name, len(TRANSLATE)).model
 
 
154
  except Exception as e:
155
  return None, f"{e}"
156
 
157
- spec = log_name.split("_")[-3]
158
- eval("wav2%s" % spec)(wav_path)
159
  jpgs = find_files(folder_path, ".jpg")
160
  preds = []
161
  for jpg in jpgs:
 
40
 
41
 
42
  def wav2mel(audio_path: str, width=3):
43
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
44
+ total_frames = len(y)
45
+ if total_frames % (width * sr) != 0:
46
+ count = total_frames // (width * sr) + 1
47
+ y = circular_padding(y, count * width * sr)
48
+
49
+ mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
50
+ log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
51
+ dur = librosa.get_duration(y=y, sr=sr)
52
+ total_frames = log_mel_spec.shape[1]
53
+ step = int(width * total_frames / dur)
54
+ count = int(total_frames / step)
55
+ begin = int(0.5 * (total_frames - count * step))
56
+ end = begin + step * count
57
+ for i in range(begin, end, step):
58
+ librosa.display.specshow(log_mel_spec[:, i : i + step])
59
+ plt.axis("off")
60
+ plt.savefig(
61
+ f"{TEMP_DIR}/{i}.jpg",
62
+ bbox_inches="tight",
63
+ pad_inches=0.0,
64
+ )
65
+ plt.close()
 
 
 
 
 
66
 
67
 
68
  def wav2cqt(audio_path: str, width=3):
69
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
70
+ total_frames = len(y)
71
+ if total_frames % (width * sr) != 0:
72
+ count = total_frames // (width * sr) + 1
73
+ y = circular_padding(y, count * width * sr)
74
+
75
+ cqt_spec = librosa.cqt(y=y, sr=sr)
76
+ log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
77
+ dur = librosa.get_duration(y=y, sr=sr)
78
+ total_frames = log_cqt_spec.shape[1]
79
+ step = int(width * total_frames / dur)
80
+ count = int(total_frames / step)
81
+ begin = int(0.5 * (total_frames - count * step))
82
+ end = begin + step * count
83
+ for i in range(begin, end, step):
84
+ librosa.display.specshow(log_cqt_spec[:, i : i + step])
85
+ plt.axis("off")
86
+ plt.savefig(
87
+ f"{TEMP_DIR}/{i}.jpg",
88
+ bbox_inches="tight",
89
+ pad_inches=0.0,
90
+ )
91
+ plt.close()
 
 
 
 
 
92
 
93
 
94
  def wav2chroma(audio_path: str, width=3):
95
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
96
+ total_frames = len(y)
97
+ if total_frames % (width * sr) != 0:
98
+ count = total_frames // (width * sr) + 1
99
+ y = circular_padding(y, count * width * sr)
100
+
101
+ chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
102
+ log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
103
+ dur = librosa.get_duration(y=y, sr=sr)
104
+ total_frames = log_chroma_spec.shape[1]
105
+ step = int(width * total_frames / dur)
106
+ count = int(total_frames / step)
107
+ begin = int(0.5 * (total_frames - count * step))
108
+ end = begin + step * count
109
+ for i in range(begin, end, step):
110
+ librosa.display.specshow(log_chroma_spec[:, i : i + step])
111
+ plt.axis("off")
112
+ plt.savefig(
113
+ f"{TEMP_DIR}/{i}.jpg",
114
+ bbox_inches="tight",
115
+ pad_inches=0.0,
116
+ )
117
+ plt.close()
 
 
 
 
 
118
 
119
 
120
  def most_frequent_value(lst: list):
 
134
  if not wav_path:
135
  return None, "Please input an audio!"
136
 
137
+ spec = log_name.split("_")[-3]
138
+ os.makedirs(folder_path, exist_ok=True)
139
  try:
140
  model = EvalNet(log_name, len(TRANSLATE)).model
141
+ eval("wav2%s" % spec)(wav_path)
142
+
143
  except Exception as e:
144
  return None, f"{e}"
145
 
 
 
146
  jpgs = find_files(folder_path, ".jpg")
147
  preds = []
148
  for jpg in jpgs: