admin commited on
Commit
981a678
·
1 Parent(s): fe46849
Files changed (1) hide show
  1. app.py +63 -77
app.py CHANGED
@@ -25,87 +25,72 @@ SAMPLE_RATE = 22050
25
 
26
 
27
  def wav2mel(audio_path: str, width=1.6, topdb=40):
28
- os.makedirs(TEMP_DIR, exist_ok=True)
29
- try:
30
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
31
- non_silents = librosa.effects.split(y, top_db=topdb)
32
- non_silent = np.concatenate([y[start:end] for start, end in non_silents])
33
- mel_spec = librosa.feature.melspectrogram(y=non_silent, sr=sr)
34
- log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
35
- dur = librosa.get_duration(y=non_silent, sr=sr)
36
- total_frames = log_mel_spec.shape[1]
37
- step = int(width * total_frames / dur)
38
- count = int(total_frames / step)
39
- begin = int(0.5 * (total_frames - count * step))
40
- end = begin + step * count
41
- for i in range(begin, end, step):
42
- librosa.display.specshow(log_mel_spec[:, i : i + step])
43
- plt.axis("off")
44
- plt.savefig(
45
- f"{TEMP_DIR}/mel_{round(dur, 2)}_{i}.jpg",
46
- bbox_inches="tight",
47
- pad_inches=0.0,
48
- )
49
- plt.close()
50
-
51
- except Exception as e:
52
- print(f"Error converting {audio_path} : {e}")
53
 
54
 
55
  def wav2cqt(audio_path: str, width=1.6, topdb=40):
56
- os.makedirs(TEMP_DIR, exist_ok=True)
57
- try:
58
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
59
- non_silents = librosa.effects.split(y, top_db=topdb)
60
- non_silent = np.concatenate([y[start:end] for start, end in non_silents])
61
- cqt_spec = librosa.cqt(y=non_silent, sr=sr)
62
- log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
63
- dur = librosa.get_duration(y=non_silent, sr=sr)
64
- total_frames = log_cqt_spec.shape[1]
65
- step = int(width * total_frames / dur)
66
- count = int(total_frames / step)
67
- begin = int(0.5 * (total_frames - count * step))
68
- end = begin + step * count
69
- for i in range(begin, end, step):
70
- librosa.display.specshow(log_cqt_spec[:, i : i + step])
71
- plt.axis("off")
72
- plt.savefig(
73
- f"{TEMP_DIR}/cqt_{round(dur, 2)}_{i}.jpg",
74
- bbox_inches="tight",
75
- pad_inches=0.0,
76
- )
77
- plt.close()
78
-
79
- except Exception as e:
80
- print(f"Error converting {audio_path} : {e}")
81
 
82
 
83
  def wav2chroma(audio_path: str, width=1.6, topdb=40):
84
- os.makedirs(TEMP_DIR, exist_ok=True)
85
- try:
86
- y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
87
- non_silents = librosa.effects.split(y, top_db=topdb)
88
- non_silent = np.concatenate([y[start:end] for start, end in non_silents])
89
- chroma_spec = librosa.feature.chroma_stft(y=non_silent, sr=sr)
90
- log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
91
- dur = librosa.get_duration(y=non_silent, sr=sr)
92
- total_frames = log_chroma_spec.shape[1]
93
- step = int(width * total_frames / dur)
94
- count = int(total_frames / step)
95
- begin = int(0.5 * (total_frames - count * step))
96
- end = begin + step * count
97
- for i in range(begin, end, step):
98
- librosa.display.specshow(log_chroma_spec[:, i : i + step])
99
- plt.axis("off")
100
- plt.savefig(
101
- f"{TEMP_DIR}/chroma_{round(dur, 2)}_{i}.jpg",
102
- bbox_inches="tight",
103
- pad_inches=0.0,
104
- )
105
- plt.close()
106
-
107
- except Exception as e:
108
- print(f"Error converting {audio_path} : {e}")
109
 
110
 
111
  def most_common_element(input_list: list):
@@ -121,14 +106,15 @@ def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
121
  if not wav_path:
122
  return None, "Please input an audio!"
123
 
 
 
124
  try:
125
  model = EvalNet(log_name, len(TRANSLATE)).model
 
126
 
127
  except Exception as e:
128
  return None, f"{e}"
129
 
130
- spec = log_name.split("_")[-3]
131
- eval("wav2%s" % spec)(wav_path)
132
  outputs = []
133
  all_files = os.listdir(folder_path)
134
  for file_name in all_files:
 
25
 
26
 
27
  def wav2mel(audio_path: str, width=1.6, topdb=40):
28
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
29
+ non_silents = librosa.effects.split(y, top_db=topdb)
30
+ non_silent = np.concatenate([y[start:end] for start, end in non_silents])
31
+ mel_spec = librosa.feature.melspectrogram(y=non_silent, sr=sr)
32
+ log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
33
+ dur = librosa.get_duration(y=non_silent, sr=sr)
34
+ total_frames = log_mel_spec.shape[1]
35
+ step = int(width * total_frames / dur)
36
+ count = int(total_frames / step)
37
+ begin = int(0.5 * (total_frames - count * step))
38
+ end = begin + step * count
39
+ for i in range(begin, end, step):
40
+ librosa.display.specshow(log_mel_spec[:, i : i + step])
41
+ plt.axis("off")
42
+ plt.savefig(
43
+ f"{TEMP_DIR}/mel_{round(dur, 2)}_{i}.jpg",
44
+ bbox_inches="tight",
45
+ pad_inches=0.0,
46
+ )
47
+ plt.close()
 
 
 
 
 
48
 
49
 
50
  def wav2cqt(audio_path: str, width=1.6, topdb=40):
51
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
52
+ non_silents = librosa.effects.split(y, top_db=topdb)
53
+ non_silent = np.concatenate([y[start:end] for start, end in non_silents])
54
+ cqt_spec = librosa.cqt(y=non_silent, sr=sr)
55
+ log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
56
+ dur = librosa.get_duration(y=non_silent, sr=sr)
57
+ total_frames = log_cqt_spec.shape[1]
58
+ step = int(width * total_frames / dur)
59
+ count = int(total_frames / step)
60
+ begin = int(0.5 * (total_frames - count * step))
61
+ end = begin + step * count
62
+ for i in range(begin, end, step):
63
+ librosa.display.specshow(log_cqt_spec[:, i : i + step])
64
+ plt.axis("off")
65
+ plt.savefig(
66
+ f"{TEMP_DIR}/cqt_{round(dur, 2)}_{i}.jpg",
67
+ bbox_inches="tight",
68
+ pad_inches=0.0,
69
+ )
70
+ plt.close()
 
 
 
 
 
71
 
72
 
73
  def wav2chroma(audio_path: str, width=1.6, topdb=40):
74
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
75
+ non_silents = librosa.effects.split(y, top_db=topdb)
76
+ non_silent = np.concatenate([y[start:end] for start, end in non_silents])
77
+ chroma_spec = librosa.feature.chroma_stft(y=non_silent, sr=sr)
78
+ log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
79
+ dur = librosa.get_duration(y=non_silent, sr=sr)
80
+ total_frames = log_chroma_spec.shape[1]
81
+ step = int(width * total_frames / dur)
82
+ count = int(total_frames / step)
83
+ begin = int(0.5 * (total_frames - count * step))
84
+ end = begin + step * count
85
+ for i in range(begin, end, step):
86
+ librosa.display.specshow(log_chroma_spec[:, i : i + step])
87
+ plt.axis("off")
88
+ plt.savefig(
89
+ f"{TEMP_DIR}/chroma_{round(dur, 2)}_{i}.jpg",
90
+ bbox_inches="tight",
91
+ pad_inches=0.0,
92
+ )
93
+ plt.close()
 
 
 
 
 
94
 
95
 
96
  def most_common_element(input_list: list):
 
106
  if not wav_path:
107
  return None, "Please input an audio!"
108
 
109
+ spec = log_name.split("_")[-3]
110
+ os.makedirs(folder_path, exist_ok=True)
111
  try:
112
  model = EvalNet(log_name, len(TRANSLATE)).model
113
+ eval("wav2%s" % spec)(wav_path)
114
 
115
  except Exception as e:
116
  return None, f"{e}"
117
 
 
 
118
  outputs = []
119
  all_files = os.listdir(folder_path)
120
  for file_name in all_files: