File size: 1,147 Bytes
88afac1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import numpy as np
import torch
def play(audio: torch.Tensor | np.ndarray | str, sr=16000, autoplay=True):
import torchaudio
from IPython.display import Audio, display
if isinstance(audio, str):
audio = torchaudio.load(audio)
if isinstance(audio, np.ndarray):
audio = torch.from_numpy(audio)
assert audio.numel() > 100, "play() needs a non empty audio array"
audio = audio.flatten()
if audio.dim() < 2:
audio = audio[None]
# Sum Channels
if audio.shape[0] > 1:
audio = audio.sum(dim=0)
display(Audio(audio.cpu().detach(), rate=sr, autoplay=autoplay, normalize=True))
def plot_mel_spec(mel_spec: torch.Tensor | np.ndarray, title: str = None):
import matplotlib.pyplot as plt
mel_spec = mel_spec.squeeze()
if isinstance(mel_spec, torch.Tensor):
mel_spec = mel_spec.cpu().numpy()
fig, ax = plt.subplots(figsize=(16, 4))
im = ax.imshow(mel_spec, aspect="auto", origin="lower", interpolation="none")
fig.colorbar(im, ax=ax)
ax.set_xlabel("frames")
ax.set_ylabel("channels")
if title is not None:
ax.set_title(title)
|