{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import librosa\n", "import soundfile as sf\n", "import numpy as np\n", "from os import listdir\n", "from os.path import isfile, join\n", "from math import floor\n", "import IPython.display as ipd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Data from [ESC-50](https://github.com/karolpiczak/ESC-50) \n", "And [freesound.org](freesound.org)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from scipy.signal import butter, lfilter\n", "\n", "def apply_lowpass_filter(x, sr):\n", " order = 10\n", " cutoff = 2000\n", " b, a = butter(order, cutoff, fs=sr, btype='low', analog=False)\n", " y = lfilter(b, a, x)\n", " return y" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def downsample(x, sr, newsr):\n", " return x[::floor(sr/newsr)]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def play(x, sr):\n", " ipd.display(ipd.Audio(data=x, rate=sr))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# crop a single file (if end by silence)\n", "# file=\"751913__spaudiobooks__chainsaw-in-a-forest\"\n", "# ext=\".wav\"\n", "# data, sr = librosa.load(dirpath+file+ext)\n", "# data = data[:sr*(60+31)]\n", "# sf.write(dirpath+file+\".wav\", data, sr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# read, filter, downsample, chunk by 3s length, write wav\n", "newsr=4000\n", "c=2550\n", "dirpath = \"../datasets/freesound/chainsaw/audio/long\"\n", "for file in listdir(dirpath):\n", " if isfile(join(dirpath, file)):\n", " print(file)\n", " data, sr = librosa.load(dirpath+file)\n", " #play(data, sr)\n", " data = apply_lowpass_filter(data, sr)\n", " data = downsample(data, sr, newsr)\n", " cutpoints = list(range(3*newsr,len(data),3*newsr))\n", " all_data = np.split(data, cutpoints)\n", " for d in all_data:\n", " if (len(d) > 1024):\n", " sf.write(dirpath+f'curated/{c}.wav', d, 4000)\n", " c+=1\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# detect too short files\n", "# dirpath = \"../datasets/freesound/environment/audio/curated/\"\n", "# for file in listdir(dirpath):\n", "# if isfile(join(dirpath, file)):\n", "# data, sr = librosa.load(dirpath+file)\n", "# if (len(data)<=1024):\n", "# print(file, len(data))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# ESC-50\n", "# attenuate, mix with background\n", "from random import randint, uniform\n", "\n", "c=0\n", "newsr=4000\n", "dirpath = \"../datasets/freesound/chainsaw/audio/\"\n", "envdir = \"../datasets/freesound/environment/audio/\"\n", "envfiles = [file for file in listdir(envdir) if isfile(join(envdir, file))]\n", "for file in listdir(dirpath):\n", " if isfile(join(dirpath, file)):\n", " print(file)\n", " data, sr = librosa.load(dirpath+file)\n", " #play(data, sr)\n", " lastindexes=[]\n", " for i in range(3):\n", " index = randint(0, len(envfiles)-1)\n", " while (index in lastindexes):\n", " index = randint(0, len(envfiles)-1)\n", " lastindexes.append(index)\n", " addfile = envfiles[index]\n", " data2, sr2 = librosa.load(envdir+addfile)\n", " data1 = apply_lowpass_filter(data, sr)\n", " data2 = apply_lowpass_filter(data2, sr2)\n", " data1 = downsample(data1, sr, newsr)\n", " data2 = downsample(data2, sr2, newsr)\n", " attenuation = round(uniform(0.2, 0.5), 2)\n", " data1 = (data1 * attenuation + data2 *(1-attenuation))/2\n", " all_data = np.split(data1, [round(len(data1)/2)])\n", " for d in all_data:\n", " sf.write(dirpath+f'test/mix-{c}.wav', d, 4000)\n", " c+=1" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# environment audio from ESC-50, filter, downsample and half the files (they are 5 sec long)\n", "newsr=4000\n", "c=2649\n", "dirpath = \"../datasets/freesound/environment/audio/\"\n", "for file in listdir(dirpath):\n", " if isfile(join(dirpath, file)):\n", " data, sr = librosa.load(dirpath+file)\n", " data = apply_lowpass_filter(data, sr)\n", " data = downsample(data, sr, newsr)\n", " all_data = np.split(data, [round(len(data)/2)])\n", " for d in all_data:\n", " # random time shift\n", " rand_zeros = np.zeros(randint(0, 1900))\n", " d = np.append(rand_zeros, d)\n", " sf.write(dirpath+f'curated/e-{c}.wav', d, 4000)\n", " c+=1" ] } ], "metadata": { "kernelspec": { "display_name": "audio-processing", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }