frugal-ai-challenge-submission

Sleeping

File size: 6,063 Bytes

5ad4868

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import librosa\n",
    "import soundfile as sf\n",
    "import numpy as np\n",
    "from os import listdir\n",
    "from os.path import isfile, join\n",
    "from math import floor\n",
    "import IPython.display as ipd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data from [ESC-50](https://github.com/karolpiczak/ESC-50)  \n",
    "And [freesound.org](freesound.org)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.signal import butter, lfilter\n",
    "\n",
    "def apply_lowpass_filter(x, sr):\n",
    "    order = 10\n",
    "    cutoff = 2000\n",
    "    b, a = butter(order, cutoff, fs=sr, btype='low', analog=False)\n",
    "    y = lfilter(b, a, x)\n",
    "    return y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def downsample(x, sr, newsr):\n",
    "    return x[::floor(sr/newsr)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def play(x, sr):\n",
    "    ipd.display(ipd.Audio(data=x, rate=sr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# crop a single file (if end by silence)\n",
    "# file=\"751913__spaudiobooks__chainsaw-in-a-forest\"\n",
    "# ext=\".wav\"\n",
    "# data, sr = librosa.load(dirpath+file+ext)\n",
    "# data = data[:sr*(60+31)]\n",
    "# sf.write(dirpath+file+\".wav\", data, sr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read, filter, downsample, chunk by 3s length, write wav\n",
    "newsr=4000\n",
    "c=2550\n",
    "dirpath = \"../datasets/freesound/chainsaw/audio/long\"\n",
    "for file in listdir(dirpath):\n",
    "    if isfile(join(dirpath, file)):\n",
    "        print(file)\n",
    "        data, sr = librosa.load(dirpath+file)\n",
    "        #play(data, sr)\n",
    "        data = apply_lowpass_filter(data, sr)\n",
    "        data = downsample(data, sr, newsr)\n",
    "        cutpoints = list(range(3*newsr,len(data),3*newsr))\n",
    "        all_data = np.split(data, cutpoints)\n",
    "        for d in all_data:\n",
    "            if (len(d) > 1024):\n",
    "                sf.write(dirpath+f'curated/{c}.wav', d, 4000)\n",
    "                c+=1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# detect too short files\n",
    "# dirpath = \"../datasets/freesound/environment/audio/curated/\"\n",
    "# for file in listdir(dirpath):\n",
    "#     if isfile(join(dirpath, file)):\n",
    "#         data, sr = librosa.load(dirpath+file)\n",
    "#         if (len(data)<=1024):\n",
    "#             print(file, len(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ESC-50\n",
    "# attenuate, mix with background\n",
    "from random import randint, uniform\n",
    "\n",
    "c=0\n",
    "newsr=4000\n",
    "dirpath = \"../datasets/freesound/chainsaw/audio/\"\n",
    "envdir = \"../datasets/freesound/environment/audio/\"\n",
    "envfiles = [file for file in listdir(envdir) if isfile(join(envdir, file))]\n",
    "for file in listdir(dirpath):\n",
    "    if isfile(join(dirpath, file)):\n",
    "        print(file)\n",
    "        data, sr = librosa.load(dirpath+file)\n",
    "        #play(data, sr)\n",
    "        lastindexes=[]\n",
    "        for i in range(3):\n",
    "            index = randint(0, len(envfiles)-1)\n",
    "            while (index in lastindexes):\n",
    "                index = randint(0, len(envfiles)-1)\n",
    "            lastindexes.append(index)\n",
    "            addfile = envfiles[index]\n",
    "            data2, sr2 = librosa.load(envdir+addfile)\n",
    "            data1 = apply_lowpass_filter(data, sr)\n",
    "            data2 = apply_lowpass_filter(data2, sr2)\n",
    "            data1 = downsample(data1, sr, newsr)\n",
    "            data2 = downsample(data2, sr2, newsr)\n",
    "            attenuation = round(uniform(0.2, 0.5), 2)\n",
    "            data1 = (data1 * attenuation + data2 *(1-attenuation))/2\n",
    "            all_data = np.split(data1, [round(len(data1)/2)])\n",
    "            for d in all_data:\n",
    "                sf.write(dirpath+f'test/mix-{c}.wav', d, 4000)\n",
    "                c+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "# environment audio from ESC-50, filter, downsample and half the files (they are 5 sec long)\n",
    "newsr=4000\n",
    "c=2649\n",
    "dirpath = \"../datasets/freesound/environment/audio/\"\n",
    "for file in listdir(dirpath):\n",
    "    if isfile(join(dirpath, file)):\n",
    "        data, sr = librosa.load(dirpath+file)\n",
    "        data = apply_lowpass_filter(data, sr)\n",
    "        data = downsample(data, sr, newsr)\n",
    "        all_data = np.split(data, [round(len(data)/2)])\n",
    "        for d in all_data:\n",
    "            # random time shift\n",
    "            rand_zeros = np.zeros(randint(0, 1900))\n",
    "            d = np.append(rand_zeros, d)\n",
    "            sf.write(dirpath+f'curated/e-{c}.wav', d, 4000)\n",
    "            c+=1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "audio-processing",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}