diff --git a/Preprocessing.ipynb b/Preprocessing.ipynb new file mode 100644 index 0000000..65b5dea --- /dev/null +++ b/Preprocessing.ipynb @@ -0,0 +1,518 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.io import wavfile\n", + "from pydub import AudioSegment\n", + "from glob import glob\n", + "from tqdm import tqdm\n", + "import numpy as np\n", + "import os\n", + "\n", + "# libraries and functions for midi2tsv\n", + "import multiprocessing\n", + "import sys\n", + "import mido\n", + "import numpy as np\n", + "from joblib import Parallel, delayed\n", + "from mido import Message, MidiFile, MidiTrack\n", + "from mir_eval.util import hz_to_midi\n", + "\n", + "def parse_midi(path):\n", + " \"\"\"open midi file and return np.array of (onset, offset, note, velocity) rows\"\"\"\n", + " midi = mido.MidiFile(path)\n", + "\n", + " time = 0\n", + " sustain = False\n", + " events = []\n", + " for message in midi:\n", + " time += message.time\n", + "\n", + " if message.type == 'control_change' and message.control == 64 and (message.value >= 64) != sustain:\n", + " # sustain pedal state has just changed\n", + " sustain = message.value >= 64\n", + " event_type = 'sustain_on' if sustain else 'sustain_off'\n", + " event = dict(index=len(events), time=time, type=event_type, note=None, velocity=0)\n", + " events.append(event)\n", + "\n", + " if 'note' in message.type:\n", + " # MIDI offsets can be either 'note_off' events or 'note_on' with zero velocity\n", + " velocity = message.velocity if message.type == 'note_on' else 0\n", + " event = dict(index=len(events), time=time, type='note', note=message.note, velocity=velocity, sustain=sustain)\n", + " events.append(event)\n", + "\n", + " notes = []\n", + " for i, onset in enumerate(events):\n", + " if onset['velocity'] == 0:\n", + " continue\n", + "\n", + " # find the next note_off message\n", + " offset = next(n for n in events[i + 1:] if n['note'] == onset['note'] or n is events[-1])\n", + "\n", + " if offset['sustain'] and offset is not events[-1]:\n", + " # if the sustain pedal is active at offset, find when the sustain ends\n", + "# offset = next(n for n in events[offset['index'] + 1:] if n['type'] == 'sustain_off' or n is events[-1])\n", + " offset = next(n for n in events[offset['index'] + 1:] if n['type'] == 'sustain_off' or n['note'] == offset['note'] or n is events[-1])\n", + " note = (onset['time'], offset['time'], onset['note'], onset['velocity'])\n", + " notes.append(note)\n", + "\n", + " return np.array(notes)\n", + "\n", + "\n", + "def save_midi(path, pitches, intervals, velocities):\n", + " \"\"\"\n", + " Save extracted notes as a MIDI file\n", + " Parameters\n", + " ----------\n", + " path: the path to save the MIDI file\n", + " pitches: np.ndarray of bin_indices\n", + " intervals: list of (onset_index, offset_index)\n", + " velocities: list of velocity values\n", + " \"\"\"\n", + " file = MidiFile()\n", + " track = MidiTrack()\n", + " file.tracks.append(track)\n", + " ticks_per_second = file.ticks_per_beat * 2.0\n", + "\n", + " events = []\n", + " for i in range(len(pitches)):\n", + " events.append(dict(type='on', pitch=pitches[i], time=intervals[i][0], velocity=velocities[i]))\n", + " events.append(dict(type='off', pitch=pitches[i], time=intervals[i][1], velocity=velocities[i]))\n", + " events.sort(key=lambda row: row['time'])\n", + "\n", + " last_tick = 0\n", + " for event in events:\n", + " current_tick = int(event['time'] * ticks_per_second)\n", + " velocity = int(event['velocity'] * 127)\n", + " if velocity > 127:\n", + " velocity = 127\n", + " pitch = int(round(hz_to_midi(event['pitch'])))\n", + " track.append(Message('note_' + event['type'], note=pitch, velocity=velocity, time=current_tick - last_tick))\n", + " last_tick = current_tick\n", + "\n", + " file.save(path)\n", + "\n", + "def process(input_file, output_file):\n", + " midi_data = parse_midi(input_file)\n", + " np.savetxt(output_file, midi_data, '%.6f', '\\t', header='onset\\toffset\\tnote\\tvelocity')\n", + "\n", + "\n", + "def files(file_list, output_dir=False):\n", + " for input_file in tqdm(file_list):\n", + " if input_file.endswith('.mid'):\n", + " if output_dir==False:\n", + " output_file = input_file[:-4] + '.tsv'\n", + " else:\n", + " output_file = os.path.join(output_dir, os.path.basename(input_file[:-4]) + '.tsv')\n", + " elif input_file.endswith('.midi'):\n", + " if output_dir==False:\n", + " output_file = input_file[:-5] + '.tsv'\n", + " else:\n", + " output_file = os.path.join(output_dir, os.path.basename(input_file[:-5]) + '.tsv') \n", + " else:\n", + " print('ignoring non-MIDI file %s' % input_file, file=sys.stderr)\n", + " continue\n", + "\n", + " yield (input_file, output_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Midi to tsvs MAPS" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 270/270 [00:06<00:00, 37.96it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "[None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "midis = glob('./MAPS/*/MUS/*.mid') # loading lists of midi files\n", + "output_dir = '../MAPS/tsvs' # prepare a dir for the tsv output\n", + "if os.path.exists(output_dir):\n", + " pass\n", + "else:\n", + " os.makedirs(output_dir)\n", + "Parallel(n_jobs=multiprocessing.cpu_count())(delayed(process)(in_file, out_file) for in_file, out_file in files(midis, output_dir=output_dir))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Downsample 44100Hz wav files to 16000Hz flac files" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 9/9 [00:06<00:00, 1.57it/s]\n" + ] + } + ], + "source": [ + "for wavfile in tqdm(glob('.././*/*.wav')):\n", + " sound = AudioSegment.from_wav(wavfile)\n", + " sound = sound.set_frame_rate(16000) # downsample it to 16000\n", + " sound = sound.set_channels(1) # Convert Stereo to Mono\n", + " \n", + " sound.export(wavfile[:-3] + 'flac', format='flac')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate pseudo files TSV\n", + "In my Pytorch Dataset code, it requires both the audio files and the tsv files. Since I am too lazy to write seperate Dataset code for unsupervised dataset, I reuse the supervised Dataset code, which requires the tsv files.\n", + "Therefore, I need to create pseudo tsv files for the unlabelled data." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 9/9 [00:00<00:00, 1238.92it/s]\n" + ] + } + ], + "source": [ + "# Creating dummy tsv for the VAT\n", + "for wavfile in tqdm(glob('.././*/*.wav')):\n", + " tsv_path = wavfile.replace('.wav', '.tsv')\n", + " \n", + " notes = []\n", + " note = (60,60,60,60)\n", + " for i in range(5):\n", + " notes.append(note)\n", + " \n", + " np.savetxt(tsv_path, notes, '%.6f', '\\t', header='onset\\toffset\\tnote\\tvelocity')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}