{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "MFA LJSpeech.ipynb", "private_outputs": true, "provenance": [], "collapsed_sections": [], "machine_shape": "hm", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "bhYTF6XCOYAh" }, "source": [ "### Forced align LJSpeech dataset using Montreal Forced Aligner (MFA)\n", "\n", "\n", "**Note**: The notebook takes 20 minutes to finish.\n", "\n", "Expected results:\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "_twSwprjG_M9" }, "source": [ "%%writefile install_mfa.sh\n", "#!/bin/bash\n", "\n", "## a script to install Montreal Forced Aligner (MFA)\n", "\n", "root_dir=${1:-/tmp/mfa}\n", "mkdir -p $root_dir\n", "cd $root_dir\n", "\n", "# download miniconda3\n", "wget -q --show-progress https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh\n", "bash Miniconda3-latest-Linux-x86_64.sh -b -p $root_dir/miniconda3 -f\n", "\n", "#install MFA\n", "$root_dir/miniconda3/bin/conda create -n aligner -c conda-forge montreal-forced-aligner -y\n", "\n", "echo -e \"\\n======== DONE ==========\"\n", "echo -e \"\\nTo activate MFA, run: source $root_dir/miniconda3/bin/activate aligner\"\n", "echo -e \"\\nTo delete MFA, run: rm -rf $root_dir\"\n", "echo -e \"\\nSee: https://montreal-forced-aligner.readthedocs.io/en/latest/aligning.html to know how to use MFA\"" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "molbePbO8mlv" }, "source": [ "# download and install mfa\n", "INSTALL_DIR=\"/tmp/mfa\" # path to install directory\n", "\n", "!bash ./install_mfa.sh {INSTALL_DIR}\n", "!source {INSTALL_DIR}/miniconda3/bin/activate aligner; mfa align --help" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ppEcCzZ2MZSp" }, "source": [ "# download and unpack ljs dataset\n", "!echo \"download and unpack ljs dataset\"\n", "!mkdir -p ./ljs; cd ./ljs; wget -q --show-progress https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2\n", "!cd ./ljs; tar xjf LJSpeech-1.1.tar.bz2" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "j6XLgf6aMbgo" }, "source": [ "# install sox tool\n", "!sudo apt install -q -y sox\n", "# convert to 16k audio clips\n", "!mkdir ./wav\n", "!echo \"normalize audio clips to sample rate of 16k\"\n", "!find ./ljs -name \"*.wav\" -type f -execdir sox --norm=-3 {} -r 16k -c 1 `pwd`/wav/{} \\;\n", "!echo \"Number of clips\" $(ls ./wav/ | wc -l)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ZbNfEn5pMdOf" }, "source": [ "# create transcript files from metadata.csv\n", "lines = open('./ljs/LJSpeech-1.1/metadata.csv', 'r').readlines()\n", "from tqdm.auto import tqdm\n", "for line in tqdm(lines):\n", " fn, _, transcript = line.strip().split('|')\n", " ident = fn\n", " open(f'./wav/{ident}.txt', 'w').write(transcript)\n", "\n", "# this is an example transcript for LJ001-0001.wav\n", "!cat ./wav/LJ001-0001.txt" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "obtWj9_wMghH" }, "source": [ "# download a pretrained english acoustic model, and english lexicon\n", "!wget -q --show-progress https://github.com/MontrealCorpusTools/mfa-models/raw/main/acoustic/english.zip\n", "!wget -q --show-progress http://www.openslr.org/resources/11/librispeech-lexicon.txt" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# see: https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/pull/480\n", "import re\n", "lexicon = open(\"librispeech-lexicon.txt\").readlines()\n", "sp = re.compile(\"\\s+\")\n", "with open(\"modified_librispeech-lexicon.txt\", \"w\") as f:\n", " for line in lexicon:\n", " word, *phonemes = sp.split(line.strip())\n", " phonemes = \" \".join(phonemes)\n", " f.write(f\"{word}\\t{phonemes}\\n\")" ], "metadata": { "id": "zf_ssMA8cbHw" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "HcZE4uxyMhXg" }, "source": [ "# FINALLY, align phonemes and speech\n", "!source {INSTALL_DIR}/miniconda3/bin/activate aligner; \\\n", "mfa align -t ./temp -j 4 ./wav modified_librispeech-lexicon.txt ./english.zip ./ljs_aligned\n", "# output files are at ./ljs_aligned\n", "!echo \"See output files at ./ljs_aligned\"" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "0bWYNXRlLZ84" }, "execution_count": null, "outputs": [] } ] }