Created
August 6, 2025 04:05
-
-
Save huseinzol05/94f263b26f01fddf4e0c1f25e9dc4cdc to your computer and use it in GitHub Desktop.
Accurate force alignment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/home/darshan.r/synthetic-dia/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", | |
| " from .autonotebook import tqdm as notebook_tqdm\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import torchaudio\n", | |
| "import torch\n", | |
| "import soundfile as sf\n", | |
| "from ctc_forced_aligner import (\n", | |
| " load_audio,\n", | |
| " load_alignment_model,\n", | |
| " generate_emissions,\n", | |
| " preprocess_text,\n", | |
| " get_alignments,\n", | |
| " get_spans,\n", | |
| " postprocess_results,\n", | |
| ")\n", | |
| "device = 'cuda'\n", | |
| "alignment_model, alignment_tokenizer = load_alignment_model(\n", | |
| " device,\n", | |
| " dtype=torch.float16 if device == \"cuda\" else torch.float32,\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# wget https://github.com/mesolitica/malaya-speech/raw/refs/heads/master/speech/example-speaker/93113.mp3\n", | |
| "gen_text = \"The tomato turned red because it was ripe! It's a natural process as the tomato matures and becomes ready to eat.\"\n", | |
| "language = 'ms'\n", | |
| "y, sr = sf.read('93113.mp3')\n", | |
| "new_wav = torch.from_numpy(y)\n", | |
| "audio_waveform = torchaudio.functional.resample(\n", | |
| " new_wav, orig_freq=44100, new_freq=16000\n", | |
| ").type(torch.float16).cuda()\n", | |
| "emissions, stride = generate_emissions(\n", | |
| " alignment_model, audio_waveform, batch_size=1\n", | |
| ")\n", | |
| "tokens_starred, text_starred = preprocess_text(\n", | |
| " gen_text,\n", | |
| " romanize=True,\n", | |
| " language=language,\n", | |
| ")\n", | |
| "tokens_starred.append('<star>')\n", | |
| "text_starred.append('<star>')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "segments, scores, blank_token = get_alignments(\n", | |
| " emissions,\n", | |
| " tokens_starred,\n", | |
| " alignment_tokenizer,\n", | |
| ")\n", | |
| "spans = get_spans(tokens_starred, segments, blank_token)\n", | |
| "word_timestamps = postprocess_results(text_starred, spans, stride, scores)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "tokenizer = alignment_tokenizer\n", | |
| "tokens = tokens_starred\n", | |
| "dictionary = tokenizer.get_vocab()\n", | |
| "dictionary = {k: v for k, v in dictionary.items()}\n", | |
| "dictionary_rev = {v: k for k, v in dictionary.items()}\n", | |
| "dictionary[\"<star>\"] = len(dictionary)\n", | |
| "blank_id = dictionary.get(\"<blank>\", tokenizer.pad_token_id)\n", | |
| "\n", | |
| "token_indices = [\n", | |
| " dictionary[c] for c in \" \".join(tokens).split(\" \") if c in dictionary\n", | |
| "]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "alignments, scores = torchaudio.functional.forced_align(emissions[None].cpu(), torch.tensor([token_indices]))\n", | |
| "alignments, scores = alignments[0], scores[0]\n", | |
| "token_spans = torchaudio.functional.merge_tokens(alignments, scores)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "groups = []\n", | |
| "current_group = []\n", | |
| "\n", | |
| "for span in token_spans:\n", | |
| " current_group.append(span)\n", | |
| " if span.token == 31:\n", | |
| " groups.append(current_group)\n", | |
| " current_group = []\n", | |
| "\n", | |
| "if len(current_group):\n", | |
| " groups.append(current_group)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'text': 'the',\n", | |
| " 'start': 23,\n", | |
| " 'end': 27,\n", | |
| " 'score': np.float64(-0.6402994791666666)},\n", | |
| " {'text': 'tomato',\n", | |
| " 'start': 28,\n", | |
| " 'end': 47,\n", | |
| " 'score': np.float64(-0.4833577473958333)},\n", | |
| " {'text': 'turned',\n", | |
| " 'start': 50,\n", | |
| " 'end': 65,\n", | |
| " 'score': np.float64(-0.6833902994791666)},\n", | |
| " {'text': 'red', 'start': 66, 'end': 79, 'score': np.float64(-0.312255859375)},\n", | |
| " {'text': 'because',\n", | |
| " 'start': 83,\n", | |
| " 'end': 98,\n", | |
| " 'score': np.float64(-0.23644147600446427)},\n", | |
| " {'text': 'it',\n", | |
| " 'start': 99,\n", | |
| " 'end': 101,\n", | |
| " 'score': np.float64(-2.0826568603515625)},\n", | |
| " {'text': 'was',\n", | |
| " 'start': 102,\n", | |
| " 'end': 111,\n", | |
| " 'score': np.float64(-0.04644775390625)},\n", | |
| " {'text': 'ripe',\n", | |
| " 'start': 116,\n", | |
| " 'end': 147,\n", | |
| " 'score': np.float64(-0.10152626037597656)},\n", | |
| " {'text': \"it's\",\n", | |
| " 'start': 163,\n", | |
| " 'end': 168,\n", | |
| " 'score': np.float64(-0.2649574279785156)},\n", | |
| " {'text': 'a',\n", | |
| " 'start': 169,\n", | |
| " 'end': 172,\n", | |
| " 'score': np.float64(-0.0223541259765625)},\n", | |
| " {'text': 'natural',\n", | |
| " 'start': 174,\n", | |
| " 'end': 190,\n", | |
| " 'score': np.float64(-0.03738205773489816)},\n", | |
| " {'text': 'process',\n", | |
| " 'start': 191,\n", | |
| " 'end': 213,\n", | |
| " 'score': np.float64(-0.4380640302385603)},\n", | |
| " {'text': 'as',\n", | |
| " 'start': 216,\n", | |
| " 'end': 221,\n", | |
| " 'score': np.float64(-0.01033782958984375)},\n", | |
| " {'text': 'the',\n", | |
| " 'start': 222,\n", | |
| " 'end': 227,\n", | |
| " 'score': np.float64(-0.08664449055989583)},\n", | |
| " {'text': 'tomato',\n", | |
| " 'start': 228,\n", | |
| " 'end': 245,\n", | |
| " 'score': np.float64(-0.16687647501627603)},\n", | |
| " {'text': 'matures',\n", | |
| " 'start': 247,\n", | |
| " 'end': 277,\n", | |
| " 'score': np.float64(-0.30379159109933035)},\n", | |
| " {'text': 'and',\n", | |
| " 'start': 279,\n", | |
| " 'end': 284,\n", | |
| " 'score': np.float64(-0.009943644205729166)},\n", | |
| " {'text': 'becomes',\n", | |
| " 'start': 285,\n", | |
| " 'end': 301,\n", | |
| " 'score': np.float64(-0.005782740456717355)},\n", | |
| " {'text': 'ready',\n", | |
| " 'start': 302,\n", | |
| " 'end': 312,\n", | |
| " 'score': np.float64(-0.00582733154296875)},\n", | |
| " {'text': 'to',\n", | |
| " 'start': 314,\n", | |
| " 'end': 319,\n", | |
| " 'score': np.float64(-0.0202484130859375)},\n", | |
| " {'text': 'eat',\n", | |
| " 'start': 322,\n", | |
| " 'end': 329,\n", | |
| " 'score': np.float64(-0.00466156005859375)}]" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import numpy as np\n", | |
| "\n", | |
| "merged = []\n", | |
| "for g in groups:\n", | |
| " temp, score = [], []\n", | |
| " if not isinstance(g, list):\n", | |
| " g = [g]\n", | |
| " if len(g) == 1 and g[0].token == 31:\n", | |
| " continue\n", | |
| " for g_ in g:\n", | |
| " if g_.token == 31:\n", | |
| " continue\n", | |
| " score.append(g_.score)\n", | |
| " temp.append(dictionary_rev[g_.token])\n", | |
| " if len(temp):\n", | |
| " merged.append({\n", | |
| " 'text': ''.join(temp),\n", | |
| " 'start': g[0].start,\n", | |
| " 'end': g[-1].start + (g[-1].end - g[-1].start) // 2,\n", | |
| " 'score': np.mean(score)\n", | |
| " })\n", | |
| "merged" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(21, 21)" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "text_nonstar = [w for w in text_starred if w != '<star>']\n", | |
| "len(merged), len(text_nonstar)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'text': 'The',\n", | |
| " 'start': 0.45880519480519477,\n", | |
| " 'end': 0.5385974025974026,\n", | |
| " 'score': -0.6402994791666666},\n", | |
| " {'text': 'tomato',\n", | |
| " 'start': 0.5585454545454546,\n", | |
| " 'end': 0.9375584415584415,\n", | |
| " 'score': -0.4833577473958333},\n", | |
| " {'text': 'turned',\n", | |
| " 'start': 0.9974025974025974,\n", | |
| " 'end': 1.2966233766233768,\n", | |
| " 'score': -0.6833902994791666},\n", | |
| " {'text': 'red',\n", | |
| " 'start': 1.3165714285714287,\n", | |
| " 'end': 1.5758961038961041,\n", | |
| " 'score': -0.312255859375},\n", | |
| " {'text': 'because',\n", | |
| " 'start': 1.6556883116883114,\n", | |
| " 'end': 1.954909090909091,\n", | |
| " 'score': -0.23644147600446427},\n", | |
| " {'text': 'it',\n", | |
| " 'start': 1.9748571428571429,\n", | |
| " 'end': 2.0147532467532465,\n", | |
| " 'score': -2.0826568603515625},\n", | |
| " {'text': 'was',\n", | |
| " 'start': 2.0347012987012985,\n", | |
| " 'end': 2.2142337662337663,\n", | |
| " 'score': -0.04644775390625},\n", | |
| " {'text': 'ripe!',\n", | |
| " 'start': 2.313974025974026,\n", | |
| " 'end': 2.932363636363636,\n", | |
| " 'score': -0.10152626037597656},\n", | |
| " {'text': \"It's\",\n", | |
| " 'start': 3.2515324675324675,\n", | |
| " 'end': 3.351272727272727,\n", | |
| " 'score': -0.2649574279785156},\n", | |
| " {'text': 'a',\n", | |
| " 'start': 3.371220779220779,\n", | |
| " 'end': 3.431064935064935,\n", | |
| " 'score': -0.0223541259765625},\n", | |
| " {'text': 'natural',\n", | |
| " 'start': 3.470961038961039,\n", | |
| " 'end': 3.79012987012987,\n", | |
| " 'score': -0.03738205773489816},\n", | |
| " {'text': 'process',\n", | |
| " 'start': 3.810077922077922,\n", | |
| " 'end': 4.248935064935065,\n", | |
| " 'score': -0.4380640302385603},\n", | |
| " {'text': 'as',\n", | |
| " 'start': 4.308779220779221,\n", | |
| " 'end': 4.40851948051948,\n", | |
| " 'score': -0.01033782958984375},\n", | |
| " {'text': 'the',\n", | |
| " 'start': 4.428467532467533,\n", | |
| " 'end': 4.528207792207792,\n", | |
| " 'score': -0.08664449055989583},\n", | |
| " {'text': 'tomato',\n", | |
| " 'start': 4.548155844155844,\n", | |
| " 'end': 4.887272727272727,\n", | |
| " 'score': -0.16687647501627603},\n", | |
| " {'text': 'matures',\n", | |
| " 'start': 4.927168831168831,\n", | |
| " 'end': 5.52561038961039,\n", | |
| " 'score': -0.30379159109933035},\n", | |
| " {'text': 'and',\n", | |
| " 'start': 5.5655064935064935,\n", | |
| " 'end': 5.665246753246753,\n", | |
| " 'score': -0.009943644205729166},\n", | |
| " {'text': 'becomes',\n", | |
| " 'start': 5.685194805194805,\n", | |
| " 'end': 6.004363636363636,\n", | |
| " 'score': -0.005782740456717355},\n", | |
| " {'text': 'ready',\n", | |
| " 'start': 6.0243116883116885,\n", | |
| " 'end': 6.223792207792209,\n", | |
| " 'score': -0.00582733154296875},\n", | |
| " {'text': 'to',\n", | |
| " 'start': 6.263688311688312,\n", | |
| " 'end': 6.363428571428572,\n", | |
| " 'score': -0.0202484130859375},\n", | |
| " {'text': 'eat.',\n", | |
| " 'start': 6.423272727272727,\n", | |
| " 'end': 6.56290909090909,\n", | |
| " 'score': -0.00466156005859375}]" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "alignment = []\n", | |
| "for i in range(len(merged)):\n", | |
| " alignment.append({\n", | |
| " 'text': text_nonstar[i],\n", | |
| " 'start': (merged[i]['start'] * len(y) / emissions.shape[0]) / sr,\n", | |
| " 'end': (merged[i]['end'] * len(y) / emissions.shape[0]) / sr,\n", | |
| " 'score': float(merged[i]['score'])\n", | |
| " })\n", | |
| "alignment" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| " <audio controls=\"controls\" >\n", | |
| " <source src=\"data:audio/wav;base64,UklGRoIiAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YV4iAAAL5CrhRdza1krTCtKE0QLQKs3WyRDHYcV0xGPD8MH6wBHBncEDwifChsGyv4S9ELwku3S67ro2vXDAmsPIxorKJs9I1LzYZ9vW3KbeduGI5BHn1uho6iDtyPEz95/7Hf8vA5kI5w5VFSUbzx+ZIzMnWSocLLMszS1WMCUzCDVUN488c0RYS7tOoFCOVDhatV3/XKVaullpWRVXT1NhUalSyVWeWcpdZWEMZAxnMGu/bpZuP2g0XBxPt0VbP144ozAEK8YotyklLU8wUzB/LYYoPyAxFiwPEgzFCE4DpP2O+cr3o/fc9EHrZN1/0sfNtssNx+G+FLiJtx67N7woueu2RLnavfW/Sr6Tu7q5ara/rpCkeZwdmOmVGJXSlhOcU6TZrYm2vrxtwFjD18ZoyZfIXcXAw/PFYsofzmjPPc9a0WXY+uE86dPs5+/49ST/7AdPDBYMlQspD1sWUByBHWkcqR4mJSsqISkFJW4kESm7LdcsICi/JyYwLztUP6Y7yzc2OTI9mj1MOAsxyiy5K5Ep/iMKHnYbcRsyGmUW3RLsEvUVgRfuE2EMXAWrAQX/k/iv7YPkLONl57Do2uIK2xnZbt3P313ZiM26xbTFachLx13Bebsfux3ALMTQwrm+yryNvXm+Cr6VvKi7xLwev3bA+sBPwzHILc0b0KTRWdTB2RzgB+QZ5abmwOpo7yryBvTR90T+lgVrDO4S7Bg7HQIgCCOyJn8pgCtZL4I24j5BRbBI0EofTT9PY1ACUaNRNFKdVDxc82f1cDFzq3JXdWl7/3+mfkN3M26JaHFnKGh2Z3xjKlyCVB5RY1NlWKNcqV0vWWxPgkXAQJs/BTuLLnIdgxDdDpoWNx1oGgARNgm0BHT+avQg6tjh1tld0TfL3snmyyjNk8qnxC2+xLhFtNWwfK4zqzSlpJ4Im+qZ6Jc9lISRlZGqk6qW8ZpzoImkMKShoHKeUp9WoHKfZZ5an8yiQam+slO8HcLbwwbF7MgC0HDXLdvl2sXbR+LQ6zHyy/Nl9F73yfw3A2oJdQ5UEkgWTRtpIIIjFyQ3JEAmIirZLWYwDDPeNnM6qDvIOvE5pDkiOJY0BDEFMH4x1zI8Mmcw2S74LWktUSzxKJYi+xuSGKgXRxWRD7oI/wNLAmsBn/4T+oD2EPXn8xHxnOyX57TiFN6y2b3VC9IFznrKw8mmyw3M5siYxXrFuMbPxaXCwr//vj3AKMKZw3bEPMUwxnHHD8n3yl/N2dBM1X7ZXNx23pbhsOZw7LvwRPOQ9RH5Qf6TBHAKrQ50Er0XKx7QI4MosS1ZM7Y3OTrhPH5BrkadSadKOU1QUq9WdFiKWt5fvmYBbMVv8HN/eKx7f3y/ewF7Y3oJdzFuUGLCWWNXrVeIVWpPnEjuRtJMHlXvV4ZT3EyaSOJGWURePfsxfiYFHtsWUg+0CQgHuAIF+Ufu7OkM7Ors4uYL3XbWMdXE1PjQyso8xY7Ag7vItmWzy69GqoikJ6FTn2GcU5j+lfaW9JixmAiXfJhangekcqUCpOOi+qIlpDym/adeqJ6otaper3q2j75ExB/G0MdvzRXVktnu2Q7aR91/4z7qgu5i7xfwAfWy/fUEiAdKCIoMMRUHHcgePBwJHEQg3SPbIjcg8CDpJIwnQCb3IxklAim1KgQoLiTRIusjgiUOJvMkiiLaH6Yd+BsUGtkWOBK9DZEK8AfCBFcBq/7Y/Kz64/Zd8u3vXPC18GXurer554rmVOWM42bg5ds22HbXYdgN2F/WrtWP1i7XiNae1bbVDtfj2D3aMtvK3ELf6uHL5CHo3+pq7HXuaPK/9oX5hPul/qsCqgU2B2YJ1A0RE9sWoBmdHWYjTynDLeUw6DNVN7Y6Fj7hQTNFEEcLSQxNdVFsU2ZTUVQFWIJdAWGUXxtcdVy+YBpjLmDUWb5SE03pSrFLk0vAR4FBzzzWPFRAy0HjPfM39TTiNIc0rTKuL5wqkSK0GZATSxALDCAEpPpj8zPvLOzO6NvkWODt22jZl9kC2nfX2NJW0E3RodHJzPnDXL1ovMW8Abglr3Cpx6mnq/6qEqmSqIeprqoMrLaum7LCtam2ZLYqt965vb0IwSjCc8H6wGrCVsbWy5/POc8VztPRgNkv3wzhOOLj5ADpAu6G8lv1jfeu+kr+SAGZA6EFFAiQC9wO4Q8zD7EPTBL5FMYV+xRhFCYVlBZeF7EXZhjSGPsXKRdSGF4aMBp7F4QVOBZQF6cVWxFCDXILQQsxCsAGSQJM/2L+5f3s+/D3ffNU8Vfy2/Pa8kDws+8E8mbz1/AZ7arsc+5y7fXog+XA5S/nZOaW43XipOVY6nXrUelR6XftQ/Le9JH1QvVm9Q/4pPym/5n/5P44ALIDUAcwCdYJkwvsDvQRRhQCGEUdnSBjIIogvCUNLo4yyTCPLroxDTg6Oyw6mDlzPClAKEFAQAFBvkO1RPlBfT5UPVo9/ztrOZE3VTc3OGs5LTrgOZ84ijeyN/g3QTXZLjUprihSKu4mhR1bFeITsxU+FGoONQh8BLECZgE6AND+x/uQ9rXxnfAk8rzw++nL4h7h4eOr5A7gNdkD1UTUBNT70XfO+MoPyY/JOsucy27KF8q8y2PNMc0VzAXMFM2ezcrM6Mu6zMTOps+AzqPNps/R0/bWv9dQ2OvavN5P4e7h7OGz4tjkLOhu6/Hsxex27WbxX/cG+9H56fb597z9bAJKASr9tPx8ASYG2gXDArQC6AaWChsK7gdyCIEL0Qz8CeoFTgV/CLwKdQgwBDYDjAbGCcoIxwQ8AgYDmQQhBOIBuP98/tr9Y/0p/SP9J/xV+an23PaT+OP3Z/SF8YjxxvMC9u31f/Oq8XvyV/QI9bD0KPR58wfzz/O19fr2KvY19M7zI/Y++Un6z/hL95j4fvxq//P+QP0J/tYBKgbKCE0JvwjlCOsKIw5dEP4P7A0BDSkPQBP6FTwVBBPFE5kYAB3nHG0aeRr4HZMhbSIaIVggkSFtI1gkdCTYIwoi3h9+HnwdnByMHD0cShrwGNEazx15Hg8dqhuoG2Yd4R5OHSUaRBn0GV0YZhQbEWwPmg1aCioHGQf/CWsLMQnCBkoHewmGCuoIQQVZAsoBMAHJ/hj9pf2N/QD7d/gz+Eb5yvnW+If20/P/8dXxA/O78/LxUe6G7NDuivI5847wwO558CvzGPOc8A/vt+9f8ADvruyo6yXs/eze7ajukO7x7aXuCPEQ8zHzE/Je8RzyyfOf9LvzOPJ28bHxxfKA9Nv1uPWW9C/0WvUH92D3xfWt8xXzJfRJ9W71C/Ur9U32DviP+VX6YPqI+ST4yfcZ+eD5NvjK9Rv1xfX59Uz1VfSu8+jzC/Un9iz2JfWR9Mr1nfdX9/r0kPNs9IX1H/X886Xzd/RY9QL17fPB82r0HvTR8pvyLfST9Uv1f/Qr9Tz3z/jh+Cz4sff29xz5Mvrc+bj45fh1+lb74vp7+i379vwP/wcAdf8W/zMAkAG4ATYBRQE0AkcDoQO9A/YE0gZ7B1gHVQgPCrwKfgqsCoALKAz0C0sLawsjDNQL9wq9C5QNBQ4xDfsMyQ0lD3gQtxBGEP4QdBJsEkoRIxG7EagRERG5ENIQYBH1EeERYhEvET4RHhHHEMkQ1BFCEwgT5RCHD3YQ1RHIEcAQzg9TD1MPjg+/D9IPjw8EDwAPkw9VD8INWgwbDPAL0gqoCbkJMgpLCbcH4wdWCR8JZQcNB+oHVAfBBdkFOgcxB1EFuQPDA24EDASfApQBYAE4AckAZwAMAH//r/7H/Zf9dv7N/oT9j/ya/aL+jP3e+5v77/ti+0/6o/lm+VL5VPkc+W/49/dG+F74OPf09Qz2y/a+9vD1QfVx9Zb2N/fe9fHzufN29Cn0RvMx82Tzt/Kl8ZTxhPK18lrxRvAF8TbyO/LP8ffxXfJq8qrxCPDq7pvvofDi70juNO7v77Lx2fFb8Ajvpu9c8cPxi/DB70/w8PDh8MbwufAu8I/vf++q7w7wH/EE8vHxX/JI9Kb17/TE8+fz+PTI9Zr1u/R59C31xPUk9gH3hPfn9pr2mvfC+Hb5VvpQ+y/8Vf0z/gH+vP1Z/vH++/4o/4n/+v/2AOIBogFpAbAC9gOkA+QC1AI/A7MEJwdFCBgHDQbrBq0IwwmDCXUIOAhGCTEKegrnCgULYwpUCigLUQvACu4KDAxZDUQOIA5ADSAN2g0YDqQNHQ2zDNQMxQ2kDskOjw4tDvQNVQ59DqkNLw0hDjMPJA9SDpINpQ1TDgYOrQyUDCoOCA9TDqAN1A1mDmoOUg3RCykLcQsMDF8MmwtoCiALcQ3+DSYMxwpCC5EMYA1lDAMK/whgCqILEAuqCYIIAwiBCFkJgAn5CFwIJAh6CKwIHwiHBzQHYgaeBf8FSgZIBXQEqgRoBG4DPQO5A3oDYQJAAboADQFvAZcA2P7l/Tf+eP6q/WD8wvsI/FL8zfuz+uj5uPlg+VT4WfdM95H3FPcR9oL1jPV69QD1hPQV9FXzyvJJ85rz9vHQ79nvKvEd8Q/wyO8G8ADwNvCi8DzwHe+J7tzuNu/87lXuoe1U7dftru6+7hTuDe4J77/vSO+f7kjv4PA98dzvB+/x7+zw+/AR8U7xRPGu8Z/y7fLX8nDzMfRg9Hr0vvQw9VH2ZvcH9yP2rvY9+C35S/lb+cH5K/oj+k36b/tw/Cr87fvt/Of9z/1q/X/9Sv6f/00Arf8p//z/ZQFBAjcCmgFcAckBBgIRArkCSwPoArMCkQNnBGgEGQTsAx0EngTLBJ8E0gQhBRsFhwVfBkAGawV1BSIGVgZiBrEGsQZQBjIGewbXBv4G8gYfB7EHCwjOB2gHTweoB0sIcgjLB3EHJAjOCHQI2QfxB5YINgmdCdoJ4wmdCUIJQgl3CWoJJQnzCP0IgwlkCt0K2Ar2ChUL2ArvCo0LhwvDCpYKAwvzCqwK8QodC4wK9wkTCpMK1AqWClQKwwppCwsLBwrUCUUKKAqaCS8JpggyCIUIBwm5CAwIpAcaB18GDgYUBvMFlwUIBXMETgRvBB4EWQOhAgcCigFQASYB0AB4ACAAiv/K/kv+Sf5O/mT9xvs5+wj8A/xG+tb4Dfnd+Qv6hPmo+N73afdQ9173BvcV9oP19/VZ9t31SvUD9YL0AvT28/jz1vO882rzAPMy86fzhPMh8x7zQPNY83DzXvNu8/rzUfT0853zwvP/8zn0Z/RT9Fn0uPTY9Mb0TvUH9hv2Pfb29lD3CvdA9wH4NfjQ99n3n/hO+Sr5mvih+HD5UPqp+mf6BfpU+kv72vu++8H7EPxl/PT8fv1O/dn8CP1z/Xr9jv3t/R3++P3X/R3+5v5s/+D+J/6M/nH/uv+x/7f/lv+0/1IAjQASANz/SwC3ANYAzQCuAKgA6QBDAYMBlwGKAaAB+QFNAoICugLNAsYC7QLuApQCxQK2AxAEhANZA9YDNgR6BNUE9AQYBZcFzAVdBSIFpwWLBicH8QY7BjAG1gYXBxYHoQcVCMAHjQcTCJQItgjACKQIdAiNCNwIJwlZCTcJ+whOCcwJigkVCVkJzgmwCT0J/Ag4CbsJownjCL8IYQluCacIEwj9Bx0IZghvCNQHOwdMB58HsAdwB/sGkgY4BpYFCwU2BVUFmwQEBEwEeATlAw8DZgI4Am0CJAJhAV0B+AG6AY0Anv92/8//1v/o/gH+Uf6w/sn9z/zr/Dn9GP3V/Dn8j/vL+0L8s/vW+sf6+vrL+nb6L/oJ+hb6IPoE+sb5Xvk5+br5+fk2+bP4avkD+nf53vgQ+WP5TfkS+S/5qvmp+cn4Yvg8+dz5avkc+Yj58vkO+uP5fvl/+f35+vl++af5OPok+tn5PPrW+s76Q/rR+eH5Y/rV+un64vrV+pP6aPrC+k/7ePs7+w/7UfvQ++77n/uD+5v7c/ts++T7JPzI+4j73Ptk/JP8P/wE/I/8Sv0+/cf8vfwd/X39kP1L/T390/1Z/h7+wP0N/qn+AP8S/+f+u/4m/8r/rf9L/6L/PwCVANQA0QCrAAABagFrAacBKgJaApwCAQOiAuwBOwJZAwIE0QNpA5cDTQSVBC4ECQSVBBgFPQVaBUUF7AQWBcIF1wV3BZwF6QWrBWIFnAUSBh0GkwVBBZMFsAWIBQoGjQbuBUMFswU6Bu8FmgW4BZIF8wS8BEYFyQWbBcUE4QPTA40EwQQJBIMD4wOXBKoExgO3AokCHANpA/sCfwKJAnoC4AGmAQoCCQKyAbIBjAESAQ4BSwH0AGoAWgCHAJAAbQD3/17/kv93AGwAWP8e/9r/7P9y/2//bv/1/q/+/v5g/0z/1f6Q/rn+2v6b/lH+gv71/uT+af5O/nr+fP6O/pr+Z/6C/vv+9f48/nf9VP0Q/tr+tv4f/gX+Jv75/cj9Af5o/oP+Rf7Q/Vj9g/1b/pn+sv3g/AL9eP1t/R/9df33/Ub9X/wW/QD+Kf0O/G38c/3Q/Sz9JfwO/Av9mP3p/Oz70fug/BT9ffz2+2H86PzM/H78e/ym/Mb8yPyx/Jz8ovyd/Hj8rPw5/Sj9fPyU/Ib9A/7I/Xb9K/3t/CX9D/4G/8H+Sv3m/JX+0P+1/m/9dP5YADwAgv4a/or/SwCQ/23/owBdAZQAef9+/7cAyQFpAVIAawDJAXIClAHFACcB0gHzAdkB4AE/AsgChQKYAbsB/AJcA2kCiwG0Ae0CFQQ9A+4AjQANA+kEnQNSAR8B6QI1BEcDQAETARkDKATQAmABoAGkArkCdAHVAIYC1gPbAa7/MAHBAyoDqwD9/40BvwLOAUUAuQCJAsECAQHI/2AAuQEoAvUAoP8jAIQBjwGoABoAIADzAKoBowBj/zcAagHrAPv/w/8HAKwA5wBOAEYA+ACuAK//kf8HAHYAIgE1AQYAY/83AKYAGwAgAHUACACo//n/dgATASEByf/W/gwAeQHsAIP/M/8ZANYAbQCL/z3/lv9GAHwAT/8n/ir/AwH/AJ3/6P4f/5L/2P94/9f+Pv8eAIj/M/5p/mH/Gv9q/rL+Pv8h/3b+8f0v/oz+X/7e/h4AcP/a/HP89v4BAAD+zfyK/vD/dP6P/BP93v5s/23+hf3C/SD+iv0X/Rb+2P+vAGP/pPzF+0D+TgDh/s38oP0XANEABP8n/Xb9w/4s/47/awD3/5P+0v6u/6n+0v2h/3UB4gDN/4n/Yf+y/zQAov91/+MAEQFm/4n/gQHiAV8A8f4U/yQBVQJ9AD//YQENAyoBcP6Z/qsBiQM1AYb+FADvAjgCOP83/pQAbQOvAg3/pf0DAIoC7QLQAcz/af4DAK8CsQF2/t/+YAL7ApH/Ff02/yYDygKf/tL9XAGiAvD/nv0s/v0AJQNeAev9Jv5dAVMC2//A/fX+ogH0AbT/Gv7l/q0ACwGT/6T+x/8GAW4AVv+z/50AHwAF/17/tgBgAQcBAAAK/xP/sf8+ALIAZQCv/ysAIwHVAMn/xf6O/lsAJwIQAWz/+P+lAB4ADwCZAIYAEgD9/0kAmgCPAB4A1/8iADIAWP8h/5QAWAH9/9j+oP/4APAAnf8z/0sAfQB9/0X/Rf9K//AAtAHf/tD87f50AXEBAACV/o7+of+O/93+gf/x//L+h/5M/+H/3P8P/xv+C//XAOH/Xf2l/QAAtwCX/2z+M/4+/yEAVv9k/rz+Kf+c/3EAtv+7/ef94v90AJP/GP+P/xkAUv8u/oP/CwKyAQP/r/16/s//hABeAPH/xP/5/4UAnwDS/0T/sv+VAHYBcwEwACb/H//X/3EBFwIcAAb/KQFLAokAjP9jANMAXgA0ADgBFAKYAMv+1f9NAbUAegADARMAt/8kAlgDPwBp/Sz//QFXAcX/rQCRATMADv+g/3QA8wApAZIAyf8KAO4AJQFZAEL/8/7M/30ABwDn/+kA7QCY/0j/t/9U/4f/MgFFAisBYP6r/O/+lQKjAv3/V/5a/mD/bACVAMIAzwC7/4v/jwBs/239vP7IAekCUQFn/g7+QgFJAkz/Kv6UAB8CNQHi/6D/DgDO/9f+o/4xANUCVQP1/3v9x/+PAugBr/9K/nf/iAKgAmj/Sv5B/1T/iAABA9QCkwB2/2L/b/+s/9f/0v/k/+wA4gJ/Ajr+WftE/hAD4gPwALv+UP8+AFT/6P0i/jUASgLkAWD/CP4u/34AWP/0/Fj+RgMRA238qPrOACEEgwDB/Mv8c/9IAgkCJ/8G/p7+tf1b/YMAOgOBAXb+nf2x/ZL9Vv4AABIBiwBP/1j/FABH/yX+GP/pAHUBTAAg/lb9dv/qAYYBVv4S/CT/hwQoA3b7EfnBALwI7AXR+zP42v1zAloC1AErAT4ARADt/t38S/6EAb8CqQHI/vX9wQEmA+H+W/0pAXIDgAHf/cv8rQDoA0QByv2C/skBNASLAUf76PrqAP4C+//U/ncAdAJJA0gB4f1x/L/9mgAPAnUA1f/CAfsAL/0u/KX+UgHgAuACwAGZ/yf8d/vg/6MCVwBq/40BRwEN/nz8Qv9NA9MBb/zN/cUEvQPR+hL4Q/7FA8IEzwPqAJL8ZPut/gwCFwP8AdL+qv2vAakDYv73+lL/yQILAfwAHAN4AG/73PwKA4sEhQBR/kkAqwHP/z3+wv8DAeD/kQDlAykEQwDW/Az9yQA/AzQAo/yA/U4AIwSkBscBXPrP+bP+RgWFCGIBpfk//p8DQACQ/uX/CP9JAV0DKP0K+fz+VQTAA2kC8gCf/b36tvyGAqQC/vuB+6wClwWBAgr+IPol/NUCWAM//xsAbwHq/Av6Rv6rA0wEiv7J9pr5JQgvDVz/CvQT+k8E+gM8/PX5owFRB9EBSPvQ/QkCt/6o+YT+OQlqCHP7W/VE+0YAtADEAhUGEwUF/nD4Pf6rB8gCk/VU92YILhEYBfry0PBl/i4JkweQ/oT5/QDFCcoBL/N59OoDfRDHDJv4P+yW+yQQ5wsI+S/yS/wACpoMHwEY9Qj2wgAGCaoHnf679rH3z/+jB64KHAdW+yPvsPQPC2kTfP9I7Gn2khGmF2P5Pd2T7xkZkx46+wHiJfOEEyIUK/cz6nr9dBJbDQn2D+12/10PFgYa+Xb7VQE2Ae3+zP2U/zkAIPwD/msI8Qd++Sj0HABdDIsJbfoY8Zb4hwUNC1oJ+v8q86X0nQVSDcECgvdP+I0EKBCJBgju4+tpBmAX4ghh8E/uJQa5F6oKYvDq5xT5WBKIGA8CAOke7mcILxH5AnT5/fr//2UJXQlr+azzgv0fAxYG+AXf+9v5dwNuAxP9/vjn9d0DChoZDsjqwOQc/4cTUwya9O3tpAWZGBMJQu7C6dn65AzFDqQBc/np/N/9CfuM/c0CgAYbBeL7EPeH/oYFDwQ7/Nrzdvm5C44StQQK7+TmivzPGFISDfRo61kBEhdHDt/sxd349r0XahscBp7xNe5/+XkH+A6ACM302uyXA6Qa0gtk6QjiUv9SHFsULva77ioBsQq8AVf5qfos/pL/lwQTCvwDDPlW+If+uQNCBDX8XfbGAJYOcwqA+G7v6PmvCSENFwOA9VX0KgbXEqoCWOoF7qEKJBscC57vFunM+08OeAmA92z0wAK9DgEMbvro6kz0Bg1nFSkFIe+m7ZgFqRI5/jbtX/whD+YJrvhN864A0wyHA1T2Wf4HCywBFO4a8swLKheCA5LuIfd3DZoLbvGz6EcCvRimDYH0Xe63/zoPMweo9ELyPQPVE1MNdvSC7Er/RA6fCX388fPP+NkEpQc1ABX6PP2ABVoFTf+mABQCS/zg+ov+//5KAWAJLw26ACTsYu15CRgY5wFS6RH37hktGlfwC9g88x4YqBeb/YLxhv2FCzIHevVu8EEC8w+TBX31qfQ+ACYGNfxZ94EIuhQrB7zxw+Zm8nwR0xtNAbzrnPK2BOcShg2t8l/mJvrADQwMtAF7+9j9twLt/KH1wf+fDksLQPlY64/yvQobFHkDF/Ul+LgCPQz5B37zwOvL//ISwAzQ+Z3yf/vIA/ICSAISA+X/XABmBen/fvIH85wFoRQIDPfzUum49v8JCBJhDVQBPvPo65P40xE+ExT0wOEJ/Z8nVSc38NrESeS9JBAthvjs02zurSB7JTr2VNLQ6ZIfsC5rAwzXX+GTDGAdyQfQ7uDtMwRLGDYORPL+6IL12QR0Dm0KsPtu9Zn6dQIfCosFrvKo7vQGEhw9DzHureHC+DAVqBKC9hXrSAOeGxgQJu352ZXuCRhuJ0QMXuV03H4BISsgGS3boMuGCPY/WCCSz6G9lQVzRKokENUlx/oMzEAkG1XQ28GfAkk+ECpu6cDK5+TnGkwyfwi01xnmaRNbHUkBoeI535z+sCX+KW/+HdGN3vgSsSKzBd3xCPnGBLYBPPLa8WwJ/RbnCQH4EvFT+CgGcQUy+Vf53wP4DOwRTQOT5FjijQaRHBkIKe108v4O4xrvAk3jRePiBDol6R018vHY1fCYEA4QHvxC8Ov5FQ6BEIQBWvgc9pj29f0YBSoJAQsv/v3rk/PsDnAY4gJd5w/nLgMMHIcZ8P7+5AvpsggzHYEOa+9E4bDydxMxHdr+UeN9+nIgYQ7T2XrbphsZPAIGlMFX0aAhFkFqA1XDQuA5MghBy/CNrSXWpzaET38BTbJBxhUflEaNEVvO9s77EPBGdSQCz2G6//WYKe0mVv8V3u3vohl+ExPrDuSNAYwatxhPABbunfA48fnxqgzjI5wO/uk14MXzbxX7Htf8XOZ8/mgRWgJV8tzz8QKFELsEBvHs+r8N+AQr8YDuuP12EIsQJfuO7838AQv5CPH/S/vJ+ij7YwGwDe4MJPaW4eLpig2KLqwmM/Ixwr/PFRfMSLAk8NTDturpVTaSQF725bs038oorz3vDsDVGdOt/W8UoAoNAgEBdgC7AEf+mP5DAkj5ge9R/8sUwA8++tbtiveoD4AU1/qZ67r+aBLBBJLm/uleGWwwjf5zxHHVdhj+NHwPYOJW5EYEoBQUC9f7D/eL+Dj5kQIgEmEMofAZ5XH7LxijGED8yOW+71UKMhOFAs/vY+/L/uoQiRXx//PhIuVkCpYcAwbg9IcFJBStAkbnRObRA6YcUBBQ8OLnz/3cDZMCXfnWCcwRJfqa7KECORK2+hneUuzWGV4tIQ2H3mrZRQVzKIQU/uvA6PgEJBEQ/5rvovlXBXT7A/NMB8ghrhaF6anPu+k2HMYxbBJo4n3bdv3PF0EVOgFP7ervygfzE7AH+vIc5PDsOg8AIR4JgOyG75sGUw/G+XzlnflGHO0YAfdc5tf5FxJqBoXo1e+kGLgnpQVF2s3ZOQOQHu8MmuxP5aT/OR/xGi70qNos730eaTDuBXLRZ9nFDNIgxP9S3bHwMSojOOH4+rwa1RccBjXFCbDcqOi8D+gRgfSs7KoGTB2fEgn3M+/K/CgG5wAM9OLwEgTqFkgLUu8V5Uz26hMbILEHAOVY5TAG8RhjCevxleww97wDVA7cFRUJI+Z02d/8riZvKmEJ0OCo2L/6SxqLEtf0heSx81IXuSkYEmLkn80O6qIguDAXCIvjq+9iDYwSjP3052buPAvrE7j6CuZm9vgY8Rt99Q7bf+8+FfQkUg/c45fV9PwOKWojqvQu0oTlzh7BPQIY4tEQvqb2nTQsMK/7q9n25R4DoQq3/t77rQaoClD+RfHs+UIQYxBx+S3z0gRvEF0IMvI64WbucQuXFAUMFwHB9A/xofqRBxQUdBI8+Z/nBPScB7sMDQET8ETznAsTGSQNevbV6OfxKAsFGw==\" type=\"audio/wav\" />\n", | |
| " Your browser does not support the audio element.\n", | |
| " </audio>\n", | |
| " " | |
| ], | |
| "text/plain": [ | |
| "<IPython.lib.display.Audio object>" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import IPython.display as ipd\n", | |
| "ipd.Audio(y[int(alignment[8]['start'] * sr): int(alignment[8]['end'] * sr)], rate = sr)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0 {'text': 'The', 'start': 0.45880519480519477, 'end': 0.5385974025974026, 'score': -0.6402994791666666}\n", | |
| "1 {'text': 'tomato', 'start': 0.5585454545454546, 'end': 0.9375584415584415, 'score': -0.4833577473958333}\n", | |
| "2 {'text': 'turned', 'start': 0.9974025974025974, 'end': 1.2966233766233768, 'score': -0.6833902994791666}\n", | |
| "3 {'text': 'red', 'start': 1.3165714285714287, 'end': 1.5758961038961041, 'score': -0.312255859375}\n", | |
| "4 {'text': 'because', 'start': 1.6556883116883114, 'end': 1.954909090909091, 'score': -0.23644147600446427}\n", | |
| "5 {'text': 'it', 'start': 1.9748571428571429, 'end': 2.0147532467532465, 'score': -2.0826568603515625}\n", | |
| "6 {'text': 'was', 'start': 2.0347012987012985, 'end': 2.2142337662337663, 'score': -0.04644775390625}\n", | |
| "7 {'text': 'ripe!', 'start': 2.313974025974026, 'end': 2.932363636363636, 'score': -0.10152626037597656}\n", | |
| "8 {'text': \"It's\", 'start': 3.2515324675324675, 'end': 3.351272727272727, 'score': -0.2649574279785156}\n", | |
| "9 {'text': 'a', 'start': 3.371220779220779, 'end': 3.431064935064935, 'score': -0.0223541259765625}\n", | |
| "10 {'text': 'natural', 'start': 3.470961038961039, 'end': 3.79012987012987, 'score': -0.03738205773489816}\n", | |
| "11 {'text': 'process', 'start': 3.810077922077922, 'end': 4.248935064935065, 'score': -0.4380640302385603}\n", | |
| "12 {'text': 'as', 'start': 4.308779220779221, 'end': 4.40851948051948, 'score': -0.01033782958984375}\n", | |
| "13 {'text': 'the', 'start': 4.428467532467533, 'end': 4.528207792207792, 'score': -0.08664449055989583}\n", | |
| "14 {'text': 'tomato', 'start': 4.548155844155844, 'end': 4.887272727272727, 'score': -0.16687647501627603}\n", | |
| "15 {'text': 'matures', 'start': 4.927168831168831, 'end': 5.52561038961039, 'score': -0.30379159109933035}\n", | |
| "16 {'text': 'and', 'start': 5.5655064935064935, 'end': 5.665246753246753, 'score': -0.009943644205729166}\n", | |
| "17 {'text': 'becomes', 'start': 5.685194805194805, 'end': 6.004363636363636, 'score': -0.005782740456717355}\n", | |
| "18 {'text': 'ready', 'start': 6.0243116883116885, 'end': 6.223792207792209, 'score': -0.00582733154296875}\n", | |
| "19 {'text': 'to', 'start': 6.263688311688312, 'end': 6.363428571428572, 'score': -0.0202484130859375}\n", | |
| "20 {'text': 'eat.', 'start': 6.423272727272727, 'end': 6.56290909090909, 'score': -0.00466156005859375}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for i in range(len(alignment)):\n", | |
| " print(i, alignment[i])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "dia", | |
| "language": "python", | |
| "name": "dia" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.10.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment