Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save amoux/57f9e4bcb0e807300357ed96b3e42da4 to your computer and use it in GitHub Desktop.
Save amoux/57f9e4bcb0e807300357ed96b3e42da4 to your computer and use it in GitHub Desktop.

Revisions

  1. amoux revised this gist Mar 28, 2020. 1 changed file with 12 additions and 1 deletion.
    13 changes: 12 additions & 1 deletion simple-node-graph-dawg-for-dna-sequences.ipynb
    Original file line number Diff line number Diff line change
    @@ -8,14 +8,25 @@
    "collapsed_sections": [],
    "machine_shape": "hm",
    "mount_file_id": "1V0A6wSvlKTa5zayGpr_H_OOT5fSV4hyq",
    "authorship_tag": "ABX9TyM9718wavG3XZbmCQVjyL/b"
    "authorship_tag": "ABX9TyM9718wavG3XZbmCQVjyL/b",
    "include_colab_link": true
    },
    "kernelspec": {
    "name": "python3",
    "display_name": "Python 3"
    }
    },
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "view-in-github",
    "colab_type": "text"
    },
    "source": [
    "<a href=\"https://colab.research.google.com/gist/amoux/57f9e4bcb0e807300357ed96b3e42da4/simple-node-graph-dawg-for-dna-sequences.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {
  2. amoux created this gist Mar 28, 2020.
    674 changes: 674 additions & 0 deletions simple-node-graph-dawg-for-dna-sequences.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,674 @@
    {
    "nbformat": 4,
    "nbformat_minor": 0,
    "metadata": {
    "colab": {
    "name": "Simple Node Graph (DAWG) for DNA Sequences.ipynb",
    "provenance": [],
    "collapsed_sections": [],
    "machine_shape": "hm",
    "mount_file_id": "1V0A6wSvlKTa5zayGpr_H_OOT5fSV4hyq",
    "authorship_tag": "ABX9TyM9718wavG3XZbmCQVjyL/b"
    },
    "kernelspec": {
    "name": "python3",
    "display_name": "Python 3"
    }
    },
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "fgdJfi5G2YCf",
    "colab_type": "text"
    },
    "source": [
    "# Node Graph\n",
    "\n",
    "> Simple implementation of a ***Directed Acyclic Word Graph (`DAWG`)*** for dna sequence autocompletion.\n",
    "\n"
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "AmbZDFFk6a3E",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    "from typing import List\n",
    "from pathlib import Path\n",
    "import json\n",
    "import collections"
    ],
    "execution_count": 0,
    "outputs": []
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "oDqJITx36g3e",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    "class Graph(object):\n",
    " def __init__(self):\n",
    " self.children = {}\n",
    " self.token = None\n",
    "\n",
    " def insert(self, token: str, add_token=True):\n",
    " node = self\n",
    " for char in token:\n",
    " if not char in node.children:\n",
    " node.children[char] = Graph()\n",
    " node = node.children[char]\n",
    " if add_token:\n",
    " node.token = token\n",
    " return node\n",
    "\n",
    " def transverse(self, query: str):\n",
    " node = self\n",
    " for char in query:\n",
    " child = node.children.get(char)\n",
    " if child:\n",
    " node = child\n",
    " else:\n",
    " break\n",
    " return node\n",
    "\n",
    " def descendant_nodes(self):\n",
    " deque = collections.deque()\n",
    " for char, child_node in self.children.items():\n",
    " deque.append((char, child_node))\n",
    "\n",
    " while deque:\n",
    " char, child_node = deque.popleft()\n",
    " if child_node.token:\n",
    " yield child_node\n",
    "\n",
    " for char, grand_child_node in child_node.children.items():\n",
    " deque.append((char, grand_child_node))\n",
    " \n",
    " def __repr__(self):\n",
    " return f'<Graph({self.token}, window={list(self.children.keys())})>'"
    ],
    "execution_count": 0,
    "outputs": []
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "LGaQEoA88MKq",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 34
    },
    "outputId": "0840b3e2-7055-4078-b9e4-387e40057030"
    },
    "source": [
    "covid_seqs_dir = Path('/content/drive/My Drive/covid-19/covid-sequences/data/')\n",
    "\n",
    "covid_seqs = {}\n",
    "for jsonfile in covid_seqs_dir.glob(\"*.json\"):\n",
    " name = jsonfile.name.replace(jsonfile.suffix, \"\")\n",
    " with jsonfile.open(\"r\") as file:\n",
    " covid_seqs[name] = json.load(file)\n",
    "\n",
    "covid_seqs.keys()"
    ],
    "execution_count": 3,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "dict_keys(['fluseq', 'sarseq', 'covseq'])"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 3
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "ctvIPfSQ-Anr",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 54
    },
    "outputId": "ed5fcba7-a98b-4e17-dc32-b3b5a33d4382"
    },
    "source": [
    "sequences_index = {}\n",
    "for key in covid_seqs.keys():\n",
    " for seqid, sequence in covid_seqs[key].items():\n",
    " if seqid not in sequences_index:\n",
    " sequences_index[seqid] = sequence\n",
    "\n",
    "sequences_index['AF389116']"
    ],
    "execution_count": 4,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "'AGCGAAAGCAGGCAAACCATTTGAATGGATGTCAATCCGACCTTACTTTTCTTAAAAGTGCCAGCACAAAATGCTATAAGCACAACTTTCCCTTATACTGGAGACCCTCCTTACAGCCATGGGACAGGAACAGGATACACCATGGATACTGTCAACAGGACACATCAGTACTCAGAAAAGGGAAGATGGACAACAAACACCGAAACTGGAGCACCGCAACTCAACCCGATTGATGGGCCACTGCCAGAAGACAATGAACCAAGTGGTTATGCCCAAACAGATTGTGTATTGGAAGCAATGGCTTTCCTTGAGGAATCCCATCCTGGTATTTTTGAAAACTCGTGTATTGAAACGATGGAGGTTGTTCAGCAAACACGAGTAGACAAGCTGACACAAGGCCGACAGACCTATGACTGGACTCTAAATAGAAACCAACCTGCTGCAACAGCATTGGCCAACACAATAGAAGTGTTCAGATCAAATGGCCTCACGGCCAATGAGTCTGGAAGGCTCATAGACTTCCTTAAGGATGTAATGGAGTCAATGAAAAAAGAAGAAATGGGGATCACAACTCATTTTCAGAGAAAGAGACGGGTGAGAGACAATATGACTAAGAAAATGATAACACAGAGAACAATAGGTAAAAAGAAGCAGAGATTGAACAAAAGGAGTTATCTAATTAGAGCATTGACCCTGAACACAATGACCAAAGATGCTGAGAGAGGGAAGCTAAAACGGAGAGCAATTGCAACCCCAGGGATGCAAATAAGGGGGTTTGTATACTTTGTTGAGACACTGGCAAGGAGTATATGTGAGAAACTTGAACAATCAGGGTTGCCAGTTGGAGGCAATGAGAAGAAAGCAAAGTTGGCAAATGTTGTAAGGAAGATGATGACCAATTCTCAGGACACCGAACTTTCTTTCACCATCACTGGAGATAACACCAAATGGAACGAAAATCAGAATCCTCGGATGTTTTTGGCCATGATCACATATATGACAAGAAATCAGCCCGAATGGTTCAGAAATGTTCTAAGTATTGCTCCAATAATGTTCTCAAACAAAATGGCGAGACTGGGAAAAGGGTATATGTTTGAGAGCAAGAGTATGAAACTTAGAACTCAAATACCTGCAGAAATGCTAGCAAGCATCGATTTGAAATATTTCAATGATTCAACAAGAAAGAAGATTGAAAAAATCCGACCGCTCTTAATAGAGGGGACTGCATCATTGAGCCCTGGAATGATGATGGGCATGTTCAATATGTTAAGCACTGTATTAGGCGTCTCCATCCTGAATCTTGGACAAAAGAGATACACCAAGACTACTTACTGGTGGGATGGTCTTCAATCCTCTGACGATTTTGCTCTGATTGTGAATGCACCCAATCATGAAGGGATTCAAGCCGGAGTCGACAGGTTTTATCGAACCTGTAAGCTACTTGGAATCAATATGAGCAAGAAAAAGTCTTACATAAACAGAACAGGTACATTTGAATTCACAAGTTTTTTCTATCGTTATGGGTTTGTTGCCAATTTCAGCATGGAGCTCCCCAGTTTTGGGGTGTCTGGGATCAACGAGTCAGCGGACATGAGTATTGGAGTTACTGTCATCAAAAACAATATGATAAACAATGATCTTGGTCCAGCAACAGCTCAAATGGCCCTTCAGTTGTTCATCAAAGATTACAGGTACACGTACCGATGCCATAGAGGTGACACACAAATACAAACCCGAAGATCATTTGAAATAAAGAAACTGTGGGAGCAAACCCGTTCCAAAGCTGGACTGCTGGTCTCCGACGGAGGCCCAAATTTATACAACATTAGAAATCTCCACATTCCTGAAGTCTGCCTAAAATGGGAATTGATGGATGAGGATTACCAGGGGCGTTTATGCAACCCACTGAACCCATTTGTCAGCCATAAAGAAATTGAATCAATGAACAATGCAGTGATGATGCCAGCACATGGTCCAGCCAAAAACATGGAGTATGATGCTGTTGCAACAACACACTCCTGGATCCCCAAAAGAAATCGATCCATCTTGAATACAAGTCAAAGAGGAGTACTTGAAGATGAACAAATGTACCAAAGGTGCTGCAATTTATTTGAAAAATTCTTCCCCAGCAGTTCATACAGAAGACCAGTCGGGATATCCAGTATGGTGGAGGCTATGGTTTCCAGAGCCCGAATTGATGCACGGATTGATTTCGAATCTGGAAGGATAAAGAAAGAAGAGTTCACTGAGATCATGAAGATCTGTTCCACCATTGAAGAGCTCAGACGGCAAAAATAGTGAATTTAGCTTGTCCTTCATGAAAAAATGCCTTGTTTCTACT'"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 4
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "rxAxFDcfDIy-",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 194
    },
    "outputId": "92c269e6-9d75-41cc-d07e-5eab98009ee7"
    },
    "source": [
    "vocab = {}\n",
    "for sequence in sequences_index.values():\n",
    " for char in sequence:\n",
    " if char not in vocab:\n",
    " vocab[char] = 1\n",
    " else:\n",
    " vocab[char] += 1\n",
    "\n",
    "vocab_counts = collections.Counter(vocab)\n",
    "vocab_counts.most_common(10)"
    ],
    "execution_count": 5,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "[('T', 35674146),\n",
    " ('A', 31873941),\n",
    " ('G', 26194090),\n",
    " ('C', 22849345),\n",
    " ('N', 9662),\n",
    " ('Y', 1547),\n",
    " ('R', 1083),\n",
    " ('K', 333),\n",
    " ('W', 286),\n",
    " ('M', 283)]"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 5
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "nCB0KmDdFUkj",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    "vocab_index = {}\n",
    "index_vocab = {}\n",
    "for index, (key, count) in enumerate(vocab_counts.most_common()):\n",
    " vocab_index[key] = index\n",
    " index_vocab[index] = key\n",
    "\n",
    "\n",
    "def encode(sequence: str) -> List[int]:\n",
    " return [vocab_index[char] for char in sequence if char in vocab_index]\n",
    "\n",
    "def decode(sequence_ids) -> List[str]:\n",
    " return [index_vocab[idx] for idx in sequence_ids if idx in index_vocab]"
    ],
    "execution_count": 0,
    "outputs": []
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "AXmQR_ghGc4J",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 52
    },
    "outputId": "2207577e-3dbe-4fb2-ac4e-5e877beddfb6"
    },
    "source": [
    "sequence_ids = encode(sequences_index[\"AF389116\"])\n",
    "sequence = decode(sequence_ids)\n",
    "\n",
    "print(f\"sequence-ids: {sequence_ids[:20]}\")\n",
    "print(f\"sequence: {sequence[:20]}\")"
    ],
    "execution_count": 7,
    "outputs": [
    {
    "output_type": "stream",
    "text": [
    "sequence-ids: [1, 2, 3, 2, 1, 1, 1, 2, 3, 1, 2, 2, 3, 1, 1, 1, 3, 3, 1, 0]\n",
    "sequence: ['A', 'G', 'C', 'G', 'A', 'A', 'A', 'G', 'C', 'A', 'G', 'G', 'C', 'A', 'A', 'A', 'C', 'C', 'A', 'T']\n"
    ],
    "name": "stdout"
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "oboeSDdCHTLx",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 34
    },
    "outputId": "cd2ab426-a129-474a-fff7-fb2896059bae"
    },
    "source": [
    "print(encode('ATGGAAGTATTTAAAGCGCCACCTATTGGGATATAAG'))"
    ],
    "execution_count": 8,
    "outputs": [
    {
    "output_type": "stream",
    "text": [
    "[1, 0, 2, 2, 1, 1, 2, 0, 1, 0, 0, 0, 1, 1, 1, 2, 3, 2, 3, 3, 1, 3, 3, 0, 1, 0, 0, 2, 2, 2, 1, 0, 1, 0, 1, 1, 2]\n"
    ],
    "name": "stdout"
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "_PUz0utHIbIb",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 34
    },
    "outputId": "939c352c-a188-4be5-8874-93664596c87a"
    },
    "source": [
    "sequences = []\n",
    "for sequence in sequences_index.values():\n",
    " sequence_ids = encode(sequence)\n",
    " sequences.append(sequence_ids)\n",
    "len(sequences)"
    ],
    "execution_count": 9,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "15177"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 9
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "zdBpmf9noHvt",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 194
    },
    "outputId": "05796ef4-3049-428a-dcc3-ea29310a79dd"
    },
    "source": [
    "import random\n",
    "\n",
    "num_steps = 3 # ngram size\n",
    "maxsize = 4000 # maxsize up to 15,177\n",
    "\n",
    "dna_pairs = {}\n",
    "random.shuffle(sequences)\n",
    "for sequence in sequences[:maxsize]:\n",
    " for i in range(0, len(sequence), num_steps):\n",
    " seq_ids = sequence[i: i + num_steps]\n",
    " seq = \"\".join(decode(seq_ids))\n",
    " if seq not in dna_pairs:\n",
    " dna_pairs[seq] = 1\n",
    " else:\n",
    " dna_pairs[seq] += 1\n",
    "\n",
    "common_pairs = collections.Counter(dna_pairs)\n",
    "common_pairs.most_common(10)"
    ],
    "execution_count": 10,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "[('TTT', 302667),\n",
    " ('TTG', 274921),\n",
    " ('TGT', 267892),\n",
    " ('ATG', 254791),\n",
    " ('ATT', 233247),\n",
    " ('GTT', 232533),\n",
    " ('AAA', 231146),\n",
    " ('TTA', 219465),\n",
    " ('AAT', 217561),\n",
    " ('TGG', 212646)]"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 10
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "3Erk5NR-Fr_5",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 34
    },
    "outputId": "d55047e4-0480-4aac-aa1a-2d0571460ac3"
    },
    "source": [
    "len(common_pairs.keys())"
    ],
    "execution_count": 11,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "426"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 11
    }
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "K3F8sUKM5McD",
    "colab_type": "text"
    },
    "source": [
    "## Autocomplete Demo\n",
    "\n"
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "LFwBohU5vz9w",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    "def iter_dna_pairs(maxsize=5000, window=3):\n",
    " for sequence in sequences[:maxsize]:\n",
    " for i in range(0, len(sequence), window):\n",
    " sequence_ids = sequence[i: i + window]\n",
    " sequence = \"\".join(decode(sequence_ids))\n",
    " yield sequence"
    ],
    "execution_count": 0,
    "outputs": []
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "2jct4paAxqx2",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    "graph = Graph()\n",
    "for pairs in iter_dna_pairs(window=3):\n",
    " graph.insert(pairs)"
    ],
    "execution_count": 0,
    "outputs": []
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "MhFGyejwx5bz",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 34
    },
    "outputId": "f02db152-0c1d-4b11-b4df-96df874d243d"
    },
    "source": [
    "graph.transverse(query=\"AT\")"
    ],
    "execution_count": 14,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "<Graph(None, window=['G', 'T', 'C', 'A'])>"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 14
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "C7qmtXa6x6kY",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 87
    },
    "outputId": "fd0f4523-01c2-4014-9e74-b2c0677374f7"
    },
    "source": [
    "dna_node = graph.transverse(\"AT\")\n",
    "list(dna_node.descendant_nodes())"
    ],
    "execution_count": 15,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "[<Graph(ATG, window=[])>,\n",
    " <Graph(ATT, window=[])>,\n",
    " <Graph(ATC, window=[])>,\n",
    " <Graph(ATA, window=[])>]"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 15
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "SrlUYT2703g4",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 87
    },
    "outputId": "99364c47-c131-415d-91aa-50db33cbf7ee"
    },
    "source": [
    "dna_node = graph.transverse(\"AT\")\n",
    "list(dna_node.descendant_nodes())"
    ],
    "execution_count": 16,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "[<Graph(ATG, window=[])>,\n",
    " <Graph(ATT, window=[])>,\n",
    " <Graph(ATC, window=[])>,\n",
    " <Graph(ATA, window=[])>]"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 16
    }
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "skiMO7YP32c0",
    "colab_type": "text"
    },
    "source": [
    "> Here the size of the window is increased by `6` steps."
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "ciljMMpx1jog",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    "graph = Graph()\n",
    "for pairs in iter_dna_pairs(maxsize=6000, window=6):\n",
    " graph.insert(pairs)"
    ],
    "execution_count": 0,
    "outputs": []
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "fiUfHL1F3sKC",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 34
    },
    "outputId": "5a1bf1a5-0a17-4a90-8a99-f4a40cd68711"
    },
    "source": [
    "graph.transverse(\"ATGG\")"
    ],
    "execution_count": 18,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "<Graph(None, window=['A', 'C', 'G', 'T'])>"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 18
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "Qo4Z70CP3vBZ",
    "colab_type": "code",
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 230
    },
    "outputId": "9ccdef41-5b76-4f19-d80c-0b4db20d753c"
    },
    "source": [
    "dna_node = graph.transverse(\"ATGG\")\n",
    "list(dna_node.descendant_nodes())"
    ],
    "execution_count": 19,
    "outputs": [
    {
    "output_type": "execute_result",
    "data": {
    "text/plain": [
    "[<Graph(ATGGAC, window=[])>,\n",
    " <Graph(ATGGAT, window=[])>,\n",
    " <Graph(ATGGAG, window=[])>,\n",
    " <Graph(ATGGAA, window=[])>,\n",
    " <Graph(ATGGCT, window=[])>,\n",
    " <Graph(ATGGCG, window=[])>,\n",
    " <Graph(ATGGCA, window=[])>,\n",
    " <Graph(ATGGCC, window=[])>,\n",
    " <Graph(ATGGGA, window=[])>,\n",
    " <Graph(ATGGGG, window=[])>,\n",
    " <Graph(ATGGGC, window=[])>,\n",
    " <Graph(ATGGTA, window=[])>]"
    ]
    },
    "metadata": {
    "tags": []
    },
    "execution_count": 19
    }
    ]
    },
    {
    "cell_type": "code",
    "metadata": {
    "id": "ly3mK9OCBYHG",
    "colab_type": "code",
    "colab": {}
    },
    "source": [
    ""
    ],
    "execution_count": 0,
    "outputs": []
    }
    ]
    }