Skip to content

Instantly share code, notes, and snippets.

@jfpuget
Last active April 15, 2022 11:55
Show Gist options
  • Select an option

  • Save jfpuget/b53f1e15a37aba5944ad to your computer and use it in GitHub Desktop.

Select an option

Save jfpuget/b53f1e15a37aba5944ad to your computer and use it in GitHub Desktop.

Revisions

  1. jfpuget revised this gist Mar 6, 2016. 1 changed file with 86 additions and 20 deletions.
    106 changes: 86 additions & 20 deletions Julia_Python_perf.ipynb
    Original file line number Diff line number Diff line change
    @@ -1280,7 +1280,7 @@
    },
    {
    "cell_type": "code",
    "execution_count": 47,
    "execution_count": 19,
    "metadata": {
    "collapsed": false
    },
    @@ -1297,21 +1297,60 @@
    " \n",
    " m = int(s,16)\n",
    " assert m == n\n",
    "\n",
    " \n",
    "def parse_int_numpy():\n",
    " for i in range(1,1000):\n",
    " n = np.random.randint(0,2**31-1)\n",
    " s = hex(n)\n",
    " \n",
    " # required for Python 2.7 on 64 bits machine\n",
    " if s[-1]=='L':\n",
    " s = s[0:-1]\n",
    " \n",
    " m = int(s,16)\n",
    " assert m == n\n",
    " \n",
    "def parse_int_opt():\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**32-1)\n",
    " s = hex(n)\n",
    " \n",
    " # required for Python 2.7 on 64 bits machine\n",
    " if s.endswith('L'):\n",
    " s = s[0:-1]\n",
    " \n",
    " m = int(s,16)\n",
    " assert m == n\n",
    " \n",
    "def parse_int1():\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**32-1)\n",
    " s = hex(n)\n",
    " m = int(s,16)\n",
    " assert m == n\n",
    " \n",
    "def parse_int1_numpy():\n",
    " for i in range(1,1000):\n",
    " n = np.random.randint(0,2**31-1)\n",
    " s = hex(n)\n",
    " m = int(s,16)\n",
    " assert m == n\n",
    " \n",
    "@jit\n",
    "def parse_numba():\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**32-1)\n",
    " s = hex(n)\n",
    " m = int(s,16)\n",
    " assert m == n\n"
    " assert m == n\n",
    "\n",
    "@jit\n",
    "def parse_numba_numpy():\n",
    " for i in range(1,1000):\n",
    " n = np.random.randint(0,2**31-1)\n",
    " s = hex(n)\n",
    " m = int(s,16)\n",
    " assert m == n"
    ]
    },
    {
    @@ -1325,7 +1364,7 @@
    },
    {
    "cell_type": "code",
    "execution_count": 59,
    "execution_count": 21,
    "metadata": {
    "collapsed": false
    },
    @@ -1334,19 +1373,34 @@
    "%%cython\n",
    "import random\n",
    "import cython\n",
    "cimport cython\n",
    "import numpy as np\n",
    "cimport numpy as np\n",
    "\n",
    "@cython.boundscheck(False)\n",
    "@cython.wraparound(False)\n",
    "cpdef parse_int_cython():\n",
    " cdef:\n",
    " int i,n,m\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**31-1)\n",
    " m = int(hex(n),16)\n",
    " assert m == n\n",
    " \n",
    "@cython.boundscheck(False)\n",
    "@cython.wraparound(False)\n",
    "cpdef parse_int_cython_numpy():\n",
    " cdef:\n",
    " int i,n,m\n",
    " for i in range(1,1000):\n",
    " n = np.random.randint(0,2**31-1)\n",
    " m = int(hex(n),16)\n",
    " assert m == n"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 60,
    "execution_count": 22,
    "metadata": {
    "collapsed": false
    },
    @@ -1355,18 +1409,28 @@
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "100 loops, best of 3: 3.59 ms per loop\n",
    "100 loops, best of 3: 3.29 ms per loop\n",
    "100 loops, best of 3: 3.64 ms per loop\n",
    "100 loops, best of 3: 3 ms per loop\n"
    "100 loops, best of 3: 3.55 ms per loop\n",
    "1000 loops, best of 3: 1.21 ms per loop\n",
    "100 loops, best of 3: 3.72 ms per loop\n",
    "100 loops, best of 3: 3.32 ms per loop\n",
    "1000 loops, best of 3: 1.05 ms per loop\n",
    "100 loops, best of 3: 3.63 ms per loop\n",
    "1000 loops, best of 3: 1.13 ms per loop\n",
    "100 loops, best of 3: 3.07 ms per loop\n",
    "1000 loops, best of 3: 817 µs per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit parse_int()\n",
    "%timeit parse_int_numpy()\n",
    "%timeit parse_int_opt()\n",
    "%timeit parse_int1()\n",
    "%timeit parse_int1_numpy()\n",
    "%timeit parse_numba()\n",
    "%timeit parse_int_cython()"
    "%timeit parse_numba_numpy()\n",
    "%timeit parse_int_cython()\n",
    "%timeit parse_int_cython_numpy()"
    ]
    },
    {
    @@ -1389,7 +1453,7 @@
    },
    {
    "cell_type": "code",
    "execution_count": 62,
    "execution_count": 25,
    "metadata": {
    "collapsed": true
    },
    @@ -1416,7 +1480,7 @@
    },
    {
    "cell_type": "code",
    "execution_count": 63,
    "execution_count": 26,
    "metadata": {
    "collapsed": false
    },
    @@ -1442,7 +1506,7 @@
    },
    {
    "cell_type": "code",
    "execution_count": 64,
    "execution_count": 27,
    "metadata": {
    "collapsed": false
    },
    @@ -1451,6 +1515,8 @@
    "%%cython\n",
    "import numpy as np\n",
    "import cython\n",
    "cimport numpy\n",
    "cimport cython\n",
    "\n",
    "@cython.boundscheck(False)\n",
    "@cython.wraparound(False)\n",
    @@ -1466,7 +1532,7 @@
    },
    {
    "cell_type": "code",
    "execution_count": 65,
    "execution_count": 28,
    "metadata": {
    "collapsed": false
    },
    @@ -1475,13 +1541,13 @@
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "1000 loops, best of 3: 848 µs per loop\n",
    "The slowest run took 176.95 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000 loops, best of 3: 961 µs per loop\n",
    "1000 loops, best of 3: 740 µs per loop\n",
    "The slowest run took 102.93 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000 loops, best of 3: 781 µs per loop\n",
    "The slowest run took 195.93 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000 loops, best of 3: 824 µs per loop\n",
    "1000 loops, best of 3: 733 µs per loop\n",
    "1000 loops, best of 3: 472 µs per loop\n"
    "The slowest run took 104.18 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000 loops, best of 3: 739 µs per loop\n",
    "1000 loops, best of 3: 471 µs per loop\n"
    ]
    }
    ],
  2. jfpuget created this gist Feb 2, 2016.
    1,527 changes: 1,527 additions & 0 deletions Julia_Python_perf.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,1527 @@
    {
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# A Python Performance Optimization Exercise\n",
    "\n",
    "## Author [Jean-François Puget](https://www.ibm.com/developerworks/community/blogs/jfp/?lang=en)\n",
    "\n",
    "A revisit of the Python micro benchmarks available at https://github.com/JuliaLang/julia/blob/master/test/perf/micro/perf.py\n",
    "\n",
    "The code used in this notebook is discussed at length in [Python Meets Julia Micro Performance](https://www.ibm.com/developerworks/community/blogs/jfp/entry/python_meets_julia_micro_performance)\n",
    "\n",
    "Timings below are for Anaconda 64 bits with Python 3.5.1 and Numba 0.23.1 running on a Windows laptop (Lenovo W520). Timings on another machine with another Python version can be quite different."
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "First, let's load useful extensions and import useful modules"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%load_ext Cython\n",
    "%load_ext line_profiler\n",
    "\n",
    "import random\n",
    "import numpy as np\n",
    "from numba import jit\n",
    "import sys\n",
    "\n",
    "if sys.version_info < (3,):\n",
    " range = xrange"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Fibonacci"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Naive code\n",
    "\n",
    "The base line is gthe one used in Julia benchmarks. It is a straightforward, frecursive implementation."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "def fib(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib(n-1)+fib(n-2)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Sanity check."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "assert fib(20) == 6765"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Cython\n",
    "\n",
    "Let's try various versions."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "def fib_cython(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_cython(n-1)+fib_cython(n-2)\n",
    "\n",
    "cpdef inline fib_cython_inline(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_cython_inline(n-1)+fib_cython_inline(n-2)\n",
    "\n",
    "\n",
    "cpdef long fib_cython_type(long n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_cython_type(n-1)+fib_cython_type(n-2)\n",
    "\n",
    "cpdef long fib_cython_type_inline(long n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_cython_type_inline(n-1)+fib_cython_type_inline(n-2)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Sanity checks"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "assert fib_cython(20) == 6765\n",
    "assert fib_cython_inline(20) == 6765\n",
    "assert fib_cython_type(20) == 6765\n",
    "assert fib_cython_type_inline(20) == 6765"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Numba\n",
    "\n",
    "Let's try three versions, with and without type."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "from numba import int32, int64\n",
    "@jit\n",
    "def fib_numba(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_numba(n-1)+fib_numba(n-2)\n",
    "\n",
    "@jit(int32(int32))\n",
    "def fib_numba_type32(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_numba_type32(n-1)+fib_numba_type32(n-2)\n",
    "\n",
    "\n",
    "@jit(int64(int64))\n",
    "def fib_numba_type64(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_numba_type64(n-1)+fib_numba_type64(n-2)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Sanity check"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "assert fib_numba(20) == 6765\n",
    "assert fib_numba_type32(20) == 6765\n",
    "assert fib_numba_type64(20) == 6765"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Using floats"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 8,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "def ffib(n):\n",
    " if n<2.0:\n",
    " return n\n",
    " return ffib(n-1)+ffib(n-2)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Timings"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "100 loops, best of 3: 3.77 ms per loop\n",
    "1000 loops, best of 3: 1.47 ms per loop\n",
    "1000 loops, best of 3: 759 µs per loop\n",
    "10000 loops, best of 3: 24.4 µs per loop\n",
    "10000 loops, best of 3: 24.6 µs per loop\n",
    "100 loops, best of 3: 4.89 ms per loop\n",
    "100 loops, best of 3: 5 ms per loop\n",
    "100 loops, best of 3: 5.01 ms per loop\n",
    "100 loops, best of 3: 5.26 ms per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit fib(20)\n",
    "%timeit fib_cython(20)\n",
    "%timeit fib_cython_inline(20)\n",
    "%timeit fib_cython_type(20)\n",
    "%timeit fib_cython_type_inline(20)\n",
    "%timeit fib_numba(20)\n",
    "%timeit fib_numba_type32(20)\n",
    "%timeit fib_numba_type64(20)\n",
    "%timeit ffib(20.0)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {
    "collapsed": false
    },
    "source": [
    "Let's try a larger one."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 10,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "1 loops, best of 3: 442 ms per loop\n",
    "10 loops, best of 3: 178 ms per loop\n",
    "10 loops, best of 3: 94.2 ms per loop\n",
    "100 loops, best of 3: 3.05 ms per loop\n",
    "100 loops, best of 3: 3.01 ms per loop\n",
    "1 loops, best of 3: 600 ms per loop\n",
    "1 loops, best of 3: 648 ms per loop\n"
    ]
    }
    ],
    "source": [
    "n = 30\n",
    "%timeit fib(n)\n",
    "%timeit fib_cython(n)\n",
    "%timeit fib_cython_inline(n)\n",
    "%timeit fib_cython_type(n)\n",
    "%timeit fib_cython_type_inline(n)\n",
    "%timeit fib_numba(n)\n",
    "%timeit ffib(n*1.0)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "## Sequential Fibonacci\n",
    "\n",
    "The recursive call is not the best way to compute Fibonacci numbers. It is better to accumulate them as follows."
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Functools"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 51,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "from functools import lru_cache as cache\n",
    "\n",
    "@cache(maxsize=None)\n",
    "def fib_cache(n):\n",
    " if n<2:\n",
    " return n\n",
    " return fib_cache(n-1)+fib_cache(n-2)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Plain Python"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "def fib_seq(n):\n",
    " if n < 2:\n",
    " return n\n",
    " a,b = 1,0\n",
    " for i in range(n-1):\n",
    " a,b = a+b,a\n",
    " return a "
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Numba"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "@jit\n",
    "def fib_seq_numba(n):\n",
    " if n < 2:\n",
    " return n\n",
    " a,b = 1,0\n",
    " for i in range(n-1):\n",
    " a,b = a+b,a\n",
    " return a "
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Cython"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 13,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "\n",
    "def fib_seq_cython(n):\n",
    " if n < 2:\n",
    " return n\n",
    " a,b = 1,0\n",
    " for i in range(n-1):\n",
    " a,b = a+b,a\n",
    " return a \n",
    "\n",
    "cpdef long fib_seq_cython_type(long n):\n",
    " if n < 2:\n",
    " return n\n",
    " cdef long a,b\n",
    " a,b = 1,0\n",
    " for i in range(n-1):\n",
    " a,b = a+b,a\n",
    " return a "
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Sanity checks"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 14,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "assert fib_seq(20) == 6765\n",
    "assert fib_seq_cython(20) == 6765\n",
    "assert fib_seq_cython_type(20) == 6765\n",
    "assert fib_seq_numba(20) == 6765"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Timings"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 52,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "The slowest run took 188.04 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "10000000 loops, best of 3: 127 ns per loop\n",
    "1000000 loops, best of 3: 1.81 µs per loop\n",
    "The slowest run took 4.46 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000000 loops, best of 3: 959 ns per loop\n",
    "The slowest run took 15.66 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "10000000 loops, best of 3: 82 ns per loop\n",
    "The slowest run took 31.77 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000000 loops, best of 3: 216 ns per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit fib_cache(20)\n",
    "%timeit fib_seq(20)\n",
    "%timeit fib_seq_cython(20)\n",
    "%timeit fib_seq_cython_type(20)\n",
    "%timeit fib_seq_numba(20)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Arbitrary precision check"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 16,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "data": {
    "text/plain": [
    "354224848179261915075"
    ]
    },
    "execution_count": 16,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "fib_seq(100)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Quicksort"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Pure Python\n",
    "\n",
    "Code used in Julia benchmarks"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 17,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "def qsort_kernel(a, lo, hi):\n",
    " i = lo\n",
    " j = hi\n",
    " while i < hi:\n",
    " pivot = a[(lo+hi) // 2]\n",
    " while i <= j:\n",
    " while a[i] < pivot:\n",
    " i += 1\n",
    " while a[j] > pivot:\n",
    " j -= 1\n",
    " if i <= j:\n",
    " a[i], a[j] = a[j], a[i]\n",
    " i += 1\n",
    " j -= 1\n",
    " if lo < j:\n",
    " qsort_kernel(a, lo, j)\n",
    " lo = i\n",
    " j = hi\n",
    " return a\n",
    "\n",
    "\n",
    "def benchmark_qsort():\n",
    " lst = [ random.random() for i in range(1,5000) ]\n",
    " qsort_kernel(lst, 0, len(lst)-1)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Numba"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 18,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "@jit\n",
    "def qsort_kernel_numba(a, lo, hi):\n",
    " i = lo\n",
    " j = hi\n",
    " while i < hi:\n",
    " pivot = a[(lo+hi) // 2]\n",
    " while i <= j:\n",
    " while a[i] < pivot:\n",
    " i += 1\n",
    " while a[j] > pivot:\n",
    " j -= 1\n",
    " if i <= j:\n",
    " a[i], a[j] = a[j], a[i]\n",
    " i += 1\n",
    " j -= 1\n",
    " if lo < j:\n",
    " qsort_kernel_numba(a, lo, j)\n",
    " lo = i\n",
    " j = hi\n",
    " return a\n",
    "\n",
    "def benchmark_qsort_numba():\n",
    " lst = [ random.random() for i in range(1,5000) ]\n",
    " qsort_kernel_numba(lst, 0, len(lst)-1)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "With Numpy"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 19,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "@jit\n",
    "def qsort_kernel_numba_numpy(a, lo, hi):\n",
    " i = lo\n",
    " j = hi\n",
    " while i < hi:\n",
    " pivot = a[(lo+hi) // 2]\n",
    " while i <= j:\n",
    " while a[i] < pivot:\n",
    " i += 1\n",
    " while a[j] > pivot:\n",
    " j -= 1\n",
    " if i <= j:\n",
    " a[i], a[j] = a[j], a[i]\n",
    " i += 1\n",
    " j -= 1\n",
    " if lo < j:\n",
    " qsort_kernel_numba_numpy(a, lo, j)\n",
    " lo = i\n",
    " j = hi\n",
    " return a\n",
    "\n",
    "def benchmark_qsort_numba_numpy():\n",
    " lst = np.random.rand(5000)\n",
    " qsort_kernel_numba(lst, 0, len(lst)-1)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Cython"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 20,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "import numpy as np\n",
    "import cython\n",
    "\n",
    "@cython.boundscheck(False)\n",
    "@cython.wraparound(False)\n",
    "cdef double[:] \\\n",
    "qsort_kernel_cython_numpy_type(double[:] a, \\\n",
    " long lo, \\\n",
    " long hi):\n",
    " cdef: \n",
    " long i, j\n",
    " double pivot\n",
    " i = lo\n",
    " j = hi\n",
    " while i < hi:\n",
    " pivot = a[(lo+hi) // 2]\n",
    " while i <= j:\n",
    " while a[i] < pivot:\n",
    " i += 1\n",
    " while a[j] > pivot:\n",
    " j -= 1\n",
    " if i <= j:\n",
    " a[i], a[j] = a[j], a[i]\n",
    " i += 1\n",
    " j -= 1\n",
    " if lo < j:\n",
    " qsort_kernel_cython_numpy_type(a, lo, j)\n",
    " lo = i\n",
    " j = hi\n",
    " return a\n",
    "\n",
    "def benchmark_qsort_numpy_cython():\n",
    " lst = np.random.rand(5000)\n",
    " qsort_kernel_cython_numpy_type(lst, 0, len(lst)-1)\n",
    " "
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 21,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "10 loops, best of 3: 17.7 ms per loop\n",
    "The slowest run took 4.06 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1 loops, best of 3: 79.4 ms per loop\n",
    "The slowest run took 12.62 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1 loops, best of 3: 20.9 ms per loop\n",
    "1000 loops, best of 3: 772 µs per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit benchmark_qsort()\n",
    "%timeit benchmark_qsort_numba()\n",
    "%timeit benchmark_qsort_numba_numpy()\n",
    "%timeit benchmark_qsort_numpy_cython()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Built-in sort"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 22,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "def benchmark_sort():\n",
    " lst = [ random.random() for i in range(1,5000) ]\n",
    " lst.sort()\n",
    "\n",
    "def benchmark_sort_numpy():\n",
    " lst = np.random.rand(5000)\n",
    " np.sort(lst)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 23,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "1000 loops, best of 3: 2 ms per loop\n",
    "1000 loops, best of 3: 306 µs per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit benchmark_sort()\n",
    "%timeit benchmark_sort_numpy()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Random mat stat"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Baseline\n",
    "\n",
    "Used in Julia benchmarks"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 24,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "def randmatstat(t):\n",
    " n = 5\n",
    " v = np.zeros(t)\n",
    " w = np.zeros(t)\n",
    " for i in range(1,t):\n",
    " a = np.random.randn(n, n)\n",
    " b = np.random.randn(n, n)\n",
    " c = np.random.randn(n, n)\n",
    " d = np.random.randn(n, n)\n",
    " P = np.matrix(np.hstack((a, b, c, d)))\n",
    " Q = np.matrix(np.vstack((np.hstack((a, b)), np.hstack((c, d)))))\n",
    " v[i] = np.trace(np.linalg.matrix_power(np.transpose(P)*P, 4))\n",
    " w[i] = np.trace(np.linalg.matrix_power(np.transpose(Q)*Q, 4))\n",
    " return (np.std(v)/np.mean(v), np.std(w)/np.mean(w))"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Speeding it up"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 25,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "def my_power(a):\n",
    " a = np.dot(a.T,a)\n",
    " a = np.dot(a,a)\n",
    " a = np.dot(a,a)\n",
    " return a\n",
    " \n",
    "def randmatstat_fast(t):\n",
    " n = 5\n",
    " v = np.zeros(t)\n",
    " w = np.zeros(t)\n",
    " for i in range(1,t):\n",
    " a = np.random.randn(n, n)\n",
    " b = np.random.randn(n, n)\n",
    " c = np.random.randn(n, n)\n",
    " d = np.random.randn(n, n)\n",
    " P = np.hstack((a, b, c, d))\n",
    " Q = np.vstack((np.hstack((a, b)), np.hstack((c, d))))\n",
    " v[i] = np.trace(my_power(P))\n",
    " w[i] = np.trace(my_power(Q))\n",
    " return (np.std(v)/np.mean(v), np.std(w)/np.mean(w))"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Timings"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 26,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "10 loops, best of 3: 160 ms per loop\n",
    "10 loops, best of 3: 83.2 ms per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit randmatstat(1000)\n",
    "%timeit randmatstat_fast(1000)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Randmatmul"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 27,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "def randmatmul(n):\n",
    " A = np.random.rand(n,n)\n",
    " B = np.random.rand(n,n)\n",
    " return np.dot(A,B)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 28,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "10 loops, best of 3: 83.7 ms per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit randmatmul(1000)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Mandelbrot"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Baseline\n",
    "The baseline used in Julia benchmarks"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 29,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "def mandel(z):\n",
    " maxiter = 80\n",
    " c = z\n",
    " for n in range(maxiter):\n",
    " if abs(z) > 2:\n",
    " return n\n",
    " z = z*z + c\n",
    " return maxiter\n",
    "\n",
    "def mandelperf():\n",
    " r1 = np.linspace(-2.0, 0.5, 26)\n",
    " r2 = np.linspace(-1.0, 1.0, 21)\n",
    " return [mandel(complex(r, i)) for r in r1 for i in r2]"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 30,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "assert sum(mandelperf()) == 14791"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Numba"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 31,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "@jit\n",
    "def mandel_numba(z):\n",
    " maxiter = 80\n",
    " c = z\n",
    " for n in range(maxiter):\n",
    " if abs(z) > 2:\n",
    " return n\n",
    " z = z*z + c\n",
    " return maxiter\n",
    "\n",
    "def mandelperf_numba():\n",
    " r1 = np.linspace(-2.0, 0.5, 26)\n",
    " r2 = np.linspace(-1.0, 1.0, 21)\n",
    " c3 = [complex(r,i) for r in r1 for i in r2]\n",
    " return [mandel_numba(c) for c in c3]"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 32,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "assert sum(mandelperf_numba()) == 14791"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Cython"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 33,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "\n",
    "import numpy as np\n",
    "cimport numpy as np\n",
    "\n",
    "cdef long mandel_cython_type(np.complex z):\n",
    " cdef long maxiter, n\n",
    " cdef np.complex c\n",
    " maxiter = 80\n",
    " c = z\n",
    " for n in range(maxiter):\n",
    " if abs(z) > 2:\n",
    " return n\n",
    " z = z*z + c\n",
    " return maxiter\n",
    "\n",
    "cpdef mandelperf_cython_type():\n",
    " cdef double[:] r1,r2\n",
    " r1 = np.linspace(-2.0, 0.5, 26)\n",
    " r2 = np.linspace(-1.0, 1.0, 21)\n",
    " return [mandel_cython_type(complex(r, i)) for r in r1 for i in r2]"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 34,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "assert sum(mandelperf_cython_type()) == 14791"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 35,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "100 loops, best of 3: 6.57 ms per loop\n",
    "100 loops, best of 3: 3.6 ms per loop\n",
    "1000 loops, best of 3: 481 µs per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit mandelperf()\n",
    "%timeit mandelperf_cython_type()\n",
    "%timeit mandelperf_numba()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Profiling"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 36,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "%lprun -s -f mandelperf_numba mandelperf_numba()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Numpy"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 53,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "@jit\n",
    "def mandel_numba(z):\n",
    " maxiter = 80\n",
    " c = z\n",
    " for n in range(maxiter):\n",
    " if abs(z) > 2:\n",
    " return n\n",
    " z = z*z + c\n",
    " return maxiter\n",
    "\n",
    "@jit\n",
    "def mandelperf_numba_mesh():\n",
    " width = 26\n",
    " height = 21\n",
    " r1 = np.linspace(-2.0, 0.5, width)\n",
    " r2 = np.linspace(-1.0, 1.0, height)\n",
    " mandel_set = np.empty((width,height), dtype=int)\n",
    " for i in range(width):\n",
    " for j in range(height):\n",
    " mandel_set[i,j] = mandel_numba(r1[i] + 1j*r2[j])\n",
    " return mandel_set"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 54,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "assert np.sum(mandelperf_numba_mesh()) == 14791"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 55,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "10000 loops, best of 3: 126 µs per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit mandelperf_numba_mesh()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "An even faster code is presented in [How To Quickly Compute The Mandelbrot Set In Python](https://www.ibm.com/developerworks/community/blogs/jfp/entry/How_To_Compute_Mandelbrodt_Set_Quickly?lang=en)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Pisum"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 43,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "def pisum(t):\n",
    " sum = 0.0\n",
    " for j in range(1, 501):\n",
    " sum = 0.0\n",
    " for k in range(1, t+1):\n",
    " sum += 1.0/(k*k)\n",
    " return sum\n",
    "\n",
    "@jit\n",
    "def pisum_numba(t):\n",
    " sum = 0.0\n",
    " for j in range(1, 501):\n",
    " sum = 0.0\n",
    " for k in range(1, t+1):\n",
    " sum += 1.0/(k*k)\n",
    " return sum\n"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 44,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "\n",
    "import cython\n",
    "\n",
    "@cython.boundscheck(False)\n",
    "@cython.wraparound(False)\n",
    "cpdef double pisum_cython(int t):\n",
    " cdef:\n",
    " double sum = 0.0\n",
    " int j,k\n",
    " for j in range(1, 501):\n",
    " sum = 0.0\n",
    " for k in range(1, t+1):\n",
    " sum += 1.0/(k*k)\n",
    " return sum"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 45,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "assert abs(pisum(10000)-1.644834071848065) < 1e-6\n",
    "assert abs(pisum_numba(10000)-1.644834071848065) < 1e-6\n",
    "assert abs(pisum_cython(10000)-1.644834071848065) < 1e-6"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 46,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "1 loops, best of 3: 926 ms per loop\n",
    "100 loops, best of 3: 20.4 ms per loop\n",
    "10 loops, best of 3: 38.2 ms per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit pisum(10000)\n",
    "%timeit pisum_numba(10000)\n",
    "%timeit pisum_cython(10000)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "## Parse int"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 47,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "def parse_int():\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**32-1)\n",
    " s = hex(n)\n",
    " \n",
    " # required for Python 2.7 on 64 bits machine\n",
    " if s[-1]=='L':\n",
    " s = s[0:-1]\n",
    " \n",
    " m = int(s,16)\n",
    " assert m == n\n",
    "\n",
    "def parse_int1():\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**32-1)\n",
    " s = hex(n)\n",
    " m = int(s,16)\n",
    " assert m == n\n",
    " \n",
    "@jit\n",
    "def parse_numba():\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**32-1)\n",
    " s = hex(n)\n",
    " m = int(s,16)\n",
    " assert m == n\n"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {
    "collapsed": true
    },
    "source": [
    "Int in C are up to 2^31-1, hence we must use this as the limit when using Cython or Numpy"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 59,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "import random\n",
    "import cython\n",
    "\n",
    "cpdef parse_int_cython():\n",
    " cdef:\n",
    " int i,n,m\n",
    " for i in range(1,1000):\n",
    " n = random.randint(0,2**31-1)\n",
    " m = int(hex(n),16)\n",
    " assert m == n"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 60,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "100 loops, best of 3: 3.59 ms per loop\n",
    "100 loops, best of 3: 3.29 ms per loop\n",
    "100 loops, best of 3: 3.64 ms per loop\n",
    "100 loops, best of 3: 3 ms per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit parse_int()\n",
    "%timeit parse_int1()\n",
    "%timeit parse_numba()\n",
    "%timeit parse_int_cython()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "### Vectorized operation"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 61,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "%lprun -s -f parse_int parse_int()"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 62,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "import numpy as np\n",
    "def parse_int_vec():\n",
    " n = np.random.randint(2^31-1,size=1000)\n",
    " for i in range(1,1000):\n",
    " ni = n[i]\n",
    " s = hex(ni)\n",
    " m = int(s,16)\n",
    " assert m == ni\n",
    " \n",
    "@jit\n",
    "def parse_int_vec_numba():\n",
    " n = np.random.randint(2^31-1,size=1000)\n",
    " for i in range(1,1000):\n",
    " ni = n[i]\n",
    " s = hex(ni)\n",
    " m = int(s,16)\n",
    " assert m == ni"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 63,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "vhex = np.vectorize(hex)\n",
    "vint = np.vectorize(int)\n",
    "\n",
    "def parse_int_numpy():\n",
    " n = np.random.randint(0,2**31-1,1000)\n",
    " s = vhex(n)\n",
    " m = vint(s,16)\n",
    " np.all(m == n)\n",
    " return s\n",
    "\n",
    "@jit\n",
    "def parse_int_numpy_numba():\n",
    " n = np.random.randint(0,2**31-1,1000)\n",
    " s = vhex(n)\n",
    " m = vint(s,16)\n",
    " np.all(m == n)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 64,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "%%cython\n",
    "import numpy as np\n",
    "import cython\n",
    "\n",
    "@cython.boundscheck(False)\n",
    "@cython.wraparound(False)\n",
    "cpdef parse_int_vec_cython():\n",
    " cdef:\n",
    " int i,m\n",
    " int[:] n\n",
    " n = np.random.randint(0,2**31-1,1000)\n",
    " for i in range(1,1000):\n",
    " m = int(hex(n[i]),16)\n",
    " assert m == n[i]"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 65,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "1000 loops, best of 3: 848 µs per loop\n",
    "The slowest run took 176.95 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000 loops, best of 3: 961 µs per loop\n",
    "1000 loops, best of 3: 740 µs per loop\n",
    "The slowest run took 102.93 times longer than the fastest. This could mean that an intermediate result is being cached \n",
    "1000 loops, best of 3: 733 µs per loop\n",
    "1000 loops, best of 3: 472 µs per loop\n"
    ]
    }
    ],
    "source": [
    "%timeit parse_int_vec()\n",
    "%timeit parse_int_vec_numba()\n",
    "%timeit parse_int_numpy()\n",
    "%timeit parse_int_numpy_numba()\n",
    "%timeit parse_int_vec_cython()"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {
    "collapsed": true
    },
    "source": [
    "That's it!"
    ]
    }
    ],
    "metadata": {
    "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
    },
    "language_info": {
    "codemirror_mode": {
    "name": "ipython",
    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.5.1"
    }
    },
    "nbformat": 4,
    "nbformat_minor": 0
    }