{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import mmap\n", "import os\n", "import struct" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%load_ext memory_profiler" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def mmap_and(f, fn):\n", " try:\n", " fd = os.open(f, os.O_RDONLY)\n", " with mmap.mmap(fd, 0, prot=mmap.MAP_SHARED) as m:\n", " return fn(m)\n", " finally:\n", " os.close(fd)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_value(m):\n", " return struct.unpack('b', struct.pack('B', m[8]))[0]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def slice_(m):\n", " return struct.unpack('b', struct.pack('B', m[1:][7]))[0]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def slice_cast(m):\n", " return memoryview(m[1:]).cast('b')[7]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def mview_slice(m):\n", " with memoryview(m) as mv:\n", " return mv[1:].cast('b')[7]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "size 2086114280\n" ] } ], "source": [ "f = '20140728.json.xz'\n", "print('size', os.stat(f).st_size)\n", "assert mmap_and(f, get_value) == mmap_and(f, slice_) == mmap_and(f, slice_cast) == mmap_and(f, mview_slice)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "peak memory: 35.02 MiB, increment: 0.30 MiB\n", "peak memory: 35.04 MiB, increment: 0.01 MiB\n", "peak memory: 3860.70 MiB, increment: 3825.66 MiB\n", "peak memory: 3863.97 MiB, increment: 3828.93 MiB\n", "peak memory: 35.04 MiB, increment: 0.00 MiB\n" ] } ], "source": [ "%memit mmap_and(f, lambda x: None)\n", "%memit mmap_and(f, get_value)\n", "%memit mmap_and(f, slice_)\n", "%memit mmap_and(f, slice_cast)\n", "%memit mmap_and(f, mview_slice)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The slowest run took 18.19 times longer than the fastest. This could mean that an intermediate result is being cached \n", "100000 loops, best of 3: 15.4 µs per loop\n" ] } ], "source": [ "%timeit mmap_and(f, lambda x: None)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The slowest run took 10.44 times longer than the fastest. This could mean that an intermediate result is being cached \n", "10000 loops, best of 3: 27.9 µs per loop\n" ] } ], "source": [ "%timeit mmap_and(f, get_value)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 1.36 s per loop\n" ] } ], "source": [ "%timeit mmap_and(f, slice_)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 1.36 s per loop\n" ] } ], "source": [ "%timeit mmap_and(f, slice_cast)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The slowest run took 5.40 times longer than the fastest. This could mean that an intermediate result is being cached \n", "10000 loops, best of 3: 29.6 µs per loop\n" ] } ], "source": [ "%timeit mmap_and(f, mview_slice)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }