In [1]:
import mmap
import os
import struct

In [2]:
%load_ext memory_profiler

In [3]:
def mmap_and(f, fn):
 try:
 fd = os.open(f, os.O_RDONLY)
 with mmap.mmap(fd, 0, prot=mmap.MAP_SHARED) as m:
 return fn(m)
 finally:
 os.close(fd)

In [4]:
def get_value(m):
 return struct.unpack('b', struct.pack('B', m[8]))[0]

In [5]:
def slice_(m):
 return struct.unpack('b', struct.pack('B', m[1:][7]))[0]

In [6]:
def slice_cast(m):
 return memoryview(m[1:]).cast('b')[7]

In [7]:
def mview_slice(m):
 with memoryview(m) as mv:
 return mv[1:].cast('b')[7]

In [8]:
f = '20140728.json.xz'
print('size', os.stat(f).st_size)
assert mmap_and(f, get_value) == mmap_and(f, slice_) == mmap_and(f, slice_cast) == mmap_and(f, mview_slice)

size 2086114280


In [9]:
%memit mmap_and(f, lambda x: None)
%memit mmap_and(f, get_value)
%memit mmap_and(f, slice_)
%memit mmap_and(f, slice_cast)
%memit mmap_and(f, mview_slice)

peak memory: 35.02 MiB, increment: 0.30 MiB
peak memory: 35.04 MiB, increment: 0.01 MiB
peak memory: 3860.70 MiB, increment: 3825.66 MiB
peak memory: 3863.97 MiB, increment: 3828.93 MiB
peak memory: 35.04 MiB, increment: 0.00 MiB


In [10]:
%timeit mmap_and(f, lambda x: None)

The slowest run took 18.19 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 15.4 µs per loop


In [11]:
%timeit mmap_and(f, get_value)

The slowest run took 10.44 times longer than the fastest. This could mean that an intermediate result is being cached 
10000 loops, best of 3: 27.9 µs per loop


In [12]:
%timeit mmap_and(f, slice_)

1 loops, best of 3: 1.36 s per loop


In [13]:
%timeit mmap_and(f, slice_cast)

1 loops, best of 3: 1.36 s per loop


In [14]:
%timeit mmap_and(f, mview_slice)

The slowest run took 5.40 times longer than the fastest. This could mean that an intermediate result is being cached 
10000 loops, best of 3: 29.6 µs per loop
