#!/usr/bin/env python3 import argparse import json import os import subprocess import tempfile import numpy import scipy.stats def format_s(seconds): """Format a time in seconds like Criterion does.""" if seconds < 0: return "-" + format_s(-seconds) def format_with_prefix(seconds, prefix): """Format to 4 digits, even if they are trailing zeros.""" if seconds >= 1e9: return "{:.4g} {}".format(seconds, prefix) for exponent in [3, 2, 1]: if seconds >= 10 ** exponent: return "{1:.{0}f} {2}".format(3 - exponent, seconds, prefix) return "{:.3f} {}".format(seconds, prefix) PREFIXES = [ (0, ""), (-3, "m"), (-3, "m"), (-6, "μ"), (-9, "n"), (-12, "p"), (-15, "f"), (-18, "a")] for exponent, prefix in PREFIXES: if seconds >= 10 ** exponent: return format_with_prefix(seconds * 10 ** (-exponent), prefix + "s") return "{:g} s".format(seconds) def format_row(a, b, c=""): """Format a row of output.""" return "{: <20} {: <10} {}".format(a, b, c) def criterion_print_extra_stats(benchmark): """Print some extra statistics that Criterion doesn't provide. `benchmark` should be a parsed JSON object describing a single benchmark from Criterion's output (tested on Criterion 1.2.3).""" # Extract columns which are interesting and should be non-null. keys = benchmark["reportKeys"] indices = {name: index for index, name in enumerate(keys)} def process(datum): return {key: datum[indices[key]] for key in ["time", "cpuTime", "iters"]} measured = list(map(process, benchmark["reportMeasured"])) # Criterion repeatedly executes the benchmarked code in a loop with an # increasing number of iterations. `time` and `cpuTime` are totals for the # loop and `iters` is the number of iterations. mean_times = [datum["time"] / datum["iters"] for datum in measured] print(format_row( "quartiles of means", ", ".join( format_s(numpy.percentile(mean_times, p)) for p in [25, 50, 75]))) # Theil-Sen regression of time vs. number of iterations. slope, intercept, *_ = scipy.stats.theilslopes( [m["time"] for m in measured], [m["iters"] for m in measured]) print(format_row( "Theil-Sen", format_s(slope), "(intercept: {})".format(format_s(intercept)))) print(format_row("min of means", format_s(numpy.amin(mean_times)))) def criterion_benchmark(command, time_limit_s=None): """Benchmark a shell command using Criterion and print the results.""" with tempfile.TemporaryDirectory(prefix="benchmark-") as dir_name: json_file = os.path.join(dir_name, "criterion-out.json") bench_command = ["bench"] if time_limit_s is not None: bench_command += ["--time-limit", str(time_limit_s)] bench_command += ["--json", json_file, "--", command] process = subprocess.run(bench_command, stdout=subprocess.PIPE) print(process.stdout.decode("utf-8").rstrip("\n")) with open(json_file, "r") as f: data = json.load(f) data = data[2] # Skip the header. assert len(data) == 1 # We're always doing a single benchmark. criterion_print_extra_stats(data[0]) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("command", help="sh command to benchmark") parser.add_argument( "--time-limit", type=int, default=60, help="time limit in seconds for the whole benchmark") args = parser.parse_args() criterion_benchmark(args.command, time_limit_s=args.time_limit)