#!/usr/bin/env python3

import argparse
import json
import os
import subprocess
import tempfile
import numpy
import scipy.stats

def format_s(seconds):
    """Format a time in seconds like Criterion does."""

    if seconds < 0:
        return "-" + format_s(-seconds)

    def format_with_prefix(seconds, prefix):
        """Format to 4 digits, even if they are trailing zeros."""
        if seconds >= 1e9:
            return "{:.4g} {}".format(seconds, prefix)
        for exponent in [3, 2, 1]:
            if seconds >= 10 ** exponent:
                return "{1:.{0}f} {2}".format(3 - exponent, seconds, prefix)
        return "{:.3f} {}".format(seconds, prefix)

    PREFIXES = [
        (0, ""),
        (-3, "m"),
        (-3, "m"),
        (-6, "μ"),
        (-9, "n"),
        (-12, "p"),
        (-15, "f"),
        (-18, "a")]

    for exponent, prefix in PREFIXES:
        if seconds >= 10 ** exponent:
            return format_with_prefix(seconds * 10 ** (-exponent), prefix + "s")
    return "{:g} s".format(seconds)

def format_row(a, b, c=""):
    """Format a row of output."""
    return "{: <20} {: <10} {}".format(a, b, c)

def criterion_print_extra_stats(benchmark):
    """Print some extra statistics that Criterion doesn't provide.

    `benchmark` should be a parsed JSON object describing a single benchmark
    from Criterion's output (tested on Criterion 1.2.3)."""

    # Extract columns which are interesting and should be non-null.
    keys = benchmark["reportKeys"]
    indices = {name: index for index, name in enumerate(keys)}
    def process(datum):
        return {key: datum[indices[key]]
                for key in ["time", "cpuTime", "iters"]}
    measured = list(map(process, benchmark["reportMeasured"]))

    # Criterion repeatedly executes the benchmarked code in a loop with an
    # increasing number of iterations. `time` and `cpuTime` are totals for the
    # loop and `iters` is the number of iterations.

    mean_times = [datum["time"] / datum["iters"] for datum in measured]

    print(format_row(
        "quartiles of means",
        ", ".join(
            format_s(numpy.percentile(mean_times, p))
            for p in [25, 50, 75])))

    # Theil-Sen regression of time vs. number of iterations.
    slope, intercept, *_ = scipy.stats.theilslopes(
        [m["time"] for m in measured], [m["iters"] for m in measured])
    print(format_row(
        "Theil-Sen",
        format_s(slope),
        "(intercept: {})".format(format_s(intercept))))

    print(format_row("min of means", format_s(numpy.amin(mean_times))))

def criterion_benchmark(command, time_limit_s=None):
    """Benchmark a shell command using Criterion and print the results."""

    with tempfile.TemporaryDirectory(prefix="benchmark-") as dir_name:
        json_file = os.path.join(dir_name, "criterion-out.json")

        bench_command = ["bench"]
        if time_limit_s is not None:
            bench_command += ["--time-limit", str(time_limit_s)]
        bench_command += ["--json", json_file, "--", command]

        process = subprocess.run(bench_command, stdout=subprocess.PIPE)
        print(process.stdout.decode("utf-8").rstrip("\n"))

        with open(json_file, "r") as f:
            data = json.load(f)

    data = data[2] # Skip the header.
    assert len(data) == 1 # We're always doing a single benchmark.
    criterion_print_extra_stats(data[0])

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("command", help="sh command to benchmark")
    parser.add_argument(
        "--time-limit", type=int, default=60,
        help="time limit in seconds for the whole benchmark")
    args = parser.parse_args()

    criterion_benchmark(args.command, time_limit_s=args.time_limit)