pkkm · April 16, 2018 21:37 · Apr 16, 2018
diff --git a/analyze.R b/analyze.R
@@ -0,0 +1,58 @@
+#!/usr/bin/env Rscript
+
+library(ggplot2)
+library(reshape2)
+library(pander)
+
+data <- read.csv("results/results.csv", check.names=FALSE)
+
+# Reorder columns for readability.
+col_order <- c("Least-squares slope", "Theil-Sen slope",
+               "Mean", "Median of means",
+               "Minimum of means", "Quartile 1 of means", "Quartile 3 of means")
+data <- data[, col_order]
+
+molten <- melt(data)
+
+# Assign categories to variables.
+molten$type <- factor(
+    "This should never be visible",
+    levels=c("Central tendency", "Regression", "Other", "This should never be visible"))
+molten <- within(molten, type[variable == "Least-squares slope" | variable == "Theil-Sen slope"] <- "Regression")
+molten <- within(molten, type[variable == "Mean" | variable == "Median of means"] <- "Central tendency")
+molten <- within(molten, type[variable == "Quartile 1 of means" | variable == "Quartile 3 of means" | variable == "Minimum of means"] <- "Other")
+
+# Draw densities.
+plot <- ggplot(molten, aes(x=value, color=variable)) +
+    geom_density(adjust=0.5) +
+    labs(x="Time [s]", y="Number of benchmarks (smoothed)", color="") +
+    facet_wrap("type", scales="fixed", ncol=1)
+ggsave("results/density.pdf", plot, device=cairo_pdf, width=8, height=6)
+
+# Draw boxplots.
+plot <- ggplot(molten, aes(x=variable, y=value)) +
+    geom_boxplot() +
+    labs(x="Statistic", y="Time [s]") +
+    theme(axis.text.x=element_text(angle=25, hjust=1))
+png(filename="results/boxplot.png", type="cairo", width=1100, height=1100, units="px", res=200)
+print(plot)
+dev.off()
+
+# Data range as a single number (instead of vector of min and max).
+range_num <- function(data) {
+    return(diff(range(data)))
+}
+
+# Summarize the spread of the data in a table.
+df <- data.frame()
+df[ncol(data),] <- NA
+rownames(df) <- names(data)
+iqr_rel <- apply(data, 2, IQR) / apply(data, 2, median)
+df$`IQR/Median` <- sprintf("%.1f%%", unlist(iqr_rel * 100))
+range_rel <- apply(data, 2, range_num) / apply(data, 2, median)
+df$`Range/Median` <- sprintf("%.1f%%", unlist(range_rel * 100))
+table <- pandoc.table.return(
+    df, style="rmarkdown", justify=c("right", "left", "left"), emphasize.rownames=FALSE)
+handle <- file("results/summary.md")
+writeLines(table, handle)
+close(handle)
diff --git a/benchmark.py b/benchmark.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import subprocess
+import tempfile
+import numpy
+import scipy.stats
+
+def format_s(seconds):
+    """Format a time in seconds like Criterion does."""
+
+    if seconds < 0:
+        return "-" + format_s(-seconds)
+
+    def format_with_prefix(seconds, prefix):
+        """Format to 4 digits, even if they are trailing zeros."""
+        if seconds >= 1e9:
+            return "{:.4g} {}".format(seconds, prefix)
+        for exponent in [3, 2, 1]:
+            if seconds >= 10 ** exponent:
+                return "{1:.{0}f} {2}".format(3 - exponent, seconds, prefix)
+        return "{:.3f} {}".format(seconds, prefix)
+
+    PREFIXES = [
+        (0, ""),
+        (-3, "m"),
+        (-3, "m"),
+        (-6, "μ"),
+        (-9, "n"),
+        (-12, "p"),
+        (-15, "f"),
+        (-18, "a")]
+
+    for exponent, prefix in PREFIXES:
+        if seconds >= 10 ** exponent:
+            return format_with_prefix(seconds * 10 ** (-exponent), prefix + "s")
+    return "{:g} s".format(seconds)
+
+def format_row(a, b, c=""):
+    """Format a row of output."""
+    return "{: <20} {: <10} {}".format(a, b, c)
+
+def criterion_print_extra_stats(benchmark):
+    """Print some extra statistics that Criterion doesn't provide.
+
+    `benchmark` should be a parsed JSON object describing a single benchmark
+    from Criterion's output (tested on Criterion 1.2.3)."""
+
+    # Extract columns which are interesting and should be non-null.
+    keys = benchmark["reportKeys"]
+    indices = {name: index for index, name in enumerate(keys)}
+    def process(datum):
+        return {key: datum[indices[key]]
+                for key in ["time", "cpuTime", "iters"]}
+    measured = list(map(process, benchmark["reportMeasured"]))
+
+    # Criterion repeatedly executes the benchmarked code in a loop with an
+    # increasing number of iterations. `time` and `cpuTime` are totals for the
+    # loop and `iters` is the number of iterations.
+
+    mean_times = [datum["time"] / datum["iters"] for datum in measured]
+
+    print(format_row(
+        "quartiles of means",
+        ", ".join(
+            format_s(numpy.percentile(mean_times, p))
+            for p in [25, 50, 75])))
+
+    # Theil-Sen regression of time vs. number of iterations.
+    slope, intercept, *_ = scipy.stats.theilslopes(
+        [m["time"] for m in measured], [m["iters"] for m in measured])
+    print(format_row(
+        "Theil-Sen",
+        format_s(slope),
+        "(intercept: {})".format(format_s(intercept))))
+
+    print(format_row("min of means", format_s(numpy.amin(mean_times))))
+
+def criterion_benchmark(command, time_limit_s=None):
+    """Benchmark a shell command using Criterion and print the results."""
+
+    with tempfile.TemporaryDirectory(prefix="benchmark-") as dir_name:
+        json_file = os.path.join(dir_name, "criterion-out.json")
+
+        bench_command = ["bench"]
+        if time_limit_s is not None:
+            bench_command += ["--time-limit", str(time_limit_s)]
+        bench_command += ["--json", json_file, "--", command]
+
+        process = subprocess.run(bench_command, stdout=subprocess.PIPE)
+        print(process.stdout.decode("utf-8").rstrip("\n"))
+
+        with open(json_file, "r") as f:
+            data = json.load(f)
+
+    data = data[2] # Skip the header.
+    assert len(data) == 1 # We're always doing a single benchmark.
+    criterion_print_extra_stats(data[0])
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("command", help="sh command to benchmark")
+    parser.add_argument(
+        "--time-limit", type=int, default=60,
+        help="time limit in seconds for the whole benchmark")
+    args = parser.parse_args()
+
+    criterion_benchmark(args.command, time_limit_s=args.time_limit)
diff --git a/main.sh b/main.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+command='bash -c "a=0; for i in {1..500000}; do (( a += RANDOM )); done"'
+n_warmup_runs=5
+n_benchmarks=60
+single_benchmark_time=60
+
+require_cmd_present() {
+    for cmd in "$@"; do
+        if ! command -v -- "$cmd" >/dev/null 2>&1; then
+            printf "ERROR: Required command \`%s\` not found.\n" "$cmd" 1>&2
+            exit 1
+        fi
+    done
+}
+
+require_cmd_present python3 grep cut tr sed bench Rscript
+
+mkdir -p "results"
+
+single_benchmark() {
+	./benchmark.py "$command" --time-limit "$single_benchmark_time" |
+		grep -E "time|mean|quartile|Theil|min" |
+		cut -c 22- |
+		cut -d\( -f1 |
+		tr -cd ".,\n0-9" |
+		tr "\n" "," |
+		sed "s/,$//"
+	printf "\n"
+}
+
+{
+    for ((i=0; i<"$n_warmup_runs"; i++)); do
+	    single_benchmark >/dev/null
+    done
+
+    echo "Least-squares slope,Mean,Quartile 1 of means,Median of means,Quartile 3 of means,Theil-Sen slope,Minimum of means"
+
+    for ((i=0; i<"$n_benchmarks"; i++)); do
+	    single_benchmark
+    done
+} >"results/results.csv"
+
+./analyze.R
No results found