Skip to content

Instantly share code, notes, and snippets.

@divinity76
Created January 25, 2024 05:20
Show Gist options
  • Select an option

  • Save divinity76/5729472dd5d77e94cd0acb245aac2226 to your computer and use it in GitHub Desktop.

Select an option

Save divinity76/5729472dd5d77e94cd0acb245aac2226 to your computer and use it in GitHub Desktop.

Revisions

  1. divinity76 created this gist Jan 25, 2024.
    168 changes: 168 additions & 0 deletions b3instructions.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,168 @@
    <?php

    declare(strict_types=1);

    /**
    * better version of shell_exec(),
    * supporting both stdin and stdout and stderr and os-level return code
    *
    * @param string $cmd
    * command to execute
    * @param string $stdin
    * (optional) data to send to stdin, binary data is supported.
    * @param string $stdout
    * (optional) stdout data generated by cmd
    * @param string $stderr
    * (optional) stderr data generated by cmd
    * @param bool $print_std
    * (optional, default false) if you want stdout+stderr to be printed while it's running,
    * set this to true. (useful for long-running commands)
    * @return int
    */
    function hhb_exec(string $cmd, string $stdin = "", string &$stdout = null, string &$stderr = null, bool $print_std = false): int
    {
    $stdouth = tmpfile();
    $stderrh = tmpfile();
    $descriptorspec = array(
    0 => array(
    "pipe",
    "rb"
    ), // stdin
    1 => array(
    "file",
    stream_get_meta_data($stdouth)['uri'],
    'ab'
    ),
    2 => array(
    "file",
    stream_get_meta_data($stderrh)['uri'],
    'ab'
    )
    );
    $pipes = array();
    $proc = proc_open($cmd, $descriptorspec, $pipes);
    while (strlen($stdin) > 0) {
    $written_now = fwrite($pipes[0], $stdin);
    if ($written_now < 1 || $written_now === strlen($stdin)) {
    // ... can add more error checking here
    break;
    }
    $stdin = substr($stdin, $written_now);
    }
    fclose($pipes[0]);
    unset($stdin, $pipes[0]);
    if (! $print_std) {
    $proc_ret = proc_close($proc); // this line will stall until the process has exited.
    $stdout = stream_get_contents($stdouth);
    $stderr = stream_get_contents($stderrh);
    } else {
    $stdout = "";
    $stderr = "";
    stream_set_blocking($stdouth, false);
    stream_set_blocking($stderrh, false);
    $fetchstd = function () use (&$stdout, &$stderr, &$stdouth, &$stderrh): bool {
    $ret = false;
    $tmp = stream_get_contents($stdouth); // fread($stdouth, 1); //
    if (is_string($tmp) && strlen($tmp) > 0) {
    $ret = true;
    $stdout .= $tmp;
    fwrite(STDOUT, $tmp);
    }
    $tmp = stream_get_contents($stderrh);// fread($stderrh, 1); //
    // var_dump($tmp);
    if (is_string($tmp) && strlen($tmp) > 0) {
    $ret = true;
    $stderr .= $tmp;
    fwrite(STDERR, $tmp);
    }
    return $ret;
    };
    while (($status = proc_get_status($proc))["running"]) {
    if (! $fetchstd()) {
    // 100 ms
    usleep(100 * 1000);
    }
    }
    $proc_ret = $status["exitcode"];
    proc_close($proc);
    $fetchstd();
    }
    fclose($stdouth);
    fclose($stderrh);
    return $proc_ret;
    }
    function exec2(string $cmd, ?string &$stdout = null, ?string &$stderr = null): void
    {
    echo "$cmd\n";
    $ret = hhb_exec($cmd, "", $stdout, $stderr, true);
    if ($ret !== 0) {
    throw new Exception("Error: $ret: $cmd");
    }
    }
    function exec3(string $cmds): void
    {
    $cmd = "/bin/bash -c " . escapeshellarg($cmds);
    exec2($cmd);
    }
    function calculateThroughputMBPS(int $timeInNanoseconds) {
    $sizeInKB = 16; // Size of the data in KB
    $sizeInMB = $sizeInKB / 1024; // Convert KB to MB (16KB = 0.015625MB)

    $timeInSeconds = $timeInNanoseconds / 1000000000; // Convert nanoseconds to seconds

    $throughput = $sizeInMB / $timeInSeconds; // Calculate throughput in MB/s

    return $throughput;
    }
    error_reporting(E_ALL);
    ini_set("display_errors", "1");
    set_error_handler(function ($errno, $errstr, $errfile, $errline) {
    if (error_reporting() & $errno) {
    throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
    }
    });
    if (!is_dir("upstream_blake3")) {
    exec3(
    <<<'CMD'
    # fancy way of just fetching the "c" folder (the only thing we want)
    git clone --branch '1.5.0' -n --depth=1 --filter=tree:0 'https://github.com/BLAKE3-team/BLAKE3.git' 'upstream_blake3'
    cd upstream_blake3
    git sparse-checkout set --no-cone c
    git checkout
    rm -rf .git
    cd c
    # some stuff we don't need
    rm -rf blake3_c_rust_bindings test.py example.c main.c Makefile.testing CMakeLists.txt blake3-config.cmake.in README.md .gitignore
    CMD
    );
    }
    $iterations = 999;
    // EXT_HASH_BLAKE3_SOURCES="hash_blake3.c blake3/upstream_blake3/c/blake3.c blake3/upstream_blake3/c/blake3_dispatch.c blake3/upstream_blake3/c/blake3_portable.c"
    $tests = array(
    "O2-portable" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
    "O2-portable-march" => "gcc -O2 -march=native -mtune=native -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
    "O2-sse2" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
    "O2-sse41" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
    "O2-avx2" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S -DBLAKE3_NO_AVX512",
    "O2-avx512" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S",
    );
    $results = array();
    foreach ($tests as $test_name => $cmd) {
    exec2($cmd);
    exec2("./test $iterations", $stdout, $stderr);
    $stdout = trim($stdout);
    $stdout_parsed = filter_var($stdout, FILTER_VALIDATE_INT);
    if ($stdout_parsed === false) {
    throw new Exception("Error: could not parse $stdout as float");
    }
    $microseconds_for_16_kib = $stdout_parsed;
    $mb_per_second = calculateThroughputMBPS($microseconds_for_16_kib);
    $results[$test_name] = [
    "microseconds_for_16_kib" => $microseconds_for_16_kib,
    "mb_per_second" => $mb_per_second,
    ];
    }
    uksort($results, function ($a, $b) use ($results) {
    return $results[$a]["mb_per_second"] <=> $results[$b]["mb_per_second"];
    });
    var_dump($results);
    73 changes: 73 additions & 0 deletions sample_output.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,73 @@
    $ time php b3instructions.php
    gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
    ./test 999
    13876

    gcc -O2 -march=native -mtune=native -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
    ./test 999
    29295

    gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
    ./test 999
    4969

    gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
    ./test 999
    4688

    gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S -DBLAKE3_NO_AVX512
    ./test 999
    2384

    gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S
    ./test 999
    1753

    array(6) {
    ["O2-portable-march"]=>
    array(2) {
    ["microseconds_for_16_kib"]=>
    int(29295)
    ["mb_per_second"]=>
    float(533.3674688513398)
    }
    ["O2-portable"]=>
    array(2) {
    ["microseconds_for_16_kib"]=>
    int(13876)
    ["mb_per_second"]=>
    float(1126.0449697319111)
    }
    ["O2-sse2"]=>
    array(2) {
    ["microseconds_for_16_kib"]=>
    int(4969)
    ["mb_per_second"]=>
    float(3144.4958744214127)
    }
    ["O2-sse41"]=>
    array(2) {
    ["microseconds_for_16_kib"]=>
    int(4688)
    ["mb_per_second"]=>
    float(3332.977815699659)
    }
    ["O2-avx2"]=>
    array(2) {
    ["microseconds_for_16_kib"]=>
    int(2384)
    ["mb_per_second"]=>
    float(6554.1107382550335)
    }
    ["O2-avx512"]=>
    array(2) {
    ["microseconds_for_16_kib"]=>
    int(1753)
    ["mb_per_second"]=>
    float(8913.291500285226)
    }
    }

    real 0m3.017s
    user 0m1.913s
    sys 0m0.105s
    48 changes: 48 additions & 0 deletions test.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,48 @@
    // compile: gcc -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_NEON -DBLAKE3_NO_SSE41
    // gcc -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S
    #include <stdio.h>
    #include <stdint.h>
    #include <string.h>
    #include "upstream_blake3/c/blake3.h"
    #include <sys/time.h>
    #include <stdlib.h>
    #include <math.h>
    #include <time.h>

    int64_t nanoseconds()
    {
    struct timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
    return (int64_t)ts.tv_sec * 1000000000 + (int64_t)ts.tv_nsec;
    }

    int main(int argc, char *argv[])
    {
    if (argc != 2)
    {
    printf("Usage: %s <iterations>\n", argv[0]);
    return 1;
    }
    int iterations = atoi(argv[1]);
    char teststr[16 * 1024] = "Hello World!"; // 16kb is an important size: it's the size of the TLS record buffer.
    int64_t best = INT64_MAX;
    for (int i = 0; i < iterations; i++)
    {
    int64_t start = nanoseconds();
    blake3_hasher hasher;
    blake3_hasher_init(&hasher);
    blake3_hasher_update(&hasher, teststr, sizeof(teststr));
    uint8_t output[BLAKE3_OUT_LEN];
    blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
    int64_t end = nanoseconds();
    int64_t elapsed = end - start;
    if (elapsed < best)
    {
    best = elapsed;
    }
    }
    //printf("Best time: %ld nanoseconds\n", best);
    printf("%ld\n", best);
    printf("\n");
    return 0;
    }