Skip to content

Instantly share code, notes, and snippets.

@clausecker
Created October 17, 2019 14:41
Show Gist options
  • Save clausecker/a1fd7df7bfac39d6da4d2710551c6712 to your computer and use it in GitHub Desktop.
Save clausecker/a1fd7df7bfac39d6da4d2710551c6712 to your computer and use it in GitHub Desktop.

Revisions

  1. clausecker created this gist Oct 17, 2019.
    132 changes: 132 additions & 0 deletions wc.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,132 @@
    /* parallel wc(1) demo program */
    /* cc -O3 -fopenmp -o wc wc.c */
    #include <ctype.h>
    #include <unistd.h>
    #include <omp.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <stdbool.h>
    #include <sys/stat.h>

    #define BUFSIZE (128 * 1024)

    /*
    * Count chars, words, and lines for one segment of the input file.
    * Track if the first and last characters of the segment were
    * spaces. We count words on a blank-nonblank transition. If the
    * first character of a segment is not a blank, it does not count
    * as a word.
    */
    struct counter {
    unsigned long chars, words, lines;
    bool firstblank, lastblank;
    };

    static void
    reduce(struct counter *restrict totals, const struct counter *restrict segments, size_t nthreads)
    {
    size_t i;

    totals->chars = 0;
    totals->words = 0;
    totals->lines = 0;
    totals->lastblank = true;

    for (i = 0; i < nthreads; i++) {
    if (segments[i].chars == 0)
    continue;

    totals->chars += segments[i].chars;
    totals->words += segments[i].words + (totals->lastblank & !segments[i].firstblank);
    totals->lines += segments[i].lines;
    totals->lastblank = segments[i].lastblank;
    }
    }

    static void
    dosegment(struct counter *segments, int fd, int tid, int nthreads, off_t filesize)
    {
    struct counter *seg = segments + tid;
    off_t start, end;
    size_t want, i;
    ssize_t got;
    unsigned long chars = 0, words = 0, lines = 0;
    char *buf;
    bool first = true, blank = false, nblank;

    buf = malloc(BUFSIZE);
    if (buf == NULL) {
    perror("malloc");
    exit(EXIT_FAILURE);
    }

    start = filesize * ((double)tid / nthreads);
    end = filesize * ((tid + 1.0) / nthreads);

    while (start < end) {
    want = end - start < BUFSIZE ? end - start : BUFSIZE;
    got = pread(fd, buf, want, start);
    if (got < 0) {
    perror("pread");
    exit(EXIT_FAILURE);
    }

    if (got == 0) {
    fprintf(stderr, "file truncated during reading\n");
    exit(EXIT_FAILURE);
    }

    if (first) {
    seg->firstblank = isspace(buf[0]);
    first = 0;
    }

    chars += got;
    start += got;
    for (i = 0; i < got; i++) {
    lines += buf[i] == '\n';
    nblank = isspace(buf[i]);
    words += blank & !nblank;
    blank = nblank;
    }
    }

    seg->chars = chars;
    seg->lines = lines;
    seg->words = words;
    seg->lastblank = blank;
    }

    extern int
    main(void)
    {
    struct stat st;
    struct counter totals, *segments;
    size_t nthreads;

    /* check if we can seek the input */
    if (fstat(STDIN_FILENO, &st) != 0) {
    perror("fstat");
    return (EXIT_FAILURE);
    }

    if (!S_ISREG(st.st_mode)) {
    fprintf(stderr, "not a regular file\n");
    return (EXIT_FAILURE);
    }

    nthreads = omp_get_max_threads();
    segments = calloc(nthreads, sizeof *segments);
    if (segments == NULL) {
    perror("calloc");
    return (EXIT_FAILURE);
    }

    # pragma omp parallel
    dosegment(segments, STDIN_FILENO, omp_get_thread_num(), omp_get_num_threads(), st.st_size);

    reduce(&totals, segments, nthreads);
    printf("%lu %lu %lu\n", totals.lines, totals.words, totals.chars);

    return (EXIT_SUCCESS);
    }