Skip to content

Instantly share code, notes, and snippets.

@fireblue
Last active December 17, 2015 19:29
Show Gist options
  • Select an option

  • Save fireblue/5660870 to your computer and use it in GitHub Desktop.

Select an option

Save fireblue/5660870 to your computer and use it in GitHub Desktop.

Revisions

  1. fireblue renamed this gist May 28, 2013. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. fireblue created this gist May 28, 2013.
    76 changes: 76 additions & 0 deletions gistfile1.m
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,76 @@
    - (BOOL)isUTF8
    {
    const char *string = (const char *)self.bytes;
    const unsigned char * bytes = (const unsigned char *)string;
    while(*bytes)
    {
    if( (// ASCII
    // use bytes[0] <= 0x7F to allow ASCII control characters
    bytes[0] == 0x09 ||
    bytes[0] == 0x0A ||
    bytes[0] == 0x0D ||
    (0x20 <= bytes[0] && bytes[0] <= 0x7E)
    )
    ) {
    bytes += 1;
    continue;
    }

    if( (// non-overlong 2-byte
    (0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
    (0x80 <= bytes[1] && bytes[1] <= 0xBF)
    )
    ) {
    bytes += 2;
    continue;
    }

    if( (// excluding overlongs
    bytes[0] == 0xE0 &&
    (0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
    (0x80 <= bytes[2] && bytes[2] <= 0xBF)
    ) ||
    (// straight 3-byte
    ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
    bytes[0] == 0xEE ||
    bytes[0] == 0xEF) &&
    (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
    (0x80 <= bytes[2] && bytes[2] <= 0xBF)
    ) ||
    (// excluding surrogates
    bytes[0] == 0xED &&
    (0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
    (0x80 <= bytes[2] && bytes[2] <= 0xBF)
    )
    ) {
    bytes += 3;
    continue;
    }

    if( (// planes 1-3
    bytes[0] == 0xF0 &&
    (0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
    (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
    (0x80 <= bytes[3] && bytes[3] <= 0xBF)
    ) ||
    (// planes 4-15
    (0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
    (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
    (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
    (0x80 <= bytes[3] && bytes[3] <= 0xBF)
    ) ||
    (// plane 16
    bytes[0] == 0xF4 &&
    (0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
    (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
    (0x80 <= bytes[3] && bytes[3] <= 0xBF)
    )
    ) {
    bytes += 4;
    continue;
    }

    return 0;
    }
    return 1;
    }