Skip to content

Instantly share code, notes, and snippets.

@junrillg
Forked from oleganza/NSData+OADataHelpers.m
Created April 6, 2016 05:38
Show Gist options
  • Save junrillg/6e827371e7b0f508c558287f0576ab05 to your computer and use it in GitHub Desktop.
Save junrillg/6e827371e7b0f508c558287f0576ab05 to your computer and use it in GitHub Desktop.

Revisions

  1. @oleganza oleganza created this gist Jan 16, 2011.
    199 changes: 199 additions & 0 deletions NSData+OADataHelpers.m
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,199 @@
    #import "NSData+OADataHelpers.h"

    @implementation NSData (OADataHelpers)

    - (NSString*) UTF8String
    {
    return [[[NSString alloc] initWithData:[self dataByHealingUTF8Stream] encoding:NSUTF8StringEncoding] autorelease];
    }

    // Replaces all broken sequences by � character and returns NSData with valid UTF-8 bytes.
    - (NSData*) dataByHealingUTF8Stream
    {
    // bits
    // 7 U+007F 0xxxxxxx
    // 11 U+07FF 110xxxxx 10xxxxxx
    // 16 U+FFFF 1110xxxx 10xxxxxx 10xxxxxx
    // 21 U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
    // 26 U+3FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    // 31 U+7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

    #define b00000000 0x00
    #define b10000000 0x80
    #define b11000000 0xc0
    #define b11100000 0xe0
    #define b11110000 0xf0
    #define b11111000 0xf8
    #define b11111100 0xfc
    #define b11111110 0xfe

    static NSString* replacementCharacter = @"";
    NSData* replacementCharacterData = [replacementCharacter dataUsingEncoding:NSUTF8StringEncoding];

    NSMutableData* resultData = [NSMutableData dataWithCapacity:[self length]];
    const char *bytes = [self bytes];
    NSUInteger length = [self length];

    static const NSUInteger bufferMaxSize = 1024;
    char buffer[bufferMaxSize]; // not initialized, but will be filled in completely before copying to resultData
    NSUInteger bufferIndex = 0;

    #define FlushBuffer() if (bufferIndex > 0) { \
    [resultData appendBytes:buffer length:bufferIndex]; \
    bufferIndex = 0; \
    }
    #define CheckBuffer() if ((bufferIndex+5) >= bufferMaxSize) { \
    [resultData appendBytes:buffer length:bufferIndex]; \
    bufferIndex = 0; \
    }

    NSUInteger byteIndex = 0;
    BOOL invalidByte = NO;
    while (byteIndex < length)
    {
    char byte = bytes[byteIndex];

    if ((byte & b10000000) == b00000000) // 0xxxxxxx
    {
    CheckBuffer();
    buffer[bufferIndex++] = byte;
    }
    else if ((byte & b11100000) == b11000000) // 110xxxxx 10xxxxxx
    {
    if (byteIndex+1 >= length) {
    FlushBuffer();
    return resultData;
    }
    char byte2 = bytes[++byteIndex];
    if ((byte2 & b11000000) == b10000000)
    {
    CheckBuffer();
    buffer[bufferIndex++] = byte;
    buffer[bufferIndex++] = byte2;
    }
    else
    {
    invalidByte = YES;
    }
    }
    else if ((byte & b11110000) == b11100000) // 1110xxxx 10xxxxxx 10xxxxxx
    {
    if (byteIndex+2 >= length) {
    FlushBuffer();
    return resultData;
    }
    char byte2 = bytes[++byteIndex];
    char byte3 = bytes[++byteIndex];
    if ((byte2 & b11000000) == b10000000 &&
    (byte3 & b11000000) == b10000000)
    {
    CheckBuffer();
    buffer[bufferIndex++] = byte;
    buffer[bufferIndex++] = byte2;
    buffer[bufferIndex++] = byte3;
    }
    else
    {
    invalidByte = YES;
    }
    }
    else if ((byte & b11111000) == b11110000) // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
    {
    if (byteIndex+3 >= length) {
    FlushBuffer();
    return resultData;
    }
    char byte2 = bytes[++byteIndex];
    char byte3 = bytes[++byteIndex];
    char byte4 = bytes[++byteIndex];
    if ((byte2 & b11000000) == b10000000 &&
    (byte3 & b11000000) == b10000000 &&
    (byte4 & b11000000) == b10000000)
    {
    CheckBuffer();
    buffer[bufferIndex++] = byte;
    buffer[bufferIndex++] = byte2;
    buffer[bufferIndex++] = byte3;
    buffer[bufferIndex++] = byte4;
    }
    else
    {
    invalidByte = YES;
    }
    }
    else if ((byte & b11111100) == b11111000) // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    {
    if (byteIndex+4 >= length) {
    FlushBuffer();
    return resultData;
    }
    char byte2 = bytes[++byteIndex];
    char byte3 = bytes[++byteIndex];
    char byte4 = bytes[++byteIndex];
    char byte5 = bytes[++byteIndex];
    if ((byte2 & b11000000) == b10000000 &&
    (byte3 & b11000000) == b10000000 &&
    (byte4 & b11000000) == b10000000 &&
    (byte5 & b11000000) == b10000000)
    {
    CheckBuffer();
    buffer[bufferIndex++] = byte;
    buffer[bufferIndex++] = byte2;
    buffer[bufferIndex++] = byte3;
    buffer[bufferIndex++] = byte4;
    buffer[bufferIndex++] = byte5;
    }
    else
    {
    invalidByte = YES;
    }
    }
    else if ((byte & b11111110) == b11111100) // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    {
    if (byteIndex+5 >= length) {
    FlushBuffer();
    return resultData;
    }
    char byte2 = bytes[++byteIndex];
    char byte3 = bytes[++byteIndex];
    char byte4 = bytes[++byteIndex];
    char byte5 = bytes[++byteIndex];
    char byte6 = bytes[++byteIndex];
    if ((byte2 & b11000000) == b10000000 &&
    (byte3 & b11000000) == b10000000 &&
    (byte4 & b11000000) == b10000000 &&
    (byte5 & b11000000) == b10000000 &&
    (byte6 & b11000000) == b10000000)
    {
    CheckBuffer();
    buffer[bufferIndex++] = byte;
    buffer[bufferIndex++] = byte2;
    buffer[bufferIndex++] = byte3;
    buffer[bufferIndex++] = byte4;
    buffer[bufferIndex++] = byte5;
    buffer[bufferIndex++] = byte6;
    }
    else
    {
    invalidByte = YES;
    }
    }
    else
    {
    invalidByte = YES;
    }

    if (invalidByte)
    {
    invalidByte = NO;
    FlushBuffer();
    [resultData appendData:replacementCharacterData];
    }

    byteIndex++;
    }
    FlushBuffer();
    return resultData;
    }

    @end