Last active
January 6, 2022 12:44
-
-
Save czlc/d55f80508749e745feba8c9a3c796fdc to your computer and use it in GitHub Desktop.
Revisions
-
czlc revised this gist
Jan 6, 2022 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,7 +22,7 @@ static const unsigned long offsetsFromUTF8[6] = { #define UNI_SUR_HIGH_START 0xD800 #define UNI_SUR_LOW_START 0xDC00 void utf8to16(const uint8_t utf8[4], uint8_t utf16[2]) { uint32_t ch = 0; int extra = trailingBytesForUTF8[*utf8]; @@ -35,6 +35,7 @@ void utf8to16(const uint8_t utf8[4], uint16_t utf16[2]) { ch -= offsetsFromUTF8[extra]; if (ch <= 0xffff) { utf16[0] = ch; utf16[1] = 0; } else { ch -= UNI_BASE; utf16[0] = (ch >> UNI_SHIFT) + UNI_SUR_HIGH_START; -
czlc revised this gist
Jan 23, 2018 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -58,7 +58,7 @@ void utf8to32(const uint8_t utf8[4], uint32_t utf32[1]) { void utf16to8(const uint16_t utf16[2], uint8_t utf8[4]) { uint32_t ch = utf16[0]; if ((utf16[0] & UNI_SUR_HIGH_START) == UNI_SUR_HIGH_START) { ch = (utf16[0] & UNI_MASK) << UNI_SHIFT; ch += utf16[1] & UNI_MASK; ch += UNI_BASE; @@ -83,7 +83,7 @@ void utf16to8(const uint16_t utf16[2], uint8_t utf8[4]) { void utf16to32(const uint16_t utf16[2], uint32_t utf32[1]) { uint32_t ch = utf16[0]; if ((utf16[0] & UNI_SUR_HIGH_START) == UNI_SUR_HIGH_START) { ch = (utf16[0] & UNI_MASK) << UNI_SHIFT; ch += utf16[1] & UNI_MASK; ch += UNI_BASE; -
czlc created this gist
Jan 23, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,122 @@ #include <stdint.h> static const char trailingBytesForUTF8[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; static const unsigned long offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; #define UNI_SHIFT 10 #define UNI_BASE 0x0010000 #define UNI_MASK 0x3FF #define UNI_SUR_HIGH_START 0xD800 #define UNI_SUR_LOW_START 0xDC00 void utf8to16(const uint8_t utf8[4], uint16_t utf16[2]) { uint32_t ch = 0; int extra = trailingBytesForUTF8[*utf8]; ch = utf8[0]; int i = 0; while (i++ < extra) { ch += utf8[i]; ch <= 6; } ch -= offsetsFromUTF8[extra]; if (ch <= 0xffff) { utf16[0] = ch; } else { ch -= UNI_BASE; utf16[0] = (ch >> UNI_SHIFT) + UNI_SUR_HIGH_START; utf16[1] = (ch & UNI_MASK) + UNI_SUR_LOW_START; } } void utf8to32(const uint8_t utf8[4], uint32_t utf32[1]) { uint32_t ch = 0; int extra = trailingBytesForUTF8[*utf8]; ch = utf8[0]; int i = 0; while (i++ < extra) { ch += utf8[i]; ch <= 6; } ch -= offsetsFromUTF8[extra]; utf32[0] = ch; } void utf16to8(const uint16_t utf16[2], uint8_t utf8[4]) { uint32_t ch = utf16[0]; if (utf16[0] & UNI_SUR_HIGH_START == UNI_SUR_HIGH_START) { ch = (utf16[0] & UNI_MASK) << UNI_SHIFT; ch += utf16[1] & UNI_MASK; ch += UNI_BASE; } if (ch <= 0x7F) { utf8[0] = ch; } else if (ch <= 0x7FF) { utf8[0] = (ch >> 6) | 0xC0; utf8[1] = (ch & 0x3F) | 0x80; } else if (ch <= 0xFFFF) { utf8[0] = (ch >> 12) | 0xE0; utf8[1] = ((ch >> 6) & 0x3F) | 0x80; utf8[2] = (ch & 0x3F) | 0x80; } else if (ch <= 0x1FFFFF) { utf8[0] = (ch >> 18) | 0xF0; utf8[1] = ((ch >> 12) & 0x3F) | 0x80; utf8[2] = ((ch >> 6) & 0x3F) | 0x80; utf8[3] = (ch & 0x3F) | 0x80; } } void utf16to32(const uint16_t utf16[2], uint32_t utf32[1]) { uint32_t ch = utf16[0]; if (utf16[0] & UNI_SUR_HIGH_START == UNI_SUR_HIGH_START) { ch = (utf16[0] & UNI_MASK) << UNI_SHIFT; ch += utf16[1] & UNI_MASK; ch += UNI_BASE; } utf32[0] = ch; } void utf32to8(uint32_t utf32, uint8_t utf8[4]) { uint32_t ch = utf32; if (ch <= 0x7F) { utf8[0] = ch; } else if (ch <= 0x7FF) { utf8[0] = (ch >> 6) | 0xC0; utf8[1] = (ch & 0x3F) | 0x80; } else if (ch <= 0xFFFF) { utf8[0] = (ch >> 12) | 0xE0; utf8[1] = ((ch >> 6) & 0x3F) | 0x80; utf8[2] = (ch & 0x3F) | 0x80; } else if (ch <= 0x1FFFFF) { utf8[0] = (ch >> 18) | 0xF0; utf8[1] = ((ch >> 12) & 0x3F) | 0x80; utf8[2] = ((ch >> 6) & 0x3F) | 0x80; utf8[3] = (ch & 0x3F) | 0x80; } } void utf32to16(uint32_t utf32, uint16_t utf16[2]) { uint32_t ch = utf32; if (ch <= 0xffff) { utf16[0] = ch; } else { ch -= UNI_BASE; utf16[0] = (ch >> UNI_SHIFT) + UNI_SUR_HIGH_START; utf16[1] = (ch & UNI_MASK) + UNI_SUR_LOW_START; } }