Skip to content

Instantly share code, notes, and snippets.

@yuraxdrumz
Forked from joni/toUTF8Array.js
Created October 22, 2019 14:14
Show Gist options
  • Save yuraxdrumz/74008ceb2c8861784164cf82f58783f2 to your computer and use it in GitHub Desktop.
Save yuraxdrumz/74008ceb2c8861784164cf82f58783f2 to your computer and use it in GitHub Desktop.

Revisions

  1. @joni joni revised this gist Aug 6, 2013. 1 changed file with 2 additions and 3 deletions.
    5 changes: 2 additions & 3 deletions toUTF8Array.js
    Original file line number Diff line number Diff line change
    @@ -18,9 +18,8 @@ function toUTF8Array(str) {
    // UTF-16 encodes 0x10000-0x10FFFF by
    // subtracting 0x10000 and splitting the
    // 20 bits of 0x0-0xFFFFF into two halves
    charcode = ((charcode & 0x3ff)<<10)
    | (str.charCodeAt(i) & 0x3ff)
    + 0x10000;
    charcode = 0x10000 + (((charcode & 0x3ff)<<10)
    | (str.charCodeAt(i) & 0x3ff))
    utf8.push(0xf0 | (charcode >>18),
    0x80 | ((charcode>>12) & 0x3f),
    0x80 | ((charcode>>6) & 0x3f),
  2. @joni joni revised this gist Mar 6, 2013. 1 changed file with 6 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion toUTF8Array.js
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,12 @@ function toUTF8Array(str) {
    // surrogate pair
    else {
    i++;
    charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)+0x10000;
    // UTF-16 encodes 0x10000-0x10FFFF by
    // subtracting 0x10000 and splitting the
    // 20 bits of 0x0-0xFFFFF into two halves
    charcode = ((charcode & 0x3ff)<<10)
    | (str.charCodeAt(i) & 0x3ff)
    + 0x10000;
    utf8.push(0xf0 | (charcode >>18),
    0x80 | ((charcode>>12) & 0x3f),
    0x80 | ((charcode>>6) & 0x3f),
  3. @joni joni revised this gist Mar 6, 2013. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion toUTF8Array.js
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,7 @@ function toUTF8Array(str) {
    // surrogate pair
    else {
    i++;
    charcode = 0x10000+((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff);
    charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)+0x10000;
    utf8.push(0xf0 | (charcode >>18),
    0x80 | ((charcode>>12) & 0x3f),
    0x80 | ((charcode>>6) & 0x3f),
  4. @joni joni renamed this gist Mar 6, 2013. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion gistfile1.js → toUTF8Array.js
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,7 @@ function toUTF8Array(str) {
    // surrogate pair
    else {
    i++;
    charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)
    charcode = 0x10000+((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff);
    utf8.push(0xf0 | (charcode >>18),
    0x80 | ((charcode>>12) & 0x3f),
    0x80 | ((charcode>>6) & 0x3f),
  5. @joni joni revised this gist Sep 21, 2012. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion gistfile1.js
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,3 @@
    /etc/apache2/conf.d/
    function toUTF8Array(str) {
    var utf8 = [];
    for (var i=0; i < str.length; i++) {
  6. @joni joni revised this gist Sep 21, 2012. 1 changed file with 6 additions and 7 deletions.
    13 changes: 6 additions & 7 deletions gistfile1.js
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    /etc/apache2/conf.d/
    function toUTF8Array(str) {
    var utf8 = [];
    for (var i=0; i < str.length; i++) {
    @@ -7,22 +8,20 @@ function toUTF8Array(str) {
    utf8.push(0xc0 | (charcode >> 6),
    0x80 | (charcode & 0x3f));
    }
    else if (charcode < 0x10000) {
    else if (charcode < 0xd800 || charcode >= 0xe000) {
    utf8.push(0xe0 | (charcode >> 12),
    0x80 | ((charcode>>6) & 0x3f),
    0x80 | (charcode & 0x3f));
    }
    else if (charcode < 0x200000) {
    // surrogate pair
    else {
    i++;
    charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)
    utf8.push(0xf0 | (charcode >>18),
    0x80 | ((charcode>>12) & 0x3f),
    0x80 | ((charcode>>6) & 0x3f),
    0x80 | (charcode & 0x3f));
    }
    else {
    // Unicode only goes up to 0x1fffff
    // append U+FFFD, replacement character
    utf8.push(0xef, 0xbf, 0xbd);
    }
    }
    return utf8;
    }
  7. @joni joni revised this gist Sep 21, 2012. 1 changed file with 10 additions and 2 deletions.
    12 changes: 10 additions & 2 deletions gistfile1.js
    Original file line number Diff line number Diff line change
    @@ -12,9 +12,17 @@ function toUTF8Array(str) {
    0x80 | ((charcode>>6) & 0x3f),
    0x80 | (charcode & 0x3f));
    }
    else if (charcode < 0x200000) {
    utf8.push(0xf0 | (charcode >>18),
    0x80 | ((charcode>>12) & 0x3f),
    0x80 | ((charcode>>6) & 0x3f),
    0x80 | (charcode & 0x3f));
    }
    else {
    utf8.push(63);
    // Unicode only goes up to 0x1fffff
    // append U+FFFD, replacement character
    utf8.push(0xef, 0xbf, 0xbd);
    }
    }
    return utf8;
    }
    }
  8. @joni joni created this gist Sep 21, 2012.
    20 changes: 20 additions & 0 deletions gistfile1.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@
    function toUTF8Array(str) {
    var utf8 = [];
    for (var i=0; i < str.length; i++) {
    var charcode = str.charCodeAt(i);
    if (charcode < 0x80) utf8.push(charcode);
    else if (charcode < 0x800) {
    utf8.push(0xc0 | (charcode >> 6),
    0x80 | (charcode & 0x3f));
    }
    else if (charcode < 0x10000) {
    utf8.push(0xe0 | (charcode >> 12),
    0x80 | ((charcode>>6) & 0x3f),
    0x80 | (charcode & 0x3f));
    }
    else {
    utf8.push(63);
    }
    }
    return utf8;
    }