yuraxdrumz · October 22, 2019 14:14 · Aug 6, 2013 · Mar 6, 2013 · Mar 6, 2013 · Mar 6, 2013
diff --git a/toUTF8Array.js b/toUTF8Array.js
@@ -18,9 +18,8 @@ function toUTF8Array(str) {
             // UTF-16 encodes 0x10000-0x10FFFF by
             // subtracting 0x10000 and splitting the
             // 20 bits of 0x0-0xFFFFF into two halves
-            charcode = ((charcode & 0x3ff)<<10)
-                      | (str.charCodeAt(i) & 0x3ff)
-                      + 0x10000;
+            charcode = 0x10000 + (((charcode & 0x3ff)<<10)
+                      | (str.charCodeAt(i) & 0x3ff))
             utf8.push(0xf0 | (charcode >>18), 
                       0x80 | ((charcode>>12) & 0x3f), 
                       0x80 | ((charcode>>6) & 0x3f), 

diff --git a/toUTF8Array.js b/toUTF8Array.js
@@ -15,7 +15,12 @@ function toUTF8Array(str) {
         // surrogate pair
         else {
             i++;
-            charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)+0x10000;
+            // UTF-16 encodes 0x10000-0x10FFFF by
+            // subtracting 0x10000 and splitting the
+            // 20 bits of 0x0-0xFFFFF into two halves
+            charcode = ((charcode & 0x3ff)<<10)
+                      | (str.charCodeAt(i) & 0x3ff)
+                      + 0x10000;
             utf8.push(0xf0 | (charcode >>18), 
                       0x80 | ((charcode>>12) & 0x3f), 
                       0x80 | ((charcode>>6) & 0x3f), 

diff --git a/toUTF8Array.js b/toUTF8Array.js
@@ -15,7 +15,7 @@ function toUTF8Array(str) {
         // surrogate pair
         else {
             i++;
-            charcode = 0x10000+((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff);
+            charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)+0x10000;
             utf8.push(0xf0 | (charcode >>18), 
                       0x80 | ((charcode>>12) & 0x3f), 
                       0x80 | ((charcode>>6) & 0x3f), 

diff --git a/gistfile1.js → toUTF8Array.js b/gistfile1.js → toUTF8Array.js
@@ -15,7 +15,7 @@ function toUTF8Array(str) {
         // surrogate pair
         else {
             i++;
-            charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)
+            charcode = 0x10000+((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff);
             utf8.push(0xf0 | (charcode >>18), 
                       0x80 | ((charcode>>12) & 0x3f), 
                       0x80 | ((charcode>>6) & 0x3f), 

diff --git a/gistfile1.js b/gistfile1.js
@@ -1,4 +1,3 @@
-/etc/apache2/conf.d/
 function toUTF8Array(str) {
     var utf8 = [];
     for (var i=0; i < str.length; i++) {

diff --git a/gistfile1.js b/gistfile1.js
@@ -1,3 +1,4 @@
+/etc/apache2/conf.d/
 function toUTF8Array(str) {
     var utf8 = [];
     for (var i=0; i < str.length; i++) {
@@ -7,22 +8,20 @@ function toUTF8Array(str) {
             utf8.push(0xc0 | (charcode >> 6), 
                       0x80 | (charcode & 0x3f));
         }
-        else if (charcode < 0x10000) {
+        else if (charcode < 0xd800 || charcode >= 0xe000) {
             utf8.push(0xe0 | (charcode >> 12), 
                       0x80 | ((charcode>>6) & 0x3f), 
                       0x80 | (charcode & 0x3f));
         }
-        else if (charcode < 0x200000) {
+        // surrogate pair
+        else {
+            i++;
+            charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)
             utf8.push(0xf0 | (charcode >>18), 
                       0x80 | ((charcode>>12) & 0x3f), 
                       0x80 | ((charcode>>6) & 0x3f), 
                       0x80 | (charcode & 0x3f));
         }
-        else {
-            // Unicode only goes up to 0x1fffff
-            // append U+FFFD, replacement character
-            utf8.push(0xef, 0xbf, 0xbd);
-        }
     }
     return utf8;
 }
diff --git a/gistfile1.js b/gistfile1.js
@@ -12,9 +12,17 @@ function toUTF8Array(str) {
                       0x80 | ((charcode>>6) & 0x3f), 
                       0x80 | (charcode & 0x3f));
         }
+        else if (charcode < 0x200000) {
+            utf8.push(0xf0 | (charcode >>18), 
+                      0x80 | ((charcode>>12) & 0x3f), 
+                      0x80 | ((charcode>>6) & 0x3f), 
+                      0x80 | (charcode & 0x3f));
+        }
         else {
-            utf8.push(63);
+            // Unicode only goes up to 0x1fffff
+            // append U+FFFD, replacement character
+            utf8.push(0xef, 0xbf, 0xbd);
         }
     }
     return utf8;
-}
+}
diff --git a/gistfile1.js b/gistfile1.js
@@ -0,0 +1,20 @@
+function toUTF8Array(str) {
+    var utf8 = [];
+    for (var i=0; i < str.length; i++) {
+        var charcode = str.charCodeAt(i);
+        if (charcode < 0x80) utf8.push(charcode);
+        else if (charcode < 0x800) {
+            utf8.push(0xc0 | (charcode >> 6), 
+                      0x80 | (charcode & 0x3f));
+        }
+        else if (charcode < 0x10000) {
+            utf8.push(0xe0 | (charcode >> 12), 
+                      0x80 | ((charcode>>6) & 0x3f), 
+                      0x80 | (charcode & 0x3f));
+        }
+        else {
+            utf8.push(63);
+        }
+    }
+    return utf8;
+}