Skip to content

Instantly share code, notes, and snippets.

@ik5
Forked from bradleypeabody/gist:185b1d7ed6c0c2ab6cec
Last active August 5, 2020 11:58
Show Gist options
  • Select an option

  • Save ik5/65de721ca495fa1bf451 to your computer and use it in GitHub Desktop.

Select an option

Save ik5/65de721ca495fa1bf451 to your computer and use it in GitHub Desktop.

Revisions

  1. ik5 revised this gist Sep 2, 2015. 1 changed file with 2 additions and 4 deletions.
    6 changes: 2 additions & 4 deletions gistfile1.go
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,5 @@
    package main

    // http://play.golang.org/p/fVf7duRtdH

    import "fmt"
    import "unicode/utf16"
    import "unicode/utf8"
    @@ -33,7 +31,7 @@ func main() {
    fmt.Println(s)
    }

    // EncodeUTF16 get a utf8 string and translate it into array of bytes of ucs2
    // EncodeUTF16 get a utf8 string and translate it into a slice of bytes of ucs2
    func EncodeUTF16(s string, add_bom bool) []byte {
    r := []rune(s)
    iresult := utf16.Encode(r)
    @@ -50,7 +48,7 @@ func EncodeUTF16(s string, add_bom bool) []byte {
    return bytes
    }

    // DecodeUTF16 get an array of bytes and decode it to UTF-8
    // DecodeUTF16 get a slice of bytes and decode it to UTF-8
    func DecodeUTF16(b []byte) (string, error) {

    if len(b)%2 != 0 {
  2. ik5 revised this gist Aug 19, 2015. 1 changed file with 60 additions and 15 deletions.
    75 changes: 60 additions & 15 deletions gistfile1.go
    Original file line number Diff line number Diff line change
    @@ -33,25 +33,70 @@ func main() {
    fmt.Println(s)
    }

    func DecodeUTF16(b []byte) (string, error) {
    // EncodeUTF16 get a utf8 string and translate it into array of bytes of ucs2
    func EncodeUTF16(s string, add_bom bool) []byte {
    r := []rune(s)
    iresult := utf16.Encode(r)
    var bytes []byte
    if add_bom {
    bytes = make([]byte, 2)
    bytes = []byte{254, 255}
    }
    for _, i := range iresult {
    temp := make([]byte, 2)
    binary.BigEndian.PutUint16(temp, i)
    bytes = append(bytes, temp...)
    }
    return bytes
    }

    if len(b)%2 != 0 {
    return "", fmt.Errorf("Must have even length byte slice")
    }
    // DecodeUTF16 get an array of bytes and decode it to UTF-8
    func DecodeUTF16(b []byte) (string, error) {

    u16s := make([]uint16, 1)
    if len(b)%2 != 0 {
    return "", fmt.Errorf("Must have even length byte slice")
    }

    ret := &bytes.Buffer{}
    bom := UTF16Bom(b)
    if bom < 0 {
    return "", fmt.Errorf("Buffer is too small")
    }

    b8buf := make([]byte, 4)
    u16s := make([]uint16, 1)
    ret := &bytes.Buffer{}
    b8buf := make([]byte, 4)
    lb := len(b)

    lb := len(b)
    for i := 0; i < lb; i += 2 {
    u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8)
    r := utf16.Decode(u16s)
    n := utf8.EncodeRune(b8buf, r[0])
    ret.Write(b8buf[:n])
    }
    for i := 0; i < lb; i += 2 {
    //assuming bom is big endian if 0 returned
    if bom == 0 || bom == 1 {
    u16s[0] = uint16(b[i+1]) + (uint16(b[i]) << 8)
    }
    if bom == 2 {
    u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8)
    }
    r := utf16.Decode(u16s)
    n := utf8.EncodeRune(b8buf, r[0])
    ret.Write([]byte(string(b8buf[:n])))
    }

    return ret.String(), nil
    return ret.String(), nil
    }

    // UTF16Bom returns 0 for no BOM, 1 for Big Endian and 2 for little endian
    // it will return -1 if b is too small for having BOM
    func UTF16Bom(b []byte) int8 {
    if len(b) < 2 {
    return -1
    }

    if b[0] == 0xFE && b[1] == 0xFF {
    return 1
    }

    if b[0] == 0xFF && b[1] == 0xFE {
    return 2
    }

    return 0
    }
  3. @bradleypeabody bradleypeabody revised this gist Jun 13, 2014. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions gistfile1.go
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,7 @@
    package main

    // http://play.golang.org/p/fVf7duRtdH

    import "fmt"
    import "unicode/utf16"
    import "unicode/utf8"
  4. @bradleypeabody bradleypeabody created this gist Jun 13, 2014.
    55 changes: 55 additions & 0 deletions gistfile1.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,55 @@
    package main

    import "fmt"
    import "unicode/utf16"
    import "unicode/utf8"
    import "bytes"

    func main() {

    b := []byte{
    0xff, // BOM
    0xfe, // BOM
    'T',
    0x00,
    'E',
    0x00,
    'S',
    0x00,
    'T',
    0x00,
    0x6C,
    0x34,
    '\n',
    0x00,
    }

    s, err := DecodeUTF16(b)
    if err != nil {
    panic(err)
    }
    fmt.Println(s)
    }

    func DecodeUTF16(b []byte) (string, error) {

    if len(b)%2 != 0 {
    return "", fmt.Errorf("Must have even length byte slice")
    }

    u16s := make([]uint16, 1)

    ret := &bytes.Buffer{}

    b8buf := make([]byte, 4)

    lb := len(b)
    for i := 0; i < lb; i += 2 {
    u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8)
    r := utf16.Decode(u16s)
    n := utf8.EncodeRune(b8buf, r[0])
    ret.Write(b8buf[:n])
    }

    return ret.String(), nil
    }