Skip to content

Instantly share code, notes, and snippets.

@stevewithington
Forked from sevaa/ToUTF8.sql
Created December 23, 2023 21:04
Show Gist options
  • Select an option

  • Save stevewithington/1d131bd1dc8571f79f3eaede234edae5 to your computer and use it in GitHub Desktop.

Select an option

Save stevewithington/1d131bd1dc8571f79f3eaede234edae5 to your computer and use it in GitHub Desktop.

Revisions

  1. @sevaa sevaa revised this gist Oct 11, 2021. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion ToUTF8.sql
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,7 @@ begin
    return cast(cast('Malformed UTF-16 - two nchar sequence cut short' as int) as varbinary)
    set @c2 = unicode(substring(@s, @i, 1))
    if (@c2 & 0xFC00) <> 0xDC00
    return cast(cast('Malformed UTF-16 - continuation missin in a two nchar sequence' as int) as varbinary)
    return cast(cast('Malformed UTF-16 - continuation missing in a two nchar sequence' as int) as varbinary)
    set @c = (((@c & 0x3FF) * 0x400) | (@c2 & 0x3FF)) + 0x10000
    end

  2. @sevaa sevaa revised this gist Oct 11, 2021. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion ToUTF8.sql
    Original file line number Diff line number Diff line change
    @@ -14,7 +14,7 @@ begin
    set @c2 = unicode(substring(@s, @i, 1))
    if (@c2 & 0xFC00) <> 0xDC00
    return cast(cast('Malformed UTF-16 - continuation missin in a two nchar sequence' as int) as varbinary)
    set @c = ((@c & 0x3FF) * 0x400) | 0x10000 | (@c2 & 0x3FF)
    set @c = (((@c & 0x3FF) * 0x400) | (@c2 & 0x3FF)) + 0x10000
    end

    if @c < 0x80
  3. @sevaa sevaa revised this gist Oct 11, 2021. 1 changed file with 7 additions and 2 deletions.
    9 changes: 7 additions & 2 deletions ToUTF8.sql
    Original file line number Diff line number Diff line change
    @@ -2,14 +2,19 @@ create function [dbo].[ToUTF8](@s nvarchar(max))
    returns varbinary(max)
    as
    begin
    declare @i int = 1, @n int = datalength(@s)/2, @r varbinary(max) = 0x, @c int, @d varbinary(4)
    declare @i int = 1, @n int = datalength(@s)/2, @r varbinary(max) = 0x, @c int, @c2 int, @d varbinary(4)
    while @i <= @n
    begin
    set @c = unicode(substring(@s, @i, 1))
    if (@c & 0xFC00) = 0xD800
    begin
    set @i += 1
    set @c = ((@c & 0x3FF) * 0x400) | 0x10000 | (unicode(substring(@s, @i, 1)) & 0x3FF)
    if @i > @n
    return cast(cast('Malformed UTF-16 - two nchar sequence cut short' as int) as varbinary)
    set @c2 = unicode(substring(@s, @i, 1))
    if (@c2 & 0xFC00) <> 0xDC00
    return cast(cast('Malformed UTF-16 - continuation missin in a two nchar sequence' as int) as varbinary)
    set @c = ((@c & 0x3FF) * 0x400) | 0x10000 | (@c2 & 0x3FF)
    end

    if @c < 0x80
  4. @sevaa sevaa revised this gist Dec 11, 2018. 1 changed file with 6 additions and 6 deletions.
    12 changes: 6 additions & 6 deletions ToUTF8.sql
    Original file line number Diff line number Diff line change
    @@ -2,24 +2,24 @@ create function [dbo].[ToUTF8](@s nvarchar(max))
    returns varbinary(max)
    as
    begin
    declare @i int = 1, @n int = len(@s), @r varbinary(max) = 0x, @c int, @d varbinary(4)
    declare @i int = 1, @n int = datalength(@s)/2, @r varbinary(max) = 0x, @c int, @d varbinary(4)
    while @i <= @n
    begin
    set @c = unicode(substring(@s, @i, 1))
    if (@c & 0xfc00) = 0xd800
    if (@c & 0xFC00) = 0xD800
    begin
    set @i += 1
    set @c = ((@c & 0x3ff) * 0x400) | 0x10000 | (unicode(substring(@s, @i, 1)) & 0x3ff)
    set @c = ((@c & 0x3FF) * 0x400) | 0x10000 | (unicode(substring(@s, @i, 1)) & 0x3FF)
    end

    if @c < 0x80
    set @d = cast(@c as binary(1))
    if @c >= 0x80 and @c < 0x800
    set @d = cast(((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xc080 as binary(2))
    set @d = cast(((@c * 4) & 0xFF00) | (@c & 0x3F) | 0xC080 as binary(2))
    if @c >= 0x800 and @c < 0x10000
    set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0x3F00) | (@c & 0x3f) | 0xe08080 as binary(3))
    set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0x3F00) | (@c & 0x3F) | 0xe08080 as binary(3))
    if @c >= 0x10000
    set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0x3F0000) | ((@c * 4) & 0x3F00) | (@c & 0x3f) | 0xf0808080 as binary(4))
    set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0x3F0000) | ((@c * 4) & 0x3F00) | (@c & 0x3F) | 0xf0808080 as binary(4))

    set @r += @d
    set @i += 1
  5. @sevaa sevaa revised this gist Dec 11, 2018. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions ToUTF8.sql
    Original file line number Diff line number Diff line change
    @@ -17,9 +17,9 @@ begin
    if @c >= 0x80 and @c < 0x800
    set @d = cast(((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xc080 as binary(2))
    if @c >= 0x800 and @c < 0x10000
    set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xe08080 as binary(3))
    set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0x3F00) | (@c & 0x3f) | 0xe08080 as binary(3))
    if @c >= 0x10000
    set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xf0808080 as binary(4))
    set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0x3F0000) | ((@c * 4) & 0x3F00) | (@c & 0x3f) | 0xf0808080 as binary(4))

    set @r += @d
    set @i += 1
  6. @sevaa sevaa created this gist Nov 7, 2016.
    28 changes: 28 additions & 0 deletions ToUTF8.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    create function [dbo].[ToUTF8](@s nvarchar(max))
    returns varbinary(max)
    as
    begin
    declare @i int = 1, @n int = len(@s), @r varbinary(max) = 0x, @c int, @d varbinary(4)
    while @i <= @n
    begin
    set @c = unicode(substring(@s, @i, 1))
    if (@c & 0xfc00) = 0xd800
    begin
    set @i += 1
    set @c = ((@c & 0x3ff) * 0x400) | 0x10000 | (unicode(substring(@s, @i, 1)) & 0x3ff)
    end

    if @c < 0x80
    set @d = cast(@c as binary(1))
    if @c >= 0x80 and @c < 0x800
    set @d = cast(((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xc080 as binary(2))
    if @c >= 0x800 and @c < 0x10000
    set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xe08080 as binary(3))
    if @c >= 0x10000
    set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xf0808080 as binary(4))

    set @r += @d
    set @i += 1
    end
    return @r
    end