Skip to content

Instantly share code, notes, and snippets.

@ScriptingPro
Forked from jpoehls/encoding-helpers.ps1
Created January 26, 2018 22:57
Show Gist options
  • Select an option

  • Save ScriptingPro/ac04acd60eefdd404d1c4a208b160e91 to your computer and use it in GitHub Desktop.

Select an option

Save ScriptingPro/ac04acd60eefdd404d1c4a208b160e91 to your computer and use it in GitHub Desktop.

Revisions

  1. @jpoehls jpoehls created this gist Apr 17, 2012.
    103 changes: 103 additions & 0 deletions encoding-helpers.ps1
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,103 @@
    <#
    .SYNOPSIS
    Converts files to the given encoding.
    Matches the include pattern recursively under the given path.
    .EXAMPLE
    Convert-FileEncoding -Include *.js -Path scripts -Encoding UTF8
    #>
    function Convert-FileEncoding([string]$Include, [string]$Path, [string]$Encoding='UTF8') {
    $count = 0
    Get-ChildItem -Include $Pattern -Recurse -Path $Path `
    | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} `
    | where {$_.Encoding -ne $Encoding} `
    | % { (Get-Content $_.FullName) `
    | Out-File $_.FullName -Encoding $Encoding; $count++; }

    Write-Host "$count $Pattern file(s) converted to $Encoding in $Path."
    }

    # http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html
    <#
    .SYNOPSIS
    Gets file encoding.
    .DESCRIPTION
    The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM).
    Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx
    .EXAMPLE
    Get-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'}
    This command gets ps1 files in current directory where encoding is not ASCII
    .EXAMPLE
    Get-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII}
    Same as previous example but fixes encoding using set-content
    # Modified by F.RICHARD August 2010
    # add comment + more BOM
    # http://unicode.org/faq/utf_bom.html
    # http://en.wikipedia.org/wiki/Byte_order_mark
    #
    # Do this next line before or add function in Profile.ps1
    # Import-Module .\Get-FileEncoding.ps1
    #>
    function Get-FileEncoding
    {
    [CmdletBinding()]
    Param (
    [Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
    [string]$Path
    )

    [byte[]]$byte = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path
    #Write-Host Bytes: $byte[0] $byte[1] $byte[2] $byte[3]

    # EF BB BF (UTF8)
    if ( $byte[0] -eq 0xef -and $byte[1] -eq 0xbb -and $byte[2] -eq 0xbf )
    { Write-Output 'UTF8' }

    # FE FF (UTF-16 Big-Endian)
    elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff)
    { Write-Output 'Unicode UTF-16 Big-Endian' }

    # FF FE (UTF-16 Little-Endian)
    elseif ($byte[0] -eq 0xff -and $byte[1] -eq 0xfe)
    { Write-Output 'Unicode UTF-16 Little-Endian' }

    # 00 00 FE FF (UTF32 Big-Endian)
    elseif ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xfe -and $byte[3] -eq 0xff)
    { Write-Output 'UTF32 Big-Endian' }

    # FE FF 00 00 (UTF32 Little-Endian)
    elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff -and $byte[2] -eq 0 -and $byte[3] -eq 0)
    { Write-Output 'UTF32 Little-Endian' }

    # 2B 2F 76 (38 | 38 | 2B | 2F)
    elseif ($byte[0] -eq 0x2b -and $byte[1] -eq 0x2f -and $byte[2] -eq 0x76 -and ($byte[3] -eq 0x38 -or $byte[3] -eq 0x39 -or $byte[3] -eq 0x2b -or $byte[3] -eq 0x2f) )
    { Write-Output 'UTF7'}

    # F7 64 4C (UTF-1)
    elseif ( $byte[0] -eq 0xf7 -and $byte[1] -eq 0x64 -and $byte[2] -eq 0x4c )
    { Write-Output 'UTF-1' }

    # DD 73 66 73 (UTF-EBCDIC)
    elseif ($byte[0] -eq 0xdd -and $byte[1] -eq 0x73 -and $byte[2] -eq 0x66 -and $byte[3] -eq 0x73)
    { Write-Output 'UTF-EBCDIC' }

    # 0E FE FF (SCSU)
    elseif ( $byte[0] -eq 0x0e -and $byte[1] -eq 0xfe -and $byte[2] -eq 0xff )
    { Write-Output 'SCSU' }

    # FB EE 28 (BOCU-1)
    elseif ( $byte[0] -eq 0xfb -and $byte[1] -eq 0xee -and $byte[2] -eq 0x28 )
    { Write-Output 'BOCU-1' }

    # 84 31 95 33 (GB-18030)
    elseif ($byte[0] -eq 0x84 -and $byte[1] -eq 0x31 -and $byte[2] -eq 0x95 -and $byte[3] -eq 0x33)
    { Write-Output 'GB-18030' }

    else
    { Write-Output 'ASCII' }
    }