Forked from indented-automation/Get-FileEncoding.ps1
Created
November 1, 2022 14:18
-
-
Save santisq/b10a90b6e2298150cdefd54b39c643db to your computer and use it in GitHub Desktop.
Revisions
-
indented-automation revised this gist
Nov 1, 2022 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -55,6 +55,7 @@ function Get-FileEncoding { 'RTF' = '7B-5C-72-74-66-31' 'GIF' = '47-49-46-38' 'REGPOL' = '50-52-65-67' 'GZIP' = '1F-8B' 'JPEG' = 'FF-D8' 'MSEXE' = '4D-5A' 'ZIP' = '50-4B' -
indented-automation revised this gist
May 20, 2020 . 1 changed file with 6 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -100,11 +100,12 @@ function Get-FileEncoding { } [PSCustomObject]@{ Name = Split-Path $Path -Leaf Extension = [System.IO.Path]::GetExtension($Path) Encoding = $encoding Path = $Path PSTypeName = 'EncodingInfo' } } catch { $pscmdlet.WriteError($_) } -
indented-automation created this gist
May 1, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,112 @@ using namespace System.Collections.Generic; using namespace System.Linq function Get-FileEncoding { <# .SYNOPSIS Attempt to determine a file type based on a BOM or file header. .DESCRIPTION This script attempts to determine file types based on a byte sequence at the beginning of the file. If an identifiable byte sequence is not present the file type cannot be determined using this method. The order signatures appear in is critical where signatures overlap. For example, UTF32-LE must be evaluated before UTF16-LE. .LINK https://en.wikipedia.org/wiki/Byte_order_mark#cite_note-b-15 https://filesignatures.net #> [CmdletBinding()] [OutputType('EncodingInfo')] param ( # The path to a file to analyze. [Parameter(Mandatory, Position = 1, ValueFromPipeline, ValueFromPipelineByPropertyName)] [ValidateScript( { Test-Path $_ -PathType Leaf } )] [Alias('FullName')] [String]$Path, # Test the file against a small set of signature definitions for binary file types. # # Identification should be treated as tentative. Several file formats cannot be identified using the sequence at the start alone. [Switch]$IncludeBinary ) begin { $signatures = [Ordered]@{ 'UTF32-LE' = 'FF-FE-00-00' 'UTF32-BE' = '00-00-FE-FF' 'UTF8' = 'EF-BB-BF' 'UTF16-LE' = 'FF-FE' 'UTF16-BE' = 'FE-FF' 'UTF7' = '2B-2F-76-38', '2B-2F-76-39', '2B-2F-76-2B', '2B-2F-76-2F' 'UTF1' = 'F7-64-4C' 'UTF-EBCDIC' = 'DD-73-66-73' 'SCSU' = '0E-FE-FF' 'BOCU-1' = 'FB-EE-28' 'GB-18030' = '84-31-95-33' } if ($IncludeBinary) { $signatures += [Ordered]@{ 'LNK' = '4C-00-00-00-01-14-02-00' 'MSEXCEL' = '50-4B-03-04-14-00-06-00' 'PNG' = '89-50-4E-47-0D-0A-1A-0A' 'MSOFFICE' = 'D0-CF-11-E0-A1-B1-1A-E1' '7ZIP' = '37-7A-BC-AF-27-1C' 'RTF' = '7B-5C-72-74-66-31' 'GIF' = '47-49-46-38' 'REGPOL' = '50-52-65-67' 'JPEG' = 'FF-D8' 'MSEXE' = '4D-5A' 'ZIP' = '50-4B' } } # Convert sequence strings to byte arrays. Intended to simplify signature maintenance. [String[]]$keys = $signatures.Keys foreach ($name in $keys) { [List[List[Byte]]]$values = foreach ($value in $signatures[$name]) { [List[Byte]]$signatureBytes = foreach ($byte in $value.Split('-')) { [Convert]::ToByte($byte, 16) } ,$signatureBytes } $signatures[$name] = $values } } process { try { $Path = $pscmdlet.GetUnresolvedProviderPathFromPSPath($Path) $bytes = [Byte[]]::new(8) $stream = [System.IO.File]::OpenRead($Path) $null = $stream.Read($bytes, 0, $bytes.Count) $bytes = [List[Byte]]$bytes $stream.Close() $encoding = foreach ($name in $signatures.Keys) { $sampleEncoding = foreach ($sequence in $signatures[$name]) { $sample = $bytes.GetRange(0, $sequence.Count) if ([System.Linq.Enumerable]::SequenceEqual($sample, $sequence)) { $name break } } if ($sampleEncoding) { $sampleEncoding break } } [PSCustomObject]@{ Name = Split-Path $Path -Leaf Extension = [System.IO.Path]::GetExtension($Path) Encoding = $encoding Path = $Path } | Add-Member -TypeName 'EncodingInfo' -PassThru } catch { $pscmdlet.WriteError($_) } } }