> 7) === 0) { return 1; } if (($ord >> 5) === 6) { return 2; } if (($ord >> 4) === 14) { return 3; } if (($ord >> 3) === 30) { return 4; } return false; } private static function CharacterIndex($string, $position, $after) { $strlen = strlen($string); if ($position < 0) { for ($index = $strlen - 1; $index >= 0; $index--) { $ord = ord($string{$index}); if (($ord >> 6) !== 2) { $position++; } if ($position === 0) { return $index; } } return null; } $count = 0; for ($index = $after; $index < $strlen; $count++) { if ($count === $position) { return $index; } $ord = ord($string{$index}); $add = UTF8::CodePointLength($ord); if ($add === false) { return false; } /*for ($check = $index + 1; $check < $index + $add; $check++) { $ord = ord($string{$check}); if ($ord < 0x80 || $ord > 0xbf) { return false; } }*/ $index += $add; } if ($count < $position) { return false; } return $strlen; } //------------------------------------------------------------ // Public (Class) //------------------------------------------------------------ /** * UTF-8 aware replacement of char */ public static function Character(/*int*/ $codepoint) { $codepoint = intval($codepoint); if ($codepoint < 127) { return chr($codepoint); } if ($codepoint < 2047) { return chr(192 | (($codepoint >> 6) & 31)).chr(128 | ($codepoint & 63)); } if ($codepoint < 65535) { return chr(224 | (($codepoint >> 12) & 31)).chr(128 | (($codepoint >> 6) & 63)).chr(128 | ($codepoint & 63)); } if ($codepoint < 1114111) { return chr(240 | (($codepoint >> 18) & 31)).chr(128 | (($codepoint >> 12) & 63)).chr(128 | (($codepoint >> 6) & 63)).chr(128 | ($codepoint & 63)); } } public static function CharacterAt($string, $index) { $nextIndex = UTF8::CharacterIndex($string, 1, $index); if ($nextIndex === false) { return ''; } return substr($string, $index, $nextIndex - $index); } public static function CharactersAt($string, $index, $length) { $nextIndex = UTF8::CharacterIndex($string, $length, $index); if ($nextIndex === false) { return substr($string, $index); } return substr($string, $index, $nextIndex - $index); } /** * UTF-8 aware replacement of ord */ public static function CodePoint(/*string*/ $character) { $ord0 = ord($character{0}); switch(UTF8::CodePointLength($ord0)) { case 1: return $ord0; case 2: return ($ord0 - 192) * 64 + (ord($character{1}) - 128); case 3: return ($ord0 - 224) * 4096 + (ord($character{1}) - 128) * 64 + (ord($character{2}) - 128); case 4: return ($ord0 - 240) * 262144 + (ord($character{1}) - 128) * 4096 + (ord($character{2}) - 128) * 64 + (ord($character{3}) - 128); default: return false; } } public static function Enumerate($string) { $strlen = strlen($string); for ($index = 0; $index < $strlen; ) { $chr = UTF8::CharacterAt($string, $index); if ($chr === '') { return; } $index += strlen($chr); yield $chr; } } public static function IsASCII($string) { $strlen = strlen($string); $count = 0; for ($index = 0; $index < $strlen; $count++) { $ord = ord($string{$index}); $add = UTF8::CodePointLength($ord); if ($add === false || $add !== 1) { return false; } $index += $add; } return $count; } public static function IsUTF8($string) { return UTF8::Length($string) !== false; } public static function Length($string) { $strlen = strlen($string); $count = 0; for ($index = 0; $index < $strlen; $count++) { $ord = ord($string{$index}); $add = UTF8::CodePointLength($ord); if ($add === false) { return false; } for ($check = $index + 1; $check < $index + $add; $check++) { $ord = ord($string{$check}); if ($ord < 0x80 || $ord > 0xbf) { return false; } } $index += $add; } return $count; } public static function Split(/*string*/ $string, /*int*/ $length = 1) { if (intval($length) !== $length || $length < 1) { trigger_error('The length of each segment must be greater than zero', E_USER_WARNING); return false; } else { $strlen = strlen($string); $result = []; $index = 0; while($index < $strlen) { $block = UTF8::CharactersAt($string, $index, $length); $result[] = $block; $index += strlen($block); } return $result; } } public static function Substr($string, $start, $length = null) { $startIndex = UTF8::CharacterIndex($string, $start, 0); if ($startIndex === null) { $startIndex = 0; } if ($startIndex === false) { return false; } if ($length === null) { return substr($string, $startIndex); } else { $endIndex = UTF8::CharacterIndex($string, $length, $startIndex); if ($endIndex === null) { return false; } if ($endIndex === false) { return substr($string, $startIndex); } if ($endIndex < $startIndex) { return $start < 0 ? '' : false; } return substr($string, $startIndex, $endIndex - $startIndex); } } //------------------------------------------------------------ // Public (Constructor) //------------------------------------------------------------ /** * Creating instances of this class is not allowed. */ public function __construct() { trigger_error('Creating instances of '.__CLASS__.' is forbidden'); } }