Last active
February 5, 2019 13:35
-
-
Save Ldes/5fc130512d57baa3bd512aa7af2dbdf6 to your computer and use it in GitHub Desktop.
Following snippet will attempt to read & parse recursively damaged serialized string (blob data).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /* | |
| * Regex/code to fix corrupt serialized PHP data | |
| * | |
| * Following snippet will attempt to read & parse recursively damaged serialized string (blob data). | |
| * For example if you stored into database column string too long and it got cut off. | |
| * Numeric primitives and bool are guaranteed to be valid, strings may be cut off and/or array keys may be missing. | |
| * The routine may be useful e.g. if recovering significant (not all) part of data is sufficient solution to you. | |
| * Via: https://stackoverflow.com/a/38708463/1723496 | |
| */ | |
| class Unserializer { | |
| /** | |
| * Parse blob string tolerating corrupted strings & arrays | |
| * @param string $str Corrupted blob string | |
| * @return array|string|int|float|bool|null | |
| */ | |
| public static function parseCorruptedBlob(&$str) { | |
| // array pattern: a:236:{...;} | |
| // integer pattern: i:123; | |
| // double pattern: d:329.0001122; | |
| // boolean pattern: b:1; or b:0; | |
| // string pattern: s:14:"date_departure"; | |
| // null pattern: N; | |
| // not supported: object O:{...}, reference R:{...} | |
| // Notes: | |
| // - primitive types (bool, int, float) except for string are guaranteed uncorrupted | |
| // - arrays are tolerant to corrupted keys/values | |
| // - references & objects are not supported | |
| // - we use single byte string length calculation (strlen rather than mb_strlen) since source string is ISO-8859-2, not utf-8 | |
| // Usage: | |
| // $unserialized = Unserializer::parseCorruptedBlob($serializedString); | |
| if (preg_match('/^a:(\d+):{/', $str, $match)) { | |
| list($pattern, $cntItems) = $match; | |
| $str = substr($str, strlen($pattern)); | |
| $array = []; | |
| for($i=0; $i<$cntItems; ++$i) { | |
| $key = self::parseCorruptedBlob($str); | |
| if(trim($key)!=='') { // hmm, we wont allow null and "" as keys.. | |
| $array[$key] = self::parseCorruptedBlob($str); | |
| } | |
| } | |
| $str = ltrim($str, '}'); // closing array bracket | |
| return $array; | |
| } elseif (preg_match('/^s:(\d+):/', $str, $match)) { | |
| list($pattern, $length) = $match; | |
| $str = substr($str, strlen($pattern)); | |
| $val = substr($str, 0, $length + 2); // include also surrounding double quotes | |
| $str = substr($str, strlen($val) + 1); // include also semicolon | |
| $val = trim($val, '"'); // remove surrounding double quotes | |
| if (preg_match('/^a:(\d+):{/', $val)) { | |
| // parse instantly another serialized array | |
| return (array) self::parseCorruptedBlob($val); | |
| } else { | |
| return (string) $val; | |
| } | |
| } elseif (preg_match('/^i:(\d+);/', $str, $match)) { | |
| list($pattern, $val) = $match; | |
| $str = substr($str, strlen($pattern)); | |
| return (int) $val; | |
| } elseif (preg_match('/^d:([\d.]+);/', $str, $match)) { | |
| list($pattern, $val) = $match; | |
| $str = substr($str, strlen($pattern)); | |
| return (float) $val; | |
| } elseif (preg_match('/^b:(0|1);/', $str, $match)) { | |
| list($pattern, $val) = $match; | |
| $str = substr($str, strlen($pattern)); | |
| return (bool) $val; | |
| } elseif (preg_match('/^N;/', $str, $match)) { | |
| $str = substr($str, strlen('N;')); | |
| return null; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment