Skip to content

Instantly share code, notes, and snippets.

@Ldes
Last active February 5, 2019 13:35
Show Gist options
  • Save Ldes/5fc130512d57baa3bd512aa7af2dbdf6 to your computer and use it in GitHub Desktop.
Save Ldes/5fc130512d57baa3bd512aa7af2dbdf6 to your computer and use it in GitHub Desktop.
Following snippet will attempt to read & parse recursively damaged serialized string (blob data).
<?php
/*
* Regex/code to fix corrupt serialized PHP data
*
* Following snippet will attempt to read & parse recursively damaged serialized string (blob data).
* For example if you stored into database column string too long and it got cut off.
* Numeric primitives and bool are guaranteed to be valid, strings may be cut off and/or array keys may be missing.
* The routine may be useful e.g. if recovering significant (not all) part of data is sufficient solution to you.
* Via: https://stackoverflow.com/a/38708463/1723496
*/
class Unserializer {
/**
* Parse blob string tolerating corrupted strings & arrays
* @param string $str Corrupted blob string
* @return array|string|int|float|bool|null
*/
public static function parseCorruptedBlob(&$str) {
// array pattern: a:236:{...;}
// integer pattern: i:123;
// double pattern: d:329.0001122;
// boolean pattern: b:1; or b:0;
// string pattern: s:14:"date_departure";
// null pattern: N;
// not supported: object O:{...}, reference R:{...}
// Notes:
// - primitive types (bool, int, float) except for string are guaranteed uncorrupted
// - arrays are tolerant to corrupted keys/values
// - references & objects are not supported
// - we use single byte string length calculation (strlen rather than mb_strlen) since source string is ISO-8859-2, not utf-8
// Usage:
// $unserialized = Unserializer::parseCorruptedBlob($serializedString);
if (preg_match('/^a:(\d+):{/', $str, $match)) {
list($pattern, $cntItems) = $match;
$str = substr($str, strlen($pattern));
$array = [];
for($i=0; $i<$cntItems; ++$i) {
$key = self::parseCorruptedBlob($str);
if(trim($key)!=='') { // hmm, we wont allow null and "" as keys..
$array[$key] = self::parseCorruptedBlob($str);
}
}
$str = ltrim($str, '}'); // closing array bracket
return $array;
} elseif (preg_match('/^s:(\d+):/', $str, $match)) {
list($pattern, $length) = $match;
$str = substr($str, strlen($pattern));
$val = substr($str, 0, $length + 2); // include also surrounding double quotes
$str = substr($str, strlen($val) + 1); // include also semicolon
$val = trim($val, '"'); // remove surrounding double quotes
if (preg_match('/^a:(\d+):{/', $val)) {
// parse instantly another serialized array
return (array) self::parseCorruptedBlob($val);
} else {
return (string) $val;
}
} elseif (preg_match('/^i:(\d+);/', $str, $match)) {
list($pattern, $val) = $match;
$str = substr($str, strlen($pattern));
return (int) $val;
} elseif (preg_match('/^d:([\d.]+);/', $str, $match)) {
list($pattern, $val) = $match;
$str = substr($str, strlen($pattern));
return (float) $val;
} elseif (preg_match('/^b:(0|1);/', $str, $match)) {
list($pattern, $val) = $match;
$str = substr($str, strlen($pattern));
return (bool) $val;
} elseif (preg_match('/^N;/', $str, $match)) {
$str = substr($str, strlen('N;'));
return null;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment