-
-
Save lmcro/be575a2322b9f06fafdc to your computer and use it in GitHub Desktop.
Revisions
-
Sean Murphy revised this gist
Jul 17, 2012 . 2 changed files with 6 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,8 +1,6 @@ <?php /** * Create a web friendly URL slug from a string. * * Although supported, transliteration is discouraged because * 1) most web browsers support UTF-8 characters in URLs @@ -17,7 +15,8 @@ * @return string */ function url_slug($str, $options = array()) { // Make sure string is in UTF-8 and strip invalid UTF-8 characters $str = mb_convert_encoding((string)$str, 'UTF-8', mb_list_encodings()); $defaults = array( 'delimiter' => '-', @@ -102,14 +101,14 @@ function url_slug($str, $options = array()) { // Transliterate characters to ASCII if ($options['transliterate']) { $str = str_replace(array_keys($char_map), $char_map, $str); } // Replace non-alphanumeric characters with our delimiter $str = preg_replace('/[^\p{L}\p{Nd}]+/u', $options['delimiter'], $str); // Remove duplicate delimiters $str = preg_replace('/(' . preg_quote($options['delimiter'], '/') . '){2,}/', '$1', $str); // Truncate slug to max. characters $str = mb_substr($str, 0, ($options['limit'] ? $options['limit'] : mb_strlen($str, 'UTF-8')), 'UTF-8'); This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,5 @@ <?php include('url_slug.php'); header('Content-type: text/plain; charset=utf-8'); // Basic usage -
sgmurphy revised this gist
Jul 13, 2012 . 1 changed file with 55 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,55 @@ <?php // Include the necessary file(s). header('Content-type: text/plain; charset=utf-8'); // Basic usage echo "This is an example string. Nothing fancy." . "\n"; echo url_slug("This is an example string. Nothing fancy.") . "\n\n"; // Example using French with unwanted characters ('?) echo "Qu'en est-il français? Ça marche alors?" . "\n"; echo url_slug("Qu'en est-il français? Ça marche alors?") . "\n\n"; // Example using transliteration echo "Что делать, если я не хочу, UTF-8?" . "\n"; echo url_slug("Что делать, если я не хочу, UTF-8?", array('transliterate' => true)) . "\n\n"; // Example using transliteration on an unsupported language echo "מה אם אני לא רוצה UTF-8 תווים?" . "\n"; echo url_slug("מה אם אני לא רוצה UTF-8 תווים?", array('transliterate' => true)) . "\n\n"; // Some other options echo "This is an Example String. What's Going to Happen to Me?" . "\n"; echo url_slug( "This is an Example String. What's Going to Happen to Me?", array( 'delimiter' => '_', 'limit' => 40, 'lowercase' => false, 'replacements' => array( '/\b(an)\b/i' => 'a', '/\b(example)\b/i' => 'Test' ) ) ); /* Output: This is an example string. Nothing fancy. this-is-an-example-string-nothing-fancy Qu'en est-il français? Ça marche alors? qu-en-est-il-français-ça-marche-alors Что делать, если я не хочу, UTF-8? chto-delat-esli-ya-ne-hochu-utf-8 מה אם אני לא רוצה UTF-8 תווים? מה-אם-אני-לא-רוצה-utf-8-תווים This is an Example String. What's Going to Happen to Me? This_is_a_Test_String_What_s_Going_to_Ha */ ?> -
sgmurphy revised this gist
Jul 12, 2012 . 1 changed file with 3 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ <?php /** * Create a web friendly URL slug from a string. * @@ -117,4 +118,5 @@ function url_slug($str, $options = array()) { $str = trim($str, $options['delimiter']); return $options['lowercase'] ? mb_strtolower($str, 'UTF-8') : $str; } ?> -
sgmurphy revised this gist
Jul 12, 2012 . 1 changed file with 79 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,7 +1,12 @@ /** * Create a web friendly URL slug from a string. * * The mb_str_replace function can be found here: <https://gist.github.com/3098836> * * Although supported, transliteration is discouraged because * 1) most web browsers support UTF-8 characters in URLs * 2) transliteration causes a loss of information * * @author Sean Murphy <[email protected]> * @copyright Copyright 2012 Sean Murphy. All rights reserved. * @license http://creativecommons.org/publicdomain/zero/1.0/ @@ -24,27 +29,92 @@ function url_slug($str, $options = array()) { // Merge options $options = array_merge($defaults, $options); $char_map = array( // Latin 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ő' => 'O', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ű' => 'U', 'Ý' => 'Y', 'Þ' => 'TH', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ő' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ű' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y', // Latin symbols '©' => '(c)', // Greek 'Α' => 'A', 'Β' => 'B', 'Γ' => 'G', 'Δ' => 'D', 'Ε' => 'E', 'Ζ' => 'Z', 'Η' => 'H', 'Θ' => '8', 'Ι' => 'I', 'Κ' => 'K', 'Λ' => 'L', 'Μ' => 'M', 'Ν' => 'N', 'Ξ' => '3', 'Ο' => 'O', 'Π' => 'P', 'Ρ' => 'R', 'Σ' => 'S', 'Τ' => 'T', 'Υ' => 'Y', 'Φ' => 'F', 'Χ' => 'X', 'Ψ' => 'PS', 'Ω' => 'W', 'Ά' => 'A', 'Έ' => 'E', 'Ί' => 'I', 'Ό' => 'O', 'Ύ' => 'Y', 'Ή' => 'H', 'Ώ' => 'W', 'Ϊ' => 'I', 'Ϋ' => 'Y', 'α' => 'a', 'β' => 'b', 'γ' => 'g', 'δ' => 'd', 'ε' => 'e', 'ζ' => 'z', 'η' => 'h', 'θ' => '8', 'ι' => 'i', 'κ' => 'k', 'λ' => 'l', 'μ' => 'm', 'ν' => 'n', 'ξ' => '3', 'ο' => 'o', 'π' => 'p', 'ρ' => 'r', 'σ' => 's', 'τ' => 't', 'υ' => 'y', 'φ' => 'f', 'χ' => 'x', 'ψ' => 'ps', 'ω' => 'w', 'ά' => 'a', 'έ' => 'e', 'ί' => 'i', 'ό' => 'o', 'ύ' => 'y', 'ή' => 'h', 'ώ' => 'w', 'ς' => 's', 'ϊ' => 'i', 'ΰ' => 'y', 'ϋ' => 'y', 'ΐ' => 'i', // Turkish 'Ş' => 'S', 'İ' => 'I', 'Ç' => 'C', 'Ü' => 'U', 'Ö' => 'O', 'Ğ' => 'G', 'ş' => 's', 'ı' => 'i', 'ç' => 'c', 'ü' => 'u', 'ö' => 'o', 'ğ' => 'g', // Russian 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 'Е' => 'E', 'Ё' => 'Yo', 'Ж' => 'Zh', 'З' => 'Z', 'И' => 'I', 'Й' => 'J', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O', 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'Ch', 'Ш' => 'Sh', 'Щ' => 'Sh', 'Ъ' => '', 'Ы' => 'Y', 'Ь' => '', 'Э' => 'E', 'Ю' => 'Yu', 'Я' => 'Ya', 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo', 'ж' => 'zh', 'з' => 'z', 'и' => 'i', 'й' => 'j', 'к' => 'k', 'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c', 'ч' => 'ch', 'ш' => 'sh', 'щ' => 'sh', 'ъ' => '', 'ы' => 'y', 'ь' => '', 'э' => 'e', 'ю' => 'yu', 'я' => 'ya', // Ukrainian 'Є' => 'Ye', 'І' => 'I', 'Ї' => 'Yi', 'Ґ' => 'G', 'є' => 'ye', 'і' => 'i', 'ї' => 'yi', 'ґ' => 'g', // Czech 'Č' => 'C', 'Ď' => 'D', 'Ě' => 'E', 'Ň' => 'N', 'Ř' => 'R', 'Š' => 'S', 'Ť' => 'T', 'Ů' => 'U', 'Ž' => 'Z', 'č' => 'c', 'ď' => 'd', 'ě' => 'e', 'ň' => 'n', 'ř' => 'r', 'š' => 's', 'ť' => 't', 'ů' => 'u', 'ž' => 'z', // Polish 'Ą' => 'A', 'Ć' => 'C', 'Ę' => 'e', 'Ł' => 'L', 'Ń' => 'N', 'Ó' => 'o', 'Ś' => 'S', 'Ź' => 'Z', 'Ż' => 'Z', 'ą' => 'a', 'ć' => 'c', 'ę' => 'e', 'ł' => 'l', 'ń' => 'n', 'ó' => 'o', 'ś' => 's', 'ź' => 'z', 'ż' => 'z', // Latvian 'Ā' => 'A', 'Č' => 'C', 'Ē' => 'E', 'Ģ' => 'G', 'Ī' => 'i', 'Ķ' => 'k', 'Ļ' => 'L', 'Ņ' => 'N', 'Š' => 'S', 'Ū' => 'u', 'Ž' => 'Z', 'ā' => 'a', 'č' => 'c', 'ē' => 'e', 'ģ' => 'g', 'ī' => 'i', 'ķ' => 'k', 'ļ' => 'l', 'ņ' => 'n', 'š' => 's', 'ū' => 'u', 'ž' => 'z' ); // Make custom replacements $str = preg_replace(array_keys($options['replacements']), $options['replacements'], $str); // Transliterate characters to ASCII if ($options['transliterate']) { $str = mb_str_replace(array_keys($char_map), $char_map, $str); } // Replace non-alphanumeric characters with our delimiter $str = preg_replace('/[^\p{L}\p{N}]+/u', $options['delimiter'], $str); // Remove duplicate delimiters $str = preg_replace('/[' . preg_quote($options['delimiter'], '/') . ']{2,}/', $options['delimiter'], $str); // Truncate slug to max. characters $str = mb_substr($str, 0, ($options['limit'] ? $options['limit'] : mb_strlen($str, 'UTF-8')), 'UTF-8'); // Remove delimiter from ends $str = trim($str, $options['delimiter']); return $options['lowercase'] ? mb_strtolower($str, 'UTF-8') : $str; } -
sgmurphy created this gist
Jul 12, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,50 @@ <?php /** * Create a web friendly URL slug from a string. * * @author Sean Murphy <[email protected]> * @copyright Copyright 2012 Sean Murphy. All rights reserved. * @license http://creativecommons.org/publicdomain/zero/1.0/ * * @param string $str * @param array $options * @return string */ function url_slug($str, $options = array()) { $str = (string)$str; $defaults = array( 'delimiter' => '-', 'limit' => null, 'lowercase' => true, 'replacements' => array(), 'transliterate' => false, ); // Merge options $options = array_merge($defaults, $options); // Make custom replacements $str = preg_replace(array_keys($options['replacements']), $options['replacements'], $str); // Replace non-alphanumeric characters with our delimiter $str = preg_replace('/[^\p{L}\p{N}]+/u', $options['delimiter'], $str); // Remove duplicate delimiters $str = preg_replace('/[' . preg_quote($options['delimiter'], '/') . ']{2,}/', $options['delimiter'], $str); // Transliterate characters to ASCII if ($options['transliterate']) { $str = iconv('UTF-8', 'ASCII//TRANSLIT', $str); $str = preg_replace('/[^' . preg_quote($options['delimiter'], '/') . '\p{L}\p{N}]+/u', '', $str); } // Truncate slug to max. characters $str = mb_substr($str, 0, ($options['limit'] ? $options['limit'] : mb_strlen($str, 'UTF-8')), 'UTF-8'); // Remove delimiter from ends $str = trim($str, $options['delimiter']); return $options['lowercase'] ? mb_strtolower($str, 'UTF-8') : $str; } ?>