Skip to content

Instantly share code, notes, and snippets.

@animatedcreativity
Forked from mbijon/xss_clean.php
Created September 6, 2016 11:23
Show Gist options
  • Save animatedcreativity/dbba54cfe30c4a2c51ad3de669536d19 to your computer and use it in GitHub Desktop.
Save animatedcreativity/dbba54cfe30c4a2c51ad3de669536d19 to your computer and use it in GitHub Desktop.

Revisions

  1. @mbijon mbijon revised this gist Mar 9, 2016. 1 changed file with 6 additions and 2 deletions.
    8 changes: 6 additions & 2 deletions xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -26,17 +26,21 @@ class xssClean {
    */
    public function clean_input( $input, $safe_level = 0 ) {

    $output = $input;
    do {
    // Treat $input as buffer on each loop, faster than new var
    $input = $output;

    // Remove unwanted tags
    $old_input = $input;
    $output = $this->strip_tags( $input );
    $output = $this->strip_encoded_entities( $output );

    // Use 2nd input param if not empty or '0'
    if ( $safe_level !== 0 ) {
    $output = $this->strip_base64( $output );
    }

    } while ( $output !== $old_input );
    } while ( $output !== $input );

    return $output;

  2. @mbijon mbijon revised this gist Mar 8, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -58,7 +58,7 @@ private function strip_encoded_entities( $input ) {
    $input = html_entity_decode($input, ENT_COMPAT, 'UTF-8');

    // Remove any attribute starting with "on" or xmlns
    $input = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+[>\b]#iu', '$1>', $input);
    $input = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+[>\b]?#iu', '$1>', $input);

    // Remove javascript: and vbscript: protocols
    $input = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $input);
  3. @mbijon mbijon revised this gist Mar 8, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -58,7 +58,7 @@ private function strip_encoded_entities( $input ) {
    $input = html_entity_decode($input, ENT_COMPAT, 'UTF-8');

    // Remove any attribute starting with "on" or xmlns
    $input = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $input);
    $input = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+[>\b]#iu', '$1>', $input);

    // Remove javascript: and vbscript: protocols
    $input = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $input);
  4. @mbijon mbijon revised this gist Mar 8, 2016. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -94,6 +94,10 @@ private function strip_tags( $input ) {
    /*
    * Focuses on stripping entities from Base64 encoded strings
    *
    * NOT ENABLED by default!
    * To enable 2nd param of clean_input() can be set to anything other than 0 or '0':
    * ie: xssClean->clean_input( $input_string, 1 )
    *
    * @param string $input Maybe Base64 encoded string
    * @return string $output Modified & re-encoded $input string
    */
  5. @mbijon mbijon revised this gist Mar 8, 2016. No changes.
  6. @mbijon mbijon revised this gist Mar 8, 2016. 1 changed file with 96 additions and 34 deletions.
    130 changes: 96 additions & 34 deletions xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -1,51 +1,113 @@
    <?php
    /*
    * XSS filter
    * XSS filter, recursively handles HTML tags & UTF encoding
    * Optionally handles base64 encoding
    *
    * ***DEPRECATION RECOMMENDED*** Not updated or maintained since 2011
    * A MAINTAINED & BETTER ALTERNATIVE => kses
    * https://github.com/RichardVasquez/kses/
    *
    * This was built from numerous sources
    * (thanks all, sorry I didn't track to credit you)
    *
    *
    * It was tested against *most* exploits here: http://ha.ckers.org/xss.html
    * WARNING: Some weren't tested!!!
    * Those include the Actionscript and SSI samples, or any newer than Jan 2011
    *
    *
    * TO-DO: compare to SymphonyCMS filter:
    * https://github.com/symphonycms/xssfilter/blob/master/extension.driver.php
    * (Symphony's is probably faster than my hack)
    */

    function xss_clean($data)
    {
    class xssClean {

    /*
    * Recursive worker to strip risky elements
    *
    * @param string $input Content to be cleaned. It MAY be modified in output
    * @return string $output Modified $input string
    */
    public function clean_input( $input, $safe_level = 0 ) {

    do {
    // Remove unwanted tags
    $old_input = $input;
    $output = $this->strip_tags( $input );
    $output = $this->strip_encoded_entities( $output );

    if ( $safe_level !== 0 ) {
    $output = $this->strip_base64( $output );
    }

    } while ( $output !== $old_input );

    return $output;

    }

    /*
    * Focuses on stripping encoded entities
    * *** This appears to be why people use this sample code. Unclear how well Kses does this ***
    *
    * @param string $input Content to be cleaned. It MAY be modified in output
    * @return string $input Modified $input string
    */
    private function strip_encoded_entities( $input ) {

    // Fix &entity\n;
    $data = str_replace(array('&amp;','&lt;','&gt;'), array('&amp;amp;','&amp;lt;','&amp;gt;'), $data);
    $data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data);
    $data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data);
    $data = html_entity_decode($data, ENT_COMPAT, 'UTF-8');
    $input = str_replace(array('&amp;','&lt;','&gt;'), array('&amp;amp;','&amp;lt;','&amp;gt;'), $input);
    $input = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $input);
    $input = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $input);
    $input = html_entity_decode($input, ENT_COMPAT, 'UTF-8');

    // Remove any attribute starting with "on" or xmlns
    $data = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $data);
    $input = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $input);

    // Remove javascript: and vbscript: protocols
    $data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data);
    $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data);
    $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data);
    $input = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $input);
    $input = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $input);
    $input = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $input);

    // Only works in IE: <span style="width: expression(alert('Ping!'));"></span>
    $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
    $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
    $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data);

    // Remove namespaced elements (we do not need them)
    $data = preg_replace('#</*\w+:\w[^>]*+>#i', '', $data);

    do
    {
    // Remove really unwanted tags
    $old_data = $data;
    $data = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $data);
    }
    while ($old_data !== $data);

    // we are done...
    return $data;
    }
    $input = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $input);
    $input = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $input);
    $input = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $input);

    return $input;

    }

    /*
    * Focuses on stripping unencoded HTML tags & namespaces
    *
    * @param string $input Content to be cleaned. It MAY be modified in output
    * @return string $input Modified $input string
    */
    private function strip_tags( $input ) {
    // Remove tags
    $input = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $input);

    // Remove namespaced elements
    $input = preg_replace('#</*\w+:\w[^>]*+>#i', '', $input);

    return $input;

    }

    /*
    * Focuses on stripping entities from Base64 encoded strings
    *
    * @param string $input Maybe Base64 encoded string
    * @return string $output Modified & re-encoded $input string
    */
    private function strip_base64( $input ) {

    $decoded = base64_decode( $input );

    $decoded = $this->strip_tags( $decoded );
    $decoded = $this->strip_encoded_entities( $decoded );

    $output = base64_encode( $decoded );

    return $output;

    }

    }
  7. @mbijon mbijon revised this gist Jul 21, 2011. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -7,6 +7,11 @@
    * It was tested against *most* exploits here: http://ha.ckers.org/xss.html
    * WARNING: Some weren't tested!!!
    * Those include the Actionscript and SSI samples, or any newer than Jan 2011
    *
    *
    * TO-DO: compare to SymphonyCMS filter:
    * https://github.com/symphonycms/xssfilter/blob/master/extension.driver.php
    * (Symphony's is probably faster than my hack)
    */

    function xss_clean($data)
  8. @mbijon mbijon created this gist Jul 21, 2011.
    46 changes: 46 additions & 0 deletions xss_clean.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,46 @@
    /*
    * XSS filter
    *
    * This was built from numerous sources
    * (thanks all, sorry I didn't track to credit you)
    *
    * It was tested against *most* exploits here: http://ha.ckers.org/xss.html
    * WARNING: Some weren't tested!!!
    * Those include the Actionscript and SSI samples, or any newer than Jan 2011
    */

    function xss_clean($data)
    {
    // Fix &entity\n;
    $data = str_replace(array('&amp;','&lt;','&gt;'), array('&amp;amp;','&amp;lt;','&amp;gt;'), $data);
    $data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data);
    $data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data);
    $data = html_entity_decode($data, ENT_COMPAT, 'UTF-8');

    // Remove any attribute starting with "on" or xmlns
    $data = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $data);

    // Remove javascript: and vbscript: protocols
    $data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data);
    $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data);
    $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data);

    // Only works in IE: <span style="width: expression(alert('Ping!'));"></span>
    $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
    $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
    $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data);

    // Remove namespaced elements (we do not need them)
    $data = preg_replace('#</*\w+:\w[^>]*+>#i', '', $data);

    do
    {
    // Remove really unwanted tags
    $old_data = $data;
    $data = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $data);
    }
    while ($old_data !== $data);

    // we are done...
    return $data;
    }