Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Erreur32/012bf66f101c517c827fd6e4f69f0a30 to your computer and use it in GitHub Desktop.
Save Erreur32/012bf66f101c517c827fd6e4f69f0a30 to your computer and use it in GitHub Desktop.

Revisions

  1. @iAugur iAugur revised this gist Oct 13, 2014. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion Apache Better Blocking with common rules.txt
    Original file line number Diff line number Diff line change
    @@ -1,2 +1,2 @@
    Following on from other Gists I have posted, this one shows a neat way of using Includes to centralise general blocking rules for Bad Bots, creepy crawlers and irritating IPs
    see the full post at http://www.blue-bag.com/
    see the full post at http://www.blue-bag.com/blog/apache-better-blocking-common-rules
  2. @iAugur iAugur revised this gist Oct 13, 2014. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions Apache Better Blocking with common rules.txt
    Original file line number Diff line number Diff line change
    @@ -1,2 +1,2 @@
    Following on from other Gists I have posted, this one shows a neat way of using Includes to centralise general blocking rules for Bat Bots, creepy crawlers and irritating IPs
    see the full post at http://technology.blue-bag.com
    Following on from other Gists I have posted, this one shows a neat way of using Includes to centralise general blocking rules for Bad Bots, creepy crawlers and irritating IPs
    see the full post at http://www.blue-bag.com/
  3. @iAugur iAugur renamed this gist Jun 17, 2014. 1 changed file with 0 additions and 0 deletions.
  4. @iAugur iAugur renamed this gist Jun 17, 2014. 1 changed file with 0 additions and 0 deletions.
  5. @iAugur iAugur renamed this gist Jun 17, 2014. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  6. @iAugur iAugur created this gist Jun 17, 2014.
    21 changes: 21 additions & 0 deletions blocked-adresses.conf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,21 @@
    ## A list of known problem IPs

    # pen test on FKEditor
    SetEnvIfNoCase REMOTE_ADDR "175\.44\.30\.180" BlockedAddress
    SetEnvIfNoCase REMOTE_ADDR "175\.44\.29\.92" BlockedAddress
    SetEnvIfNoCase REMOTE_ADDR "175\.44\.30\.180" BlockedAddress
    SetEnvIfNoCase REMOTE_ADDR "174\.139\.240\.74" BlockedAddress


    # looking for backups
    SetEnvIfNoCase REMOTE_ADDR "192\.99\.12\.128" BlockedAddress

    # Bad Crawler
    SetEnvIfNoCase REMOTE_ADDR "144\.76\.195\.72" BlockedAddress
    SetEnvIfNoCase REMOTE_ADDR "54\.189\.47\.213" BlockedAddress

    # Java scraper
    SetEnvIfNoCase REMOTE_ADDR "62\.116\.110\.111" BlockedAddress

    # Big hitter - known spammer
    SetEnvIfNoCase REMOTE_ADDR "109\.201\.137\.166" BlockedAddress
    37 changes: 37 additions & 0 deletions blocked-agents.conf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,37 @@
    # list obtained from 3rd party

    SetEnvIfNoCase User-Agent ^$ bad_bot #this is for blank user-agents
    SetEnvIfNoCase User-Agent "Jakarta" BlockedAgent
    SetEnvIfNoCase User-Agent "User-Agent" BlockedAgent
    SetEnvIfNoCase User-Agent "libwww," BlockedAgent
    SetEnvIfNoCase User-Agent "lwp-trivial" BlockedAgent
    SetEnvIfNoCase User-Agent "Snoopy" BlockedAgent
    SetEnvIfNoCase User-Agent "PHPCrawl" BlockedAgent
    SetEnvIfNoCase User-Agent "WEP Search" BlockedAgent
    SetEnvIfNoCase User-Agent "Missigua Locator" BlockedAgent
    SetEnvIfNoCase User-Agent "ISC Systems iRc" BlockedAgent
    SetEnvIfNoCase User-Agent "lwp-trivial" BlockedAgent

    SetEnvIfNoCase User-Agent "GbPlugin" BlockedAgent
    SetEnvIfNoCase User-Agent "Wget" BlockedAgent
    SetEnvIfNoCase User-Agent "EmailSiphon" BlockedAgent
    SetEnvIfNoCase User-Agent "EmailWolf" BlockedAgent
    SetEnvIfNoCase User-Agent "libwww-perl" BlockedAgent

    ## end of 3rd party list (note could also block them in Robots.txt see article)

    ## List derived from actual activity
    # Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)
    SetEnvIfNoCase User-Agent "BLEXBot" BlockedAgent

    # Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/)
    SetEnvIfNoCase User-Agent "007ac9 Crawler" BlockedAgent

    #Mozilla/5.0 (compatible; MJ12bot/v1.4.5; http://www.majestic12.co.uk/bot.php?+)
    SetEnvIfNoCase User-Agent "MJ12bot" BlockedAgent

    # Fetchbot (https://github.com/PuerkitoBio/fetchbot)
    SetEnvIfNoCase User-Agent "Fetchbot" BlockedAgent

    #Mozilla/5.0 (compatible; SISTRIX Crawler; http://crawler.sistrix.net/)
    SetEnvIfNoCase User-Agent "SISTRIX" BlockedAgent
    2 changes: 2 additions & 0 deletions readme.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,2 @@
    Following on from other Gists I have posted, this one shows a neat way of using Includes to centralise general blocking rules for Bat Bots, creepy crawlers and irritating IPs
    see the full post at http://technology.blue-bag.com
    84 changes: 84 additions & 0 deletions vhost-sample.conf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,84 @@
    <VirtualHost *:80>
    ## Note this is heavily reduced just to show the relevant lines
    ## Expires and security options have been removed
    ## Don't just paste this - but refer to it along with your customisations

    ServerName www.example.com

    DocumentRoot /var/www/example.com/live/htdocs

    <Directory /var/www/example.com/live/htdocs>
    Options +FollowSymLinks

    # Disable .htaccess files (remember to account for any rules they implement)
    AllowOverride None

    # Include our blocked lists
    Include /etc/apache2/blocked-addresses.conf
    Include /etc/apache2/blocked-agents.conf

    Order allow,deny
    Allow from all
    # Deny from our blocked lists
    deny from env=BlockedAddress
    deny from env=BlockedAgent

    <IfModule mod_rewrite.c>
    RewriteEngine on


    # Intercept Microsoft Office Protocol Discovery
    # OPTION requests for this were hitting site regularly
    RewriteCond %{REQUEST_METHOD} ^OPTIONS
    RewriteCond %{HTTP_USER_AGENT} ^Microsoft\ Office\ Protocol\ Discovery [OR]
    RewriteCond %{HTTP_USER_AGENT} ^Microsoft\ Office\ Existence\ Discovery [OR]
    RewriteCond %{HTTP_USER_AGENT} ^Microsoft\-WebDAV\-MiniRedir.*$
    RewriteRule .* - [R=405,L]

    ##### Security hardening ####
    ## DENY REQUEST BASED ON REQUEST METHOD ###
    RewriteCond %{REQUEST_METHOD} ^(TRACE|TRACK|OPTIONS|HEAD)$ [NC]
    RewriteRule ^.*$ - [F]


    </IfModule>
    </Directory>

    ## the following log details are included to show
    ## how to use SetEnvIf to include/exclude certain requests for images etc
    ## Also turn on robots.txt logging to check robots behaviour

    ## Custom Logging for combined logs - note they are filtered to not log images, robots.txt, cs, js etc
    UseCanonicalName On
    LogFormat "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vcommon

    ErrorLog /var/www/log/customer-error.log

    # Possible values include: debug, info, notice, warn, error, crit,
    # alert, emerg.
    LogLevel warn

    ## we aren't logging images, css, js etc

    ## flag robots.txt requests - allow these to test robot behaviour
    SetEnvIf Request_URI "^/robots\.txt$" robots-request=0
    ## flag favicon requests
    SetEnvIf Request_URI "^/favicon\.ico$" favicon-request=1

    ## flag image requests
    SetEnvIf Request_URI "(\.gif|\.png|\.jpg)$" image-request=1

    ## flag Css and JS requests
    SetEnvIf Request_URI \.css css-request=1
    SetEnvIf Request_URI \.js js-request=1

    ## set do_not_log if any of the above flags are set
    SetEnvIf robots-request 1 do_not_log=1
    SetEnvIf favicon-request 1 do_not_log=1
    SetEnvIf image-request 1 do_not_log=1
    SetEnvIf css-request 1 do_not_log=1
    SetEnvIf js-request 1 do_not_log=1

    ## only log if do_not_log is not set
    CustomLog /var/www/log/customer-access.log vcommon env=!do_not_log
    </VirtualHost>