Skip to content

Commit

Permalink
adding option to block known script-bots
Browse files Browse the repository at this point in the history
  • Loading branch information
ansibleguy committed Dec 9, 2023
1 parent f9387b2 commit 2182f84
Show file tree
Hide file tree
Showing 13 changed files with 138 additions and 6 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ ansible-galaxy install -r requirements.yml

* **Default opt-outs**:
* proxy-mode caching
* Blocking of Known Script-Bots
* Blocking of known Bad-Crawler-Bots

## Info

Expand Down Expand Up @@ -146,6 +148,11 @@ nginx:
ssl:
mode: 'existing' # pre-existing certificates to be copied to the target server

security:
# very basic filtering of bad bots based on user-agent matching
block_script_bots: true
block_bad_crawler_bots: true

guys_statics:
mode: 'server'
domain: 'static.guy.net'
Expand Down
46 changes: 46 additions & 0 deletions defaults/main/0_hardcoded.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,50 @@ NGINX_HC:
basic_auth: ['file', 'pam']
basic_auth_pam: ['system', 'custom']

user_agents:
# Well-known user-agents used by scripting languages for very basic security-filtering
# matching is done case-insensitive
script:
# NOTE: empty user-agent is also matched
full: []

# NOTE: these are sub-strings inside the user-agent header
sub:
# cli tools
- 'curl'
- 'wget'
- 'Apache-HttpClient'
- 'nmap'
- 'Metasploit'
# automation tools
- 'headless'
# golang
- 'go-http-client'
# python
- 'python'
- 'httpx'
- 'httpcore'
- 'aiohttp'
- 'httputil'
# php
- 'GuzzleHttp'
- 'phpcrawl'
- 'Zend_Http_Client'
- 'Wordpress'
# others
- 'cpp-httplib' # c++
- 'java'
- 'perl'
- 'axios' # JS

bad_crawlers:
full: []
sub:
- 'spider'
- 'test-bot'
- 'tiny-bot'
- 'fidget-spinner-bot'
# python
- 'scrapy'

NONE_VALUES: [none, '', ' ']
3 changes: 3 additions & 0 deletions defaults/main/2_site.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ defaults_site:
allow_only_methods: ['HEAD', 'GET', 'POST']
# if 'restrict_methods' is disabled - this will still deny 'TRACE' & 'CONNECT' as they might open your server/services up to attacks
deny_dangerous_methods: true
block_script_bots: false
block_bad_crawler_bots: false
block_status_code: 403

redirect:
target: 'https://github.com/ansibleguy'
Expand Down
5 changes: 5 additions & 0 deletions handlers/main.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
---

# NOTE: stop from reload/restart when config is invalid
- name: Validate-nginx-config
ansible.builtin.command: 'nginx -t -c /etc/nginx/nginx.conf'
changed_when: false

- name: Reload-nginx
ansible.builtin.systemd:
name: 'nginx.service'
Expand Down
12 changes: 12 additions & 0 deletions molecule/default/converge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,17 @@
redirect:
target: 'https://github.com/ansibleguy'

TESTsecurity:
domain: 'nginx6.test.ansibleguy.net'
ssl: "{{ dummy_ssl }}"
mode: 'server'
server:
path: '/var/www/test6'
index: ['test.html']
security:
allow_only_methods: ['GET', 'HEAD']
block_script_bots: true
block_bad_crawler_bots: true

roles:
- ansibleguy.infra_nginx
1 change: 1 addition & 0 deletions tasks/debian/add_basic_auth.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
delegate_to: localhost
become: false
no_log: true
changed_when: false
when: site.basic_auth.provider == 'file'

- name: "Nginx | Site '{{ name }}' | Basic Auth | Pretty error"
Expand Down
2 changes: 1 addition & 1 deletion tasks/debian/add_site.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
owner: 'root'
group: "{{ NGINX_CONFIG.group }}"
tags: [auth]
notify: Reload-nginx
notify: [Validate-nginx-config, Reload-nginx]

- name: "Nginx | Site '{{ name }}' | Enabling"
ansible.builtin.file:
Expand Down
2 changes: 1 addition & 1 deletion tasks/debian/add_status.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
mode: 0640
owner: 'root'
group: "{{ NGINX_CONFIG.group }}"
notify: Reload-nginx
notify: [Validate-nginx-config, Reload-nginx]
tags: [config, sites, base]

- name: Nginx | Status page | Enabling
Expand Down
2 changes: 1 addition & 1 deletion tasks/debian/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
validate: "nginx -t -c %s"
insertafter: "{{ item.after | default(omit) }}"
backrefs: true
notify: Restart-nginx
notify: [Validate-nginx-config, Restart-nginx]
loop: "{{ NGINX_HC.main_config }}"

# todo: implement GeoIP
Expand Down
2 changes: 1 addition & 1 deletion tasks/debian/rm_site.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
loop:
- "/etc/nginx/sites-enabled/site_{{ name }}"
- "/etc/nginx/sites-available/site_{{ name }}"
notify: Restart-nginx
notify: [Validate-nginx-config, Restart-nginx]

- name: "Nginx | Remove Site '{{ name }}' | Removing local certificates"
ansible.builtin.file:
Expand Down
29 changes: 29 additions & 0 deletions templates/etc/nginx/sites-available/inc/site_http_config.j2
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,35 @@
return 405;
}
{% endif %}
{% if site.security.block_script_bots | bool %}
# block well-known script-bots
if ($http_user_agent = "" ) {
return {{ site.security.block_status_code }};
}
{% if NGINX_HC.user_agents.script.full | length > 0 %}
if ($http_user_agent ~* ({{ NGINX_HC.user_agents.script.full | join('|') }}) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% if NGINX_HC.user_agents.script.sub | length > 0 %}
if ($http_user_agent ~* (.*{{ NGINX_HC.user_agents.script.sub | join('.*|.*') }}.*) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% endif %}
{% if site.security.block_bad_crawler_bots | bool %}
# block well-known bad-crawler-bots
{% if NGINX_HC.user_agents.bad_crawlers.full | length > 0 %}
if ($http_user_agent ~* ({{ NGINX_HC.user_agents.bad_crawlers.full | join('|') }}) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% if NGINX_HC.user_agents.bad_crawlers.sub | length > 0 %}
if ($http_user_agent ~* (.*{{ NGINX_HC.user_agents.bad_crawlers.sub | join('.*|.*') }}.*) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% endif %}

{% if NGINX_CONFIG.config | length > 0 %}
# global config
Expand Down
4 changes: 2 additions & 2 deletions templates/etc/nginx/sites-available/inc/site_https.j2
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ server {
{% if site.config_additions_root | length > 0 %}
# additional lines
{% endif %}
{% for line in site.config_additions_root %}
{% for line in site.config_additions_root | ensure_list %}
{{ line }}
{% endfor %}

Expand All @@ -85,7 +85,7 @@ server {
{% if site.config_additions | length > 0 %}
# additional lines
{% endif %}
{% for line in site.config_additions %}
{% for line in site.config_additions | ensure_list %}
{{ line }}
{% endfor %}

Expand Down
29 changes: 29 additions & 0 deletions templates/etc/nginx/sites-available/inc/site_https_config.j2
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,35 @@
return 405;
}
{% endif %}
{% if site.security.block_script_bots | bool %}
# block well-known script-bots
if ($http_user_agent = "" ) {
return {{ site.security.block_status_code }};
}
{% if NGINX_HC.user_agents.script.full | length > 0 %}
if ($http_user_agent ~* ({{ NGINX_HC.user_agents.script.full | join('|') }}) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% if NGINX_HC.user_agents.script.sub | length > 0 %}
if ($http_user_agent ~* (.*{{ NGINX_HC.user_agents.script.sub | join('.*|.*') }}.*) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% endif %}
{% if site.security.block_bad_crawler_bots | bool %}
# block well-known bad-crawler-bots
{% if NGINX_HC.user_agents.bad_crawlers.full | length > 0 %}
if ($http_user_agent ~* ({{ NGINX_HC.user_agents.bad_crawlers.full | join('|') }}) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% if NGINX_HC.user_agents.bad_crawlers.sub | length > 0 %}
if ($http_user_agent ~* (.*{{ NGINX_HC.user_agents.bad_crawlers.sub | join('.*|.*') }}.*) ) {
return {{ site.security.block_status_code }};
}
{% endif %}
{% endif %}

{% if NGINX_CONFIG.config | length > 0 %}
# global config
Expand Down

0 comments on commit 2182f84

Please sign in to comment.