Skip to content

Commit

Permalink
add cloudflare turnstile
Browse files Browse the repository at this point in the history
  • Loading branch information
stkenny committed Feb 14, 2025
1 parent 52746d6 commit 294db72
Show file tree
Hide file tree
Showing 10 changed files with 428 additions and 0 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ gem 'high_voltage', '~> 3.1'
gem 'is_it_working-cbeer'
gem 'nest'
gem 'resque'
gem 'rack-attack', '~> 6.6'

group :development, :production do
gem 'appsignal'
Expand Down
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,8 @@ GEM
rdf
racc (1.8.1)
rack (2.2.10)
rack-attack (6.7.0)
rack (>= 1.0, < 4)
rack-protection (3.2.0)
base64 (>= 0.1.0)
rack (~> 2.2, >= 2.2.4)
Expand Down Expand Up @@ -1278,6 +1280,7 @@ DEPENDENCIES
paper_trail
parallel_tests
qa (~> 5.1)
rack-attack (~> 6.6)
railroady
rails (> 6, < 7.2)
rails-controller-testing
Expand Down
1 change: 1 addition & 0 deletions app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ class ApplicationController < ActionController::Base
helper Openseadragon::OpenseadragonHelper
before_action :authenticate_user_from_token!
before_action :set_locale, :set_metadata_language
before_action { |controller| BotDetectController.bot_detection_enforce_filter(controller) }

include HttpAcceptLanguage

Expand Down
200 changes: 200 additions & 0 deletions app/controllers/bot_detect_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# This controller has actions for issuing a challenge page for CloudFlare Turnstile product,
# and then redirecting back to desired page.
#
# It also includes logic for configuring rack attack and a Rails controller filter to enforce
# redirection to these actions. All the logic related to bot detection with turnstile is
# mostly in this file -- with very flexible configuration in class_attributes -- to faciliate
# future extraction to a re-usable gem if desired.
#
# See more local docs at https://sciencehistory.atlassian.net/wiki/spaces/HDC/pages/2645098498/Cloudflare+Turnstile+bot+detection
#
class BotDetectController < ApplicationController
# Config for bot detection is held here in class_attributes, kind of wonky, but it works
#
# These are defaults ready for extraction to a gem, in general here at Sci Hist if we want
# to set config we do it in ./config/initializers/rack_attack.rb

class_attribute :enabled, default: false # Must set to true to turn on at all

class_attribute :cf_turnstile_sitekey, default: "1x00000000000000000000AA" # a testing key that always passes
class_attribute :cf_turnstile_secret_key, default: "1x0000000000000000000000000000000AA" # a testing key always passes
# Turnstile testing keys: https://developers.cloudflare.com/turnstile/troubleshooting/testing/

# up to rate_limit_count requests in rate_limit_period before challenged
class_attribute :rate_limit_period, default: 12.hour
class_attribute :rate_limit_count, default: 10

# how long is a challenge pass good for before re-challenge?
class_attribute :session_passed_good_for, default: 24.hours

# An array, can be:
# * a string, path prefix
# * a hash of rails route-decoded params, like `{ controller: "something" }`,
# or `{ controller: "something", action: "index" }
# The hash is more expensive to check and uses some not-technically-public
# Rails api, but it's just so convenient.
#
# Used by default :location_matcher, if set custom may not be used
class_attribute :rate_limited_locations, default: []

# Executed at the _controller_ filter level, to last minute exempt certain
# actions from protection.
class_attribute :allow_exempt, default: ->(controller) { false }


# rate limit per subnet, following lehigh's lead, although we use a smaller
# subnet: /24 for IPv4, and /72 for IPv6
# https://git.drupalcode.org/project/turnstile_protect/-/blob/0dae9f95d48f9d8cae5a8e61e767c69f64490983/src/EventSubscriber/Challenge.php#L140-151
class_attribute :rate_limit_discriminator, default: (lambda do |req|
if req.ip.index(":") # ipv6
IPAddr.new("#{req.ip}/24").to_string
else
IPAddr.new("#{req.ip}/72").to_string
end
rescue IPAddr::InvalidAddressError
req.ip
end)

class_attribute :location_matcher, default: ->(rack_req) {
parsed_route = nil
rate_limited_locations.any? do |val|
case val
when Hash
begin
# #recognize_path may e not techinically public API, and may be expensive, but
# no other way to do this, and it's mentioned in rack-attack:
# https://github.com/rack/rack-attack/blob/86650c4f7ea1af24fe4a89d3040e1309ee8a88bc/docs/advanced_configuration.md#match-actions-in-rails
# We do it lazily only if needed so if you don't want that don't use it.
parsed_route ||= rack_req.env["action_dispatch.routes"].recognize_path(rack_req.url, method: rack_req.request_method)
parsed_route && parsed_route >= val
rescue ActionController::RoutingError
false
end
when String
# string complete path at beginning, must end in ?, or end of string
/\A#{Regexp.escape val}(\/|\?|\Z)/ =~ rack_req.path
end
end
}
class_attribute :cf_turnstile_js_url, default: "https://challenges.cloudflare.com/turnstile/v0/api.js"
class_attribute :cf_turnstile_validation_url, default: "https://challenges.cloudflare.com/turnstile/v0/siteverify"
class_attribute :cf_timeout, default: 3 # max timeout seconds waiting on Cloudfront Turnstile api
helper_method :cf_turnstile_js_url, :cf_turnstile_sitekey

# key stored in Rails session object with channge passed confirmed
class_attribute :session_passed_key, default: "bot_detection-passed"

# key in rack env that says challenge is required
class_attribute :env_challenge_trigger_key, default: "bot_detect.should_challenge"

# for allowing unsubscribe for testing
class_attribute :_track_notification_subscription, instance_accessor: false

# perhaps in an initializer, and after changing any config, run:
#
# Rails.application.config.to_prepare do
# BotDetectController.rack_attack_init
# end
#
# Safe to call more than once if you change config and want to call again, say in testing.
def self.rack_attack_init
self._rack_attack_uninit # make it safe for calling multiple times

## Turnstile bot detection throttling
#
# for paths matched by `rate_limited_locations`, after over rate_limit count requests in rate_limit_period,
# token will be stored in rack env instructing challenge is required.
#
# For actual challenge, need before_action in controller.
#
# You could rate limit detect on wider paths than you actually challenge on, or the same. You probably
# don't want to rate-limit detect on narrower list of paths than you challenge on!
Rack::Attack.track("bot_detect/rate_exceeded",
limit: self.rate_limit_count,
period: self.rate_limit_period) do |req|
if self.enabled && self.location_matcher.call(req)
self.rate_limit_discriminator.call(req)
end
end

self._track_notification_subscription = ActiveSupport::Notifications.subscribe("track.rack_attack") do |_name, _start, _finish, request_id, payload|
rack_request = payload[:request]
rack_env = rack_request.env
match_name = rack_env["rack.attack.matched"] # name of rack-attack rule

if match_name == "bot_detect/rate_exceeded"
match_data = rack_env["rack.attack.match_data"]
match_data_formatted = match_data.slice(:count, :limit, :period).map { |k, v| "#{k}=#{v}"}.join(" ")
discriminator = rack_env["rack.attack.match_discriminator"] # unique key for rate limit, usually includes ip

rack_env[self.env_challenge_trigger_key] = true
end
end
end

def self._rack_attack_uninit
Rack::Attack.track("bot_detect/rate_exceeded") {} # overwrite track name with empty proc
ActiveSupport::Notifications.unsubscribe(self._track_notification_subscription) if self._track_notification_subscription
self._track_notification_subscription = nil
end

# Usually in your ApplicationController,
#
# before_action { |controller| BotDetectController.bot_detection_enforce_filter(controller) }
def self.bot_detection_enforce_filter(controller)
if self.enabled &&
controller.request.env[self.env_challenge_trigger_key] &&
!controller.session[self.session_passed_key].try { |date| Time.now - Time.new(date) < self.session_passed_good_for } &&
!controller.kind_of?(self) && # don't ever guard ourself, that'd be a mess!
! self.allow_exempt.call(controller)

# we can only do GET requests right now
if !controller.request.get?
Rails.logger.warn("#{self}: Asked to protect request we could not, unprotected: #{controller.requet.method} #{controller.request.url}, (#{controller.request.remote_ip}, #{controller.request.user_agent})")
return
end

Rails.logger.info("#{self.name}: Cloudflare Turnstile challenge redirect: (#{controller.request.remote_ip}, #{controller.request.user_agent}): from #{controller.request.url}")
# status code temporary
controller.redirect_to controller.bot_detect_challenge_path(dest: controller.request.original_fullpath), status: 307
end
end


def challenge
end

def verify_challenge
body = {
secret: self.cf_turnstile_secret_key,
response: params["cf_turnstile_response"],
remoteip: request.remote_ip
}

http = HTTP.timeout(self.cf_timeout)
response = http.post(self.cf_turnstile_validation_url,
json: body)

result = response.parse
# {"success"=>true, "error-codes"=>[], "challenge_ts"=>"2025-01-06T17:44:28.544Z", "hostname"=>"example.com", "metadata"=>{"result_with_testing_key"=>true}}
# {"success"=>false, "error-codes"=>["invalid-input-response"], "messages"=>[], "metadata"=>{"result_with_testing_key"=>true}}

if result["success"]
# mark it as succesful in session, and record time. They do need a session/cookies
# to get through the challenge.
session[self.session_passed_key] = Time.now.utc.iso8601
else
Rails.logger.warn("#{self.class.name}: Cloudflare Turnstile validation failed (#{request.remote_ip}, #{request.user_agent}): #{result}")
end

# let's just return the whole thing to client? Is there anything confidential there?
render json: result
rescue HTTP::Error, JSON::ParserError => e
# probably an http timeout? or something weird.
Rails.logger.warn("#{self.class.name}: Cloudflare turnstile validation error (#{request.remote_ip}, #{request.user_agent}): #{e}: #{response&.body}")
render json: {
success: false,
http_exception: e
}
end
end
67 changes: 67 additions & 0 deletions app/views/bot_detect/challenge.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<% content_for(:head) do %>
<script src="<%= cf_turnstile_js_url %>" async defer></script>
<meta name="robots" content="noindex">
<% end %>


<h1 class="brand-alt-h1 mb-4">Traffic control and bot detection...</h2>

<div
class="cf-turnstile"
data-sitekey="<%= cf_turnstile_sitekey %>"
data-callback="turnstileCallback"
></div>

<noscript>
<div class="alert alert-danger">Sorry, Javascript is required to be enabled for our traffic check, and does not appear available.</div>
</noscript>

<p>We strive to make our content freely available. If this check is preventing you from making use of our resources, make sure you have cookies enabled. If you still have trouble, please <a href="mailto:[email protected]">get in touch</a>.</p>

<script type="text/javascript">
async function turnstileCallback(token) {
try {
// I don't know if we should be disabling CSRF for this one, but we'll just use it
const csrfToken = document.querySelector("[name='csrf-token']");

const response = await fetch('<%= bot_detect_challenge_path %>', {
method: 'POST',
headers: {
"X-CSRF-Token": csrfToken?.content,
"Content-Type": "application/json"
},
body: JSON.stringify({ cf_turnstile_response: token }),
});

if (!response.ok) {
throw new Error('bad response: ' + response.status + ": " + response.url);
}

result = await response.json();
if (result["success"] == true) {
const dest = new URLSearchParams(window.location.search).get("dest");
// For security make sure it only has path and on
if (!dest.startsWith("/") || dest.startsWith("//")) {
throw new Error("illegal non-local redirect: " + dest);
}
// replace the challenge page in history
window.location.replace(dest);
} else {
console.error("Turnstile response reported as failure: " + JSON.stringify(result))
_displayChallengeError();
}
} catch(error) {
console.error("Error processing turnstile challenge backend action: " + error);
_displayChallengeError();
}
}

function _displayChallengeError() {
document.querySelector(".cf-turnstile").outerHTML = `
<div class="alert alert-danger" role="alert">
<i class="fa fa-exclamation-triangle" aria-hidden="true"></i>
Check failed. Sorry, something has gone wrong, or your traffic looks unusual to us. You can try refreshing this page to try again.
</div>
`;
}
</script>
4 changes: 4 additions & 0 deletions config/environments/development.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
config.cache_store = :null_store
end

if Settings.cf_turnstile_enabled
config.cache_store = :memory_store
end

# Store uploaded files on the local file system (see config/storage.yml for options)
config.active_storage.service = :local

Expand Down
2 changes: 2 additions & 0 deletions config/environments/test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,6 @@
# config.action_view.raise_on_missing_translations = true

config.active_record.belongs_to_required_by_default = false

Rack::Attack.enabled = false
end
Loading

0 comments on commit 294db72

Please sign in to comment.