diff --git a/lib/Catalyst/Plugin/HTML/Scrubber.pm b/lib/Catalyst/Plugin/HTML/Scrubber.pm index 8479a68..cb90681 100644 --- a/lib/Catalyst/Plugin/HTML/Scrubber.pm +++ b/lib/Catalyst/Plugin/HTML/Scrubber.pm @@ -27,7 +27,7 @@ sub setup { return $c->maybe::next::method(@_); } -sub prepare_parameters { +sub execute { my $c = shift; $c->maybe::next::method(@_); @@ -47,33 +47,113 @@ sub prepare_parameters { sub html_scrub { my ($c, $conf) = @_; - param: - for my $param (keys %{ $c->request->{parameters} }) { - #while (my ($param, $value) = each %{ $c->request->{parameters} }) { - my $value = \$c->request->{parameters}{$param}; - if (ref $$value && ref $$value ne 'ARRAY') { - next param; + # If there's body_data - for e.g. a POSTed JSON body that was decoded - + # then we need to walk through it, scrubbing as appropriate + if (my $body_data = $c->request->body_data) { + $c->_scrub_recurse($conf, $c->request->body_data); + } + + # And if Catalyst::Controller::REST is in use so we have $req->data, + # then scrub that too + if ($c->request->can('data')) { + my $data = $c->request->data; + if ($data) { + $c->_scrub_recurse($conf, $c->request->data); } + } + + # Normal query/POST body parameters: + $c->_scrub_recurse($conf, $c->request->parameters); + +} - # If we only want to operate on certain params, do that checking - # now... - if ($conf && $conf->{ignore_params}) { - my $ignore_params = $c->config->{scrubber}{ignore_params}; - if (ref $ignore_params ne 'ARRAY') { - $ignore_params = [ $ignore_params ]; +# Recursively scrub param values... +sub _scrub_recurse { + my ($c, $conf, $data) = @_; + + # If the thing we've got is a hashref, walk over its keys, checking + # whether we should ignore, otherwise, do the needful + if (ref $data eq 'HASH') { + for my $key (keys %$data) { + if (!$c->_should_scrub_param($conf, $key)) { + next; + } + + # OK, it's fine to fettle with this key - if its value is + # a ref, recurse, otherwise, scrub + if (my $ref = ref $data->{$key}) { + $c->_scrub_recurse($conf, $data->{$key}); + } else { + # Alright, non-ref value, so scrub it + # FIXME why did we have to have this ref-ref handling fun? + #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value); + $data->{$key} = $c->_scrubber->scrub($data->{$key}); } - for my $ignore_param (@$ignore_params) { - if (ref $ignore_param eq 'Regexp') { - next param if $param =~ $ignore_param; - } else { - next param if $param eq $ignore_param; - } + } + } elsif (ref $data eq 'ARRAY') { + # Simple - scrub all the values + $_ = $c->_scrubber->scrub($_) for @$data; + for (@$data) { + if (ref $_) { + $c->_scrub_recurse($conf, $_); + } else { + $_ = $c->_scrubber->scrub($_); } - } + } + } elsif (ref $data eq 'CODE') { + $c->log->debug("Can't scrub a coderef!"); + } else { + # This shouldn't happen, as we should always start with a ref, + # and non-ref hash/array values should have been handled above. + $c->log->debug("Non-ref to scrub - should this happen?"); + } +} - # If we're still here, we want to scrub this param's value. - $_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value); +sub _should_scrub_param { + my ($c, $conf, $param) = @_; + # If we only want to operate on certain params, do that checking + # now... + if ($conf && $conf->{ignore_params}) { + my $ignore_params = $c->config->{scrubber}{ignore_params}; + if (ref $ignore_params ne 'ARRAY') { + $ignore_params = [ $ignore_params ]; + } + for my $ignore_param (@$ignore_params) { + if (ref $ignore_param eq 'Regexp') { + return if $param =~ $ignore_param; + } else { + return if $param eq $ignore_param; + } + } } + + # If we've not bailed above, we didn't match any ignore_params + # entries, or didn't have any, so we do want to scrub + return 1; +} + + +# Incredibly nasty monkey-patch to rewind filehandle before parsing - see +# https://github.com/perl-catalyst/catalyst-runtime/pull/186 +# First, get the default handlers hashref: +my $default_data_handlers = Catalyst->default_data_handlers(); + +# Wrap the coderef for application/json in one that rewinds the filehandle +# first: +my $orig_json_handler = $default_data_handlers->{'application/json'}; +$default_data_handlers->{'application/json'} = sub { + $_[0]->seek(0,0); # rewind $fh arg + $orig_json_handler->(@_); +}; + + +{ + # and now replace the original default_data_handlers() with a version that + # returns our modified handlers + no warnings 'redefine'; + *Catalyst::default_data_handlers = sub { + return $default_data_handlers; + }; } __PACKAGE__->meta->make_immutable; @@ -124,9 +204,11 @@ See SYNOPSIS for how to configure the plugin, both with its own configuration passing on any options from L to control exactly what scrubbing happens. -=item prepare_parameters +=item dispatch -Sanitize HTML tags in all parameters (unless `ignore_params` exempts them). +Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) - +this includes normal POST params, and serialised data (e.g. a POSTed JSON body) +accessed via `$c->req->body_data` or `$c->req->data`. =back diff --git a/t/00_compile.t b/t/00_compile.t index 282d2b1..e62167c 100644 --- a/t/00_compile.t +++ b/t/00_compile.t @@ -1,4 +1,4 @@ use strict; use Test::More tests => 1; -BEGIN { use_ok 'Catalyst::Plugin::HTML::Scrubber' } +BEGIN { use Catalyst; use_ok 'Catalyst::Plugin::HTML::Scrubber' } diff --git a/t/03_params.t b/t/03_params.t index af944ce..b7ab603 100644 --- a/t/03_params.t +++ b/t/03_params.t @@ -12,26 +12,93 @@ use Test::More; { my $req = GET('/'); my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); + is($res->code, RC_OK, 'response ok'); is($res->content, 'index', 'content ok'); } { my $req = POST('/', [foo => 'bar']); my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'parameter ok'); + is($res->code, RC_OK, 'response ok'); + is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone'); } { my $req = POST('/', [foo => 'bar']); my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar'); + is($res->code, RC_OK, 'response ok'); + is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param'); } { + # we allow in the test app config so this should not be stripped my $req = POST('/', [foo => 'bar']); my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'parameter ok'); + is($res->code, RC_OK, 'response ok'); + is($c->req->param('foo'), 'bar', 'Allowed tag not stripped'); +} +{ + diag "HTML left alone in ignored field - by regex match"; + my $value = '

Bar

Foo

'; + my $req = POST('/', [foo_html => $value]); + my ($res, $c) = ctx_request($req); + is($res->code, RC_OK, 'response ok'); + is( + $c->req->param('foo_html'), + $value, + 'HTML left alone in ignored (by regex) field', + ); +} +{ + diag "HTML left alone in ignored field - by name"; + my $value = '

Bar

Foo

'; + my $req = POST('/', [ignored_param => $value]); + my ($res, $c) = ctx_request($req); + is($res->code, RC_OK, 'response ok'); + is( + $c->req->param('ignored_param'), + $value, + 'HTML left alone in ignored (by name) field', + ); +} + +{ + # Test that data in a JSON body POSTed gets scrubbed too + my $json_body = <", + "baz":{ + "one":"Second-level " + }, + "arr": [ + "one test ", + "two " + ], + "some_html": "Leave this alone: " +} +JSON + my $req = POST('/', + Content_Type => 'application/json', Content => $json_body + ); + my ($res, $c) = ctx_request($req); + is($res->code, RC_OK, 'response ok'); + is( + $c->req->body_data->{foo}, + 'Top-level ', # note trailing space where img was removed + 'Top level body param scrubbed', + ); + is( + $c->req->body_data->{baz}{one}, + 'Second-level ', + 'Second level body param scrubbed', + ); + is( + $c->req->body_data->{arr}[0], + 'one test ', + 'Second level array contents scrubbbed', + ); + is( + $c->req->body_data->{some_html}, + 'Leave this alone: ', + 'Body data param matching ignore_params left alone', + ); } done_testing(); diff --git a/t/05_ignore_params.t b/t/05_ignore_params.t deleted file mode 100644 index b28061f..0000000 --- a/t/05_ignore_params.t +++ /dev/null @@ -1,53 +0,0 @@ -use strict; -use warnings; - -use FindBin qw($Bin); -use lib "$Bin/lib"; - -use Catalyst::Test 'MyApp05'; -use HTTP::Request::Common; -use HTTP::Status; -use Test::More; - -{ - diag "Simple request with no params"; - my $req = GET('/'); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($res->content, 'index', 'content ok'); -} -{ - diag "Request wth one param, nothing to strip"; - my $req = POST('/', [foo => 'bar']); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'parameter ok'); -} -{ - diag "Request with XSS attempt gets stripped"; - my $req = POST('/', [foo => 'bar']); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'XSS was stripped'); -} -{ - diag "HTML left alone in ignored field - by regex match"; - my $value = '

Bar

Foo

'; - my $req = POST('/', [foo_html => $value]); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo_html'), $value, 'HTML left alone in ignored field'); -} -{ - diag "HTML left alone in ignored field - by name"; - my $value = '

Bar

Foo

'; - my $req = POST('/', [ignored_param => $value]); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('ignored_param'), $value, 'HTML left alone in ignored field'); -} - - - -done_testing(); - diff --git a/t/05_rest.t b/t/05_rest.t new file mode 100644 index 0000000..e680a47 --- /dev/null +++ b/t/05_rest.t @@ -0,0 +1,60 @@ +use strict; +use warnings; + +use FindBin qw($Bin); +use lib "$Bin/lib"; + +use Test::More; + + +eval 'use Catalyst::Controller::REST'; +plan skip_all => 'Catalyst::Controller::REST not available, skip REST tests' if $@; + +use Catalyst::Test 'MyApp05'; +use HTTP::Request::Common; +use HTTP::Status; + +{ + # Test that data in a JSON body POSTed gets scrubbed too + my $json_body = <", + "baz":{ + "one":"Second-level " + }, + "arr": [ + "one test ", + "two " + ], + "some_html": "Leave this alone: " +} +JSON + my $req = POST('/foo', + Content_Type => 'application/json', Content => $json_body + ); + my ($res, $c) = ctx_request($req); + is($res->code, RC_OK, 'response ok'); + is( + $c->req->data->{foo}, + 'Top-level ', # note trailing space where img was removed + 'Top level body param scrubbed', + ); + is( + $c->req->data->{baz}{one}, + 'Second-level ', + 'Second level body param scrubbed', + ); + is( + $c->req->data->{arr}[0], + 'one test ', + 'Second level array contents scrubbbed', + ); + is( + $c->req->data->{some_html}, + 'Leave this alone: ', + 'Body data param matching ignore_params left alone', + ); +} + +done_testing(); + diff --git a/t/lib/MyApp03.pm b/t/lib/MyApp03.pm index 5ba1683..615f94a 100644 --- a/t/lib/MyApp03.pm +++ b/t/lib/MyApp03.pm @@ -9,8 +9,17 @@ extends 'Catalyst'; __PACKAGE__->config( name => 'MyApp03', - scrubber => [allow => [qw/br hr b/],] + scrubber => { + auto => 1, + + ignore_params => [ qr/_html$/, 'ignored_param' ], + + # params for HTML::Scrubber + params => [ + allow => [qw/br hr b/], + ], + } ); __PACKAGE__->setup(); diff --git a/t/lib/MyApp05.pm b/t/lib/MyApp05.pm index ffac01c..3131524 100644 --- a/t/lib/MyApp05.pm +++ b/t/lib/MyApp05.pm @@ -10,13 +10,20 @@ extends 'Catalyst'; __PACKAGE__->config( name => 'MyApp03', scrubber => { - ignore_params => [ - qr/_html$/, - 'ignored_param', + + auto => 1, + + ignore_params => [ qr/_html$/, 'ignored_param' ], + + # params for HTML::Scrubber + params => [ + allow => [qw/br hr b/], ], - }, + } ); -__PACKAGE__->setup(); + + +__PACKAGE__->setup(); 1; diff --git a/t/lib/MyApp05/Controller/Root.pm b/t/lib/MyApp05/Controller/Root.pm index 2a1feeb..937f72c 100644 --- a/t/lib/MyApp05/Controller/Root.pm +++ b/t/lib/MyApp05/Controller/Root.pm @@ -3,15 +3,30 @@ package MyApp05::Controller::Root; use Moose; use namespace::autoclean; -BEGIN { extends 'Catalyst::Controller'; } +BEGIN { extends 'Catalyst::Controller::REST' } -__PACKAGE__->config(namespace => ''); +__PACKAGE__->config( + namespace => '', +); -sub index : Path : Args(0) { +# default to avoid "No default action defined" +sub foo : Path : ActionClass('REST') { } + +sub foo_GET { my ($self, $c) = @_; $c->res->body('index'); } +sub foo_POST { + my ($self, $c) = @_; + $c->res->body('POST received'); +} + +sub index { + my ($self, $c) = @_; + $c->res->body("DEFAULT"); +} + 1;