diff --git a/lib/Catalyst/Plugin/HTML/Scrubber.pm b/lib/Catalyst/Plugin/HTML/Scrubber.pm
index 8479a68..cb90681 100644
--- a/lib/Catalyst/Plugin/HTML/Scrubber.pm
+++ b/lib/Catalyst/Plugin/HTML/Scrubber.pm
@@ -27,7 +27,7 @@ sub setup {
return $c->maybe::next::method(@_);
}
-sub prepare_parameters {
+sub execute {
my $c = shift;
$c->maybe::next::method(@_);
@@ -47,33 +47,113 @@ sub prepare_parameters {
sub html_scrub {
my ($c, $conf) = @_;
- param:
- for my $param (keys %{ $c->request->{parameters} }) {
- #while (my ($param, $value) = each %{ $c->request->{parameters} }) {
- my $value = \$c->request->{parameters}{$param};
- if (ref $$value && ref $$value ne 'ARRAY') {
- next param;
+ # If there's body_data - for e.g. a POSTed JSON body that was decoded -
+ # then we need to walk through it, scrubbing as appropriate
+ if (my $body_data = $c->request->body_data) {
+ $c->_scrub_recurse($conf, $c->request->body_data);
+ }
+
+ # And if Catalyst::Controller::REST is in use so we have $req->data,
+ # then scrub that too
+ if ($c->request->can('data')) {
+ my $data = $c->request->data;
+ if ($data) {
+ $c->_scrub_recurse($conf, $c->request->data);
}
+ }
+
+ # Normal query/POST body parameters:
+ $c->_scrub_recurse($conf, $c->request->parameters);
+
+}
- # If we only want to operate on certain params, do that checking
- # now...
- if ($conf && $conf->{ignore_params}) {
- my $ignore_params = $c->config->{scrubber}{ignore_params};
- if (ref $ignore_params ne 'ARRAY') {
- $ignore_params = [ $ignore_params ];
+# Recursively scrub param values...
+sub _scrub_recurse {
+ my ($c, $conf, $data) = @_;
+
+ # If the thing we've got is a hashref, walk over its keys, checking
+ # whether we should ignore, otherwise, do the needful
+ if (ref $data eq 'HASH') {
+ for my $key (keys %$data) {
+ if (!$c->_should_scrub_param($conf, $key)) {
+ next;
+ }
+
+ # OK, it's fine to fettle with this key - if its value is
+ # a ref, recurse, otherwise, scrub
+ if (my $ref = ref $data->{$key}) {
+ $c->_scrub_recurse($conf, $data->{$key});
+ } else {
+ # Alright, non-ref value, so scrub it
+ # FIXME why did we have to have this ref-ref handling fun?
+ #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
+ $data->{$key} = $c->_scrubber->scrub($data->{$key});
}
- for my $ignore_param (@$ignore_params) {
- if (ref $ignore_param eq 'Regexp') {
- next param if $param =~ $ignore_param;
- } else {
- next param if $param eq $ignore_param;
- }
+ }
+ } elsif (ref $data eq 'ARRAY') {
+ # Simple - scrub all the values
+ $_ = $c->_scrubber->scrub($_) for @$data;
+ for (@$data) {
+ if (ref $_) {
+ $c->_scrub_recurse($conf, $_);
+ } else {
+ $_ = $c->_scrubber->scrub($_);
}
- }
+ }
+ } elsif (ref $data eq 'CODE') {
+ $c->log->debug("Can't scrub a coderef!");
+ } else {
+ # This shouldn't happen, as we should always start with a ref,
+ # and non-ref hash/array values should have been handled above.
+ $c->log->debug("Non-ref to scrub - should this happen?");
+ }
+}
- # If we're still here, we want to scrub this param's value.
- $_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
+sub _should_scrub_param {
+ my ($c, $conf, $param) = @_;
+ # If we only want to operate on certain params, do that checking
+ # now...
+ if ($conf && $conf->{ignore_params}) {
+ my $ignore_params = $c->config->{scrubber}{ignore_params};
+ if (ref $ignore_params ne 'ARRAY') {
+ $ignore_params = [ $ignore_params ];
+ }
+ for my $ignore_param (@$ignore_params) {
+ if (ref $ignore_param eq 'Regexp') {
+ return if $param =~ $ignore_param;
+ } else {
+ return if $param eq $ignore_param;
+ }
+ }
}
+
+ # If we've not bailed above, we didn't match any ignore_params
+ # entries, or didn't have any, so we do want to scrub
+ return 1;
+}
+
+
+# Incredibly nasty monkey-patch to rewind filehandle before parsing - see
+# https://github.com/perl-catalyst/catalyst-runtime/pull/186
+# First, get the default handlers hashref:
+my $default_data_handlers = Catalyst->default_data_handlers();
+
+# Wrap the coderef for application/json in one that rewinds the filehandle
+# first:
+my $orig_json_handler = $default_data_handlers->{'application/json'};
+$default_data_handlers->{'application/json'} = sub {
+ $_[0]->seek(0,0); # rewind $fh arg
+ $orig_json_handler->(@_);
+};
+
+
+{
+ # and now replace the original default_data_handlers() with a version that
+ # returns our modified handlers
+ no warnings 'redefine';
+ *Catalyst::default_data_handlers = sub {
+ return $default_data_handlers;
+ };
}
__PACKAGE__->meta->make_immutable;
@@ -124,9 +204,11 @@ See SYNOPSIS for how to configure the plugin, both with its own configuration
passing on any options from L to control exactly what
scrubbing happens.
-=item prepare_parameters
+=item dispatch
-Sanitize HTML tags in all parameters (unless `ignore_params` exempts them).
+Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
+this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
+accessed via `$c->req->body_data` or `$c->req->data`.
=back
diff --git a/t/00_compile.t b/t/00_compile.t
index 282d2b1..e62167c 100644
--- a/t/00_compile.t
+++ b/t/00_compile.t
@@ -1,4 +1,4 @@
use strict;
use Test::More tests => 1;
-BEGIN { use_ok 'Catalyst::Plugin::HTML::Scrubber' }
+BEGIN { use Catalyst; use_ok 'Catalyst::Plugin::HTML::Scrubber' }
diff --git a/t/03_params.t b/t/03_params.t
index af944ce..b7ab603 100644
--- a/t/03_params.t
+++ b/t/03_params.t
@@ -12,26 +12,93 @@ use Test::More;
{
my $req = GET('/');
my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
+ is($res->code, RC_OK, 'response ok');
is($res->content, 'index', 'content ok');
}
{
my $req = POST('/', [foo => 'bar']);
my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('foo'), 'bar', 'parameter ok');
+ is($res->code, RC_OK, 'response ok');
+ is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
}
{
my $req = POST('/', [foo => 'bar']);
my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('foo'), 'bar');
+ is($res->code, RC_OK, 'response ok');
+ is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
}
{
+ # we allow in the test app config so this should not be stripped
my $req = POST('/', [foo => 'bar']);
my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('foo'), 'bar', 'parameter ok');
+ is($res->code, RC_OK, 'response ok');
+ is($c->req->param('foo'), 'bar', 'Allowed tag not stripped');
+}
+{
+ diag "HTML left alone in ignored field - by regex match";
+ my $value = 'Bar
Foo
';
+ my $req = POST('/', [foo_html => $value]);
+ my ($res, $c) = ctx_request($req);
+ is($res->code, RC_OK, 'response ok');
+ is(
+ $c->req->param('foo_html'),
+ $value,
+ 'HTML left alone in ignored (by regex) field',
+ );
+}
+{
+ diag "HTML left alone in ignored field - by name";
+ my $value = 'Bar
Foo
';
+ my $req = POST('/', [ignored_param => $value]);
+ my ($res, $c) = ctx_request($req);
+ is($res->code, RC_OK, 'response ok');
+ is(
+ $c->req->param('ignored_param'),
+ $value,
+ 'HTML left alone in ignored (by name) field',
+ );
+}
+
+{
+ # Test that data in a JSON body POSTed gets scrubbed too
+ my $json_body = <",
+ "baz":{
+ "one":"Second-level
"
+ },
+ "arr": [
+ "one test
",
+ "two "
+ ],
+ "some_html": "Leave this alone:
"
+}
+JSON
+ my $req = POST('/',
+ Content_Type => 'application/json', Content => $json_body
+ );
+ my ($res, $c) = ctx_request($req);
+ is($res->code, RC_OK, 'response ok');
+ is(
+ $c->req->body_data->{foo},
+ 'Top-level ', # note trailing space where img was removed
+ 'Top level body param scrubbed',
+ );
+ is(
+ $c->req->body_data->{baz}{one},
+ 'Second-level ',
+ 'Second level body param scrubbed',
+ );
+ is(
+ $c->req->body_data->{arr}[0],
+ 'one test ',
+ 'Second level array contents scrubbbed',
+ );
+ is(
+ $c->req->body_data->{some_html},
+ 'Leave this alone:
',
+ 'Body data param matching ignore_params left alone',
+ );
}
done_testing();
diff --git a/t/05_ignore_params.t b/t/05_ignore_params.t
deleted file mode 100644
index b28061f..0000000
--- a/t/05_ignore_params.t
+++ /dev/null
@@ -1,53 +0,0 @@
-use strict;
-use warnings;
-
-use FindBin qw($Bin);
-use lib "$Bin/lib";
-
-use Catalyst::Test 'MyApp05';
-use HTTP::Request::Common;
-use HTTP::Status;
-use Test::More;
-
-{
- diag "Simple request with no params";
- my $req = GET('/');
- my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($res->content, 'index', 'content ok');
-}
-{
- diag "Request wth one param, nothing to strip";
- my $req = POST('/', [foo => 'bar']);
- my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('foo'), 'bar', 'parameter ok');
-}
-{
- diag "Request with XSS attempt gets stripped";
- my $req = POST('/', [foo => 'bar']);
- my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('foo'), 'bar', 'XSS was stripped');
-}
-{
- diag "HTML left alone in ignored field - by regex match";
- my $value = 'Bar
Foo
';
- my $req = POST('/', [foo_html => $value]);
- my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('foo_html'), $value, 'HTML left alone in ignored field');
-}
-{
- diag "HTML left alone in ignored field - by name";
- my $value = 'Bar
Foo
';
- my $req = POST('/', [ignored_param => $value]);
- my ($res, $c) = ctx_request($req);
- ok($res->code == RC_OK, 'response ok');
- is($c->req->param('ignored_param'), $value, 'HTML left alone in ignored field');
-}
-
-
-
-done_testing();
-
diff --git a/t/05_rest.t b/t/05_rest.t
new file mode 100644
index 0000000..e680a47
--- /dev/null
+++ b/t/05_rest.t
@@ -0,0 +1,60 @@
+use strict;
+use warnings;
+
+use FindBin qw($Bin);
+use lib "$Bin/lib";
+
+use Test::More;
+
+
+eval 'use Catalyst::Controller::REST';
+plan skip_all => 'Catalyst::Controller::REST not available, skip REST tests' if $@;
+
+use Catalyst::Test 'MyApp05';
+use HTTP::Request::Common;
+use HTTP::Status;
+
+{
+ # Test that data in a JSON body POSTed gets scrubbed too
+ my $json_body = <",
+ "baz":{
+ "one":"Second-level
"
+ },
+ "arr": [
+ "one test
",
+ "two "
+ ],
+ "some_html": "Leave this alone:
"
+}
+JSON
+ my $req = POST('/foo',
+ Content_Type => 'application/json', Content => $json_body
+ );
+ my ($res, $c) = ctx_request($req);
+ is($res->code, RC_OK, 'response ok');
+ is(
+ $c->req->data->{foo},
+ 'Top-level ', # note trailing space where img was removed
+ 'Top level body param scrubbed',
+ );
+ is(
+ $c->req->data->{baz}{one},
+ 'Second-level ',
+ 'Second level body param scrubbed',
+ );
+ is(
+ $c->req->data->{arr}[0],
+ 'one test ',
+ 'Second level array contents scrubbbed',
+ );
+ is(
+ $c->req->data->{some_html},
+ 'Leave this alone:
',
+ 'Body data param matching ignore_params left alone',
+ );
+}
+
+done_testing();
+
diff --git a/t/lib/MyApp03.pm b/t/lib/MyApp03.pm
index 5ba1683..615f94a 100644
--- a/t/lib/MyApp03.pm
+++ b/t/lib/MyApp03.pm
@@ -9,8 +9,17 @@ extends 'Catalyst';
__PACKAGE__->config(
name => 'MyApp03',
- scrubber => [allow => [qw/br hr b/],]
+ scrubber => {
+ auto => 1,
+
+ ignore_params => [ qr/_html$/, 'ignored_param' ],
+
+ # params for HTML::Scrubber
+ params => [
+ allow => [qw/br hr b/],
+ ],
+ }
);
__PACKAGE__->setup();
diff --git a/t/lib/MyApp05.pm b/t/lib/MyApp05.pm
index ffac01c..3131524 100644
--- a/t/lib/MyApp05.pm
+++ b/t/lib/MyApp05.pm
@@ -10,13 +10,20 @@ extends 'Catalyst';
__PACKAGE__->config(
name => 'MyApp03',
scrubber => {
- ignore_params => [
- qr/_html$/,
- 'ignored_param',
+
+ auto => 1,
+
+ ignore_params => [ qr/_html$/, 'ignored_param' ],
+
+ # params for HTML::Scrubber
+ params => [
+ allow => [qw/br hr b/],
],
- },
+ }
);
-__PACKAGE__->setup();
+
+
+__PACKAGE__->setup();
1;
diff --git a/t/lib/MyApp05/Controller/Root.pm b/t/lib/MyApp05/Controller/Root.pm
index 2a1feeb..937f72c 100644
--- a/t/lib/MyApp05/Controller/Root.pm
+++ b/t/lib/MyApp05/Controller/Root.pm
@@ -3,15 +3,30 @@ package MyApp05::Controller::Root;
use Moose;
use namespace::autoclean;
-BEGIN { extends 'Catalyst::Controller'; }
+BEGIN { extends 'Catalyst::Controller::REST' }
-__PACKAGE__->config(namespace => '');
+__PACKAGE__->config(
+ namespace => '',
+);
-sub index : Path : Args(0) {
+# default to avoid "No default action defined"
+sub foo : Path : ActionClass('REST') { }
+
+sub foo_GET {
my ($self, $c) = @_;
$c->res->body('index');
}
+sub foo_POST {
+ my ($self, $c) = @_;
+ $c->res->body('POST received');
+}
+
+sub index {
+ my ($self, $c) = @_;
+ $c->res->body("DEFAULT");
+}
+
1;