Skip to content

Commit a8b7b5e

Browse files
authored
Merge pull request #3 from bigpresh/bigpresh/scrub_deserialised_body_data_params
Scrub body_data / data params too (e.g. POSTed JSON)
2 parents 6ac9a8e + 4330290 commit a8b7b5e

File tree

8 files changed

+281
-94
lines changed

8 files changed

+281
-94
lines changed

lib/Catalyst/Plugin/HTML/Scrubber.pm

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ sub setup {
2727
return $c->maybe::next::method(@_);
2828
}
2929

30-
sub prepare_parameters {
30+
sub execute {
3131
my $c = shift;
3232

3333
$c->maybe::next::method(@_);
@@ -47,33 +47,113 @@ sub prepare_parameters {
4747
sub html_scrub {
4848
my ($c, $conf) = @_;
4949

50-
param:
51-
for my $param (keys %{ $c->request->{parameters} }) {
52-
#while (my ($param, $value) = each %{ $c->request->{parameters} }) {
53-
my $value = \$c->request->{parameters}{$param};
54-
if (ref $$value && ref $$value ne 'ARRAY') {
55-
next param;
50+
# If there's body_data - for e.g. a POSTed JSON body that was decoded -
51+
# then we need to walk through it, scrubbing as appropriate
52+
if (my $body_data = $c->request->body_data) {
53+
$c->_scrub_recurse($conf, $c->request->body_data);
54+
}
55+
56+
# And if Catalyst::Controller::REST is in use so we have $req->data,
57+
# then scrub that too
58+
if ($c->request->can('data')) {
59+
my $data = $c->request->data;
60+
if ($data) {
61+
$c->_scrub_recurse($conf, $c->request->data);
5662
}
63+
}
64+
65+
# Normal query/POST body parameters:
66+
$c->_scrub_recurse($conf, $c->request->parameters);
67+
68+
}
5769

58-
# If we only want to operate on certain params, do that checking
59-
# now...
60-
if ($conf && $conf->{ignore_params}) {
61-
my $ignore_params = $c->config->{scrubber}{ignore_params};
62-
if (ref $ignore_params ne 'ARRAY') {
63-
$ignore_params = [ $ignore_params ];
70+
# Recursively scrub param values...
71+
sub _scrub_recurse {
72+
my ($c, $conf, $data) = @_;
73+
74+
# If the thing we've got is a hashref, walk over its keys, checking
75+
# whether we should ignore, otherwise, do the needful
76+
if (ref $data eq 'HASH') {
77+
for my $key (keys %$data) {
78+
if (!$c->_should_scrub_param($conf, $key)) {
79+
next;
80+
}
81+
82+
# OK, it's fine to fettle with this key - if its value is
83+
# a ref, recurse, otherwise, scrub
84+
if (my $ref = ref $data->{$key}) {
85+
$c->_scrub_recurse($conf, $data->{$key});
86+
} else {
87+
# Alright, non-ref value, so scrub it
88+
# FIXME why did we have to have this ref-ref handling fun?
89+
#$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
90+
$data->{$key} = $c->_scrubber->scrub($data->{$key});
6491
}
65-
for my $ignore_param (@$ignore_params) {
66-
if (ref $ignore_param eq 'Regexp') {
67-
next param if $param =~ $ignore_param;
68-
} else {
69-
next param if $param eq $ignore_param;
70-
}
92+
}
93+
} elsif (ref $data eq 'ARRAY') {
94+
# Simple - scrub all the values
95+
$_ = $c->_scrubber->scrub($_) for @$data;
96+
for (@$data) {
97+
if (ref $_) {
98+
$c->_scrub_recurse($conf, $_);
99+
} else {
100+
$_ = $c->_scrubber->scrub($_);
71101
}
72-
}
102+
}
103+
} elsif (ref $data eq 'CODE') {
104+
$c->log->debug("Can't scrub a coderef!");
105+
} else {
106+
# This shouldn't happen, as we should always start with a ref,
107+
# and non-ref hash/array values should have been handled above.
108+
$c->log->debug("Non-ref to scrub - should this happen?");
109+
}
110+
}
73111

74-
# If we're still here, we want to scrub this param's value.
75-
$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
112+
sub _should_scrub_param {
113+
my ($c, $conf, $param) = @_;
114+
# If we only want to operate on certain params, do that checking
115+
# now...
116+
if ($conf && $conf->{ignore_params}) {
117+
my $ignore_params = $c->config->{scrubber}{ignore_params};
118+
if (ref $ignore_params ne 'ARRAY') {
119+
$ignore_params = [ $ignore_params ];
120+
}
121+
for my $ignore_param (@$ignore_params) {
122+
if (ref $ignore_param eq 'Regexp') {
123+
return if $param =~ $ignore_param;
124+
} else {
125+
return if $param eq $ignore_param;
126+
}
127+
}
76128
}
129+
130+
# If we've not bailed above, we didn't match any ignore_params
131+
# entries, or didn't have any, so we do want to scrub
132+
return 1;
133+
}
134+
135+
136+
# Incredibly nasty monkey-patch to rewind filehandle before parsing - see
137+
# https://github.com/perl-catalyst/catalyst-runtime/pull/186
138+
# First, get the default handlers hashref:
139+
my $default_data_handlers = Catalyst->default_data_handlers();
140+
141+
# Wrap the coderef for application/json in one that rewinds the filehandle
142+
# first:
143+
my $orig_json_handler = $default_data_handlers->{'application/json'};
144+
$default_data_handlers->{'application/json'} = sub {
145+
$_[0]->seek(0,0); # rewind $fh arg
146+
$orig_json_handler->(@_);
147+
};
148+
149+
150+
{
151+
# and now replace the original default_data_handlers() with a version that
152+
# returns our modified handlers
153+
no warnings 'redefine';
154+
*Catalyst::default_data_handlers = sub {
155+
return $default_data_handlers;
156+
};
77157
}
78158

79159
__PACKAGE__->meta->make_immutable;
@@ -124,9 +204,11 @@ See SYNOPSIS for how to configure the plugin, both with its own configuration
124204
passing on any options from L<HTML::Scrubber> to control exactly what
125205
scrubbing happens.
126206
127-
=item prepare_parameters
207+
=item dispatch
128208
129-
Sanitize HTML tags in all parameters (unless `ignore_params` exempts them).
209+
Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
210+
this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
211+
accessed via `$c->req->body_data` or `$c->req->data`.
130212
131213
=back
132214

t/00_compile.t

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
use strict;
22
use Test::More tests => 1;
33

4-
BEGIN { use_ok 'Catalyst::Plugin::HTML::Scrubber' }
4+
BEGIN { use Catalyst; use_ok 'Catalyst::Plugin::HTML::Scrubber' }

t/03_params.t

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,93 @@ use Test::More;
1212
{
1313
my $req = GET('/');
1414
my ($res, $c) = ctx_request($req);
15-
ok($res->code == RC_OK, 'response ok');
15+
is($res->code, RC_OK, 'response ok');
1616
is($res->content, 'index', 'content ok');
1717
}
1818
{
1919
my $req = POST('/', [foo => 'bar']);
2020
my ($res, $c) = ctx_request($req);
21-
ok($res->code == RC_OK, 'response ok');
22-
is($c->req->param('foo'), 'bar', 'parameter ok');
21+
is($res->code, RC_OK, 'response ok');
22+
is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
2323
}
2424
{
2525
my $req = POST('/', [foo => 'bar<script>alert("0");</script>']);
2626
my ($res, $c) = ctx_request($req);
27-
ok($res->code == RC_OK, 'response ok');
28-
is($c->req->param('foo'), 'bar');
27+
is($res->code, RC_OK, 'response ok');
28+
is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
2929
}
3030
{
31+
# we allow <b> in the test app config so this should not be stripped
3132
my $req = POST('/', [foo => '<b>bar</b>']);
3233
my ($res, $c) = ctx_request($req);
33-
ok($res->code == RC_OK, 'response ok');
34-
is($c->req->param('foo'), '<b>bar</b>', 'parameter ok');
34+
is($res->code, RC_OK, 'response ok');
35+
is($c->req->param('foo'), '<b>bar</b>', 'Allowed tag not stripped');
36+
}
37+
{
38+
diag "HTML left alone in ignored field - by regex match";
39+
my $value = '<h1>Bar</h1><p>Foo</p>';
40+
my $req = POST('/', [foo_html => $value]);
41+
my ($res, $c) = ctx_request($req);
42+
is($res->code, RC_OK, 'response ok');
43+
is(
44+
$c->req->param('foo_html'),
45+
$value,
46+
'HTML left alone in ignored (by regex) field',
47+
);
48+
}
49+
{
50+
diag "HTML left alone in ignored field - by name";
51+
my $value = '<h1>Bar</h1><p>Foo</p>';
52+
my $req = POST('/', [ignored_param => $value]);
53+
my ($res, $c) = ctx_request($req);
54+
is($res->code, RC_OK, 'response ok');
55+
is(
56+
$c->req->param('ignored_param'),
57+
$value,
58+
'HTML left alone in ignored (by name) field',
59+
);
60+
}
61+
62+
{
63+
# Test that data in a JSON body POSTed gets scrubbed too
64+
my $json_body = <<JSON;
65+
{
66+
"foo": "Top-level <img src=foo.jpg title=fun>",
67+
"baz":{
68+
"one":"Second-level <img src=test.jpg>"
69+
},
70+
"arr": [
71+
"one test <img src=arrtest1.jpg>",
72+
"two <script>window.alert('XSS!');</script>"
73+
],
74+
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
75+
}
76+
JSON
77+
my $req = POST('/',
78+
Content_Type => 'application/json', Content => $json_body
79+
);
80+
my ($res, $c) = ctx_request($req);
81+
is($res->code, RC_OK, 'response ok');
82+
is(
83+
$c->req->body_data->{foo},
84+
'Top-level ', # note trailing space where img was removed
85+
'Top level body param scrubbed',
86+
);
87+
is(
88+
$c->req->body_data->{baz}{one},
89+
'Second-level ',
90+
'Second level body param scrubbed',
91+
);
92+
is(
93+
$c->req->body_data->{arr}[0],
94+
'one test ',
95+
'Second level array contents scrubbbed',
96+
);
97+
is(
98+
$c->req->body_data->{some_html},
99+
'Leave <b>this</b> alone: <img src=allowed.gif>',
100+
'Body data param matching ignore_params left alone',
101+
);
35102
}
36103

37104
done_testing();

t/05_ignore_params.t

Lines changed: 0 additions & 53 deletions
This file was deleted.

t/05_rest.t

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
use strict;
2+
use warnings;
3+
4+
use FindBin qw($Bin);
5+
use lib "$Bin/lib";
6+
7+
use Test::More;
8+
9+
10+
eval 'use Catalyst::Controller::REST';
11+
plan skip_all => 'Catalyst::Controller::REST not available, skip REST tests' if $@;
12+
13+
use Catalyst::Test 'MyApp05';
14+
use HTTP::Request::Common;
15+
use HTTP::Status;
16+
17+
{
18+
# Test that data in a JSON body POSTed gets scrubbed too
19+
my $json_body = <<JSON;
20+
{
21+
"foo": "Top-level <img src=foo.jpg title=fun>",
22+
"baz":{
23+
"one":"Second-level <img src=test.jpg>"
24+
},
25+
"arr": [
26+
"one test <img src=arrtest1.jpg>",
27+
"two <script>window.alert('XSS!');</script>"
28+
],
29+
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
30+
}
31+
JSON
32+
my $req = POST('/foo',
33+
Content_Type => 'application/json', Content => $json_body
34+
);
35+
my ($res, $c) = ctx_request($req);
36+
is($res->code, RC_OK, 'response ok');
37+
is(
38+
$c->req->data->{foo},
39+
'Top-level ', # note trailing space where img was removed
40+
'Top level body param scrubbed',
41+
);
42+
is(
43+
$c->req->data->{baz}{one},
44+
'Second-level ',
45+
'Second level body param scrubbed',
46+
);
47+
is(
48+
$c->req->data->{arr}[0],
49+
'one test ',
50+
'Second level array contents scrubbbed',
51+
);
52+
is(
53+
$c->req->data->{some_html},
54+
'Leave <b>this</b> alone: <img src=allowed.gif>',
55+
'Body data param matching ignore_params left alone',
56+
);
57+
}
58+
59+
done_testing();
60+

t/lib/MyApp03.pm

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,17 @@ extends 'Catalyst';
99

1010
__PACKAGE__->config(
1111
name => 'MyApp03',
12-
scrubber => [allow => [qw/br hr b/],]
12+
scrubber => {
1313

14+
auto => 1,
15+
16+
ignore_params => [ qr/_html$/, 'ignored_param' ],
17+
18+
# params for HTML::Scrubber
19+
params => [
20+
allow => [qw/br hr b/],
21+
],
22+
}
1423
);
1524
__PACKAGE__->setup();
1625

0 commit comments

Comments
 (0)