Skip to content

Scrub body_data / data params too (e.g. POSTed JSON) #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 106 additions & 24 deletions lib/Catalyst/Plugin/HTML/Scrubber.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sub setup {
return $c->maybe::next::method(@_);
}

sub prepare_parameters {
sub execute {
my $c = shift;

$c->maybe::next::method(@_);
Expand All @@ -47,33 +47,113 @@ sub prepare_parameters {
sub html_scrub {
my ($c, $conf) = @_;

param:
for my $param (keys %{ $c->request->{parameters} }) {
#while (my ($param, $value) = each %{ $c->request->{parameters} }) {
my $value = \$c->request->{parameters}{$param};
if (ref $$value && ref $$value ne 'ARRAY') {
next param;
# If there's body_data - for e.g. a POSTed JSON body that was decoded -
# then we need to walk through it, scrubbing as appropriate
if (my $body_data = $c->request->body_data) {
$c->_scrub_recurse($conf, $c->request->body_data);
}

# And if Catalyst::Controller::REST is in use so we have $req->data,
# then scrub that too
if ($c->request->can('data')) {
my $data = $c->request->data;
if ($data) {
$c->_scrub_recurse($conf, $c->request->data);
}
}

# Normal query/POST body parameters:
$c->_scrub_recurse($conf, $c->request->parameters);

}

# If we only want to operate on certain params, do that checking
# now...
if ($conf && $conf->{ignore_params}) {
my $ignore_params = $c->config->{scrubber}{ignore_params};
if (ref $ignore_params ne 'ARRAY') {
$ignore_params = [ $ignore_params ];
# Recursively scrub param values...
sub _scrub_recurse {
my ($c, $conf, $data) = @_;

# If the thing we've got is a hashref, walk over its keys, checking
# whether we should ignore, otherwise, do the needful
if (ref $data eq 'HASH') {
for my $key (keys %$data) {
if (!$c->_should_scrub_param($conf, $key)) {
next;
}

# OK, it's fine to fettle with this key - if its value is
# a ref, recurse, otherwise, scrub
if (my $ref = ref $data->{$key}) {
$c->_scrub_recurse($conf, $data->{$key});
} else {
# Alright, non-ref value, so scrub it
# FIXME why did we have to have this ref-ref handling fun?
#$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
$data->{$key} = $c->_scrubber->scrub($data->{$key});
}
for my $ignore_param (@$ignore_params) {
if (ref $ignore_param eq 'Regexp') {
next param if $param =~ $ignore_param;
} else {
next param if $param eq $ignore_param;
}
}
} elsif (ref $data eq 'ARRAY') {
# Simple - scrub all the values
$_ = $c->_scrubber->scrub($_) for @$data;
for (@$data) {
if (ref $_) {
$c->_scrub_recurse($conf, $_);
} else {
$_ = $c->_scrubber->scrub($_);
}
}
}
} elsif (ref $data eq 'CODE') {
$c->log->debug("Can't scrub a coderef!");
} else {
# This shouldn't happen, as we should always start with a ref,
# and non-ref hash/array values should have been handled above.
$c->log->debug("Non-ref to scrub - should this happen?");
}
}

# If we're still here, we want to scrub this param's value.
$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
sub _should_scrub_param {
my ($c, $conf, $param) = @_;
# If we only want to operate on certain params, do that checking
# now...
if ($conf && $conf->{ignore_params}) {
my $ignore_params = $c->config->{scrubber}{ignore_params};
if (ref $ignore_params ne 'ARRAY') {
$ignore_params = [ $ignore_params ];
}
for my $ignore_param (@$ignore_params) {
if (ref $ignore_param eq 'Regexp') {
return if $param =~ $ignore_param;
} else {
return if $param eq $ignore_param;
}
}
}

# If we've not bailed above, we didn't match any ignore_params
# entries, or didn't have any, so we do want to scrub
return 1;
}


# Incredibly nasty monkey-patch to rewind filehandle before parsing - see
# https://github.com/perl-catalyst/catalyst-runtime/pull/186
# First, get the default handlers hashref:
my $default_data_handlers = Catalyst->default_data_handlers();

# Wrap the coderef for application/json in one that rewinds the filehandle
# first:
my $orig_json_handler = $default_data_handlers->{'application/json'};
$default_data_handlers->{'application/json'} = sub {
$_[0]->seek(0,0); # rewind $fh arg
$orig_json_handler->(@_);
};


{
# and now replace the original default_data_handlers() with a version that
# returns our modified handlers
no warnings 'redefine';
*Catalyst::default_data_handlers = sub {
return $default_data_handlers;
};
}

__PACKAGE__->meta->make_immutable;
Expand Down Expand Up @@ -124,9 +204,11 @@ See SYNOPSIS for how to configure the plugin, both with its own configuration
passing on any options from L<HTML::Scrubber> to control exactly what
scrubbing happens.

=item prepare_parameters
=item dispatch

Sanitize HTML tags in all parameters (unless `ignore_params` exempts them).
Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
accessed via `$c->req->body_data` or `$c->req->data`.

=back

Expand Down
2 changes: 1 addition & 1 deletion t/00_compile.t
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use strict;
use Test::More tests => 1;

BEGIN { use_ok 'Catalyst::Plugin::HTML::Scrubber' }
BEGIN { use Catalyst; use_ok 'Catalyst::Plugin::HTML::Scrubber' }
81 changes: 74 additions & 7 deletions t/03_params.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,93 @@ use Test::More;
{
my $req = GET('/');
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($res->code, RC_OK, 'response ok');
is($res->content, 'index', 'content ok');
}
{
my $req = POST('/', [foo => 'bar']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'parameter ok');
is($res->code, RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
}
{
my $req = POST('/', [foo => 'bar<script>alert("0");</script>']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), 'bar');
is($res->code, RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
}
{
# we allow <b> in the test app config so this should not be stripped
my $req = POST('/', [foo => '<b>bar</b>']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), '<b>bar</b>', 'parameter ok');
is($res->code, RC_OK, 'response ok');
is($c->req->param('foo'), '<b>bar</b>', 'Allowed tag not stripped');
}
{
diag "HTML left alone in ignored field - by regex match";
my $value = '<h1>Bar</h1><p>Foo</p>';
my $req = POST('/', [foo_html => $value]);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->param('foo_html'),
$value,
'HTML left alone in ignored (by regex) field',
);
}
{
diag "HTML left alone in ignored field - by name";
my $value = '<h1>Bar</h1><p>Foo</p>';
my $req = POST('/', [ignored_param => $value]);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->param('ignored_param'),
$value,
'HTML left alone in ignored (by name) field',
);
}

{
# Test that data in a JSON body POSTed gets scrubbed too
my $json_body = <<JSON;
{
"foo": "Top-level <img src=foo.jpg title=fun>",
"baz":{
"one":"Second-level <img src=test.jpg>"
},
"arr": [
"one test <img src=arrtest1.jpg>",
"two <script>window.alert('XSS!');</script>"
],
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
}
JSON
my $req = POST('/',
Content_Type => 'application/json', Content => $json_body
);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->body_data->{foo},
'Top-level ', # note trailing space where img was removed
'Top level body param scrubbed',
);
is(
$c->req->body_data->{baz}{one},
'Second-level ',
'Second level body param scrubbed',
);
is(
$c->req->body_data->{arr}[0],
'one test ',
'Second level array contents scrubbbed',
);
is(
$c->req->body_data->{some_html},
'Leave <b>this</b> alone: <img src=allowed.gif>',
'Body data param matching ignore_params left alone',
);
}

done_testing();
Expand Down
53 changes: 0 additions & 53 deletions t/05_ignore_params.t

This file was deleted.

60 changes: 60 additions & 0 deletions t/05_rest.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use strict;
use warnings;

use FindBin qw($Bin);
use lib "$Bin/lib";

use Test::More;


eval 'use Catalyst::Controller::REST';
plan skip_all => 'Catalyst::Controller::REST not available, skip REST tests' if $@;

use Catalyst::Test 'MyApp05';
use HTTP::Request::Common;
use HTTP::Status;

{
# Test that data in a JSON body POSTed gets scrubbed too
my $json_body = <<JSON;
{
"foo": "Top-level <img src=foo.jpg title=fun>",
"baz":{
"one":"Second-level <img src=test.jpg>"
},
"arr": [
"one test <img src=arrtest1.jpg>",
"two <script>window.alert('XSS!');</script>"
],
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
}
JSON
my $req = POST('/foo',
Content_Type => 'application/json', Content => $json_body
);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->data->{foo},
'Top-level ', # note trailing space where img was removed
'Top level body param scrubbed',
);
is(
$c->req->data->{baz}{one},
'Second-level ',
'Second level body param scrubbed',
);
is(
$c->req->data->{arr}[0],
'one test ',
'Second level array contents scrubbbed',
);
is(
$c->req->data->{some_html},
'Leave <b>this</b> alone: <img src=allowed.gif>',
'Body data param matching ignore_params left alone',
);
}

done_testing();

11 changes: 10 additions & 1 deletion t/lib/MyApp03.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@ extends 'Catalyst';

__PACKAGE__->config(
name => 'MyApp03',
scrubber => [allow => [qw/br hr b/],]
scrubber => {

auto => 1,

ignore_params => [ qr/_html$/, 'ignored_param' ],

# params for HTML::Scrubber
params => [
allow => [qw/br hr b/],
],
}
);
__PACKAGE__->setup();

Expand Down
Loading