Skip to content

Commit 7e79d57

Browse files
authored
Improve Utils.normalize_uri (huginn#1719)
* Improve Utils.normalize_uri Globally replacing generally unsafe characters in a URL would not fix invalid authorities and paths, so use Addressable::URI to normalize them when necessary. This should fix huginn#1701. * Remove an unused function * Fix the test case to make sure an IPv6 address is supported
1 parent 4150b1e commit 7e79d57

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

lib/utils.rb

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
require 'jsonpath'
22
require 'cgi'
3+
require 'addressable/uri'
34

45
module Utils
56
def self.unindent(s)
@@ -25,11 +26,29 @@ def self.normalize_uri(uri)
2526
begin
2627
URI(uri)
2728
rescue URI::Error
28-
URI(uri.to_s.gsub(/[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]+/) { |unsafe|
29-
unsafe.bytes.each_with_object(String.new) { |uc, s|
30-
s << sprintf('%%%02X', uc)
31-
}
32-
}.force_encoding(Encoding::US_ASCII))
29+
begin
30+
URI(uri.to_s.gsub(/[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]+/) { |unsafe|
31+
unsafe.bytes.each_with_object(String.new) { |uc, s|
32+
s << sprintf('%%%02X', uc)
33+
}
34+
}.force_encoding(Encoding::US_ASCII))
35+
rescue URI::Error => e
36+
begin
37+
auri = Addressable::URI.parse(uri.to_s)
38+
rescue
39+
# Do not leak Addressable::URI::InvalidURIError which
40+
# callers might not expect.
41+
raise e
42+
else
43+
# Addressable::URI#normalize! modifies the query and
44+
# fragment components beyond escaping unsafe characters, so
45+
# avoid using it. Otherwise `?a[]=%2F` would be normalized
46+
# as `?a%5B%5D=/`, for example.
47+
auri.site = auri.normalized_site
48+
auri.path = auri.normalized_path
49+
URI(auri.to_s)
50+
end
51+
end
3352
end
3453
end
3554

spec/data_fixtures/urlTest.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
<li><a href="https://www.google.ca/search?q=위키백과:대문">unicode param</a></li>
1313
<li><a href="http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8">percent encoded url</a></li>
1414
<li><a href="https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8">percent encoded param</a></li>
15+
<li><a href="http://[::1]/path[]?query[]=foo">brackets</a></li>
1516
</ul>
1617
</body>
17-
</html>
18+
</html>

spec/models/agents/website_agent_spec.rb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,8 +1105,8 @@
11051105

11061106
describe "#check" do
11071107
before do
1108-
expect { @checker.check }.to change { Event.count }.by(7)
1109-
@events = Event.last(7)
1108+
expect { @checker.check }.to change { Event.count }.by(8)
1109+
@events = Event.last(8)
11101110
end
11111111

11121112
it "should check hostname" do
@@ -1143,6 +1143,11 @@
11431143
event = @events[6]
11441144
expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
11451145
end
1146+
1147+
it "should check url with unescaped brackets in the path component" do
1148+
event = @events[7]
1149+
expect(event.payload['url']).to eq("http://[::1]/path%5B%5D?query[]=foo")
1150+
end
11461151
end
11471152
end
11481153
end

0 commit comments

Comments
 (0)