From ffedeca556ae5f96feda0d3e50b665359303fff2 Mon Sep 17 00:00:00 2001 From: Benno Bielmeier Date: Thu, 21 Nov 2024 02:48:20 +0100 Subject: [PATCH] feat: Languages.search now only works with Regexp See also #123 --- CHANGELOG.md | 3 +++ README.adoc | 11 +++++------ lib/languages.rb | 5 +++-- sig/languages.rbs | 2 +- test/test_languages.rb | 44 +++++++++++++++--------------------------- 5 files changed, 28 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4663f90..0b5bd71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Update ISO 639 data incorporating approved changes of [2023 series](https://iso639-3.sil.org/sites/iso639-3/files/reports/2023%20Summary%20of%20Outcomes.pdf), [2024 Quarter 2](https://iso639-3.sil.org/sites/iso639-3/files/reports/2024%20Quarter%202%20639%20MA%20newsletter.pdf), and [2024 Quater 3](https://iso639-3.sil.org/sites/iso639-3/files/reports/2024%20Quarter%203%20639%20MA%20newsletter.pdf) +- Interface of `Languages.search` changed. See for more details. + - Argument `case_sensitive` has been removed. + - Argument `pattern` can no longer be String. Its type has to be `Regexp` ### Deprecated diff --git a/README.adoc b/README.adoc index 2dd481e..781c1e3 100644 --- a/README.adoc +++ b/README.adoc @@ -58,19 +58,18 @@ invalid = Languages[:invalid] # invalid or unknown names or ISO codes returns n [source] Languages.all -.Get languages by name +.Get languages by name (regexp search) [source] ---- -Languages.search "^Germ" +Languages.search /Germ/i Languages.search /\AJapan/ ---- [CAUTION] -- -Passing a string to `Languages.search` results in case-sensitive search. -If case-insensitive search is intended, use ignorecase regexp like `/search_pattern/i` or pass optional `case_sensitive` parameter. -[source] -Languages.search('search_pattern', case_sensitive: false) +Searching languages by name is only allowed via `Regexp` that has been prepared and validated (if it comes from a untrusted user) in terms of case sensitivity and security / timeout. +The support of passing search pattern of type String has been removed in v0.9.0. +See https://github.com/bbenno/languages/pull/123[#123] for more details. -- .Since ISO 639-3 categorizes the languages by scope and type, one can filter by them diff --git a/lib/languages.rb b/lib/languages.rb index 4e76b09..85a00d2 100644 --- a/lib/languages.rb +++ b/lib/languages.rb @@ -36,8 +36,9 @@ def [](key) end end - def search(pattern, case_sensitive: true) - pattern = Regexp.new(pattern, Regexp::IGNORECASE).freeze unless case_sensitive + def search(pattern) + raise(ArgumentError, 'Pattern must be a Regexp') unless pattern.is_a?(Regexp) + all.select { |l| l.name.match? pattern } end diff --git a/sig/languages.rbs b/sig/languages.rbs index 3bbc931..51a49a0 100644 --- a/sig/languages.rbs +++ b/sig/languages.rbs @@ -8,7 +8,7 @@ module Languages @@data: Array[Language] def self?.[]: (String | Symbol) -> Language? - def self?.search: (String|Regexp pattern, ?bool case_sensitive) -> Array[Language] + def self?.search: (Regexp pattern) -> Array[Language] def self?.all: () -> Array[Language] def self?.names: () -> Array[String] diff --git a/test/test_languages.rb b/test/test_languages.rb index 75b0821..81e28c2 100644 --- a/test/test_languages.rb +++ b/test/test_languages.rb @@ -3,6 +3,10 @@ require 'test_helper' class TestLanguages < Minitest::Test + def setup + @search_pattern = /Germ/ + end + def test_that_it_has_a_version_number refute_nil ::Languages::VERSION end @@ -86,15 +90,13 @@ def test_single_language_lookup_key_is_case_insensitive end def test_search_provides_enumerable - assert_kind_of Enumerable, ::Languages.search('Japanese') + assert_kind_of Enumerable, ::Languages.search(@search_pattern) end - def test_search_with_string_pattern - pattern = 'Japanese' - search_result = ::Languages.search(pattern) + def test_search_with_string_pattern_fails + pattern = @search_pattern.source - assert(search_result.map(&:name).all? { |n| n.match?(pattern) }) - refute((Languages.all - search_result).map(&:name).any? { |n| n.match?(pattern) }) + assert_raises(ArgumentError) { ::Languages.search(pattern) } end def test_search_with_regex_pattern @@ -105,30 +107,16 @@ def test_search_with_regex_pattern refute((Languages.all - search_result).map(&:name).any? { |n| n.match?(pattern) }) end - def test_search_is_case_sensitive - pattern1 = 'Germ' - pattern2 = pattern1.downcase - search_result1 = ::Languages.search(pattern1) - search_result2 = ::Languages.search(pattern2) - - refute_equal(search_result1.count, search_result2.count) - end - - def test_search_can_be_case_sensitive_if_specified - pattern1 = 'Germ' - pattern2 = /germ/i - search_result1 = ::Languages.search(pattern1) - search_result2 = ::Languages.search(pattern2) - - assert_equal(search_result1.count, search_result2.count) - end + def test_search_can_be_case_insensitive + case_sensitive_pattern = /tib/ + case_insensitive_pattern = Regexp.new(case_sensitive_pattern.source, Regexp::IGNORECASE) - def test_search_is_case_insensitive_if_specified - pattern = 'Germ' - search_result1 = ::Languages.search(pattern) - search_result2 = ::Languages.search(pattern, case_sensitive: false) + case_sensitive_result = ::Languages.search(case_sensitive_pattern) + case_insensitive_search_result = ::Languages.search(case_insensitive_pattern) - assert_equal(search_result1.count, search_result2.count) + # case sensitive result only contains "Celtiberian" + # case insensitive result additionaly contains "Tibet" + refute_equal(case_insensitive_search_result, case_sensitive_result) end def test_reference_to_macrolanguage