diff --git a/spec/cadmium/i18n/stop_words_spec.cr b/spec/cadmium/i18n/stop_words_spec.cr index 26d4b8d..6d4fdb6 100644 --- a/spec/cadmium/i18n/stop_words_spec.cr +++ b/spec/cadmium/i18n/stop_words_spec.cr @@ -2,15 +2,11 @@ include Cadmium::I18n::StopWords stop_words en, fr stop_words all_languages describe Cadmium::I18n::StopWords do - subject = stop_words_fr - it "should return a word from the french stop words list" do - subject[2].should eq("absolument") + stop_words_fr[2].should eq("absolument") end - subject = stop_words_all_languages["ru"] - it "should return a word from the russian stop words list" do - subject[45].should eq("взгляд") + stop_words_all_languages["ru"][45].should eq("взгляд") end end diff --git a/src/cadmium/readability.cr b/src/cadmium/readability.cr index 9a96d60..7359089 100644 --- a/src/cadmium/readability.cr +++ b/src/cadmium/readability.cr @@ -74,7 +74,7 @@ module Cadmium # The average number of sentences per 100 words. Useful for the Coleman-Liau # and Linsear Write score calculation def sentences_per_hundred_words - sentences.size.to_f / (words.size / 100).to_f + sentences.size.to_f / (words.size // 100).to_f end # The average number of characters per word. Useful for the Coleman-Liau @@ -133,7 +133,7 @@ module Cadmium # ARI uses a scale based on age in full-time education. def ari - result = 4.71 * (num_chars / num_words) + 0.5 * (num_words / num_sentences) - 21.43 + result = 4.71 * (num_chars // num_words) + 0.5 * (num_words // num_sentences) - 21.43 result.finite? ? result.round(2) : 0.0 end @@ -156,7 +156,7 @@ module Cadmium # good standard for ordinary text. def lix - result = (num_words / num_sentences).to_f + ((@long_words * 100) / num_words).to_f + result = (num_words // num_sentences).to_f + ((@long_words * 100) // num_words).to_f result.finite? ? result.round(2) : 0.0 end diff --git a/src/cadmium/sentiment.cr b/src/cadmium/sentiment.cr index 05894de..874a62c 100644 --- a/src/cadmium/sentiment.cr +++ b/src/cadmium/sentiment.cr @@ -6,20 +6,20 @@ module Cadmium # Negate the next word in the phrase. NEGATORS = { - "cant" => 1, - "can't" => 1, - "dont" => 1, - "don't" => 1, - "doesnt" => 1, - "doesn't" => 1, - "not" => 1, - "non" => 1, - "wont" => 1, - "won't" => 1, - "isnt" => 1, - "isn't" => 1, - "wasnt" => 1, - "wasn't" => 1, + "cant", + "can't", + "dont", + "don't", + "doesnt", + "doesn't", + "not", + "non", + "wont", + "won't", + "isnt", + "isn't", + "wasnt", + "wasn't", } # Manage the `Tokenizer` that the sentiment analyzer uses. diff --git a/src/cadmium/summarizer/luhn_summarizer.cr b/src/cadmium/summarizer/luhn_summarizer.cr index 4ad22cc..0374608 100644 --- a/src/cadmium/summarizer/luhn_summarizer.cr +++ b/src/cadmium/summarizer/luhn_summarizer.cr @@ -33,10 +33,10 @@ module Cadmium window_size = window_size(terms_in_sentence, normalized_terms) return 0 if window_size <= 0 number_of_normalized_terms = terms_in_sentence.count { |term| normalized_terms.includes?(term) } - (number_of_normalized_terms*number_of_normalized_terms) / window_size + (number_of_normalized_terms*number_of_normalized_terms) // window_size end - private def select_sentences(text, max_num_sentences, normalized_terms_ratio) + private def select_sentences(text, max_num_sentences, normalized_terms_ratio) : Array(String) sentences = Cadmium::Util::Sentence.sentences(text) sentences.sort_by! { |sentence| -sentence_rating(sentence, normalized_terms_ratio) } # This could be improved, performance wise. sentences[0..max_num_sentences]