@@ -29,7 +29,7 @@ namespace stemming
29
29
30
30
Define a valid s-ending as one of:
31
31
- b c d f g h j k l m n o p r t v y
32
-
32
+
33
33
Define a valid öst-ending as one of:
34
34
35
35
- i k l n p r t u v
@@ -58,12 +58,16 @@ namespace stemming
58
58
<b>Step 3:</b>
59
59
60
60
Search for the longest among the following suffixes in R1, and perform the action indicated.
61
- - lig ig els
62
- - Delete
63
- - löst
64
- - Replace with lös
65
- - fullt
66
- - Replace with full
61
+ - lig ig els
62
+ - Delete.
63
+ - öst
64
+ - Replace with ös if preceded by a valid öst-ending.
65
+
66
+ The letter of the valid öst-ending is not necessarily in R1.
67
+ Prior to Snowball 2.3.0, öst-ending was effectively just
68
+ l and was required to be in R1.
69
+ - fullt
70
+ - Replace with full.
67
71
*/
68
72
// ------------------------------------------------------
69
73
template <typename string_typeT = std::wstring>
@@ -378,7 +382,7 @@ namespace stemming
378
382
void step_3 (string_typeT& text)
379
383
{
380
384
if (stem<string_typeT>::is_suffix_in_r1 (text,
381
- /* fullt*/
385
+ /* fullt */
382
386
common_lang_constants::LOWER_F, common_lang_constants::UPPER_F,
383
387
common_lang_constants::LOWER_U, common_lang_constants::UPPER_U,
384
388
common_lang_constants::LOWER_L, common_lang_constants::UPPER_L,
@@ -388,9 +392,18 @@ namespace stemming
388
392
text.erase (text.length ()-1 );
389
393
stem<string_typeT>::update_r_sections (text);
390
394
}
391
- else if (stem<string_typeT>::is_suffix_in_r1 (text,
392
- /* löst*/
393
- common_lang_constants::LOWER_L, common_lang_constants::UPPER_L,
395
+ else if (text.length () >= 4 &&
396
+ (stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_I, common_lang_constants::UPPER_I) ||
397
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_K, common_lang_constants::UPPER_K) ||
398
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_L, common_lang_constants::UPPER_L) ||
399
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_N, common_lang_constants::UPPER_N) ||
400
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_P, common_lang_constants::UPPER_P) ||
401
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_R, common_lang_constants::UPPER_R) ||
402
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_T, common_lang_constants::UPPER_T) ||
403
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_U, common_lang_constants::UPPER_U) ||
404
+ stem<string_typeT>::is_either (text[text.length () - 4 ], common_lang_constants::LOWER_V, common_lang_constants::UPPER_V)) &&
405
+ stem<string_typeT>::is_suffix_in_r1 (text,
406
+ /* öst (with valid character in front of it) */
394
407
common_lang_constants::LOWER_O_UMLAUTS, common_lang_constants::UPPER_O_UMLAUTS,
395
408
common_lang_constants::LOWER_S, common_lang_constants::UPPER_S,
396
409
common_lang_constants::LOWER_T, common_lang_constants::UPPER_T) )
@@ -399,19 +412,19 @@ namespace stemming
399
412
stem<string_typeT>::update_r_sections (text);
400
413
}
401
414
else if (stem<string_typeT>::delete_if_is_in_r1 (text,
402
- /* lig*/
415
+ /* lig */
403
416
common_lang_constants::LOWER_L, common_lang_constants::UPPER_L,
404
417
common_lang_constants::LOWER_I, common_lang_constants::UPPER_I,
405
418
common_lang_constants::LOWER_G, common_lang_constants::UPPER_G, false ) )
406
419
{ return ; }
407
420
else if (stem<string_typeT>::delete_if_is_in_r1 (text,
408
- /* els*/
421
+ /* els */
409
422
common_lang_constants::LOWER_E, common_lang_constants::UPPER_E,
410
423
common_lang_constants::LOWER_L, common_lang_constants::UPPER_L,
411
424
common_lang_constants::LOWER_S, common_lang_constants::UPPER_S, false ) )
412
425
{ return ; }
413
426
else if (stem<string_typeT>::delete_if_is_in_r1 (text,
414
- /* ig */
427
+ /* ig */
415
428
common_lang_constants::LOWER_I, common_lang_constants::UPPER_I,
416
429
common_lang_constants::LOWER_G, common_lang_constants::UPPER_G, false ) )
417
430
{ return ; }
0 commit comments