|
| 1 | +From 6df43f8b17ff16e8b6124dee2bd47cbd05a89e30 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Anne-Edgar WILKE < [email protected]> |
| 3 | +Date: Fri, 28 Aug 2015 00:38:22 +0200 |
| 4 | +Subject: [PATCH] Fix COMPOUNDHYPHENMIN=1 compound hyphenation |
| 5 | + |
| 6 | + FIRST BUG |
| 7 | + --------- |
| 8 | + |
| 9 | + Problem |
| 10 | + |
| 11 | +In a compound word, the word parts of two characters are never |
| 12 | +hyphenated. |
| 13 | + |
| 14 | + Example |
| 15 | + |
| 16 | +To reproduce the bug, just go to the directory hyphen-2.8.8 and do the |
| 17 | +following : |
| 18 | + |
| 19 | +echo "\ |
| 20 | +UTF-8 |
| 21 | +LEFTHYPHENMIN 1 |
| 22 | +RIGHTHYPHENMIN 1 |
| 23 | +COMPOUNDLEFTHYPHENMIN 1 |
| 24 | +COMPOUNDRIGHTHYPHENMIN 1 |
| 25 | +.post1 |
| 26 | +NEXTLEVEL |
| 27 | +e1 |
| 28 | +a1 |
| 29 | +" > hyphen.pat |
| 30 | + |
| 31 | +./example hyphen.pat <(echo postea) |
| 32 | + |
| 33 | +The output is post=ea ; but it should be post=e=a. |
| 34 | + |
| 35 | +If you replace postea with posteaque in the command above, you get |
| 36 | +post=e=a=que, which is correct. Indeed, the component "eaque" is now |
| 37 | +five characters long, so it is hyphenated. |
| 38 | + |
| 39 | +If you replace postea with ea, you get e=a, which is also correct ; |
| 40 | +this is because ea is not a compound word. |
| 41 | + |
| 42 | + Solution |
| 43 | + |
| 44 | +In the file hyphen.c, line 966, "if (i - begin > 1)" must be replaced |
| 45 | +with "if (i - begin > 0)". |
| 46 | +Indeed, the word part is comprised between begin and i inclusively ; |
| 47 | +its length is i - begin + 1. So, if you want to hyphenate the words |
| 48 | +parts of length 2 and above, you have to check that i - begin + 1 >= 2, |
| 49 | +ie i - begin > 0. |
| 50 | + |
| 51 | + SECOND BUG |
| 52 | + ---------- |
| 53 | + |
| 54 | + Problem |
| 55 | + |
| 56 | +In a compound word, the word parts are never hyphenated between their |
| 57 | +second to last and their last character. |
| 58 | + |
| 59 | + Example |
| 60 | + |
| 61 | +To reproduce the bug, do the following : |
| 62 | + |
| 63 | +echo "\ |
| 64 | +UTF-8 |
| 65 | +LEFTHYPHENMIN 1 |
| 66 | +RIGHTHYPHENMIN 1 |
| 67 | +COMPOUNDLEFTHYPHENMIN 1 |
| 68 | +COMPOUNDRIGHTHYPHENMIN 1 |
| 69 | +1que. |
| 70 | +NEXTLEVEL |
| 71 | +e1 |
| 72 | +" > hyphen.pat |
| 73 | + |
| 74 | +./example hyphen.pat <(echo meaque) |
| 75 | + |
| 76 | +The output is mea=que ; but it should be me=a=que. |
| 77 | + |
| 78 | +Again, if you replace meaque with mea, you get me=a, which is correct, |
| 79 | +because mea is not a compound word. |
| 80 | + |
| 81 | +If you replace meaque with eamque, you get e=am=que, as expected ; it |
| 82 | +shows that there is no similar bug with the first and the second |
| 83 | +character of word parts. |
| 84 | + |
| 85 | + Solution |
| 86 | + |
| 87 | +In the file hyphen.c, line 983, "for (j = 0; j < i - begin - 1; j++)" |
| 88 | +must be replaced with "for (j = 0; j < i - begin; j++)". |
| 89 | +Indeed, the word part has length i - begin + 1. So there are i - begin |
| 90 | +possible places for a hyphen. Thus j must take i - begin different |
| 91 | +values, ie go from 0 to i - begin - 1. |
| 92 | +--- |
| 93 | + hyphen.c | 4 ++-- |
| 94 | + 1 file changed, 2 insertions(+), 2 deletions(-) |
| 95 | + |
| 96 | +diff --git a/hyphen.c b/hyphen.c |
| 97 | +index 79dc072..4954dbd 100644 |
| 98 | +--- a/hyphen.c |
| 99 | ++++ b/hyphen.c |
| 100 | +@@ -963,7 +963,7 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, |
| 101 | + for (i = 0; i < word_size; i++) rep2[i] = NULL; |
| 102 | + for (i = 0; i < word_size; i++) if |
| 103 | + (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { |
| 104 | +- if (i - begin > 1) { |
| 105 | ++ if (i - begin > 0) { |
| 106 | + int hyph = 0; |
| 107 | + prep_word[i + 2] = '\0'; |
| 108 | + /* non-standard hyphenation at compound boundary (Schiffahrt) */ |
| 109 | +@@ -980,7 +980,7 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, |
| 110 | + hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph, |
| 111 | + hyphens2, &rep2, &pos2, &cut2, clhmin, |
| 112 | + crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend)); |
| 113 | +- for (j = 0; j < i - begin - 1; j++) { |
| 114 | ++ for (j = 0; j < i - begin; j++) { |
| 115 | + hyphens[begin + j] = hyphens2[j]; |
| 116 | + if (rep2[j] && rep && pos && cut) { |
| 117 | + if (!*rep && !*pos && !*cut) { |
0 commit comments