Correct an error in the previous commit that caused failure to wrap around when nothing is selected.

Coises · Coises · commit 01afcad063e6 · 2025-02-26T14:42:17.000-07:00
Remove \m escape; add [[.x--.]] character names for invalid UTF-8 bytes.

Update help.
diff --git a/help.htm b/help.htm
@@ -308,7 +308,7 @@ <h3>Unicode</h3>
 
 <p>There are no surrogate pairs in <strong>Columns++</strong> regular expressions; each code point matches as a single character. To enter any Unicode character in hexadecimal notation, use the full code point; for example, enter &#x1f642; as <code>\x{1f642}</code>. (The surrogate pair, <code>\x{d83d}\x{de42}</code>, which must be used in <strong>Notepad++</strong> search, <em>will not match</em> in <strong>Columns++</strong>.)</p>
 
-<p><strong>Scintilla</strong>, the display control used in <strong>Notepad++</strong>, represents Unicode internally as UTF-8. (This is true whether the file containing the document is UTF-8, UTF-16 or anything else other than “ANSI.”) When displaying Unicode documents that contain invalid UTF-8, Scintilla shows each byte that cannot be decoded as a hexadecimal code in reversed colors. When matching a regular expression, <strong>Columns++</strong> treats each of these error bytes as if it were the Unicode code point formed by adding <code>0xdc00</code> to the invalid byte. These code points are in the surrogate range and are invalid as Unicode characters. (It is possible to match one of these error bytes by prefixing <code>dc</code> to the hexadecimal value; e.g., <code>0xf7</code> is never a valid <em>byte</em> in UTF-8, but it can be found as <code>\x{dcf7}</code>.)</p>
+<p><strong>Scintilla</strong>, the display control used in <strong>Notepad++</strong>, represents Unicode internally as UTF-8. (This is true whether the file containing the document is UTF-8, UTF-16 or anything else other than “ANSI.”) When displaying Unicode documents that contain invalid UTF-8, Scintilla shows each byte that cannot be decoded as a hexadecimal code in reversed colors. You can match any of these bytes with <code>\i</code>; to match a specific byte, use the hexadecimal code Scintilla displays as a symbolic character name, e.g., <code>[[.xF7.]]</code>. (When matching a regular expression, <strong>Columns++</strong> treats each of these error bytes as if it were the Unicode code point formed by adding <code>0xdc00</code> to the invalid byte. These code points are in the surrogate range and are invalid as UTF-32 code units.)</p>
 
 <p>The period (<code>.</code>) matches any one code point except the characters which end lines in Scintilla: carriage return (<code>\x0d</code> or <code>\r</code>) and newline (also called line feed, <code>\x0a</code> or <code>\n</code>). This corresponds to the <a href="https://npp-user-manual.org/docs/searching/#single-character-matches">documented</a> behavior of the period, but not the actual behavior in Notepad++ (where there are several other control characters it does not match). Use <code>\X</code> to match a character including any combining code points (marks) which follow it. (In Notepad++ search, <code>.</code> and <code>\X</code> do not work as expected when the code points involved are outside the basic multilingual plane, that is, 0x10000 or greater.)</p>
 
@@ -317,7 +317,6 @@ <h3>Unicode</h3>
 <table class=optionsTable>
 <tr><th>escape</th><th>negation</th><th>character class</th><th>meaning</th></tr>
 <tr><td><code>\i</code></td><td><code>\I</code></td><td><code>[[:invalid:]]</code></td><td>a byte in an invalid UTF-8 sequence</td></tr>
-<tr><td><code>\m</code></td><td><code>\M</code></td><td><code>[[:mark:]]</code></td><td>a combining mark, which displays as part of the previous character</td></tr>
 <tr><td><code>\o</code></td><td><code>\O</code></td><td><code>[[:ascii:]]</code></td><td>an ASCII character, code points 0 through 127</td></tr>
 <tr><td><code>\y</code></td><td><code>\Y</code></td><td><code>[[:defined:]]</code></td><td>any Unicode code point that is assigned and is not a surrogate or a private use character</td></tr>
 </table>
@@ -488,7 +487,9 @@ <h3>Unicode</h3>
 <tr><td><code>[[.sflo.]]</code></td>                                     <td>1bca0</td><td>shorthand format letter overlap</td></tr>
 <tr><td><code>[[.sfco.]]</code></td>                                     <td>1bca1</td><td>shorthand format continuing overlap</td></tr>
 <tr><td><code>[[.sfds.]]</code></td>                                     <td>1bca2</td><td>shorthand format down step</td></tr>
-<tr><td><code>[[.sfus.]]</code></td>                                     <td>1bca3</td><td>shorthand format up step</td></tr></table>
+<tr><td><code>[[.sfus.]]</code></td>                                     <td>1bca3</td><td>shorthand format up step</td></tr>
+<tr><td><code>[[.x80.]]–[[.xff.]]</code></td>                            <td></td>     <td>invalid UTF-8 bytes</td></tr>
+</table>
 
 </section>
 
diff --git a/src/ColumnsPlusPlus.h b/src/ColumnsPlusPlus.h
@@ -415,11 +415,11 @@ class ColumnsPlusPlusData {
         if (searchData.autoSetSelection) {
             if (sci.SelectionMode() != Scintilla::SelectionMode::Stream) return SearchRegionNotReady;
             if (sci.Selections() > 1) return SearchRegionNotReady;
-            if (sci.SelectionEmpty()) return SearchRegionImpliedAll;
         }
         if (sci.IndicatorValueAt(searchData.indicator, 0)) return SearchRegionReady;
         Scintilla::Position ie = sci.IndicatorEnd(searchData.indicator, 0);
-        return ie != 0 && ie != sci.Length() ? SearchRegionReady : SearchRegionNotReady;
+        if (ie != 0 && ie != sci.Length()) return SearchRegionReady;
+        return searchData.autoSetSelection && sci.SelectionEmpty() ? SearchRegionImpliedAll : SearchRegionNotReady;
     }
 
     void syncFindButton() {
diff --git a/src/Unicode/UnicodeRegexTraits.cpp b/src/Unicode/UnicodeRegexTraits.cpp
@@ -184,8 +184,12 @@ const utf32_regex_traits::char_class_type utf32_regex_traits::asciiMasks[] = {
     /* 7E ~     */ CatMask_Sm | mask_ascii | mask_graph | mask_punct,
     /* 7F DEL   */ CatMask_Cc | mask_ascii | mask_cntrl,
 };
-        
+
+
 const std::map<std::string, utf32_regex_traits::char_class_type> utf32_regex_traits::classnames = {
+
+    // Unicode general categories - short names:
+
     {"c*", CatMask_Cc | CatMask_Cf | CatMask_Cn | CatMask_Co},
     {"l*", CatMask_Ll | CatMask_Lm | CatMask_Lo | CatMask_Lt | CatMask_Lu},
     {"m*", CatMask_Mc | CatMask_Me | CatMask_Mn},
@@ -222,6 +226,9 @@ const std::map<std::string, utf32_regex_traits::char_class_type> utf32_regex_tra
     {"zl", CatMask_Zl},
     {"zp", CatMask_Zp},
     {"zs", CatMask_Zs},
+
+    // Unicode character class names:
+
     {"ascii"                   , mask_ascii},
     {"any"                     , 0x3fffffff00000000U},
     {"assigned"                , 0x3fffffee00000000U},
@@ -236,7 +243,6 @@ const std::map<std::string, utf32_regex_traits::char_class_type> utf32_regex_tra
     {"format"                  , CatMask_Cf},
     {"not assigned"            , CatMask_Cn},
     {"private use"             , CatMask_Co},
-    {"invalid"                 , CatMask_Cs},  // No surrogates in UTF-32, but we use some to hold invalid UTF-8 bytes
     {"lowercase letter"        , CatMask_Ll},
     {"modifier letter"         , CatMask_Lm},
     {"other letter"            , CatMask_Lo},
@@ -262,6 +268,9 @@ const std::map<std::string, utf32_regex_traits::char_class_type> utf32_regex_tra
     {"line separator"          , CatMask_Zl},
     {"paragraph separator"     , CatMask_Zp},
     {"space separator"         , CatMask_Zs},
+
+    // POSIX/Boost class names and escapes:
+
     {"alnum"   , mask_alnum     },
     {"alpha"   , mask_alpha     },
     {"blank"   , mask_blank     },
@@ -270,11 +279,8 @@ const std::map<std::string, utf32_regex_traits::char_class_type> utf32_regex_tra
     {"digit"   , mask_digit     },
     {"graph"   , mask_graph     },
     {"h"       , mask_horizontal},
-    {"i"       , CatMask_Cs     },
     {"l"       , mask_lower     },
     {"lower"   , mask_lower     },
-    {"m"       , CatMask_Mc | CatMask_Me | CatMask_Mn},
-    {"o"       , mask_ascii     },
     {"print"   , mask_print     },
     {"punct"   , mask_punct     },
     {"s"       , mask_space     },
@@ -286,19 +292,30 @@ const std::map<std::string, utf32_regex_traits::char_class_type> utf32_regex_tra
     {"w"       , mask_word      },
     {"word"    , mask_word      },
     {"xdigit"  , mask_xdigit    },
+
+    // additional for Columns++:
+
     {"y"       , 0x3fffffe600000000U},
-    {"defined" , 0x3fffffe600000000U}
+    {"defined" , 0x3fffffe600000000U},
+    {"i"       , CatMask_Cs},           // Surrogates are not valid in UTF-8, but we use xDC80-xDCFF
+    {"invalid" , CatMask_Cs},           // to represent invalid UTF-8 bytes
+    {"o"       , mask_ascii}
+
 };
 
+
 const std::map<std::string, utf32_regex_traits::char_type> utf32_regex_traits::character_names = {
+
     {"ht"    , 0x0009},  // Horizontal Tab
     {"lf"    , 0x000a},  // Line Feed
     {"cr"    , 0x000d},  // Carriage Return
     {"sflo"  , 0x1bca0}, // Shorthand Format Letter Overlap
     {"sfco"  , 0x1bca1}, // Shorthand Format Continuing Overlap
     {"sfds"  , 0x1bca2}, // Shorthand Format Down Step
     {"sfus"  , 0x1bca3}, // Shorthand Format Up Step
-                         // from Notepad++ (ScintillaEditView.h):
+
+    // from Notepad++ (ScintillaEditView.h):
+
     {"nul"   , 0x0000},  // Null
     {"soh"   , 0x0001},  // Start of Heading
     {"stx"   , 0x0002},  // Start of Text
@@ -412,7 +429,9 @@ const std::map<std::string, utf32_regex_traits::char_type> utf32_regex_traits::c
     {"iaa"   , 0xfff9},  // interlinear annotation anchor
     {"ias"   , 0xfffa},  // interlinear annotation separator
     {"iat"   , 0xfffb},  // interlinear annotation terminator
-                         // other POSIX names, from Boost (regex_traits_default.hpp):
+
+    // other POSIX names, from Boost (regex_traits_default.hpp):
+
     {"alert"               , 0x07},
     {"backspace"           , 0x08},
     {"tab"                 , 0x09},
@@ -466,9 +485,142 @@ const std::map<std::string, utf32_regex_traits::char_type> utf32_regex_traits::c
     {"left-curly-bracket"  , 0x7b},
     {"vertical-line"       , 0x7c},
     {"right-curly-bracket" , 0x7d},
-    {"tilde"               , 0x7e}
+    {"tilde"               , 0x7e},
+     
+    // invalid UTF-8 bytes:
+
+    {"x80"                 , 0xdc80},
+    {"x81"                 , 0xdc81},
+    {"x82"                 , 0xdc82},
+    {"x83"                 , 0xdc83},
+    {"x84"                 , 0xdc84},
+    {"x85"                 , 0xdc85},
+    {"x86"                 , 0xdc86},
+    {"x87"                 , 0xdc87},
+    {"x88"                 , 0xdc88},
+    {"x89"                 , 0xdc89},
+    {"x8a"                 , 0xdc8a},
+    {"x8b"                 , 0xdc8b},
+    {"x8c"                 , 0xdc8c},
+    {"x8d"                 , 0xdc8d},
+    {"x8e"                 , 0xdc8e},
+    {"x8f"                 , 0xdc8f},
+    {"x90"                 , 0xdc90},
+    {"x91"                 , 0xdc91},
+    {"x92"                 , 0xdc92},
+    {"x93"                 , 0xdc93},
+    {"x94"                 , 0xdc94},
+    {"x95"                 , 0xdc95},
+    {"x96"                 , 0xdc96},
+    {"x97"                 , 0xdc97},
+    {"x98"                 , 0xdc98},
+    {"x99"                 , 0xdc99},
+    {"x9a"                 , 0xdc9a},
+    {"x9b"                 , 0xdc9b},
+    {"x9c"                 , 0xdc9c},
+    {"x9d"                 , 0xdc9d},
+    {"x9e"                 , 0xdc9e},
+    {"x9f"                 , 0xdc9f},
+    {"xa0"                 , 0xdca0},
+    {"xa1"                 , 0xdca1},
+    {"xa2"                 , 0xdca2},
+    {"xa3"                 , 0xdca3},
+    {"xa4"                 , 0xdca4},
+    {"xa5"                 , 0xdca5},
+    {"xa6"                 , 0xdca6},
+    {"xa7"                 , 0xdca7},
+    {"xa8"                 , 0xdca8},
+    {"xa9"                 , 0xdca9},
+    {"xaa"                 , 0xdcaa},
+    {"xab"                 , 0xdcab},
+    {"xac"                 , 0xdcac},
+    {"xad"                 , 0xdcad},
+    {"xae"                 , 0xdcae},
+    {"xaf"                 , 0xdcaf},
+    {"xb0"                 , 0xdcb0},
+    {"xb1"                 , 0xdcb1},
+    {"xb2"                 , 0xdcb2},
+    {"xb3"                 , 0xdcb3},
+    {"xb4"                 , 0xdcb4},
+    {"xb5"                 , 0xdcb5},
+    {"xb6"                 , 0xdcb6},
+    {"xb7"                 , 0xdcb7},
+    {"xb8"                 , 0xdcb8},
+    {"xb9"                 , 0xdcb9},
+    {"xba"                 , 0xdcba},
+    {"xbb"                 , 0xdcbb},
+    {"xbc"                 , 0xdcbc},
+    {"xbd"                 , 0xdcbd},
+    {"xbe"                 , 0xdcbe},
+    {"xbf"                 , 0xdcbf},
+    {"xc0"                 , 0xdcc0},
+    {"xc1"                 , 0xdcc1},
+    {"xc2"                 , 0xdcc2},
+    {"xc3"                 , 0xdcc3},
+    {"xc4"                 , 0xdcc4},
+    {"xc5"                 , 0xdcc5},
+    {"xc6"                 , 0xdcc6},
+    {"xc7"                 , 0xdcc7},
+    {"xc8"                 , 0xdcc8},
+    {"xc9"                 , 0xdcc9},
+    {"xca"                 , 0xdcca},
+    {"xcb"                 , 0xdccb},
+    {"xcc"                 , 0xdccc},
+    {"xcd"                 , 0xdccd},
+    {"xce"                 , 0xdcce},
+    {"xcf"                 , 0xdccf},
+    {"xd0"                 , 0xdcd0},
+    {"xd1"                 , 0xdcd1},
+    {"xd2"                 , 0xdcd2},
+    {"xd3"                 , 0xdcd3},
+    {"xd4"                 , 0xdcd4},
+    {"xd5"                 , 0xdcd5},
+    {"xd6"                 , 0xdcd6},
+    {"xd7"                 , 0xdcd7},
+    {"xd8"                 , 0xdcd8},
+    {"xd9"                 , 0xdcd9},
+    {"xda"                 , 0xdcda},
+    {"xdb"                 , 0xdcdb},
+    {"xdc"                 , 0xdcdc},
+    {"xdd"                 , 0xdcdd},
+    {"xde"                 , 0xdcde},
+    {"xdf"                 , 0xdcdf},
+    {"xe0"                 , 0xdce0},
+    {"xe1"                 , 0xdce1},
+    {"xe2"                 , 0xdce2},
+    {"xe3"                 , 0xdce3},
+    {"xe4"                 , 0xdce4},
+    {"xe5"                 , 0xdce5},
+    {"xe6"                 , 0xdce6},
+    {"xe7"                 , 0xdce7},
+    {"xe8"                 , 0xdce8},
+    {"xe9"                 , 0xdce9},
+    {"xea"                 , 0xdcea},
+    {"xeb"                 , 0xdceb},
+    {"xec"                 , 0xdcec},
+    {"xed"                 , 0xdced},
+    {"xee"                 , 0xdcee},
+    {"xef"                 , 0xdcef},
+    {"xf0"                 , 0xdcf0},
+    {"xf1"                 , 0xdcf1},
+    {"xf2"                 , 0xdcf2},
+    {"xf3"                 , 0xdcf3},
+    {"xf4"                 , 0xdcf4},
+    {"xf5"                 , 0xdcf5},
+    {"xf6"                 , 0xdcf6},
+    {"xf7"                 , 0xdcf7},
+    {"xf8"                 , 0xdcf8},
+    {"xf9"                 , 0xdcf9},
+    {"xfa"                 , 0xdcfa},
+    {"xfb"                 , 0xdcfb},
+    {"xfc"                 , 0xdcfc},
+    {"xfd"                 , 0xdcfd},
+    {"xfe"                 , 0xdcfe},
+    {"xff"                 , 0xdcff}
+
 };
 
+
 const std::set<utf32_regex_traits::string_type> utf32_regex_traits::digraphs = {  // from Boost
     U"ae", U"ch", U"dz", U"lj", U"ll", U"nj", U"ss",
     U"Ae", U"Ch", U"Dz", U"Lj", U"Ll", U"Nj", U"Ss",

Original file line number	Diff line number	Diff line change
`@@ -415,11 +415,11 @@ class ColumnsPlusPlusData {`
`415`	`415`	`if (searchData.autoSetSelection) {`
`416`	`416`	`if (sci.SelectionMode() != Scintilla::SelectionMode::Stream) return SearchRegionNotReady;`
`417`	`417`	`if (sci.Selections() > 1) return SearchRegionNotReady;`
`418`		`- if (sci.SelectionEmpty()) return SearchRegionImpliedAll;`
`419`	`418`	`}`
`420`	`419`	`if (sci.IndicatorValueAt(searchData.indicator, 0)) return SearchRegionReady;`
`421`	`420`	`Scintilla::Position ie = sci.IndicatorEnd(searchData.indicator, 0);`
`422`		`- return ie != 0 && ie != sci.Length() ? SearchRegionReady : SearchRegionNotReady;`
	`421`	`+ if (ie != 0 && ie != sci.Length()) return SearchRegionReady;`
	`422`	`+ return searchData.autoSetSelection && sci.SelectionEmpty() ? SearchRegionImpliedAll : SearchRegionNotReady;`
`423`	`423`	`}`
`424`	`424`
`425`	`425`	`void syncFindButton() {`