@@ -10,8 +10,10 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
16+ #include <limits.h> // IWYU pragma: keep
1517#include <math.h>
1618#include <stdarg.h>
1719#include <stdint.h>
@@ -224,6 +226,257 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
224226 return i ;
225227}
226228
229+ #ifdef HAVE_LIBNCURSESW
230+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
231+ assert (!ps -> buf || ps -> pos < ps -> size );
232+
233+ char tempBuf [MB_LEN_MAX ];
234+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
235+
236+ // It is unnecessarily expensive to fix the output string if the caller
237+ // gives an incorrect buffer size. This function would not support any
238+ // truncation of the output string.
239+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
240+ assert (len > 0 );
241+ if (len == (size_t )-1 ) {
242+ assert (len != (size_t )-1 );
243+ fail ();
244+ }
245+ if (ps -> buf && len > ps -> size - ps -> pos ) {
246+ assert (!ps -> buf || len <= ps -> size - ps -> pos );
247+ fail ();
248+ }
249+
250+ ps -> pos += len ;
251+ }
252+ #else
253+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
254+ assert (!ps -> buf || ps -> pos < ps -> size );
255+
256+ char * buf = ps -> buf ;
257+ if (buf ) {
258+ buf [ps -> pos ] = (char )c ;
259+ }
260+
261+ ps -> pos += 1 ;
262+ }
263+ #endif
264+
265+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
266+ assert (src || maxLen == 0 );
267+
268+ size_t pos = 0 ;
269+ bool wasReplaced = false;
270+
271+ #ifdef HAVE_LIBNCURSESW
272+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
273+ wchar_t ch ;
274+
275+ mbstate_t decState ;
276+ memset (& decState , 0 , sizeof (decState ));
277+ #else
278+ const char replacementChar = '?' ;
279+ char ch ;
280+ #endif
281+
282+ do {
283+ size_t len = 0 ;
284+ bool shouldReplace = false;
285+ ch = 0 ;
286+
287+ if (pos < maxLen ) {
288+ // Read the next character from the byte sequence
289+ #ifdef HAVE_LIBNCURSESW
290+ mbstate_t newState ;
291+ memcpy (& newState , & decState , sizeof (newState ));
292+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
293+
294+ assert (len != 0 || ch == 0 );
295+ switch (len ) {
296+ case (size_t )-2 :
297+ errno = EILSEQ ;
298+ shouldReplace = true;
299+ len = maxLen - pos ;
300+ break ;
301+
302+ case (size_t )-1 :
303+ shouldReplace = true;
304+ len = 1 ;
305+ break ;
306+
307+ default :
308+ memcpy (& decState , & newState , sizeof (decState ));
309+ }
310+ #else
311+ len = 1 ;
312+ ch = src [pos ];
313+ #endif
314+ }
315+
316+ pos += len ;
317+
318+ // Filter unprintable characters
319+ if (!shouldReplace && ch != 0 ) {
320+ #ifdef HAVE_LIBNCURSESW
321+ shouldReplace = !iswprint (ch );
322+ #else
323+ shouldReplace = !isprint ((unsigned char )ch );
324+ #endif
325+ }
326+
327+ if (shouldReplace ) {
328+ ch = replacementChar ;
329+ if (wasReplaced ) {
330+ continue ;
331+ }
332+ }
333+ wasReplaced = shouldReplace ;
334+
335+ encodeWChar (ps , ch );
336+ } while (ch != 0 );
337+ }
338+
339+ char * String_makePrintable (const char * str , size_t maxLen ) {
340+ WCharEncoderState encState ;
341+
342+ memset (& encState , 0 , sizeof (encState ));
343+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
344+ size_t size = encState .pos ;
345+ assert (size > 0 );
346+
347+ memset (& encState , 0 , sizeof (encState ));
348+ char * buf = xMalloc (size );
349+ encState .size = size ;
350+ encState .buf = buf ;
351+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
352+ assert (encState .pos == size );
353+
354+ return buf ;
355+ }
356+
357+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
358+ if (!ps -> str || ps -> maxLen == 0 ) {
359+ return false;
360+ }
361+
362+ // If the previous call of this function encounters an invalid sequence,
363+ // do not continue (because the "mbState" object for mbrtowc() is
364+ // undefined). The caller is supposed to reset the state.
365+ #ifdef HAVE_LIBNCURSESW
366+ bool isStateDefined = ps -> ch != WEOF ;
367+ #else
368+ bool isStateDefined = ps -> ch != EOF ;
369+ #endif
370+ if (!isStateDefined ) {
371+ return false;
372+ }
373+
374+ #ifdef HAVE_LIBNCURSESW
375+ wchar_t wc ;
376+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
377+ switch (len ) {
378+ case (size_t )-1 :
379+ // Invalid sequence
380+ ps -> ch = WEOF ;
381+ return false;
382+
383+ case (size_t )-2 :
384+ // Incomplete sequence
385+ ps -> str += ps -> maxLen ;
386+ ps -> maxLen = 0 ;
387+ return false;
388+
389+ case 0 :
390+ assert (wc == 0 );
391+
392+ ps -> str = NULL ;
393+ ps -> maxLen = 0 ;
394+ ps -> ch = wc ;
395+ return true;
396+
397+ default :
398+ ps -> str += len ;
399+ ps -> maxLen -= len ;
400+ ps -> ch = wc ;
401+ }
402+ return true;
403+ #else
404+ ps -> ch = * ps -> str ;
405+ if (ps -> ch == 0 ) {
406+ ps -> str = NULL ;
407+ ps -> maxLen = 0 ;
408+ } else {
409+ ps -> str ++ ;
410+ ps -> maxLen -- ;
411+ }
412+ return true;
413+ #endif
414+ }
415+
416+ #ifndef HAVE_STRNLEN
417+ static size_t strnlen (const char * str , size_t maxLen ) {
418+ for (size_t len = 0 ; len < maxLen ; len ++ ) {
419+ if (!str [len ]) {
420+ return len ;
421+ }
422+ }
423+ return maxLen ;
424+ }
425+ #endif
426+
427+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
428+ assert (* str || maxLen == 0 );
429+
430+ if (maxWidth < 0 )
431+ maxWidth = INT_MAX ;
432+
433+ #ifdef HAVE_LIBNCURSESW
434+ MBStringDecoderState state ;
435+ memset (& state , 0 , sizeof (state ));
436+ state .str = * str ;
437+ state .maxLen = maxLen ;
438+
439+ int totalWidth = 0 ;
440+
441+ while (String_decodeNextWChar (& state )) {
442+ if (state .ch == 0 )
443+ break ;
444+
445+ int w = wcwidth ((wchar_t )state .ch );
446+ if (w < 0 ) {
447+ assert (w >= 0 );
448+ break ;
449+ }
450+
451+ if (w > maxWidth - totalWidth )
452+ break ;
453+
454+ totalWidth += w ;
455+
456+ // If the character takes zero columns, include the character in the
457+ // substring if the working encoding is UTF-8, and ignore it otherwise.
458+ // In Unicode, combining characters are always placed after the base
459+ // character, but some legacy 8-bit encodings instead place combining
460+ // characters before the base character.
461+ if (w <= 0 && !CRT_utf8 ) {
462+ continue ;
463+ }
464+
465+ // (*str - start) will represent the length of the substring bounded
466+ // by the width limit.
467+ * str = state .str ;
468+ }
469+
470+ assert (state .ch != WEOF );
471+ return totalWidth ;
472+ #else
473+ maxLen = MINIMUM ((unsigned int )maxWidth , maxLen );
474+ size_t len = strnlen (* str , maxLen );
475+ * str += len ;
476+ return (int )len ;
477+ #endif
478+ }
479+
227480int xAsprintf (char * * strp , const char * fmt , ...) {
228481 va_list vl ;
229482 va_start (vl , fmt );
0 commit comments