4444
4545#include <stdio.h>
4646
47+ #ifdef HAVE_WCHAR_H
48+ #include <wchar.h>
49+ #include <wctype.h>
50+ #endif
51+
4752#include "netdissect-ctype.h"
4853
4954#include "netdissect.h"
5661#define HEXDUMP_HEXSTUFF_PER_LINE \
5762 (HEXDUMP_HEXSTUFF_PER_SHORT * HEXDUMP_SHORTS_PER_LINE)
5863
64+ #ifdef HAVE_WCHAR_H
65+
66+ /*
67+ * The blow is_utf8_printable is taken from ngrep
68+ *
69+ * Check if a UTF-8 character sequence is printable using standard library functions.
70+ * Returns the number of bytes in the UTF-8 character if printable, 0 otherwise.
71+ * Also returns the display width (1 or 2 columns) via the width_out parameter.
72+ *
73+ * This uses mbrtowc() to convert multi-byte UTF-8 to wide char, then iswprint()
74+ * to check if it's printable, and wcwidth() to get the display width.
75+ */
76+ static int is_utf8_printable (const unsigned char * s , size_t max_len , int * width_out ) {
77+ if (!s || max_len == 0 ) return 0 ;
78+
79+ mbstate_t state = {0 };
80+ wchar_t wc ;
81+
82+ size_t len = mbrtowc (& wc , (const char * )s , max_len , & state );
83+
84+ /* Check for errors and incomplete sequences */
85+ if (len == (size_t )-1 ) {
86+ /* Encoding error */
87+ return 0 ;
88+ }
89+
90+ if (len == (size_t )-2 ) {
91+ /* Incomplete multi-byte sequence (need more bytes) */
92+ return 0 ;
93+ }
94+
95+ if (len == 0 ) {
96+ /* Null character */
97+ return 0 ;
98+ }
99+
100+ /* Check if the wide character is printable */
101+ #if defined(_WIN32 ) || defined(_WIN64 )
102+ /* Windows iswprint() is too conservative - be more permissive for UTF-8 */
103+ /* Accept any valid UTF-8 character that's not a control character */
104+ int is_printable = iswprint (wc ) ||
105+ (wc >= 0x80 && wc < 0xD800 ) || /* Most of BMP except surrogates */
106+ (wc >= 0xE000 && wc < 0x110000 ); /* Private use + supplementary planes */
107+
108+ /* But exclude actual control characters */
109+ if (wc < 0x20 || (wc >= 0x7F && wc < 0xA0 )) {
110+ is_printable = 0 ;
111+ }
112+ #else
113+ int is_printable = iswprint (wc );
114+ #endif
115+
116+ if (is_printable ) {
117+ /* Get display width (1 for normal chars, 2 for wide chars like CJK, 0 for combining) */
118+ int w = wcwidth (wc );
119+ if (w < 0 ) w = 1 ; /* Treat non-printable/control as width 1 */
120+ /* Note: wcwidth returns 0 for combining characters, which is correct */
121+ if (width_out ) * width_out = w ;
122+ return (int )len ;
123+ }
124+
125+ return 0 ;
126+ }
127+ #endif
128+
59129void
60130ascii_print (netdissect_options * ndo ,
61131 const u_char * cp , u_int length )
@@ -71,28 +141,49 @@ ascii_print(netdissect_options *ndo,
71141 truncated = TRUE;
72142 }
73143 ND_PRINT ("\n" );
74- while (length != 0 ) {
75- s = GET_U_1 (cp );
76- cp ++ ;
77- length -- ;
78- if (s == '\r' ) {
79- /*
80- * Don't print CRs at the end of the line; they
81- * don't belong at the ends of lines on UN*X,
82- * and the standard I/O library will give us one
83- * on Windows so we don't need to print one
84- * ourselves.
85- *
86- * In the middle of a line, just print a '.'.
87- */
88- if (length > 1 && GET_U_1 (cp ) != '\n' )
89- ND_PRINT ("." );
144+
145+ while (length > 0 ) {
146+ int utf8_len ;
147+ int j ;
148+
149+ utf8_len = 0 ;
150+
151+ #ifdef HAVE_WCHAR_H
152+ if (ndo -> ndo_utf8 ) {
153+ utf8_len = is_utf8_printable (cp , length , NULL );
154+ }
155+ #endif
156+
157+ if (utf8_len > 0 ) {
158+ /* Valid printable UTF-8 character */
159+ for (j = 0 ; j < utf8_len ; j ++ )
160+ ND_PRINT ("%c" , cp [j ]);
161+ cp += utf8_len ;
162+ length -= utf8_len ;
163+
90164 } else {
91- if (!ND_ASCII_ISGRAPH (s ) &&
92- (s != '\t' && s != ' ' && s != '\n' ))
93- ND_PRINT ("." );
94- else
95- ND_PRINT ("%c" , s );
165+ s = GET_U_1 (cp );
166+ cp ++ ;
167+ length -- ;
168+ if (s == '\r' ) {
169+ /*
170+ * Don't print CRs at the end of the line; they
171+ * don't belong at the ends of lines on UN*X,
172+ * and the standard I/O library will give us one
173+ * on Windows so we don't need to print one
174+ * ourselves.
175+ *
176+ * In the middle of a line, just print a '.'.
177+ */
178+ if (length > 1 && GET_U_1 (cp ) != '\n' )
179+ ND_PRINT ("." );
180+ } else {
181+ if (!ND_ASCII_ISGRAPH (s ) &&
182+ (s != '\t' && s != ' ' && s != '\n' ))
183+ ND_PRINT ("." );
184+ else
185+ ND_PRINT ("%c" , s );
186+ }
96187 }
97188 }
98189 if (truncated )
@@ -104,52 +195,69 @@ hex_and_ascii_print_with_offset(netdissect_options *ndo, const char *indent,
104195 const u_char * cp , u_int length , u_int offset )
105196{
106197 u_int caplength ;
107- u_int i ;
108- u_int s1 , s2 ;
109- u_int nshorts ;
198+ u_int nbytes_unprinted ;
199+ u_int s1 ;
110200 int truncated = FALSE;
111201 char hexstuff [HEXDUMP_SHORTS_PER_LINE * HEXDUMP_HEXSTUFF_PER_SHORT + 1 ], * hsp ;
112- char asciistuff [ASCII_LINELENGTH + 1 ], * asp ;
202+ char asciistuff [ASCII_LINELENGTH + 1 + 4 ], * asp ;
203+ u_int utf8_bytes_to_skip = 0 ;
113204
114205 caplength = ND_BYTES_AVAILABLE_AFTER (cp );
115206 if (length > caplength ) {
116207 length = caplength ;
117208 truncated = TRUE;
118209 }
119- nshorts = length / sizeof (u_short );
120- i = 0 ;
210+ nbytes_unprinted = 0 ;
121211 hsp = hexstuff ; asp = asciistuff ;
122- while (nshorts != 0 ) {
212+ while (length != 0 ) {
123213 s1 = GET_U_1 (cp );
214+
215+ // insert the leading space of short
216+ if ((hsp - hexstuff ) % HEXDUMP_HEXSTUFF_PER_SHORT == 0 ) {
217+ (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ), " " );
218+ hsp ++ ;
219+ }
220+
221+ // add the byte
222+ (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ), "%02x" , s1 );
223+ hsp += 2 ;
224+
225+ if (utf8_bytes_to_skip > 0 ) {
226+ // only pad the new line
227+ if (nbytes_unprinted == (asp - asciistuff )) {
228+ * (asp ++ ) = ' ' ;
229+ }
230+ utf8_bytes_to_skip -- ;
231+ } else {
232+ // try to add the display (utf8) chars
233+ #ifdef HAVE_WCHAR_H
234+ utf8_bytes_to_skip = ndo -> ndo_utf8 ? is_utf8_printable (cp , length , NULL ) : 0 ;
235+ #endif
236+ if (utf8_bytes_to_skip > 0 ) {
237+ u_int j ;
238+ for (j = 0 ; j < utf8_bytes_to_skip ; j ++ ) {
239+ * (asp ++ ) = (char )GET_U_1 (cp + j );
240+ }
241+ utf8_bytes_to_skip -- ;
242+ } else {
243+ * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s1 ) ? s1 : '.' );
244+ }
245+ }
246+
124247 cp ++ ;
125- s2 = GET_U_1 (cp );
126- cp ++ ;
127- (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ),
128- " %02x%02x" , s1 , s2 );
129- hsp += HEXDUMP_HEXSTUFF_PER_SHORT ;
130- * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s1 ) ? s1 : '.' );
131- * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s2 ) ? s2 : '.' );
132- i ++ ;
133- if (i >= HEXDUMP_SHORTS_PER_LINE ) {
248+ nbytes_unprinted ++ ;
249+ if (nbytes_unprinted >= (HEXDUMP_SHORTS_PER_LINE * sizeof (u_short ))) {
134250 * hsp = * asp = '\0' ;
135251 ND_PRINT ("%s0x%04x: %-*s %s" ,
136252 indent , offset , HEXDUMP_HEXSTUFF_PER_LINE ,
137253 hexstuff , asciistuff );
138- i = 0 ; hsp = hexstuff ; asp = asciistuff ;
254+ nbytes_unprinted = 0 ; hsp = hexstuff ; asp = asciistuff ;
139255 offset += HEXDUMP_BYTES_PER_LINE ;
140256 }
141- nshorts -- ;
142- }
143- if (length & 1 ) {
144- s1 = GET_U_1 (cp );
145- cp ++ ;
146- (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ),
147- " %02x" , s1 );
148- hsp += 3 ;
149- * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s1 ) ? s1 : '.' );
150- ++ i ;
257+ length -- ;
151258 }
152- if (i > 0 ) {
259+
260+ if (nbytes_unprinted > 0 ) {
153261 * hsp = * asp = '\0' ;
154262 ND_PRINT ("%s0x%04x: %-*s %s" ,
155263 indent , offset , HEXDUMP_HEXSTUFF_PER_LINE ,
@@ -159,6 +267,7 @@ hex_and_ascii_print_with_offset(netdissect_options *ndo, const char *indent,
159267 nd_trunc_longjmp (ndo );
160268}
161269
270+
162271void
163272hex_and_ascii_print (netdissect_options * ndo , const char * indent ,
164273 const u_char * cp , u_int length )
0 commit comments