14
14
* See the License for the specific language governing permissions and
15
15
* limitations under the License.
16
16
*/
17
+ #define NOMINMAX /* tell windows not to define min/max macros */
17
18
#include < log4cxx/logstring.h>
18
19
#include < log4cxx/helpers/charsetdecoder.h>
19
20
#include < log4cxx/helpers/bytebuffer.h>
20
21
#include < log4cxx/helpers/exception.h>
21
22
#include < log4cxx/helpers/pool.h>
23
+ #include < log4cxx/helpers/loglog.h>
22
24
#include < apr_xlate.h>
23
25
#if !defined(LOG4CXX)
24
26
#define LOG4CXX 1
@@ -165,21 +167,14 @@ class MbstowcsCharsetDecoder : public CharsetDecoder
165
167
{
166
168
log4cxx_status_t stat = APR_SUCCESS;
167
169
enum { BUFSIZE = 256 };
168
- wchar_t buf[BUFSIZE];
170
+ wchar_t wbuf[BUFSIZE];
171
+ char cbuf[BUFSIZE*4 ];
169
172
170
173
mbstate_t mbstate;
171
174
memset (&mbstate, 0 , sizeof (mbstate));
172
175
173
176
while (in.remaining () > 0 )
174
177
{
175
- size_t requested = in.remaining ();
176
-
177
- if (requested > BUFSIZE - 1 )
178
- {
179
- requested = BUFSIZE - 1 ;
180
- }
181
-
182
- memset (buf, 0 , BUFSIZE * sizeof (wchar_t ));
183
178
const char * src = in.current ();
184
179
185
180
if (*src == 0 )
@@ -189,21 +184,31 @@ class MbstowcsCharsetDecoder : public CharsetDecoder
189
184
}
190
185
else
191
186
{
192
- size_t converted = mbsrtowcs (buf,
187
+ auto available = std::min (sizeof (cbuf) - 1 , in.remaining ());
188
+ strncpy (cbuf, src, available);
189
+ cbuf[available] = 0 ;
190
+ src = cbuf;
191
+ size_t wCharCount = mbsrtowcs (wbuf,
193
192
&src,
194
- requested ,
193
+ BUFSIZE - 1 ,
195
194
&mbstate);
195
+ auto converted = src - cbuf;
196
+ in.position (in.position () + converted);
196
197
197
- if (converted == (size_t ) -1 )
198
+ if (wCharCount == (size_t ) -1 ) // Illegal byte sequence?
198
199
{
199
- stat = APR_BADARG;
200
- in.position (src - in.data ());
200
+ LogString msg (LOG4CXX_STR (" Illegal byte sequence at " ));
201
+ msg.append (std::to_wstring (in.position ()));
202
+ msg.append (LOG4CXX_STR (" of " ));
203
+ msg.append (std::to_wstring (in.limit ()));
204
+ LogLog::warn (msg);
205
+ stat = APR_BADCH;
201
206
break ;
202
207
}
203
208
else
204
209
{
205
- stat = append (out, buf) ;
206
- in. position (in. position () + requested );
210
+ wbuf[wCharCount] = 0 ;
211
+ stat = append (out, wbuf );
207
212
}
208
213
}
209
214
}
@@ -418,73 +423,60 @@ class USASCIICharsetDecoder : public CharsetDecoder
418
423
};
419
424
420
425
/* *
421
- * Charset decoder that uses an embedded CharsetDecoder consistent
422
- * with current locale settings.
426
+ * Charset decoder that uses current locale settings.
423
427
*/
424
428
class LocaleCharsetDecoder : public CharsetDecoder
425
429
{
426
430
public:
427
- LocaleCharsetDecoder () : pool(), decoder(), encoding()
428
- {
429
- }
430
- virtual ~LocaleCharsetDecoder ()
431
+ LocaleCharsetDecoder () : state()
431
432
{
432
433
}
433
- virtual log4cxx_status_t decode (ByteBuffer& in,
434
- LogString& out)
434
+ log4cxx_status_t decode (ByteBuffer& in, LogString& out) override
435
435
{
436
+ log4cxx_status_t result = APR_SUCCESS;
436
437
const char * p = in.current ();
437
438
size_t i = in.position ();
439
+ size_t remain = in.limit () - i;
438
440
#if !LOG4CXX_CHARSET_EBCDIC
439
-
440
- for (; i < in.limit () && ((unsigned int ) *p) < 0x80 ; i++, p++)
441
+ if (std::mbsinit (&this ->state )) // ByteBuffer not partially decoded?
441
442
{
442
- out.append (1 , *p);
443
+ // Copy single byte characters
444
+ for (; 0 < remain && ((unsigned int ) *p) < 0x80 ; --remain, ++i, p++)
445
+ {
446
+ out.append (1 , *p);
447
+ }
443
448
}
444
-
445
- in.position (i);
446
449
#endif
447
-
448
- if (i < in. limit () )
450
+ // Decode characters that may be represented by multiple bytes
451
+ while ( 0 < remain )
449
452
{
450
- Pool subpool;
451
- const char * enc = apr_os_locale_encoding (subpool.getAPRPool ());
453
+ wchar_t ch;
454
+ size_t n = std::mbrtowc (&ch, p, remain, &this ->state );
455
+ if (0 == n) // NULL encountered?
452
456
{
453
- std::unique_lock<std::mutex> lock (mutex);
454
-
455
- if (enc == 0 )
456
- {
457
- if (decoder == 0 )
458
- {
459
- encoding = " C" ;
460
- decoder.reset ( new USASCIICharsetDecoder () );
461
- }
462
- }
463
- else if (encoding != enc)
464
- {
465
- encoding = enc;
466
-
467
- try
468
- {
469
- LOG4CXX_DECODE_CHAR (e, encoding);
470
- decoder = getDecoder (e);
471
- }
472
- catch (IllegalArgumentException&)
473
- {
474
- decoder.reset ( new USASCIICharsetDecoder () );
475
- }
476
- }
457
+ ++i;
458
+ break ;
459
+ }
460
+ if (static_cast <std::size_t >(-1 ) == n) // decoding error?
461
+ {
462
+ result = APR_BADARG;
463
+ break ;
464
+ }
465
+ if (static_cast <std::size_t >(-2 ) == n) // incomplete sequence?
466
+ {
467
+ break ;
477
468
}
478
- return decoder->decode (in, out);
469
+ Transcoder::encode (static_cast <unsigned int >(ch), out);
470
+ remain -= n;
471
+ i += n;
472
+ p += n;
479
473
}
480
-
481
- return APR_SUCCESS ;
474
+ in. position (i);
475
+ return result ;
482
476
}
477
+
483
478
private:
484
- Pool pool;
485
- std::mutex mutex;
486
- CharsetDecoderPtr decoder;
487
- std::string encoding;
479
+ std::mbstate_t state;
488
480
};
489
481
490
482
@@ -561,23 +553,30 @@ CharsetDecoderPtr CharsetDecoder::getISOLatinDecoder()
561
553
CharsetDecoderPtr CharsetDecoder::getDecoder (const LogString& charset)
562
554
{
563
555
if (StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" UTF-8" ), LOG4CXX_STR (" utf-8" )) ||
564
- StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" UTF8" ), LOG4CXX_STR (" utf8" )))
556
+ StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" UTF8" ), LOG4CXX_STR (" utf8" )) ||
557
+ StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" CP65001" ), LOG4CXX_STR (" cp65001" )))
565
558
{
566
559
return std::make_shared<UTF8CharsetDecoder>();
567
560
}
568
561
else if (StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" C" ), LOG4CXX_STR (" c" )) ||
569
562
charset == LOG4CXX_STR (" 646" ) ||
570
563
StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" US-ASCII" ), LOG4CXX_STR (" us-ascii" )) ||
571
564
StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" ISO646-US" ), LOG4CXX_STR (" iso646-US" )) ||
572
- StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" ANSI_X3.4-1968" ), LOG4CXX_STR (" ansi_x3.4-1968" )))
565
+ StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" ANSI_X3.4-1968" ), LOG4CXX_STR (" ansi_x3.4-1968" )) ||
566
+ StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" CP20127" ), LOG4CXX_STR (" cp20127" )))
573
567
{
574
568
return std::make_shared<USASCIICharsetDecoder>();
575
569
}
576
570
else if (StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" ISO-8859-1" ), LOG4CXX_STR (" iso-8859-1" )) ||
577
- StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" ISO-LATIN-1" ), LOG4CXX_STR (" iso-latin-1" )))
571
+ StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" ISO-LATIN-1" ), LOG4CXX_STR (" iso-latin-1" )) ||
572
+ StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" CP1252" ), LOG4CXX_STR (" cp1252" )))
578
573
{
579
574
return std::make_shared<ISOLatinCharsetDecoder>();
580
575
}
576
+ else if (StringHelper::equalsIgnoreCase (charset, LOG4CXX_STR (" LOCALE" ), LOG4CXX_STR (" locale" )))
577
+ {
578
+ return std::make_shared<LocaleCharsetDecoder>();
579
+ }
581
580
582
581
#if APR_HAS_XLATE
583
582
return std::make_shared<APRCharsetDecoder>(charset);
0 commit comments