From 2ee0b7cd12d8a2341df17bc846cae3a7de718a31 Mon Sep 17 00:00:00 2001 From: Mikhail Pilin Date: Fri, 19 Oct 2018 11:41:41 +0300 Subject: [PATCH] Add utf_selector and is_utf_same. --- README.md | 72 +++++++------ include/ww898/utf_converters.hpp | 39 ++++++- test/utf_converters_test.cpp | 171 +++++++++++++++++++++---------- 3 files changed, 190 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 84c997f..4c81e77 100644 --- a/README.md +++ b/README.md @@ -29,16 +29,20 @@ Tested on following compilers: ```cpp // यूनिकोड -static char const u8_orig[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1"; -using namespace ww898; +static char const u8s[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1"; +using namespace ww898::utf; std::u16string u16; -utf::convz(u8_orig, std::back_inserter(u16)); +convz, utf16>(u8s, std::back_inserter(u16)); std::u32string u32; -utf::conv(u16.begin(), u16.end(), std::back_inserter(u32)); +conv>(u16.begin(), u16.end(), std::back_inserter(u32)); std::vector u8; -utf::convz(u32.begin(), std::back_inserter(u8)); -std::wstring wstr; -utf::convz(u8.begin(), std::back_inserter(wstr)); +convz(u32.data(), std::back_inserter(u8)); +std::wstring uw; +conv(u8s, u8s + sizeof(u8s), std::back_inserter(uw)); +static_assert(is_utf_same::value, "Fail"); +static_assert(1 == + (is_utf_same::value ? 1 : 0) + + (is_utf_same::value ? 1 : 0), "Fail"); ``` ## Performance @@ -237,33 +241,33 @@ UTF8 ==> UTFW : 0.340384930s (+196.02%) #### MacOS High Sierra v10.13.6 (Clang v6.0.0) ```cpp -Running 489 test cases... -sizeof wchar_t: 4 -UTFW: UTF32 -Resolution: 2793647583 -UTF8 ==> UTF8 : 0.111039205s -UTF8 ==> UTF16: 0.143631552s -UTF8 ==> UTF32: 0.105463425s -UTF8 ==> UTFW : 0.105106640s -UTF16 ==> UTF8 : 0.158074631s -UTF16 ==> UTF16: 0.055528284s -UTF16 ==> UTF32: 0.063203264s -UTF16 ==> UTFW : 0.063167823s -UTF32 ==> UTF8 : 0.123977591s -UTF32 ==> UTF16: 0.061630976s -UTF32 ==> UTF32: 0.027633560s -UTF32 ==> UTFW : 0.029324893s -UTFW ==> UTF8 : 0.123948012s -UTFW ==> UTF16: 0.064873256s -UTFW ==> UTF32: 0.030606730s -UTFW ==> UTFW : 0.027596372s -codecvt_utf8_utf16: -UTF16 ==> UTF8 : 0.151798551s (-3.97%) -UTF8 ==> UTF16: 0.256203078s (+78.38%) -codecvt_utf8: -UTFW ==> UTF8 : 0.137034385s (+10.56%) -UTF8 ==> UTFW : 0.360953804s (+243.42%) - +Running 489 test cases... +sizeof wchar_t: 4 +UTFW: UTF32 +Resolution: 2793647583 +UTF8 ==> UTF8 : 0.111039205s +UTF8 ==> UTF16: 0.143631552s +UTF8 ==> UTF32: 0.105463425s +UTF8 ==> UTFW : 0.105106640s +UTF16 ==> UTF8 : 0.158074631s +UTF16 ==> UTF16: 0.055528284s +UTF16 ==> UTF32: 0.063203264s +UTF16 ==> UTFW : 0.063167823s +UTF32 ==> UTF8 : 0.123977591s +UTF32 ==> UTF16: 0.061630976s +UTF32 ==> UTF32: 0.027633560s +UTF32 ==> UTFW : 0.029324893s +UTFW ==> UTF8 : 0.123948012s +UTFW ==> UTF16: 0.064873256s +UTFW ==> UTF32: 0.030606730s +UTFW ==> UTFW : 0.027596372s +codecvt_utf8_utf16: +UTF16 ==> UTF8 : 0.151798551s (-3.97%) +UTF8 ==> UTF16: 0.256203078s (+78.38%) +codecvt_utf8: +UTFW ==> UTF8 : 0.137034385s (+10.56%) +UTF8 ==> UTFW : 0.360953804s (+243.42%) + *** No errors detected ``` diff --git a/include/ww898/utf_converters.hpp b/include/ww898/utf_converters.hpp index 724c73f..1b40c48 100644 --- a/include/ww898/utf_converters.hpp +++ b/include/ww898/utf_converters.hpp @@ -456,8 +456,9 @@ template< typename Utf, typename Outf, typename It, + typename Eit, typename Oit> -Oit conv(It && it, It && eit, Oit && oit) +Oit conv(It && it, Eit && eit, Oit && oit) { return detail::conv_strategy::type, @@ -468,7 +469,7 @@ Oit conv(It && it, It && eit, Oit && oit) ? detail::conv_impl::random_interator : detail::conv_impl::normal>()( std::forward(it), - std::forward(eit), + std::forward(eit), std::forward(oit)); } @@ -478,11 +479,39 @@ template< size_t wchar_size> struct wchar_selector {}; -template<> struct wchar_selector<2> { typedef utf16 utfw_type; }; -template<> struct wchar_selector<4> { typedef utf32 utfw_type; }; +template<> struct wchar_selector<2> { typedef utf16 type; }; +template<> struct wchar_selector<4> { typedef utf32 type; }; } -typedef detail::wchar_selector::utfw_type utfw; +typedef detail::wchar_selector::type utfw; + +namespace detail { + +template< + typename Ch> +struct utf_selector {}; + +template<> struct utf_selector< char> { typedef utf8 type; }; +template<> struct utf_selector { typedef utf8 type; }; +template<> struct utf_selector { typedef utf8 type; }; +template<> struct utf_selector { typedef utf16 type; }; +template<> struct utf_selector { typedef utf32 type; }; +template<> struct utf_selector { typedef utfw type; }; + +} + +template< + typename Ch> +using utf_selector = detail::utf_selector::type>; + +template< + typename Ch> +using utf_selector_t = typename utf_selector::type; + +template< + typename Ch1, + typename Ch2> +using is_utf_same = std::is_same, utf_selector_t>; }} diff --git a/test/utf_converters_test.cpp b/test/utf_converters_test.cpp index 41a9acf..b711907 100644 --- a/test/utf_converters_test.cpp +++ b/test/utf_converters_test.cpp @@ -42,7 +42,6 @@ #include #include #include -#include #if defined(__linux__) || defined(__APPLE__) #include @@ -63,16 +62,16 @@ namespace { struct unicode_tuple final { - std::string utf8; - std::u16string utf16; - std::u32string utf32; - std::wstring utfw; + std::string u8 ; + std::u16string u16; + std::u32string u32; + std::wstring uw ; }; struct supported_tuple final { - std::string utf8; - std::u32string utf32; + std::string u8 ; + std::u32string u32; }; unicode_tuple const unicode_test_data[] = @@ -384,17 +383,17 @@ supported_tuple const supported_test_data[] = template< typename Ch> -struct utf_resolver {}; +struct utf_namer {}; -template<> struct utf_resolver { typedef utf::utf8 utf_type; static char const name[]; }; -template<> struct utf_resolver { typedef utf::utf16 utf_type; static char const name[]; }; -template<> struct utf_resolver { typedef utf::utf32 utf_type; static char const name[]; }; -template<> struct utf_resolver { typedef utf::utfw utf_type; static char const name[]; }; +template<> struct utf_namer { static char const value[]; }; +template<> struct utf_namer { static char const value[]; }; +template<> struct utf_namer { static char const value[]; }; +template<> struct utf_namer { static char const value[]; }; -char const utf_resolver::name[] = "UTF8"; -char const utf_resolver::name[] = "UTF16"; -char const utf_resolver::name[] = "UTF32"; -char const utf_resolver::name[] = "UTFW"; +char const utf_namer::value[] = "UTF8"; +char const utf_namer::value[] = "UTF16"; +char const utf_namer::value[] = "UTF32"; +char const utf_namer::value[] = "UTFW"; template< typename Ch, @@ -403,8 +402,8 @@ void run_conv_test( std::basic_string const & buf, std::basic_string const & obuf) { - typedef typename utf_resolver::utf_type utf_type; - typedef typename utf_resolver::utf_type outf_type; + typedef utf::utf_selector_t utf_type; + typedef utf::utf_selector_t outf_type; std::basic_string buf_tmp0; utf::convz(buf.data(), std::back_inserter(buf_tmp0)); @@ -421,7 +420,7 @@ template< void run_size_test( std::basic_string const & buf) { - typedef typename utf_resolver::utf_type utf_type; + typedef utf::utf_selector_t utf_type; size_t total_size0 = 0; for (auto str = buf.data(); *str;) @@ -442,33 +441,80 @@ void run_size_test( } -BOOST_DATA_TEST_CASE(conv_utf8_to_utf8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf8 , tuple.utf8 ); } -BOOST_DATA_TEST_CASE(conv_utf8_to_utf16 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf8 , tuple.utf16); } -BOOST_DATA_TEST_CASE(conv_utf8_to_utf32 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf8 , tuple.utf32); } -BOOST_DATA_TEST_CASE(conv_utf8_to_utfw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf8 , tuple.utfw ); } -BOOST_DATA_TEST_CASE(conv_utf16_to_utf8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf16, tuple.utf8 ); } -BOOST_DATA_TEST_CASE(conv_utf16_to_utf16, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf16, tuple.utf16); } -BOOST_DATA_TEST_CASE(conv_utf16_to_utf32, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf16, tuple.utf32); } -BOOST_DATA_TEST_CASE(conv_utf16_to_utfw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf16, tuple.utfw ); } -BOOST_DATA_TEST_CASE(conv_utf32_to_utf8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf32, tuple.utf8 ); } -BOOST_DATA_TEST_CASE(conv_utf32_to_utf16, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf32, tuple.utf16); } -BOOST_DATA_TEST_CASE(conv_utf32_to_utf32, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf32, tuple.utf32); } -BOOST_DATA_TEST_CASE(conv_utf32_to_utfw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utf32, tuple.utfw ); } -BOOST_DATA_TEST_CASE(conv_utfw_to_utf8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utfw , tuple.utf8 ); } -BOOST_DATA_TEST_CASE(conv_utfw_to_utf16 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utfw , tuple.utf16); } -BOOST_DATA_TEST_CASE(conv_utfw_to_utf32 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utfw , tuple.utf32); } -BOOST_DATA_TEST_CASE(conv_utfw_to_utfw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.utfw , tuple.utfw ); } - -BOOST_DATA_TEST_CASE(conv_utf32_to_utf8_supported, boost::make_iterator_range(supported_test_data), tuple) { run_conv_test(tuple.utf32, tuple.utf8 ); } -BOOST_DATA_TEST_CASE(conv_utf8_to_utf32_supported, boost::make_iterator_range(supported_test_data), tuple) { run_conv_test(tuple.utf8 , tuple.utf32); } - -BOOST_DATA_TEST_CASE(size_utf8 , boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.utf8 ); } -BOOST_DATA_TEST_CASE(size_utf16, boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.utf16); } -BOOST_DATA_TEST_CASE(size_utf32, boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.utf32); } -BOOST_DATA_TEST_CASE(size_utfw , boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.utfw ); } - -BOOST_DATA_TEST_CASE(size_utf8_supported , boost::make_iterator_range(supported_test_data), tuple) { run_size_test(tuple.utf8 ); } -BOOST_DATA_TEST_CASE(size_utf32_supported, boost::make_iterator_range(supported_test_data), tuple) { run_size_test(tuple.utf32); } +BOOST_DATA_TEST_CASE(conv_u8_to_u8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u8 , tuple.u8 ); } +BOOST_DATA_TEST_CASE(conv_u8_to_u16 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u8 , tuple.u16); } +BOOST_DATA_TEST_CASE(conv_u8_to_u32 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u8 , tuple.u32); } +BOOST_DATA_TEST_CASE(conv_u8_to_uw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u8 , tuple.uw ); } +BOOST_DATA_TEST_CASE(conv_u16_to_u8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u16, tuple.u8 ); } +BOOST_DATA_TEST_CASE(conv_u16_to_u16, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u16, tuple.u16); } +BOOST_DATA_TEST_CASE(conv_u16_to_u32, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u16, tuple.u32); } +BOOST_DATA_TEST_CASE(conv_u16_to_uw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u16, tuple.uw ); } +BOOST_DATA_TEST_CASE(conv_u32_to_u8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u32, tuple.u8 ); } +BOOST_DATA_TEST_CASE(conv_u32_to_u16, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u32, tuple.u16); } +BOOST_DATA_TEST_CASE(conv_u32_to_u32, boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u32, tuple.u32); } +BOOST_DATA_TEST_CASE(conv_u32_to_uw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.u32, tuple.uw ); } +BOOST_DATA_TEST_CASE(conv_uw_to_u8 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.uw , tuple.u8 ); } +BOOST_DATA_TEST_CASE(conv_uw_to_u16 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.uw , tuple.u16); } +BOOST_DATA_TEST_CASE(conv_uw_to_u32 , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.uw , tuple.u32); } +BOOST_DATA_TEST_CASE(conv_uw_to_uw , boost::make_iterator_range(unicode_test_data), tuple) { run_conv_test(tuple.uw , tuple.uw ); } + +BOOST_DATA_TEST_CASE(conv_u32_to_u8_supported, boost::make_iterator_range(supported_test_data), tuple) { run_conv_test(tuple.u32, tuple.u8 ); } +BOOST_DATA_TEST_CASE(conv_u8_to_u32_supported, boost::make_iterator_range(supported_test_data), tuple) { run_conv_test(tuple.u8 , tuple.u32); } + +BOOST_DATA_TEST_CASE(size_u8 , boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.u8 ); } +BOOST_DATA_TEST_CASE(size_u16, boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.u16); } +BOOST_DATA_TEST_CASE(size_u32, boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.u32); } +BOOST_DATA_TEST_CASE(size_uw , boost::make_iterator_range(unicode_test_data), tuple) { run_size_test(tuple.uw ); } + +BOOST_DATA_TEST_CASE(size_u8_supported , boost::make_iterator_range(supported_test_data), tuple) { run_size_test(tuple.u8 ); } +BOOST_DATA_TEST_CASE(size_u32_supported, boost::make_iterator_range(supported_test_data), tuple) { run_size_test(tuple.u32); } + +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); +BOOST_STATIC_ASSERT(utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); + +#if defined(_WIN32) + +BOOST_STATIC_ASSERT( utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); + +BOOST_STATIC_ASSERT( utf::is_utf_same::value); +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); + +#elif defined(__linux__) || defined(__APPLE__) + +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT( utf::is_utf_same::value); + +BOOST_STATIC_ASSERT(!utf::is_utf_same::value); +BOOST_STATIC_ASSERT( utf::is_utf_same::value); + +#else + #error Unknown platform +#endif namespace { @@ -507,7 +553,7 @@ uint64_t get_time() throw() #endif #else - #error get_time() is not implmeneted + #error Unknown compiler #endif } @@ -561,7 +607,7 @@ uint64_t get_time_resolution() throw() auto const elapsed_time = end_time - beg_time; return static_cast(elapsed_time * 1000000000ull / elapsed_orig.count()); #else - #error get_time_resolution() is not implmeneted + #error Unknown platform #endif } @@ -593,8 +639,8 @@ void dump_name() { static size_t const name_width = 5; std::cout << - std::left << std::setw(name_width) << utf_resolver::name << " ==> " << - std::left << std::setw(name_width) << utf_resolver::name << ": "; + std::left << std::setw(name_width) << utf_namer::value << " ==> " << + std::left << std::setw(name_width) << utf_namer::value << ": "; } void dump_duration(double const duration) @@ -622,8 +668,8 @@ double run_measure( std::vector const & buf, std::vector const & obuf) { - typedef typename utf_resolver::utf_type utf_type; - typedef typename utf_resolver::utf_type outf_type; + typedef utf::utf_selector_t utf_type; + typedef utf::utf_selector_t outf_type; std::vector res; res.reserve(obuf.capacity()); @@ -682,7 +728,7 @@ BOOST_AUTO_TEST_CASE(performance, WW898_PERFORMANCE_TESTS_MODE) std::cout << "sizeof wchar_t: " << sizeof(wchar_t) << std::endl << - utf_resolver::name << ": UTF" << 8 * sizeof(wchar_t) << std::endl; + utf_namer::value << ": UTF" << 8 * sizeof(wchar_t) << std::endl; auto const resolution = get_time_resolution(); std::cout << "Resolution: " << resolution << std::endl; @@ -794,6 +840,25 @@ BOOST_AUTO_TEST_CASE(performance, WW898_PERFORMANCE_TESTS_MODE) } } +BOOST_AUTO_TEST_CASE(example, WW898_PERFORMANCE_TESTS_MODE) +{ + // यूनिकोड + static char const u8s[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1"; + using namespace ww898::utf; + std::u16string u16; + convz, utf16>(u8s, std::back_inserter(u16)); + std::u32string u32; + conv>(u16.begin(), u16.end(), std::back_inserter(u32)); + std::vector u8; + convz(u32.data(), std::back_inserter(u8)); + std::wstring uw; + conv(u8s, u8s + sizeof(u8s), std::back_inserter(uw)); + static_assert(is_utf_same::value, "Fail"); + static_assert(1 == + (is_utf_same::value ? 1 : 0) + + (is_utf_same::value ? 1 : 0), "Fail"); +} + #undef WW898_PERFORMANCE_TESTS_MODE BOOST_AUTO_TEST_SUITE_END()