Skip to content

Commit

Permalink
Add utf_selector and is_utf_same.
Browse files Browse the repository at this point in the history
  • Loading branch information
ww898 committed Oct 21, 2018
1 parent 3e423a4 commit 2ee0b7c
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 92 deletions.
72 changes: 38 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,20 @@ Tested on following compilers:

```cpp
// यूनिकोड
static char const u8_orig[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1";
using namespace ww898;
static char const u8s[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1";
using namespace ww898::utf;
std::u16string u16;
utf::convz<utf::utf8, utf::utf16>(u8_orig, std::back_inserter(u16));
convz<utf_selector_t<decltype(*u8s)>, utf16>(u8s, std::back_inserter(u16));
std::u32string u32;
utf::conv<utf::utf16, utf::utf32>(u16.begin(), u16.end(), std::back_inserter(u32));
conv<utf16, utf_selector_t<decltype(u32)::value_type>>(u16.begin(), u16.end(), std::back_inserter(u32));
std::vector<char> u8;
utf::convz<utf::utf32, utf::utf8>(u32.begin(), std::back_inserter(u8));
std::wstring wstr;
utf::convz<utf::utf8, utf::utfw>(u8.begin(), std::back_inserter(wstr));
convz<utf32, utf8>(u32.data(), std::back_inserter(u8));
std::wstring uw;
conv<utf8, utfw>(u8s, u8s + sizeof(u8s), std::back_inserter(uw));
static_assert(is_utf_same<decltype(*u8s), decltype(u8)::value_type>::value, "Fail");
static_assert(1 ==
(is_utf_same<decltype(u16)::value_type, decltype(uw)::value_type>::value ? 1 : 0) +
(is_utf_same<decltype(u32)::value_type, decltype(uw)::value_type>::value ? 1 : 0), "Fail");
```
## Performance
Expand Down Expand Up @@ -237,33 +241,33 @@ UTF8 ==> UTFW : 0.340384930s (+196.02%)

#### MacOS High Sierra v10.13.6 (Clang v6.0.0)
```cpp
Running 489 test cases...
sizeof wchar_t: 4
UTFW: UTF32
Resolution: 2793647583
UTF8 ==> UTF8 : 0.111039205s
UTF8 ==> UTF16: 0.143631552s
UTF8 ==> UTF32: 0.105463425s
UTF8 ==> UTFW : 0.105106640s
UTF16 ==> UTF8 : 0.158074631s
UTF16 ==> UTF16: 0.055528284s
UTF16 ==> UTF32: 0.063203264s
UTF16 ==> UTFW : 0.063167823s
UTF32 ==> UTF8 : 0.123977591s
UTF32 ==> UTF16: 0.061630976s
UTF32 ==> UTF32: 0.027633560s
UTF32 ==> UTFW : 0.029324893s
UTFW ==> UTF8 : 0.123948012s
UTFW ==> UTF16: 0.064873256s
UTFW ==> UTF32: 0.030606730s
UTFW ==> UTFW : 0.027596372s
codecvt_utf8_utf16<char16_t>:
UTF16 ==> UTF8 : 0.151798551s (-3.97%)
UTF8 ==> UTF16: 0.256203078s (+78.38%)
codecvt_utf8<wchar_t>:
UTFW ==> UTF8 : 0.137034385s (+10.56%)
UTF8 ==> UTFW : 0.360953804s (+243.42%)

Running 489 test cases...
sizeof wchar_t: 4
UTFW: UTF32
Resolution: 2793647583
UTF8 ==> UTF8 : 0.111039205s
UTF8 ==> UTF16: 0.143631552s
UTF8 ==> UTF32: 0.105463425s
UTF8 ==> UTFW : 0.105106640s
UTF16 ==> UTF8 : 0.158074631s
UTF16 ==> UTF16: 0.055528284s
UTF16 ==> UTF32: 0.063203264s
UTF16 ==> UTFW : 0.063167823s
UTF32 ==> UTF8 : 0.123977591s
UTF32 ==> UTF16: 0.061630976s
UTF32 ==> UTF32: 0.027633560s
UTF32 ==> UTFW : 0.029324893s
UTFW ==> UTF8 : 0.123948012s
UTFW ==> UTF16: 0.064873256s
UTFW ==> UTF32: 0.030606730s
UTFW ==> UTFW : 0.027596372s
codecvt_utf8_utf16<char16_t>:
UTF16 ==> UTF8 : 0.151798551s (-3.97%)
UTF8 ==> UTF16: 0.256203078s (+78.38%)
codecvt_utf8<wchar_t>:
UTFW ==> UTF8 : 0.137034385s (+10.56%)
UTF8 ==> UTFW : 0.360953804s (+243.42%)

*** No errors detected
```

Expand Down
39 changes: 34 additions & 5 deletions include/ww898/utf_converters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,8 +456,9 @@ template<
typename Utf,
typename Outf,
typename It,
typename Eit,
typename Oit>
Oit conv(It && it, It && eit, Oit && oit)
Oit conv(It && it, Eit && eit, Oit && oit)
{
return detail::conv_strategy<Utf, Outf,
typename std::decay<It>::type,
Expand All @@ -468,7 +469,7 @@ Oit conv(It && it, It && eit, Oit && oit)
? detail::conv_impl::random_interator
: detail::conv_impl::normal>()(
std::forward<It>(it),
std::forward<It>(eit),
std::forward<Eit>(eit),
std::forward<Oit>(oit));
}

Expand All @@ -478,11 +479,39 @@ template<
size_t wchar_size>
struct wchar_selector {};

template<> struct wchar_selector<2> { typedef utf16 utfw_type; };
template<> struct wchar_selector<4> { typedef utf32 utfw_type; };
template<> struct wchar_selector<2> { typedef utf16 type; };
template<> struct wchar_selector<4> { typedef utf32 type; };

}

typedef detail::wchar_selector<sizeof(wchar_t)>::utfw_type utfw;
typedef detail::wchar_selector<sizeof(wchar_t)>::type utfw;

namespace detail {

template<
typename Ch>
struct utf_selector {};

template<> struct utf_selector< char> { typedef utf8 type; };
template<> struct utf_selector<unsigned char> { typedef utf8 type; };
template<> struct utf_selector<signed char> { typedef utf8 type; };
template<> struct utf_selector<char16_t > { typedef utf16 type; };
template<> struct utf_selector<char32_t > { typedef utf32 type; };
template<> struct utf_selector<wchar_t > { typedef utfw type; };

}

template<
typename Ch>
using utf_selector = detail::utf_selector<typename std::decay<Ch>::type>;

template<
typename Ch>
using utf_selector_t = typename utf_selector<Ch>::type;

template<
typename Ch1,
typename Ch2>
using is_utf_same = std::is_same<utf_selector_t<Ch1>, utf_selector_t<Ch2>>;

}}
Loading

0 comments on commit 2ee0b7c

Please sign in to comment.