Skip to content

Commit

Permalink
修复 m̄ ê̄ ế ê̌ ề 这几个音无法转换为不含声调结果的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
mozillazg committed Jul 13, 2019
1 parent 7872495 commit 3f83256
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 11 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
---------

`0.35.4`_ (2019-07-xx)
+++++++++++++++++++++++

* **[Bugfixed]** 修复 ```` ``ê̄`` ``ế`` ``ê̌`` ```` 这几个音无法转换为不含声调结果的问题。


`0.35.3`_ (2019-05-11)
++++++++++++++++++++++++

Expand Down Expand Up @@ -795,3 +801,4 @@ __ https://github.com/mozillazg/python-pinyin/issues/8
.. _0.35.1: https://github.com/mozillazg/python-pinyin/compare/v0.35.0...v0.35.1
.. _0.35.2: https://github.com/mozillazg/python-pinyin/compare/v0.35.1...v0.35.2
.. _0.35.3: https://github.com/mozillazg/python-pinyin/compare/v0.35.2...v0.35.3
.. _0.35.4: https://github.com/mozillazg/python-pinyin/compare/v0.35.3...v0.35.4
6 changes: 6 additions & 0 deletions pypinyin/phonetic_symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@
"ň": "n3",
"ǹ": "n4",

"m̄": "m1", # len('m̄') == 2
"ḿ": "m2",
"m̀": "m4", # len("m̀") == 2

"ê̄": "ê1", # len('ê̄') == 2
"ế": "ê2",
"ê̌": "ê3", # len('ê̌') == 2
"ề": "ê4",
}
phonetic_symbol_reverse = dict((v, k) for k, v in phonetic_symbol.items())
5 changes: 5 additions & 0 deletions pypinyin/style/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@

# 带声调字符与数字表示声调的对应关系
PHONETIC_SYMBOL_DICT = phonetic_symbol.phonetic_symbol.copy()
PHONETIC_SYMBOL_DICT_KEY_LENGTH_NOT_ONE = dict(
(k, v)
for k, v in PHONETIC_SYMBOL_DICT.items()
if len(k) > 1
)
# 匹配带声调字符的正则表达式
RE_PHONETIC_SYMBOL = re.compile(
r'[{0}]'.format(
Expand Down
1 change: 1 addition & 0 deletions pypinyin/style/_constants.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _INITIALS_NOT_STRICT = ... # type: List[Text]


PHONETIC_SYMBOL_DICT = ... # type: Dict[Text, Text]
PHONETIC_SYMBOL_DICT_KEY_LENGTH_NOT_ONE = ... # type: Dict[Text, Text]

RE_PHONETIC_SYMBOL = ... # type : Any

Expand Down
20 changes: 10 additions & 10 deletions pypinyin/style/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pypinyin.style._constants import (
_INITIALS, _INITIALS_NOT_STRICT,
RE_PHONETIC_SYMBOL, PHONETIC_SYMBOL_DICT,
PHONETIC_SYMBOL_DICT_KEY_LENGTH_NOT_ONE,
RE_NUMBER
)

Expand Down Expand Up @@ -57,24 +58,23 @@ def _replace(match):
return PHONETIC_SYMBOL_DICT[symbol]

# 替换拼音中的带声调字符
return RE_PHONETIC_SYMBOL.sub(_replace, pinyin).replace('m̀', 'm4')
value = RE_PHONETIC_SYMBOL.sub(_replace, pinyin)
for symbol, to in PHONETIC_SYMBOL_DICT_KEY_LENGTH_NOT_ONE.items():
value = value.replace(symbol, to)

return value


def replace_symbol_to_no_symbol(pinyin):
"""把带声调字符替换为没有声调的字符"""
def _replace(match):
symbol = match.group(0) # 带声调的字符
# 去掉声调: a1 -> a
return RE_NUMBER.sub(r'', PHONETIC_SYMBOL_DICT[symbol])

# 替换拼音中的带声调字符
return RE_PHONETIC_SYMBOL.sub(_replace, pinyin).replace('m̀', 'm')
value = replace_symbol_to_number(pinyin)
return RE_NUMBER.sub('', value)


def has_finals(pinyin):
"""判断是否有韵母"""
# 鼻音: 'ḿ', 'm̀', 'ń', 'ň', 'ǹ ' 没有韵母
for symbol in ['ḿ', 'm̀', 'ń', 'ň', 'ǹ']:
# 鼻音: 'm̄', 'ḿ', 'm̀', 'ń', 'ň', 'ǹ ' 没有韵母
for symbol in ['m̄', 'ḿ', 'm̀', 'ń', 'ň', ', 'ê̄', 'ế', 'ê̌', 'ề']:
if symbol in pinyin:
return False

Expand Down
16 changes: 15 additions & 1 deletion tests/test_pinyin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
pinyin, slug, lazy_pinyin, load_single_dict,
load_phrases_dict, NORMAL, TONE, TONE2, TONE3, INITIALS,
FIRST_LETTER, FINALS, FINALS_TONE, FINALS_TONE2, FINALS_TONE3,
BOPOMOFO, BOPOMOFO_FIRST, CYRILLIC, CYRILLIC_FIRST
BOPOMOFO, BOPOMOFO_FIRST, CYRILLIC, CYRILLIC_FIRST, Style
)
from pypinyin.compat import SUPPORT_UCS4
from pypinyin.core import seg
Expand Down Expand Up @@ -545,6 +545,20 @@ def test_m4():
han, heteronym=True, style=FINALS_TONE3) == [['m2', 'ou2', 'm4']]


@pytest.mark.parametrize('han,style,expect', [
['呣', Style.TONE, ['ḿ', 'm̀']],
['呣', Style.TONE2, ['m2', 'm4']],
['嘸', Style.TONE, ['m̄', 'ḿ']],
['嘸', Style.TONE2, ['m1', 'm2']],
['誒', Style.TONE, ['ê̄', 'ế', 'ê̌', 'ề']],
['誒', Style.TONE2, ['ê1', 'ê2', 'ê3', 'ê4']],
])
def test_m_e(han, style, expect):
result = pinyin(han, style=style, heteronym=True)
assert len(result) == 1
assert (set(result[0]) & set(expect)) == set(expect)


if __name__ == '__main__':
import pytest
pytest.cmdline.main()

0 comments on commit 3f83256

Please sign in to comment.