From 803494d783220fc9953e33a004e79e234fe9d915 Mon Sep 17 00:00:00 2001 From: xjchengo Date: Mon, 18 Jan 2016 15:11:35 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=8A=A0=E5=BF=AB=E6=8B=BC=E9=9F=B3?= =?UTF-8?q?=E8=BD=AC=E6=96=87=E5=AD=97=E7=9A=84=E9=80=9F=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Pinyin/Pinyin.php | 45 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/Pinyin/Pinyin.php b/src/Pinyin/Pinyin.php index 195bf003..141a5ed6 100755 --- a/src/Pinyin/Pinyin.php +++ b/src/Pinyin/Pinyin.php @@ -31,14 +31,7 @@ class Pinyin * * @var array */ - protected static $dictionary; - - /** - * Appends words. - * - * @var array - */ - protected static $appends = array(); + protected static $dictionary = array(); /** * Settings. @@ -52,6 +45,12 @@ class Pinyin 'uppercase' => false, 'charset' => 'UTF-8' // GB2312,UTF-8 ); + /** + * Internal charset used by this package. + * + * @var string + */ + protected static $internalCharset = 'UTF-8'; /** * The instance. @@ -67,8 +66,9 @@ class Pinyin */ private function __construct() { - if (is_null(static::$dictionary)) { - self::$dictionary = json_decode(file_get_contents(dirname(__DIR__).'/data/dict.php'), true); + if (!static::$dictionary) { + $list = json_decode(file_get_contents(dirname(__DIR__).'/data/dict.php'), true); + static::appends($list); } } @@ -164,8 +164,8 @@ public static function parse($string, array $settings = array()) $settings = array_merge(self::$settings, $settings); // add charset set - if (!empty($settings['charset']) && $settings['charset'] != 'UTF-8') { - $string = iconv($settings['charset'], 'UTF-8', $string); + if (!empty($settings['charset']) && $settings['charset'] != static::$internalCharset) { + $string = iconv($settings['charset'], static::$internalCharset, $string); } // remove non-Chinese char. @@ -201,7 +201,11 @@ public static function parse($string, array $settings = array()) */ public static function appends(array $appends) { - static::$dictionary = array_merge(self::$dictionary, static::formatWords($appends)); + $list = static::formatWords($appends); + foreach ($list as $key => $value) { + $firstChar = mb_substr($key, 0, 1, static::$internalCharset); + self::$dictionary[$firstChar][$key] = $value; + } } /** @@ -242,7 +246,18 @@ protected function getFirstLetters($pinyin, $settings) */ protected function string2pinyin($string) { - $pinyin = strtr($this->prepare($string), self::$dictionary); + $preparedString = $this->prepare($string); + $count = mb_strlen($preparedString, static::$internalCharset); + $dictionary = []; + + $i = 0; + while ($i < $count) { + $char = mb_substr($preparedString, $i++, 1, static::$internalCharset); + $pinyinGroup = isset(self::$dictionary[$char]) ? self::$dictionary[$char] : []; + $dictionary = array_merge($dictionary, $pinyinGroup); + } + + $pinyin = strtr($preparedString, $dictionary); return trim(str_replace(' ', ' ', $pinyin)); } @@ -272,6 +287,8 @@ public static function formatWords($words) */ protected static function formatDictPinyin($pinyin) { + $pinyin = trim($pinyin); + return preg_replace_callback('/[a-z]{1,}:?\d{1}\s?/i', function ($matches) { return strtolower($matches[0]); }, " {$pinyin} "); From 93e398020c5d8bec6968d87c7d7fe094456e32f2 Mon Sep 17 00:00:00 2001 From: xjchengo Date: Mon, 18 Jan 2016 16:30:25 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20PHP=205.3=20=E5=85=BC?= =?UTF-8?q?=E5=AE=B9=E7=9A=84=E8=AF=AD=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Pinyin/Pinyin.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Pinyin/Pinyin.php b/src/Pinyin/Pinyin.php index 141a5ed6..7d646209 100755 --- a/src/Pinyin/Pinyin.php +++ b/src/Pinyin/Pinyin.php @@ -66,7 +66,7 @@ class Pinyin */ private function __construct() { - if (!static::$dictionary) { + if (empty(static::$dictionary)) { $list = json_decode(file_get_contents(dirname(__DIR__).'/data/dict.php'), true); static::appends($list); } @@ -248,12 +248,12 @@ protected function string2pinyin($string) { $preparedString = $this->prepare($string); $count = mb_strlen($preparedString, static::$internalCharset); - $dictionary = []; + $dictionary = array(); $i = 0; while ($i < $count) { $char = mb_substr($preparedString, $i++, 1, static::$internalCharset); - $pinyinGroup = isset(self::$dictionary[$char]) ? self::$dictionary[$char] : []; + $pinyinGroup = isset(self::$dictionary[$char]) ? self::$dictionary[$char] : array(); $dictionary = array_merge($dictionary, $pinyinGroup); }