diff --git a/convert.py b/convert.py index f7ceec1..5b7c0db 100755 --- a/convert.py +++ b/convert.py @@ -18,7 +18,11 @@ if not HANZI_RE.match(line): continue - # Skip list page. + # Skip single character & too long pages + if not 1 < len(line) < 9: + continue + + # Skip list pages if line.endswith('\u5217\u8868'): continue