Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 20, 2025
1 parent f51b50a commit c3763ca
Show file tree
Hide file tree
Showing 2 changed files with 2,907 additions and 2,905 deletions.
50 changes: 25 additions & 25 deletions PolyDeDupe/non_alpha.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,33 @@

NON_ALPHA = re.compile(
"[^"
"\u0080-\u00FF" # Latin-1 Supplement (covers many Western European languages)
"\u0100-\u017F" # Latin Extended-A (Central European, Baltic, etc.)
"\u0180-\u024F" # Latin Extended-B (additional European languages)
"\u1E00-\u1EFF" # Latin Extended Additional (covers Vietnamese, some African languages)
"\u0400-\u04FF" # Cyrillic (covers Russian, Bulgarian, Serbian, etc.)
"\u0370-\u03FF" # Greek and Coptic
"\u1F00-\u1FFF" # Greek Extended
"\u0080-\u00ff" # Latin-1 Supplement (covers many Western European languages)
"\u0100-\u017f" # Latin Extended-A (Central European, Baltic, etc.)
"\u0180-\u024f" # Latin Extended-B (additional European languages)
"\u1e00-\u1eff" # Latin Extended Additional (covers Vietnamese, some African languages)
"\u0400-\u04ff" # Cyrillic (covers Russian, Bulgarian, Serbian, etc.)
"\u0370-\u03ff" # Greek and Coptic
"\u1f00-\u1fff" # Greek Extended
# Including your original ranges
"\u0600-\u06FF" # Arabic
"\u07C0-\u07FF" # N'Ko
"\u0900-\u097F" # Devanagari (Hindi, Marathi, Sanskrit)
"\u1200-\u137F" # Ethiopic (Amharic, Tigrinya)
"\u2D30-\u2D7F" # Tifinagh (Berber languages)
"\uA500-\uA63F" # Vai (West African)
"\u0600-\u06ff" # Arabic
"\u07c0-\u07ff" # N'Ko
"\u0900-\u097f" # Devanagari (Hindi, Marathi, Sanskrit)
"\u1200-\u137f" # Ethiopic (Amharic, Tigrinya)
"\u2d30-\u2d7f" # Tifinagh (Berber languages)
"\ua500-\ua63f" # Vai (West African)
# Additional language ranges
"\u4E00-\u9FFF" # CJK Unified Ideographs (Chinese)
"\uAC00-\uD7AF" # Hangul Syllables (Korean)
"\u3040-\u309F\u30A0-\u30FF" # Hiragana and Katakana (Japanese)
"\u0B80-\u0BFF" # Tamil
"\u0C00-\u0C7F" # Telugu
"\u0C80-\u0CFF" # Kannada
"\u0D00-\u0D7F" # Malayalam
"\u0980-\u09FF" # Bengali
"\u0A00-\u0A7F" # Gurmukhi (Punjabi)
"\u0A80-\u0AFF" # Gujarati
"\u0B00-\u0B7F" # Oriya
"\u0750-\u077F" # Arabic Supplement
"\u4e00-\u9fff" # CJK Unified Ideographs (Chinese)
"\uac00-\ud7af" # Hangul Syllables (Korean)
"\u3040-\u309f\u30a0-\u30ff" # Hiragana and Katakana (Japanese)
"\u0b80-\u0bff" # Tamil
"\u0c00-\u0c7f" # Telugu
"\u0c80-\u0cff" # Kannada
"\u0d00-\u0d7f" # Malayalam
"\u0980-\u09ff" # Bengali
"\u0a00-\u0a7f" # Gurmukhi (Punjabi)
"\u0a80-\u0aff" # Gujarati
"\u0b00-\u0b7f" # Oriya
"\u0750-\u077f" # Arabic Supplement
"A-Za-z_0-9" # General Latin, numerals, and underscore
"]"
)
Loading

0 comments on commit c3763ca

Please sign in to comment.