-
Notifications
You must be signed in to change notification settings - Fork 0
/
encodeHebrew
executable file
·88 lines (60 loc) · 2.03 KB
/
encodeHebrew
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/perl
#see
#https://en.wikipedia.org/wiki/Braille_ASCII
# "Unicode includes a means for encoding eight-dot Braille; however, Braille ASCII continues to be the preferred format for encoding six-dot Braille."
use v5.12;
use common::sense;
use utf8;
use open qw(:std :utf8);
use Unicode::Normalize qw(decompose reorder NFC NFD);
use JSON;
use LWP::Simple;
use Data::Dumper;
use Getopt::Long;
use lib qw(.);
use H2BEncoder;
my $printHebrew =0;
my $printBrailleUnicode =0;
my $printBrailleAscii =0;
my $highlightTaamim =0;
my $tEncoding ='CP';
my $dageshMode ='HEH_BGDCFT';
my $debugReorder;
my $addSpace;
GetOptions(
"taammim-encoding|e=s" => \$tEncoding,
"dagesh-mode|m=s" => \$dageshMode,
"debug-reorder|d" => \$debugReorder,
"print-hebrew|w" => \$printHebrew,
"print-braille-unicode|u" => \$printBrailleUnicode,
"print-braille-ascii|a" => \$printBrailleAscii,
"add-space|s" => \$addSpace,
"highlight-taamim|t" => \$highlightTaamim,
);
my $enc = H2BEncoder->new(mode => $tEncoding, dageshMode => $dageshMode);
my $counter =0;
sub addWordNums{
my ($line, $isBraille) = @_;
my $splitter = ($isBraille? "\N{BRAILLE PATTERN BLANK}" : " ");
my $prefix = ($isBraille? "br-" : "tr-");
my @words = split($splitter, $line);
return join $splitter, map {$counter++; qq{<span class="${prefix}word" id="${prefix}$counter">$_</span>};} @words;
}
while(<>){
chomp;
emitLine($_);
}
exit 0;
sub emitLine{
my $line = shift;
$line = NFD($line);
say addWordNums($line,0) if $printHebrew;
say $enc->brailleReorder($line) if $debugReorder;
my $p1Pure = $enc->heb2BrUni($line);
my $p1 = $enc->heb2BrUni($line, highlightTaamim => $highlightTaamim);
$p1 =~ s{\N{BRAILLE PATTERN BLANK}}{\N{BRAILLE PATTERN BLANK} }g if $addSpace;
say addWordNums($p1, 1) if $printBrailleUnicode;
my $p2 = $enc->brUni2BrAscii($p1Pure);
$p2 =~ s/=/ /g; # dont actually need to code zeroes. better with spaces for now.
say $p2 if $printBrailleAscii;
}