-
Notifications
You must be signed in to change notification settings - Fork 0
/
mappcvt.pl
93 lines (80 loc) · 2.5 KB
/
mappcvt.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/perl
# script for helping with conversion to new version of WN database
# see NEWWNVERSION for usage instructions
use strict;
use warnings;
my $OLDVERSION="2.1";
my $MAPPINGVERSION="21-30";
my %sensekeysold;
my %sensekeysnew; # hash of lists; keys are offsets, list entries are sensekeys
my %mappingover; # hash of lists; keys are offsets, entries are poss new offsets
sub read_mapp_file {
(my $fn, my $pos) = @_;
open(MAPP, "<", $fn) or die "Could not open $fn: $!";
while (<MAPP>) {
chomp;
(my $key, my $rest) = /^([0-9]+) (.+)$/;
while ($rest =~ m/([0-9]+ [^ ]+) /g) {
push @{$mappingover{"$pos:$key"}}, "$pos:$1";
push @{$mappingover{"5:$key"}}, "5:$1" if ($pos==3);
}
}
close MAPP;
}
open(INDEX, "<", "/usr/share/wordnet$OLDVERSION/index.sense") or die "Could not open index.sense for old: $!";
while (<INDEX>) {
chomp;
(my $sk, my $pos, my $offset) = /^([^%]+%(.)[^ ]+) ([0-9]+)/;
$sensekeysold{$sk} = "$pos:$offset";
}
close INDEX;
open(INDEX2, "<", "/usr/share/wordnet/index.sense") or die "Could not open index.sense for new: $!";
while (<INDEX2>) {
chomp;
(my $sk, my $pos, my $offset) = /^([^%]+%(.)[^ ]+) ([0-9]+)/;
push @{$sensekeysnew{"$pos:$offset"}}, $sk;
}
close INDEX2;
my $path="/home/kps/seal/mapps/mapping-$MAPPINGVERSION";
read_mapp_file("$path/wn$MAPPINGVERSION.adj", 3);
read_mapp_file("$path/wn$MAPPINGVERSION.adv", 4);
read_mapp_file("$path/wn$MAPPINGVERSION.noun", 1);
read_mapp_file("$path/wn$MAPPINGVERSION.verb", 2);
open(ENWNPO, "<", "en2wn.po") or die "Could not open PO file: $!";
while (<ENWNPO>) {
chomp;
if (/^msgstr/) {
s/^msgstr "//;
s/"$//;
my $msgstr = $_;
unless ($msgstr eq 'NULL' or $msgstr eq '' or $msgstr =~ /^Content-Type/) {
my $oldoff = $sensekeysold{$msgstr}; # in form "$pos:$offset";
if (exists($mappingover{$oldoff})) {
(my $w) = /^([^%]+)%/;
print "s/\"$msgstr/\"";
foreach my $newoff (@{$mappingover{$oldoff}}) {
# newoff looks like "1:00092663 0.118"
(my $trueoff, my $prob) = $newoff =~ /^([^ ]+) ([^ ]+)/;
my $count = 0;
my $first='';
foreach my $newsk (@{$sensekeysnew{$trueoff}}) {
(my $thisw) = $newsk =~ /^([^%]+)%/;
$first = "$newsk/ # $prob " unless $first;
if ($w eq $thisw) {
$count++;
print "$newsk/ # $prob ";
} #possibly no matches => change to ""...
}
print $first if ($count == 0);
}
print "\n";
}
else {
print "s/\"$msgstr/\"/ # NOMAP\n";
# probably maps to NULL in new version!!
}
}
}
}
close ENWNPO;
exit 0;