Skip to content

Commit d7a2b87

Browse files
authored
Merge pull request #3 from Birch-san/refactor
Refactor
2 parents b008ab4 + 9e20249 commit d7a2b87

24 files changed

+1669
-953
lines changed

mecab-web/.gitignore

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,45 @@
11
# compilation products of em++
2-
mecab.js
3-
mecab.html
4-
mecab.wasm
5-
mecab.data
2+
/mecab.js
3+
/mecab.html
4+
/mecab.wasm
5+
/mecab.data
66

77
# outputs of edict2-downloader.sh
8-
edict2.gz
9-
edict2
10-
edict2.eucjp.txt
11-
edict2.utf8.txt
12-
edict2.utf16.txt
13-
edict2.utf16le.txt
8+
/edict2.gz
9+
/edict2
10+
/edict2.eucjp.txt
11+
/edict2.utf8.txt
12+
/edict2.utf16.txt
13+
/edict2.utf16le.txt
1414

1515
# outputs of enamdict-downloader.sh
16-
enamdict
17-
enamdict.gz
18-
enamdict.utf8.txt
16+
/enamdict
17+
/enamdict.gz
18+
/enamdict.utf8.txt
1919

2020
# outputs of kanjidic2-downloader.sh
21-
kanjidic2.xml
22-
kanjidic2.xml.gz
23-
kanjidic2-lf.utf8.txt
21+
/kanjidic2.xml
22+
/kanjidic2.xml.gz
23+
/kanjidic2-lf.utf8.txt
2424

2525
# pre-compressed so that Apache doesn't have to gzip them every time
26-
mecab.js.gz
27-
mecab.wasm.gz
28-
mecab.data.gz
29-
edict2.eucjp.txt.gz
30-
edict2.utf8.txt.gz
31-
edict2.utf16.txt.gz
32-
edict2.utf16le.txt.gz
33-
enamdict.utf8.txt.gz
34-
kanjidic2-lf.utf8.txt.gz
35-
36-
test.html
37-
38-
sw.js
39-
40-
node_modules
41-
web_modules
42-
workbox_modules
43-
44-
dist
45-
dist.tar.gz
26+
/mecab.js.gz
27+
/mecab.wasm.gz
28+
/mecab.data.gz
29+
/edict2.eucjp.txt.gz
30+
/edict2.utf8.txt.gz
31+
/edict2.utf16.txt.gz
32+
/edict2.utf16le.txt.gz
33+
/enamdict.utf8.txt.gz
34+
/kanjidic2-lf.utf8.txt.gz
35+
36+
/test.html
37+
38+
/sw.js
39+
40+
/node_modules
41+
/web_modules
42+
/workbox_modules
43+
44+
/dist
45+
/dist.tar.gz

mecab-web/index.html

Lines changed: 135 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,103 @@ <h1>MeCab in WebAssembly</h1>
6060
initStore,
6161
initApplication,
6262
} from './src/index.js';
63+
import {
64+
MecabContext,
65+
} from './src/mecab/index.js';
66+
import {
67+
Edict2GlossParser,
68+
EnamdictGlossParser,
69+
} from './src/edict2/index.js';
70+
import {
71+
Kanjidic2Factory,
72+
FuriganaFitterFactory,
73+
MecabPipelineFactory,
74+
Edict2LikeDictionaryFactory,
75+
DictionariesFactory,
76+
} from './src/factory.js';
77+
import { Deferred } from './src/util/index.js';
78+
import { tokenize, toHiragana, } from './web_modules/wanakana.js';
79+
6380
const [store, actions] = initStore();
6481

82+
const kanjidic2 = fetch('kanjidic2-lf.utf8.txt')
83+
.then(response => response.text())
84+
.then(kanjidic2Text => new Kanjidic2Factory()
85+
.construct({
86+
kanjidic2Text,
87+
}));
88+
89+
const furiganaFitter = kanjidic2
90+
.then(kanjidic2 => new FuriganaFitterFactory()
91+
.construct({
92+
kanjidic2,
93+
wanakana: { tokenize, toHiragana, },
94+
}));
95+
96+
const edict2Text = fetch('edict2.utf8.txt')
97+
.then(response => response.text());
98+
const edict2 = Promise.all([edict2Text, furiganaFitter])
99+
.then(([text, furiganaFitter]) => new Edict2LikeDictionaryFactory()
100+
.construct({
101+
text,
102+
glossParser: new Edict2GlossParser(),
103+
furiganaFitter,
104+
}));
105+
106+
const enamdictText = fetch('enamdict.utf8.txt')
107+
.then(response => response.text());
108+
const enamdict = Promise.all([enamdictText, furiganaFitter])
109+
.then(([text, furiganaFitter]) => new Edict2LikeDictionaryFactory()
110+
.construct({
111+
text,
112+
glossParser: new EnamdictGlossParser(),
113+
furiganaFitter,
114+
}));
115+
116+
const mecabDeferred = new Deferred();
117+
const mecabContext = mecabDeferred.promise.then((callbacks) => {
118+
return new MecabContext({
119+
callbacks,
120+
});
121+
});
122+
123+
const dictionaries = Promise.all([edict2, enamdict])
124+
.then(([edict2, enamdict]) => new DictionariesFactory()
125+
.construct({
126+
edict2,
127+
enamdict,
128+
}));
129+
130+
const mecabPipeline = Promise.all([mecabContext, furiganaFitter])
131+
.then(([mecabContext, furiganaFitter]) => new MecabPipelineFactory()
132+
.construct({
133+
mecab: mecabContext.construct({
134+
config: {},
135+
}),
136+
wanakana: { toHiragana, },
137+
furiganaFitter,
138+
}));
139+
140+
const languageTools = Promise.all([dictionaries, mecabPipeline, furiganaFitter])
141+
.then(([dictionaries, mecabPipeline, furiganaFitter]) => ({
142+
dictionaries,
143+
mecabPipeline,
144+
furiganaFitter,
145+
}));
146+
147+
languageTools
148+
.then(languageTools => {
149+
console.log(languageTools);
150+
Object.assign(window, { languageTools, });
151+
return languageTools;
152+
})
153+
.then(languageTools => actions.setLanguageTools(languageTools));
154+
65155
initApplication({
66-
dictionaryTextPromises: {
67-
edict2: fetch('edict2.utf8.txt')
68-
.then(response => response.text()),
69-
enamdict: fetch('enamdict.utf8.txt')
70-
.then(response => response.text()),
71-
kanjidic2: fetch('kanjidic2-lf.utf8.txt')
72-
.then(response => response.text())
73-
.then(text => window.kanjidic = text),
156+
dictionaryLoadPromises: {
157+
edict2,
158+
enamdict,
159+
kanjidic2,
74160
},
75161
store,
76162
actions,
@@ -79,39 +165,36 @@ <h1>MeCab in WebAssembly</h1>
79165
});
80166

81167
store.subscribe(state => {
82-
if (state.ready
83-
&& !state.parses.length
84-
&& state.initialQuery) {
85-
// console.log(state.initialQuery);
86-
const parsed = state.parse(state.kanjidic2Lookup, state.initialQuery);
87-
// console.log(parsed);
88-
actions.addParse(parsed);
89-
}
90-
if (!state.termResults.key
91-
&& state.dictionaryText
92-
&& state.dictionaryText.edict2
93-
&& state.dictionaryText.enamdict
94-
&& state.kanjidic2Lookup)
95-
{
96-
actions.chooseTerm({
97-
token: '双子',
98-
});
168+
if (state.languageTools) {
169+
const { mecabPipeline } = state.languageTools;
170+
if (!state.parses.length
171+
&& state.initialQuery) {
172+
// console.log(state.initialQuery);
173+
const parsed = mecabPipeline.tokenize(state.initialQuery);
174+
// console.log(parsed);
175+
actions.addParse(parsed);
176+
}
177+
if (!state.termResults.key) {
178+
actions.chooseTerm({
179+
token: '双子',
180+
});
181+
}
99182
}
100183
});
101184

102185
const statusElement = document.getElementById('status');
103186
const progressElement = document.getElementById('progress');
104187
const spinnerElement = document.getElementById('spinner');
105-
const wrapped = {};
106-
const toBeFreed = {
107-
mecab_model_t: new Set(), // config
108-
mecab_t: new Set(), // tagger
109-
mecab_lattice_t: new Set(),
110-
};
111-
const currentPointers = {
112-
tagger: undefined,
113-
lattice: undefined,
114-
}
188+
// const wrapped = {};
189+
// const toBeFreed = {
190+
// mecab_model_t: new Set(), // config
191+
// mecab_t: new Set(), // tagger
192+
// mecab_lattice_t: new Set(),
193+
// };
194+
// const currentPointers = {
195+
// tagger: undefined,
196+
// lattice: undefined,
197+
// }
115198

116199
const dicdir = '/naist-jdic';
117200

@@ -148,6 +231,7 @@ <h1>MeCab in WebAssembly</h1>
148231
`);
149232
}],
150233
postRun: [function() {
234+
const wrapped = {};
151235
Object.assign(wrapped, {
152236
mecab_do: Module.cwrap('mecab_do2', 'number', ['string']),
153237
mecab_model_new2: Module.cwrap('mecab_model_new2', 'number', ['string']),
@@ -161,25 +245,28 @@ <h1>MeCab in WebAssembly</h1>
161245

162246
// we could probably skip the destroy since the world's ending anyway..
163247
Module.addOnExit(() => {
164-
toBeFreed.mecab_t.forEach((p_mecab_t) => {
165-
wrapped.mecab_destroy(p_mecab_t);
166-
});
167-
toBeFreed.mecab_model_t.forEach((p_mecab_model_t) => {
168-
wrapped.mecab_model_destroy(p_mecab_model_t);
169-
});
248+
mecabContext.then((mecabContext) => mecabContext.free());
249+
// toBeFreed.mecab_t.forEach((p_mecab_t) => {
250+
// wrapped.mecab_destroy(p_mecab_t);
251+
// });
252+
// toBeFreed.mecab_model_t.forEach((p_mecab_model_t) => {
253+
// wrapped.mecab_model_destroy(p_mecab_model_t);
254+
// });
170255
});
171256

257+
mecabDeferred.resolve(wrapped);
258+
172259
// const args = document.getElementById('args').value;
173260
// const args = '-o output.txt input.txt';
174-
const args = '';
175-
const p_mecab_model = wrapped.mecab_model_new2(args);
176-
toBeFreed.mecab_model_t.add(p_mecab_model);
261+
// const args = '';
262+
// const p_mecab_model = wrapped.mecab_model_new2(args);
263+
// toBeFreed.mecab_model_t.add(p_mecab_model);
177264

178-
const p_tagger = wrapped.mecab_model_new_tagger(p_mecab_model);
179-
toBeFreed.mecab_t.add(p_tagger);
265+
// const p_tagger = wrapped.mecab_model_new_tagger(p_mecab_model);
266+
// toBeFreed.mecab_t.add(p_tagger);
180267

181-
currentPointers.tagger = p_tagger;
182-
actions.setupMecab(wrapped, currentPointers);
268+
// currentPointers.tagger = p_tagger;
269+
// actions.setupMecab(wrapped, currentPointers);
183270
}],
184271
print: (function() {
185272
// var element = document.getElementById('output');

mecab-web/src/edict2/dictionaries.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
export class Edict2LikeDictionary {
2+
constructor({
3+
headwordReadingRankerFactory,
4+
headwordReadingPipelineFactory,
5+
parsedEntryRelevancePipelineFactory,
6+
matchPipelineFactory,
7+
matchesPipelineFactory,
8+
}) {
9+
this._headwordReadingRankerFactory = headwordReadingRankerFactory;
10+
this._headwordReadingPipelineFactory = headwordReadingPipelineFactory;
11+
this._parsedEntryRelevancePipelineFactory = parsedEntryRelevancePipelineFactory;
12+
this._matchPipelineFactory = matchPipelineFactory;
13+
this._matchesPipelineFactory = matchesPipelineFactory;
14+
}
15+
16+
getMatchesPipeline(mecabToken) {
17+
const headwordReadingRanker = this._headwordReadingRankerFactory.construct(mecabToken);
18+
const headwordReadingPipeline = this._headwordReadingPipelineFactory.construct(headwordReadingRanker);
19+
const relevancePipeline = this._parsedEntryRelevancePipelineFactory.construct(mecabToken);
20+
const matchPipeline = this._matchPipelineFactory.construct({
21+
headwordReadingPipeline,
22+
relevancePipeline,
23+
});
24+
const matchesPipeline = this._matchesPipelineFactory.construct({
25+
matchPipeline,
26+
});
27+
return matchesPipeline;
28+
}
29+
}
30+
31+
export class Dictionaries {
32+
constructor({
33+
dictionaries: {
34+
edict2,
35+
enamdict,
36+
},
37+
searchTermRecommender,
38+
}) {
39+
this._dictionaries = {
40+
edict2,
41+
enamdict,
42+
};
43+
this._searchTermRecommender = searchTermRecommender;
44+
}
45+
46+
lookupTerm(term) {
47+
return this.lookupToken({
48+
token: term,
49+
});
50+
}
51+
52+
lookupToken(mecabToken) {
53+
const term = this._searchTermRecommender.getRecommendedSearchTerm(mecabToken);
54+
55+
const edict2MatchesPipeline = this._dictionaries.edict2.getMatchesPipeline(mecabToken);
56+
const enamdictMatchesPipeline = this._dictionaries.enamdict.getMatchesPipeline(mecabToken);
57+
58+
return {
59+
edict2: edict2MatchesPipeline.lookup(term),
60+
enamdict: enamdictMatchesPipeline.lookup(term),
61+
}
62+
}
63+
}

0 commit comments

Comments
 (0)