File tree 2 files changed +10
-3
lines changed
2 files changed +10
-3
lines changed Original file line number Diff line number Diff line change
1
+ * .pyc
2
+ * .db
3
+ * .txt
4
+ .DS_Store
Original file line number Diff line number Diff line change
1
+ from __future__ import division
1
2
import sqlite3
2
3
import codecs
3
4
import sys
@@ -6,17 +7,19 @@ class Parser:
6
7
SENTENCE_START_SYMBOL = '^'
7
8
SENTENCE_END_SYMBOL = '$'
8
9
9
- def __init__ (self , name , db ):
10
+ def __init__ (self , name , db , split_char = '.' ):
10
11
self .name = name
11
12
self .db = db
13
+ self .split_char = split_char
12
14
13
15
def save_word_pair (self , word1 , word2 ):
14
16
self .db .add_word (word1 , word2 )
15
17
16
18
def parse (self , file_name ):
17
19
txt = codecs .open (file_name , 'r' , 'utf-8' ).read ()
18
- sentences = txt .split (' \n ' )
20
+ sentences = txt .split (self . split_char )
19
21
i = 0
22
+ l = len (sentences )
20
23
21
24
for sentence in sentences :
22
25
words = sentence .split ()
@@ -30,7 +33,7 @@ def parse(self, file_name):
30
33
self .db .commit ()
31
34
i += 1
32
35
if i % 1000 == 0 :
33
- print i
36
+ print '%d%% complete' % ( 100 * i / l ,)
34
37
sys .stdout .flush ()
35
38
36
39
You can’t perform that action at this time.
0 commit comments