Skip to content

Commit

Permalink
[textsum] Use lib/labeler to unify labels when converting data
Browse files Browse the repository at this point in the history
  • Loading branch information
basicthinker committed Aug 9, 2017
1 parent 7f2491b commit f287e37
Showing 1 changed file with 5 additions and 16 deletions.
21 changes: 5 additions & 16 deletions seq2seq/textsum/convert_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import pickle
import re

import sys
sys.path.append('../../lib')
import labeler

# Special tokens
PARAGRAPH_START = '<p>'
PARAGRAPH_END = '</p>'
Expand All @@ -18,21 +22,6 @@
DOCUMENT_START = '<d>'
DOCUMENT_END = '</d>'

# Labels
BUG = 'bug'
FEATURE = 'feature'
PERFORMANCE = 'performance'
RELIABILITY = 'reliability'
MAINTENANCE = 'maintenance'

fs_labeler = {
'b':BUG,
'f':FEATURE,
'p':PERFORMANCE,
'c':RELIABILITY,
'misc':MAINTENANCE,
}

def to_skip(token):
return token in '={}<>()[]--' or '=' in token

Expand All @@ -49,7 +38,7 @@ def get_tokens(string, counter):
return tokens

def parse_patch(patch):
label = fs_labeler[patch['type']]
label = FS_LABELER[patch['type']]
lines = patch['message'].splitlines()
assert patch['subject'] == lines[0].strip()
return label, lines
Expand Down

0 comments on commit f287e37

Please sign in to comment.