remove textblob dependency

Rostlab · Jun 23, 2016 · 1f0b5c7 · 1f0b5c7
1 parent 79e6a42
commit 1f0b5c7
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 85 deletions.
diff --git a/nalaf/features/parsing.py b/nalaf/features/parsing.py
@@ -1,8 +1,4 @@
-from textblob import TextBlob
-from textblob.en.taggers import NLTKTagger
-from textblob.en.np_extractors import FastNPExtractor
 from nalaf.features import FeatureGenerator
-from nalaf import print_debug
 from nalaf.features import get_spacy_nlp_english
 
 class SpacyPosTagger(FeatureGenerator):
@@ -27,81 +23,3 @@ def generate(self, dataset):
                 for token, spacy_token in zip(sentence, spacy_doc):
                     token.features['pos'] = spacy_token.pos_
                     token.features['tag'] = spacy_token.tag_
-
-class NLKTPosTagger(FeatureGenerator):
-    """
-    POS-tag a dataset using the NLTK Pos Tagger
-    See: https://textblob.readthedocs.org/en/dev/_modules/textblob/en/taggers.html#NLTKTagger
-    """
-
-    def __init__(self):
-        self.tagger = NLTKTagger()
-
-    def generate(self, dataset):
-        """
-        :type dataset: nalaf.structures.data.Dataset
-        """
-
-        for part in dataset.parts():
-            for sentence in part.sentences:
-                text_tokens = list(map(lambda x: x.word, sentence))
-                tags = self.tagger.tag(text_tokens, tokenize=False)
-                for token, tag in zip(sentence, tags):
-                    token.features['tag'] = tag[1]
-
-class PosTagFeatureGenerator(FeatureGenerator):
-    """
-    """
-
-    def __init__(self):
-        self.punctuation = ['.', ',', ':', ';', '[', ']', '(', ')', '{', '}', '”', '“', '–', '"', '#', '?', '-']
-
-    def generate(self, dataset):
-        """
-        :type dataset: nalaf.structures.data.Dataset
-        """
-        for part in dataset.parts():
-            tags = TextBlob(part.text).tags
-
-            tag_index = 0
-            for sentence in part.sentences:
-                for token in sentence:
-                    if token.word in self.punctuation:
-                        token.features['tag'] = 'PUN'
-                    else:
-                        remember_index = tag_index
-                        for word, tag in tags[tag_index:]:
-                            if token.word in word:
-                                token.features['tag'] = tag
-                                tag_index += 1
-                                break
-                        tag_index = remember_index
-
-
-class NounPhraseFeatureGenerator(FeatureGenerator):
-    """
-
-    """
-    def __init__(self):
-        self.extractor = FastNPExtractor()
-
-    def generate(self, dataset):
-        """
-        :type dataset: nalaf.structures.data.Dataset
-        """
-        for part in dataset.parts():
-            for sentence in part:
-                # get the chunk of text representing the sentence
-                joined_sentence = part.text[sentence[0].start:sentence[-1].start + len(sentence[-1].word)]
-                phrases = self.extractor.extract(joined_sentence)
-                for phrase in phrases:
-                    # only consider real noun phrases that have more than 1 word
-                    if ' ' in phrase:
-                        # find the phrase offset in part text
-                        phrase_start = part.text.find(phrase, sentence[0].start)
-                        phrase_end = phrase_start + len(phrase)
-
-                        # mark the tokens that are part of that phrase
-                        for token in sentence:
-                            if phrase_start <= token.start < token.end <= phrase_end:
-                                token.features['is_nn'] = 1
diff --git a/setup.py b/setup.py
@@ -37,9 +37,6 @@ def readme():
 
         # 'spacy'
         # Note: it may cause problems on different environments (e.g. Travis CI)-- install it manually
-
-        # 'textblob'
-        # Note: it may cause problems on different environments (e.g. Travis CI)-- install it manually
     ],
     include_package_data=True,
     zip_safe=False,