Skip to content

Commit

Permalink
Merge pull request #23 from dc-aichara/add_flake
Browse files Browse the repository at this point in the history
Add Flake8 Code Checker
  • Loading branch information
R1j1t authored Sep 22, 2020
2 parents d1d5990 + 1a26af8 commit 96cb79e
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 38 deletions.
8 changes: 8 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
ignore = W503
exclude = .git,__pycache__,build,peters_code,.ipynb_checkpoints,setup.py
max-complexity = 15
per-file-ignores =
# imported but unused
__init__.py: F401
max-line-length = 80
6 changes: 6 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ jobs:
python -m spacy download en_core_web_sm
- name: Black Code Formatter
run: black . --check
- name: Flake Code Checker
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings.
flake8 . --count --exit-zero --statistics
- name: Test with pytest
run: |
pytest
27 changes: 14 additions & 13 deletions contextualSpellCheck/contextualSpellCheck.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import spacy
import torch
import editdistance
from datetime import datetime
import os
import copy
import warnings, logging
import logging
import os
import warnings
from datetime import datetime

import editdistance
import spacy
import torch
from spacy.tokens import Doc, Token, Span
from spacy.vocab import Vocab

from transformers import AutoModelForMaskedLM, AutoTokenizer


Expand Down Expand Up @@ -43,19 +43,20 @@ def __init__(
Defaults to False.
"""
if (
(type(vocab_path) != type(""))
or (type(debug) != type(True))
or (type(performance) != type(True))
not isinstance(vocab_path, str)
or not isinstance(debug, type(True))
or not isinstance(performance, type(True))
):
raise TypeError(
"Please check datatype provided. vocab_path should be str,"
" debug and performance should be bool"
)
try:
int(float(max_edit_dist))
except ValueError as identifier:
except ValueError:
raise ValueError(
f"cannot convert {max_edit_dist} to int. Please provide a valid integer"
f"cannot convert {max_edit_dist} to int. Please provide a "
f"valid integer "
)

if vocab_path != "":
Expand Down Expand Up @@ -182,7 +183,7 @@ def check(self, query="", spacy_model="en_core_web_sm"):
(str, `Doc`): returns updated query (if no oov words then "")
and updated Doc Object
"""
if type(query) != str and len(query) == 0:
if not isinstance(query, str) and len(query) == 0:
return "Invalid query, expected non empty `str` but passed", query

nlp = spacy.load(spacy_model, disable=["tagger", "parser"])
Expand Down
59 changes: 34 additions & 25 deletions contextualSpellCheck/tests/test_contextualSpellCheck.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
import spacy
from pytest import approx
import warnings, os
import warnings
import os

from ..contextualSpellCheck import ContextualSpellCheck

Expand All @@ -18,7 +19,8 @@
"inputSentence, misspell",
[
(
"Income was $9.4 million compared to the prior year of $2.7 million.",
"Income was $9.4 million \
compared to the prior year of $2.7 million.",
[],
),
("who is Rajat Goel?", []),
Expand All @@ -43,8 +45,8 @@ def test_no_misspellIdentify(inputSentence, misspell):
def test_type_misspellIdentify(inputSentence, misspell):
print("Start type correction test for spelling mistake identification\n")
doc = nlp(inputSentence)
assert type(checker.misspell_identify(doc)[0]) == type(misspell)
assert type(checker.misspell_identify(doc)[1]) == type(doc)
assert isinstance(checker.misspell_identify(doc)[0], type(misspell))
assert isinstance(checker.misspell_identify(doc)[1], type(doc))
assert checker.misspell_identify(doc)[1] == doc


Expand All @@ -62,8 +64,8 @@ def test_identify_misspellIdentify(inputSentence, misspell):
print("Start misspell word identifation test\n")
doc = nlp(inputSentence)
checkerReturn = checker.misspell_identify(doc)[0]
assert type(checkerReturn) == list
## Changed the approach after v0.1.0
assert isinstance(checkerReturn, list)
# Changed the approach after v0.1.0
assert [tok.text_with_ws for tok in checkerReturn] == [
doc[i].text_with_ws for i in misspell
]
Expand Down Expand Up @@ -142,9 +144,9 @@ def test_skipURL_misspellIdentify(inputSentence, misspell):
def test_type_candidateGenerator(inputSentence, misspell):
doc = nlp(inputSentence)
misspell, doc = checker.misspell_identify(doc)
assert type(checker.candidate_generator(doc, misspell)) == tuple
assert type(checker.candidate_generator(doc, misspell)[0]) == type(doc)
assert type(checker.candidate_generator(doc, misspell)[1]) == dict
assert isinstance(checker.candidate_generator(doc, misspell), tuple)
assert isinstance(checker.candidate_generator(doc, misspell)[0], type(doc))
assert isinstance(checker.candidate_generator(doc, misspell)[1], dict)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -203,7 +205,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
doc = nlp(inputSentence)
(misspellings, doc) = checker.misspell_identify(doc)
doc, suggestions = checker.candidate_generator(doc, misspellings)
## changed after v1.0 because of deepCopy creatng issue with ==
# changed after v1.0 because of deepCopy creatng issue with ==
# gold_suggestions = {doc[key]: value for key, value in misspell.items()}
assert [tok.i for tok in suggestions] == [key for key in misspell.keys()]
assert [suggString for suggString in suggestions.values()] == [
Expand All @@ -226,7 +228,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
def test_extension_candidateGenerator(inputSentence, misspell):
doc = nlp(inputSentence)
(misspellings, doc) = checker.misspell_identify(doc)
suggestions = checker.candidate_generator(doc, misspellings)
checker.candidate_generator(doc, misspellings)
assert doc._.performed_spellCheck == misspell


Expand Down Expand Up @@ -321,8 +323,9 @@ def test_ranking_candidateRanking(inputSentence, misspell):
(misspellings, doc) = checker.misspell_identify(doc)
doc, suggestions = checker.candidate_generator(doc, misspellings)
selectedWord = checker.candidate_ranking(doc, suggestions)
## changes made after v0.1
# assert selectedWord == {doc[key]: value for key, value in misspell.items()}
# changes made after v0.1
# assert selectedWord ==
# {doc[key]: value for key, value in misspell.items()}
assert [tok.i for tok in selectedWord.keys()] == [
tok for tok in misspell.keys()
]
Expand Down Expand Up @@ -378,8 +381,8 @@ def test_doc_extensions():
("%", 0.00041),
],
}
assert doc._.contextual_spellCheck == True
assert doc._.performed_spellCheck == True
assert doc._.contextual_spellCheck
assert doc._.performed_spellCheck
# updated after v0.1
assert [tok.i for tok in doc._.suggestions_spellCheck.keys()] == [
tok.i for tok in gold_suggestion.keys()
Expand Down Expand Up @@ -422,7 +425,7 @@ def test_doc_extensions():
def test_span_extensions():
try:
nlp.add_pipe(checker)
except:
except BaseException:
print("contextual SpellCheck already in pipeline")
doc = nlp(
"Income was $9.4 milion compared to the prior year of $2.7 milion."
Expand All @@ -446,7 +449,7 @@ def test_span_extensions():
doc[5]: [],
}

assert doc[2:6]._.get_has_spellCheck == True
assert doc[2:6]._.get_has_spellCheck
# splitting components to make use of approx function
print(doc[2:6]._.score_spellCheck)
print(gold_score)
Expand All @@ -472,7 +475,8 @@ def test_span_extensions():
abs=1e-4,
)

# assert doc[2:6]._.score_spellCheck == approx(gold_score,rel=1e-4, abs=1e-4)
# assert doc[2:6]._.score_spellCheck ==
# approx(gold_score,rel=1e-4, abs=1e-4)
nlp.remove_pipe("contextual spellchecker")


Expand All @@ -497,7 +501,7 @@ def test_token_extension():
("USD", 0.00113),
]

assert doc[4]._.get_require_spellCheck == True
assert doc[4]._.get_require_spellCheck
assert doc[4]._.get_suggestion_spellCheck == gold_suggestions
# Match words and score separately to incorporate approx fn in pytest
assert [word_score[0] for word_score in doc[4]._.score_spellCheck] == [
Expand Down Expand Up @@ -525,13 +529,14 @@ def test_warning():
# warnings.simplefilter("always")
# Trigger a warning.

assert doc[4]._.get_require_spellCheck == False
assert not doc[4]._.get_require_spellCheck
assert doc[4]._.get_suggestion_spellCheck == ""
assert doc[4]._.score_spellCheck == []
# Verify Warning
assert issubclass(w[-1].category, UserWarning)
assert (
"Position of tokens modified by downstream element in pipeline eg. merge_entities"
"Position of tokens modified by downstream \
element in pipeline eg. merge_entities"
in str(w[-1].message)
)

Expand All @@ -546,14 +551,16 @@ def test_warning():
ContextualSpellCheck(vocab_path=True)
assert (
e
== "Please check datatype provided. vocab_path should be str, debug and performance should be bool"
== "Please check datatype provided. \
vocab_path should be str, debug and performance should be bool"
)
max_edit_distance = "non_int_or_float"
with pytest.raises(ValueError) as e:
ContextualSpellCheck(max_edit_dist=max_edit_distance)
assert (
e
== f"cannot convert {max_edit_distance} to int. Please provide a valid integer"
== f"cannot convert {max_edit_distance} to int. \
Please provide a valid integer"
)

try:
Expand Down Expand Up @@ -582,8 +589,10 @@ def test_bert_model_name():
model_name = "a_random_model"
error_message = (
f"Can't load config for '{model_name}'. Make sure that:\n\n"
f"- '{model_name}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
f"- or '{model_name}' is the correct path to a directory containing a config.json file\n\n"
f"- '{model_name}' is a correct model identifier listed on \
'https://huggingface.co/models'\n\n"
f"- or '{model_name}' is the correct path to a directory \
containing a config.json file\n\n"
)

with pytest.raises(OSError) as e:
Expand Down
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ mecab-python3==0.996.5
ipadic==1.0.0
unidic-lite==1.0.6

# Code formatting
flake8==3.8.3
black==20.8b1

0 comments on commit 96cb79e

Please sign in to comment.