From 64bd793eac7a0449203c78864d64d8d4612146e3 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 28 Jun 2019 15:32:46 +0200 Subject: [PATCH] Fix non-deterministic behavior (#817) * Fix non-deterministic behavior fixes #778 * Update changelog * Fix issue with dirhash * Fix issues * Fix issue with Python<3.4 --- CHANGELOG.md | 5 + setup.py | 3 +- snips_nlu/dataset/validation.py | 7 + .../log_reg_classifier_utils.py | 10 +- .../deterministic_intent_parser.py | 4 +- snips_nlu/tests/integration_test.py | 51 +------- snips_nlu/tests/test_crf_slot_filler.py | 25 ++++ .../tests/test_deterministic_intent_parser.py | 38 ++++++ .../test_intent_classifier_featurizer.py | 120 ++++++++++++++++++ .../tests/test_log_reg_classifier_utils.py | 6 +- .../tests/test_log_reg_intent_classifier.py | 47 ++++++- snips_nlu/tests/test_nlu_engine.py | 45 +++++++ 12 files changed, 298 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ba6bde94..99fdc1cf1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog All notable changes to this project will be documented in this file. +## [Unreleased] +### Fixed +- Fix non-deterministic behavior [#817](https://github.com/snipsco/snips-nlu/pull/817) + ## [0.19.7] ### Changed - Re-score ambiguous `DeterministicIntentParser` results based on slots [#791](https://github.com/snipsco/snips-nlu/pull/791) @@ -284,6 +288,7 @@ several commands. - Fix compiling issue with `bindgen` dependency when installing from source - Fix issue in `CRFSlotFiller` when handling builtin entities +[Unreleased]: https://github.com/snipsco/snips-nlu/compare/0.19.7...lts-0.19.7.x [0.19.7]: https://github.com/snipsco/snips-nlu/compare/0.19.6...0.19.7 [0.19.6]: https://github.com/snipsco/snips-nlu/compare/0.19.5...0.19.6 [0.19.5]: https://github.com/snipsco/snips-nlu/compare/0.19.4...0.19.5 diff --git a/setup.py b/setup.py index 59a2b2851..4412dc33f 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,8 @@ "mock>=2.0,<3.0", "snips_nlu_metrics>=0.14.1,<0.15", "pylint<2", - "coverage>=4.4.2,<5.0" + "coverage>=4.4.2,<5.0", + "checksumdir~=1.1.6", ] } diff --git a/snips_nlu/dataset/validation.py b/snips_nlu/dataset/validation.py index 2af205b15..43c7cf905 100644 --- a/snips_nlu/dataset/validation.py +++ b/snips_nlu/dataset/validation.py @@ -47,12 +47,19 @@ def validate_and_format_dataset(dataset): if language not in get_all_languages(): raise DatasetFormatError("Unknown language: '%s'" % language) + dataset[INTENTS] = { + intent_name: intent_data + for intent_name, intent_data in sorted(iteritems(dataset[INTENTS]))} for intent in itervalues(dataset[INTENTS]): _validate_and_format_intent(intent, dataset[ENTITIES]) utterance_entities_values = extract_utterance_entities(dataset) builtin_entity_parser = BuiltinEntityParser.build(dataset=dataset) + dataset[ENTITIES] = { + intent_name: entity_data + for intent_name, entity_data in sorted(iteritems(dataset[ENTITIES]))} + for entity_name, entity in iteritems(dataset[ENTITIES]): uterrance_entities = utterance_entities_values[entity_name] if is_builtin_entity(entity_name): diff --git a/snips_nlu/intent_classifier/log_reg_classifier_utils.py b/snips_nlu/intent_classifier/log_reg_classifier_utils.py index 5d709b77b..a681c2ffe 100644 --- a/snips_nlu/intent_classifier/log_reg_classifier_utils.py +++ b/snips_nlu/intent_classifier/log_reg_classifier_utils.py @@ -2,7 +2,7 @@ import itertools import re -from builtins import next, range, str, zip +from builtins import next, range, str from copy import deepcopy from uuid import uuid4 @@ -120,14 +120,12 @@ def build_training_data(dataset, language, data_augmentation_config, resources, noise_class = intent_index - # Computing dataset statistics - nb_utterances = [len(intent[UTTERANCES]) for intent in itervalues(intents)] - augmented_utterances = [] utterance_classes = [] - for nb_utterance, intent_name in zip(nb_utterances, intents): + for intent_name, intent_data in sorted(iteritems(intents)): + nb_utterances = len(intent_data[UTTERANCES]) min_utterances_to_generate = max( - data_augmentation_config.min_utterances, nb_utterance) + data_augmentation_config.min_utterances, nb_utterances) utterances = augment_utterances( dataset, intent_name, language=language, min_utterances=min_utterances_to_generate, diff --git a/snips_nlu/intent_parser/deterministic_intent_parser.py b/snips_nlu/intent_parser/deterministic_intent_parser.py index 496c49ea1..6699601fa 100644 --- a/snips_nlu/intent_parser/deterministic_intent_parser.py +++ b/snips_nlu/intent_parser/deterministic_intent_parser.py @@ -7,7 +7,7 @@ from collections import defaultdict from pathlib import Path -from future.utils import iteritems, iterkeys, itervalues +from future.utils import iteritems, itervalues from snips_nlu_utils import normalize from snips_nlu.common.dataset_utils import get_slot_name_mappings @@ -483,7 +483,7 @@ def _get_range_shift(matched_range, ranges_mapping): def _get_group_names_to_slot_names(slot_names_mapping): slot_names = {slot_name for mapping in itervalues(slot_names_mapping) - for slot_name in iterkeys(mapping)} + for slot_name in mapping} return {"group%s" % i: name for i, name in enumerate(sorted(slot_names))} diff --git a/snips_nlu/tests/integration_test.py b/snips_nlu/tests/integration_test.py index 38059d115..17c8b5e9b 100644 --- a/snips_nlu/tests/integration_test.py +++ b/snips_nlu/tests/integration_test.py @@ -1,8 +1,7 @@ # coding=utf-8 from __future__ import print_function, unicode_literals -import json -from builtins import range, str +from builtins import str from future.utils import iteritems from snips_nlu_metrics import compute_cross_val_metrics @@ -51,33 +50,6 @@ def check_metrics(self, results): "Slot f1 score is too low (%.3f) for slot '%s' of intent " "'%s'" % (slot_f1, slot_name, intent_name)) - def test_nlu_engine_training_is_deterministic(self): - # We can't write a test to ensure the NLU training is always the same - # instead we train the NLU 10 times and check the learnt parameters. - # It does not bring any guarantee but it might alert us once in a while - - # Given - num_runs = 10 - random_state = 42 - - with PERFORMANCE_DATASET_PATH.open("r") as f: - dataset = json.load(f) - - ref_log_reg, ref_crfs = None, None - for _ in range(num_runs): - # When - engine = TrainingEngine(random_state=random_state).fit(dataset) - log_reg = _extract_log_reg(engine) - crfs = _extract_crfs(engine) - - if ref_log_reg is None: - ref_log_reg = log_reg - ref_crfs = crfs - else: - # Then - self.assertDictEqual(ref_log_reg, log_reg) - self.assertDictEqual(ref_crfs, crfs) - def _slot_matching_lambda(lhs_slot, rhs_slot): lhs_value = lhs_slot["text"] @@ -93,24 +65,3 @@ def _slot_matching_lambda(lhs_slot, rhs_slot): if rhs_tokens and rhs_tokens[0].lower() in SKIPPED_DATE_PREFIXES: rhs_tokens = rhs_tokens[1:] return lhs_tokens == rhs_tokens - - -def _extract_log_reg(engine): - log_reg = dict() - intent_classifier = engine.intent_parsers[1].intent_classifier - log_reg["intent_list"] = intent_classifier.intent_list - log_reg["coef"] = intent_classifier.classifier.coef_.tolist() - log_reg["intercept"] = intent_classifier.classifier.intercept_.tolist() - log_reg["t_"] = intent_classifier.classifier.t_ - return log_reg - - -def _extract_crfs(engine): - crfs = dict() - slot_fillers = engine.intent_parsers[1].slot_fillers - for intent, slot_filler in iteritems(slot_fillers): - crfs[intent] = { - "state_features": slot_filler.crf_model.state_features_, - "transition_features": slot_filler.crf_model.transition_features_ - } - return crfs diff --git a/snips_nlu/tests/test_crf_slot_filler.py b/snips_nlu/tests/test_crf_slot_filler.py index a9ec87e83..5c213bf48 100644 --- a/snips_nlu/tests/test_crf_slot_filler.py +++ b/snips_nlu/tests/test_crf_slot_filler.py @@ -989,3 +989,28 @@ def __del__(self): Features not seen at train time: - ngram_1:text""" self.assertEqual(expected_log, log) + + def test_training_should_be_reproducible(self): + # Given + random_state = 42 + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a [beverage_temperature:Temperature](hot) cup of tea +- make me [number_of_cups:snips/number](five) tea cups""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + + # When + slot_filler1 = CRFSlotFiller(random_state=random_state) + slot_filler1.fit(dataset, "MakeTea") + + slot_filler2 = CRFSlotFiller(random_state=random_state) + slot_filler2.fit(dataset, "MakeTea") + + # Then + self.assertDictEqual(slot_filler1.crf_model.state_features_, + slot_filler2.crf_model.state_features_) + self.assertDictEqual(slot_filler1.crf_model.transition_features_, + slot_filler2.crf_model.transition_features_) diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py index a01305eb0..2c21a218c 100644 --- a/snips_nlu/tests/test_deterministic_intent_parser.py +++ b/snips_nlu/tests/test_deterministic_intent_parser.py @@ -4,8 +4,10 @@ import io from builtins import range +from checksumdir import dirhash from mock import patch +from snips_nlu.common.io_utils import temp_dir from snips_nlu.constants import ( DATA, END, ENTITY, LANGUAGE_EN, RES_ENTITY, RES_INTENT, RES_INTENT_NAME, RES_PROBA, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, STOP_WORDS) @@ -1201,3 +1203,39 @@ def test_should_get_range_shift(self): # When / Then self.assertEqual(-1, _get_range_shift((6, 7), ranges_mapping)) self.assertEqual(2, _get_range_shift((12, 13), ranges_mapping)) + + def test_training_should_be_reproducible(self): + # Given + random_state = 42 + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a [beverage_temperature:Temperature](hot) cup of tea +- make me [number_of_cups:snips/number](five) tea cups + +--- +type: intent +name: MakeCoffee +utterances: +- make me [number_of_cups:snips/number](one) cup of coffee please +- brew [number_of_cups] cups of coffee""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + + # When + parser1 = DeterministicIntentParser(random_state=random_state) + parser1.fit(dataset) + + parser2 = DeterministicIntentParser(random_state=random_state) + parser2.fit(dataset) + + # Then + with temp_dir() as tmp_dir: + dir_parser1 = tmp_dir / "parser1" + dir_parser2 = tmp_dir / "parser2" + parser1.persist(dir_parser1) + parser2.persist(dir_parser2) + hash1 = dirhash(str(dir_parser1), 'sha256') + hash2 = dirhash(str(dir_parser2), 'sha256') + self.assertEqual(hash1, hash2) diff --git a/snips_nlu/tests/test_intent_classifier_featurizer.py b/snips_nlu/tests/test_intent_classifier_featurizer.py index f2d571d44..56d2e9f8b 100644 --- a/snips_nlu/tests/test_intent_classifier_featurizer.py +++ b/snips_nlu/tests/test_intent_classifier_featurizer.py @@ -5,8 +5,10 @@ from builtins import str, zip, range import numpy as np +from checksumdir import dirhash from mock import patch, MagicMock +from snips_nlu.common.io_utils import temp_dir from snips_nlu.common.utils import json_string from snips_nlu.constants import LANGUAGE_EN, STEMS, WORD_CLUSTERS, STOP_WORDS from snips_nlu.dataset import Dataset @@ -309,6 +311,48 @@ def test_fit_cooccurrence_vectorizer_feature_selection(self, mocked_chi2): self.assertDictEqual( expected_pairs, featurizer.cooccurrence_vectorizer.word_pairs) + def test_training_should_be_reproducible(self): + # Given + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a [beverage_temperature:Temperature](hot) cup of tea +- make me [number_of_cups:snips/number](five) tea cups + +--- +type: intent +name: MakeCoffee +utterances: +- make me [number_of_cups:snips/number](one) cup of coffee please +- brew [number_of_cups] cups of coffee""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + utterances = [ + text_to_utterance("please make me two hots cups of tea"), + text_to_utterance("i want a cup of coffee"), + ] + classes = np.array([0, 1]) + shared = self.get_shared_data(dataset) + shared["random_state"] = 42 + + # When + featurizer1 = Featurizer(**shared) + featurizer1.fit(dataset, utterances, classes, max(classes)) + + featurizer2 = Featurizer(**shared) + featurizer2.fit(dataset, utterances, classes, max(classes)) + + # Then + with temp_dir() as tmp_dir: + dir_featurizer1 = tmp_dir / "featurizer1" + dir_featurizer2 = tmp_dir / "featurizer2" + featurizer1.persist(dir_featurizer1) + featurizer2.persist(dir_featurizer2) + hash1 = dirhash(str(dir_featurizer1), 'sha256') + hash2 = dirhash(str(dir_featurizer2), 'sha256') + self.assertEqual(hash1, hash2) + class TestTfidfVectorizer(FixtureTest): @@ -675,6 +719,44 @@ def test_preprocess(self): self.assertSequenceEqual(expected_data, processed_data) + def test_training_should_be_reproducible(self): + # Given + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a [beverage_temperature:Temperature](hot) cup of tea +- make me [number_of_cups:snips/number](five) tea cups + +--- +type: intent +name: MakeCoffee +utterances: +- make me [number_of_cups:snips/number](one) cup of coffee please +- brew [number_of_cups] cups of coffee""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + x = [text_to_utterance("please make me two hots cups of tea")] + shared = self.get_shared_data(dataset) + shared["random_state"] = 42 + + # When + vectorizer1 = TfidfVectorizer(**shared) + vectorizer1.fit(x, dataset) + + vectorizer2 = TfidfVectorizer(**shared) + vectorizer2.fit(x, dataset) + + # Then + with temp_dir() as tmp_dir: + dir_vectorizer1 = tmp_dir / "vectorizer1" + dir_vectorizer2 = tmp_dir / "vectorizer2" + vectorizer1.persist(dir_vectorizer1) + vectorizer2.persist(dir_vectorizer2) + hash1 = dirhash(str(dir_vectorizer1), 'sha256') + hash2 = dirhash(str(dir_vectorizer2), 'sha256') + self.assertEqual(hash1, hash2) + class CooccurrenceVectorizerTest(FixtureTest): @@ -1193,3 +1275,41 @@ def test_preprocess(self): ] self.assertSequenceEqual(expected_data, processed_data) + + def test_training_should_be_reproducible(self): + # Given + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a [beverage_temperature:Temperature](hot) cup of tea +- make me [number_of_cups:snips/number](five) tea cups + +--- +type: intent +name: MakeCoffee +utterances: +- make me [number_of_cups:snips/number](one) cup of coffee please +- brew [number_of_cups] cups of coffee""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + x = [text_to_utterance("please make me two hots cups of tea")] + shared = self.get_shared_data(dataset) + shared["random_state"] = 42 + + # When + vectorizer1 = CooccurrenceVectorizer(**shared) + vectorizer1.fit(x, dataset) + + vectorizer2 = CooccurrenceVectorizer(**shared) + vectorizer2.fit(x, dataset) + + # Then + with temp_dir() as tmp_dir: + dir_vectorizer1 = tmp_dir / "vectorizer1" + dir_vectorizer2 = tmp_dir / "vectorizer2" + vectorizer1.persist(dir_vectorizer1) + vectorizer2.persist(dir_vectorizer2) + hash1 = dirhash(str(dir_vectorizer1), 'sha256') + hash2 = dirhash(str(dir_vectorizer2), 'sha256') + self.assertEqual(hash1, hash2) diff --git a/snips_nlu/tests/test_log_reg_classifier_utils.py b/snips_nlu/tests/test_log_reg_classifier_utils.py index 66e27baba..1d6970e10 100644 --- a/snips_nlu/tests/test_log_reg_classifier_utils.py +++ b/snips_nlu/tests/test_log_reg_classifier_utils.py @@ -6,7 +6,7 @@ from itertools import cycle import numpy as np -from future.utils import itervalues +from future.utils import itervalues, iteritems from mock import MagicMock, patch from snips_nlu.constants import ( @@ -58,8 +58,8 @@ def test_should_build_training_data_with_no_noise( random_state) # Then - expected_utterances = [utterance for intent - in itervalues(dataset[INTENTS]) + expected_utterances = [utterance for _, intent + in sorted(iteritems(dataset[INTENTS])) for utterance in intent[UTTERANCES]] expected_intent_mapping = ["my_first_intent", "my_second_intent"] self.assertListEqual(expected_utterances, utterances) diff --git a/snips_nlu/tests/test_log_reg_intent_classifier.py b/snips_nlu/tests/test_log_reg_intent_classifier.py index 334a3d74d..d855f47ad 100644 --- a/snips_nlu/tests/test_log_reg_intent_classifier.py +++ b/snips_nlu/tests/test_log_reg_intent_classifier.py @@ -2,10 +2,14 @@ from __future__ import unicode_literals import io - +import sys from builtins import str +from unittest import skipIf + +from checksumdir import dirhash from mock import patch +from snips_nlu.common.io_utils import temp_dir from snips_nlu.constants import ( INTENTS, LANGUAGE_EN, RES_INTENT_NAME, RES_PROBA, UTTERANCES) from snips_nlu.dataset import Dataset @@ -470,3 +474,44 @@ def test_log_best_features(self): # Then self.assertIsInstance(log, str) self.assertIn("Top 20", log) + + @skipIf(sys.version_info[0:2] < (3, 5), + "The bug fixed here " + "https://github.com/scikit-learn/scikit-learn/pull/13422 is " + "available for scikit-learn>=0.21.0 in which the support for " + "Python<=3.4 has been dropped") + def test_training_should_be_reproducible(self): + # Given + random_state = 40 + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a [beverage_temperature:Temperature](hot) cup of tea +- make me [number_of_cups:snips/number](five) tea cups + +--- +type: intent +name: MakeCoffee +utterances: +- make me [number_of_cups:snips/number](one) cup of coffee please +- brew [number_of_cups] cups of coffee""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + + # When + classifier1 = LogRegIntentClassifier(random_state=random_state) + classifier1.fit(dataset) + + classifier2 = LogRegIntentClassifier(random_state=random_state) + classifier2.fit(dataset) + + # Then + with temp_dir() as tmp_dir: + dir_classifier1 = tmp_dir / "classifier1" + dir_classifier2 = tmp_dir / "classifier2" + classifier1.persist(dir_classifier1) + classifier2.persist(dir_classifier2) + hash1 = dirhash(str(dir_classifier1), 'sha256') + hash2 = dirhash(str(dir_classifier2), 'sha256') + self.assertEqual(hash1, hash2) diff --git a/snips_nlu/tests/test_nlu_engine.py b/snips_nlu/tests/test_nlu_engine.py index 30a438a26..5d24f1569 100644 --- a/snips_nlu/tests/test_nlu_engine.py +++ b/snips_nlu/tests/test_nlu_engine.py @@ -3,13 +3,17 @@ import io import shutil +import sys from builtins import str +from unittest import skipIf +from checksumdir import dirhash from mock import MagicMock, patch from snips_nlu_parsers import get_all_languages import snips_nlu from snips_nlu import load_resources +from snips_nlu.common.io_utils import temp_dir from snips_nlu.constants import ( END, LANGUAGE, LANGUAGE_EN, RES_ENTITY, RES_INPUT, RES_INTENT, RES_INTENT_NAME, RES_MATCH_RANGE, RES_RAW_VALUE, RES_SLOTS, RES_SLOT_NAME, @@ -1350,3 +1354,44 @@ def test_should_not_build_custom_parser_when_provided(self): # Then mocked_build_parser.assert_not_called() + + @skipIf(sys.version_info[0:2] < (3, 5), + "The bug fixed here " + "https://github.com/scikit-learn/scikit-learn/pull/13422 is " + "available for scikit-learn>=0.21.0 in which the support for " + "Python<=3.4 has been dropped") + def test_training_should_be_reproducible(self): + # Given + random_state = 42 + dataset_stream = io.StringIO(""" +--- +type: intent +name: MakeTea +utterances: +- make me a hot cup of tea +- make me five tea cups + +--- +type: intent +name: MakeCoffee +utterances: +- make me one cup of coffee please +- brew two cups of coffee""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + + # When + engine1 = SnipsNLUEngine(random_state=random_state) + engine1.fit(dataset) + + engine2 = SnipsNLUEngine(random_state=random_state) + engine2.fit(dataset) + + # Then + with temp_dir() as tmp_dir: + dir_engine1 = tmp_dir / "engine1" + dir_engine2 = tmp_dir / "engine2" + engine1.persist(dir_engine1) + engine2.persist(dir_engine2) + hash1 = dirhash(str(dir_engine1), 'sha256') + hash2 = dirhash(str(dir_engine2), 'sha256') + self.assertEqual(hash1, hash2)