Skip to content

Commit

Permalink
Fix non-deterministic behavior (#817)
Browse files Browse the repository at this point in the history
* Fix non-deterministic behavior

fixes #778

* Update changelog

* Fix issue with dirhash

* Fix issues

* Fix issue with Python<3.4
  • Loading branch information
adrienball authored and ClemDoum committed Jul 10, 2019
1 parent 1b9924b commit 64bd793
Show file tree
Hide file tree
Showing 12 changed files with 298 additions and 63 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Changelog
All notable changes to this project will be documented in this file.

## [Unreleased]
### Fixed
- Fix non-deterministic behavior [#817](https://github.com/snipsco/snips-nlu/pull/817)

## [0.19.7]
### Changed
- Re-score ambiguous `DeterministicIntentParser` results based on slots [#791](https://github.com/snipsco/snips-nlu/pull/791)
Expand Down Expand Up @@ -284,6 +288,7 @@ several commands.
- Fix compiling issue with `bindgen` dependency when installing from source
- Fix issue in `CRFSlotFiller` when handling builtin entities

[Unreleased]: https://github.com/snipsco/snips-nlu/compare/0.19.7...lts-0.19.7.x
[0.19.7]: https://github.com/snipsco/snips-nlu/compare/0.19.6...0.19.7
[0.19.6]: https://github.com/snipsco/snips-nlu/compare/0.19.5...0.19.6
[0.19.5]: https://github.com/snipsco/snips-nlu/compare/0.19.4...0.19.5
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
"mock>=2.0,<3.0",
"snips_nlu_metrics>=0.14.1,<0.15",
"pylint<2",
"coverage>=4.4.2,<5.0"
"coverage>=4.4.2,<5.0",
"checksumdir~=1.1.6",
]
}

Expand Down
7 changes: 7 additions & 0 deletions snips_nlu/dataset/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,19 @@ def validate_and_format_dataset(dataset):
if language not in get_all_languages():
raise DatasetFormatError("Unknown language: '%s'" % language)

dataset[INTENTS] = {
intent_name: intent_data
for intent_name, intent_data in sorted(iteritems(dataset[INTENTS]))}
for intent in itervalues(dataset[INTENTS]):
_validate_and_format_intent(intent, dataset[ENTITIES])

utterance_entities_values = extract_utterance_entities(dataset)
builtin_entity_parser = BuiltinEntityParser.build(dataset=dataset)

dataset[ENTITIES] = {
intent_name: entity_data
for intent_name, entity_data in sorted(iteritems(dataset[ENTITIES]))}

for entity_name, entity in iteritems(dataset[ENTITIES]):
uterrance_entities = utterance_entities_values[entity_name]
if is_builtin_entity(entity_name):
Expand Down
10 changes: 4 additions & 6 deletions snips_nlu/intent_classifier/log_reg_classifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import itertools
import re
from builtins import next, range, str, zip
from builtins import next, range, str
from copy import deepcopy
from uuid import uuid4

Expand Down Expand Up @@ -120,14 +120,12 @@ def build_training_data(dataset, language, data_augmentation_config, resources,

noise_class = intent_index

# Computing dataset statistics
nb_utterances = [len(intent[UTTERANCES]) for intent in itervalues(intents)]

augmented_utterances = []
utterance_classes = []
for nb_utterance, intent_name in zip(nb_utterances, intents):
for intent_name, intent_data in sorted(iteritems(intents)):
nb_utterances = len(intent_data[UTTERANCES])
min_utterances_to_generate = max(
data_augmentation_config.min_utterances, nb_utterance)
data_augmentation_config.min_utterances, nb_utterances)
utterances = augment_utterances(
dataset, intent_name, language=language,
min_utterances=min_utterances_to_generate,
Expand Down
4 changes: 2 additions & 2 deletions snips_nlu/intent_parser/deterministic_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import defaultdict
from pathlib import Path

from future.utils import iteritems, iterkeys, itervalues
from future.utils import iteritems, itervalues
from snips_nlu_utils import normalize

from snips_nlu.common.dataset_utils import get_slot_name_mappings
Expand Down Expand Up @@ -483,7 +483,7 @@ def _get_range_shift(matched_range, ranges_mapping):

def _get_group_names_to_slot_names(slot_names_mapping):
slot_names = {slot_name for mapping in itervalues(slot_names_mapping)
for slot_name in iterkeys(mapping)}
for slot_name in mapping}
return {"group%s" % i: name
for i, name in enumerate(sorted(slot_names))}

Expand Down
51 changes: 1 addition & 50 deletions snips_nlu/tests/integration_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# coding=utf-8
from __future__ import print_function, unicode_literals

import json
from builtins import range, str
from builtins import str

from future.utils import iteritems
from snips_nlu_metrics import compute_cross_val_metrics
Expand Down Expand Up @@ -51,33 +50,6 @@ def check_metrics(self, results):
"Slot f1 score is too low (%.3f) for slot '%s' of intent "
"'%s'" % (slot_f1, slot_name, intent_name))

def test_nlu_engine_training_is_deterministic(self):
# We can't write a test to ensure the NLU training is always the same
# instead we train the NLU 10 times and check the learnt parameters.
# It does not bring any guarantee but it might alert us once in a while

# Given
num_runs = 10
random_state = 42

with PERFORMANCE_DATASET_PATH.open("r") as f:
dataset = json.load(f)

ref_log_reg, ref_crfs = None, None
for _ in range(num_runs):
# When
engine = TrainingEngine(random_state=random_state).fit(dataset)
log_reg = _extract_log_reg(engine)
crfs = _extract_crfs(engine)

if ref_log_reg is None:
ref_log_reg = log_reg
ref_crfs = crfs
else:
# Then
self.assertDictEqual(ref_log_reg, log_reg)
self.assertDictEqual(ref_crfs, crfs)


def _slot_matching_lambda(lhs_slot, rhs_slot):
lhs_value = lhs_slot["text"]
Expand All @@ -93,24 +65,3 @@ def _slot_matching_lambda(lhs_slot, rhs_slot):
if rhs_tokens and rhs_tokens[0].lower() in SKIPPED_DATE_PREFIXES:
rhs_tokens = rhs_tokens[1:]
return lhs_tokens == rhs_tokens


def _extract_log_reg(engine):
log_reg = dict()
intent_classifier = engine.intent_parsers[1].intent_classifier
log_reg["intent_list"] = intent_classifier.intent_list
log_reg["coef"] = intent_classifier.classifier.coef_.tolist()
log_reg["intercept"] = intent_classifier.classifier.intercept_.tolist()
log_reg["t_"] = intent_classifier.classifier.t_
return log_reg


def _extract_crfs(engine):
crfs = dict()
slot_fillers = engine.intent_parsers[1].slot_fillers
for intent, slot_filler in iteritems(slot_fillers):
crfs[intent] = {
"state_features": slot_filler.crf_model.state_features_,
"transition_features": slot_filler.crf_model.transition_features_
}
return crfs
25 changes: 25 additions & 0 deletions snips_nlu/tests/test_crf_slot_filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,3 +989,28 @@ def __del__(self):
Features not seen at train time:
- ngram_1:text"""
self.assertEqual(expected_log, log)

def test_training_should_be_reproducible(self):
# Given
random_state = 42
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

# When
slot_filler1 = CRFSlotFiller(random_state=random_state)
slot_filler1.fit(dataset, "MakeTea")

slot_filler2 = CRFSlotFiller(random_state=random_state)
slot_filler2.fit(dataset, "MakeTea")

# Then
self.assertDictEqual(slot_filler1.crf_model.state_features_,
slot_filler2.crf_model.state_features_)
self.assertDictEqual(slot_filler1.crf_model.transition_features_,
slot_filler2.crf_model.transition_features_)
38 changes: 38 additions & 0 deletions snips_nlu/tests/test_deterministic_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import io
from builtins import range

from checksumdir import dirhash
from mock import patch

from snips_nlu.common.io_utils import temp_dir
from snips_nlu.constants import (
DATA, END, ENTITY, LANGUAGE_EN, RES_ENTITY, RES_INTENT, RES_INTENT_NAME,
RES_PROBA, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, STOP_WORDS)
Expand Down Expand Up @@ -1201,3 +1203,39 @@ def test_should_get_range_shift(self):
# When / Then
self.assertEqual(-1, _get_range_shift((6, 7), ranges_mapping))
self.assertEqual(2, _get_range_shift((12, 13), ranges_mapping))

def test_training_should_be_reproducible(self):
# Given
random_state = 42
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

# When
parser1 = DeterministicIntentParser(random_state=random_state)
parser1.fit(dataset)

parser2 = DeterministicIntentParser(random_state=random_state)
parser2.fit(dataset)

# Then
with temp_dir() as tmp_dir:
dir_parser1 = tmp_dir / "parser1"
dir_parser2 = tmp_dir / "parser2"
parser1.persist(dir_parser1)
parser2.persist(dir_parser2)
hash1 = dirhash(str(dir_parser1), 'sha256')
hash2 = dirhash(str(dir_parser2), 'sha256')
self.assertEqual(hash1, hash2)
120 changes: 120 additions & 0 deletions snips_nlu/tests/test_intent_classifier_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from builtins import str, zip, range

import numpy as np
from checksumdir import dirhash
from mock import patch, MagicMock

from snips_nlu.common.io_utils import temp_dir
from snips_nlu.common.utils import json_string
from snips_nlu.constants import LANGUAGE_EN, STEMS, WORD_CLUSTERS, STOP_WORDS
from snips_nlu.dataset import Dataset
Expand Down Expand Up @@ -309,6 +311,48 @@ def test_fit_cooccurrence_vectorizer_feature_selection(self, mocked_chi2):
self.assertDictEqual(
expected_pairs, featurizer.cooccurrence_vectorizer.word_pairs)

def test_training_should_be_reproducible(self):
# Given
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
utterances = [
text_to_utterance("please make me two hots cups of tea"),
text_to_utterance("i want a cup of coffee"),
]
classes = np.array([0, 1])
shared = self.get_shared_data(dataset)
shared["random_state"] = 42

# When
featurizer1 = Featurizer(**shared)
featurizer1.fit(dataset, utterances, classes, max(classes))

featurizer2 = Featurizer(**shared)
featurizer2.fit(dataset, utterances, classes, max(classes))

# Then
with temp_dir() as tmp_dir:
dir_featurizer1 = tmp_dir / "featurizer1"
dir_featurizer2 = tmp_dir / "featurizer2"
featurizer1.persist(dir_featurizer1)
featurizer2.persist(dir_featurizer2)
hash1 = dirhash(str(dir_featurizer1), 'sha256')
hash2 = dirhash(str(dir_featurizer2), 'sha256')
self.assertEqual(hash1, hash2)


class TestTfidfVectorizer(FixtureTest):

Expand Down Expand Up @@ -675,6 +719,44 @@ def test_preprocess(self):

self.assertSequenceEqual(expected_data, processed_data)

def test_training_should_be_reproducible(self):
# Given
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
x = [text_to_utterance("please make me two hots cups of tea")]
shared = self.get_shared_data(dataset)
shared["random_state"] = 42

# When
vectorizer1 = TfidfVectorizer(**shared)
vectorizer1.fit(x, dataset)

vectorizer2 = TfidfVectorizer(**shared)
vectorizer2.fit(x, dataset)

# Then
with temp_dir() as tmp_dir:
dir_vectorizer1 = tmp_dir / "vectorizer1"
dir_vectorizer2 = tmp_dir / "vectorizer2"
vectorizer1.persist(dir_vectorizer1)
vectorizer2.persist(dir_vectorizer2)
hash1 = dirhash(str(dir_vectorizer1), 'sha256')
hash2 = dirhash(str(dir_vectorizer2), 'sha256')
self.assertEqual(hash1, hash2)


class CooccurrenceVectorizerTest(FixtureTest):

Expand Down Expand Up @@ -1193,3 +1275,41 @@ def test_preprocess(self):
]

self.assertSequenceEqual(expected_data, processed_data)

def test_training_should_be_reproducible(self):
# Given
dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
x = [text_to_utterance("please make me two hots cups of tea")]
shared = self.get_shared_data(dataset)
shared["random_state"] = 42

# When
vectorizer1 = CooccurrenceVectorizer(**shared)
vectorizer1.fit(x, dataset)

vectorizer2 = CooccurrenceVectorizer(**shared)
vectorizer2.fit(x, dataset)

# Then
with temp_dir() as tmp_dir:
dir_vectorizer1 = tmp_dir / "vectorizer1"
dir_vectorizer2 = tmp_dir / "vectorizer2"
vectorizer1.persist(dir_vectorizer1)
vectorizer2.persist(dir_vectorizer2)
hash1 = dirhash(str(dir_vectorizer1), 'sha256')
hash2 = dirhash(str(dir_vectorizer2), 'sha256')
self.assertEqual(hash1, hash2)
Loading

0 comments on commit 64bd793

Please sign in to comment.