-
Notifications
You must be signed in to change notification settings - Fork 0
/
cli.py
executable file
·132 lines (100 loc) · 4.23 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import sys
import os
import logging
import click
from doppelspeller import __version__, __build__
from doppelspeller.cli_utils import time_usage
LOGGER = logging.getLogger(__name__)
@click.group(invoke_without_command=True)
@click.version_option(version=__version__)
@click.option('-v', '--verbose', count=True, envvar='LOGGING_LEVEL',
help='Make output more verbose. Use more v\'s for more verbosity.')
@click.pass_context
def cli(context, verbose):
LOGGER.info(f"Predict Redeem v{__version__}-{__build__}")
if verbose <= 1:
level = logging.WARNING
elif verbose == 2:
level = logging.INFO
else:
level = logging.DEBUG
logging.basicConfig(stream=sys.stdout, level=level, format='[%(asctime)s]%(levelname)s|%(name)s|%(message)s')
@cli.command()
def stage_example_data_set_on_docker_container(**kwargs):
"""Makes the example data set files available on the Docker container!"""
cmd = (
"cp -r /doppelspeller/example_dataset/*.gz $PROJECT_DATA_PATH && "
"cd $PROJECT_DATA_PATH && /bin/gunzip -f -r *.gz"
)
return os.popen(cmd).read()
@cli.command()
@time_usage
def train_model(**kwargs):
"""Train the model!"""
from doppelspeller.train import train_model
LOGGER.info('Training the model!')
return train_model()
@cli.command()
@time_usage
def generate_predictions(**kwargs):
"""Generate the predictions!"""
import doppelspeller.constants as c
from doppelspeller.predict import Prediction
LOGGER.info('Generating the predictions!')
prediction = Prediction(c.DATA_TYPE_TEST)
return prediction.generate_test_predictions()
@cli.command()
@click.option('-t', '--title-to-search', 'title')
@time_usage
def closest_search_single_title(**kwargs):
"""Closest search single title!"""
import doppelspeller.constants as c
from doppelspeller.predict import Prediction
LOGGER.info('Searching for the closest match!')
title_to_search = kwargs['title'].strip()
if not title_to_search:
raise Exception('Empty value provided for --title-to-search="" (direct call) or title="" (make call)')
prediction = Prediction(c.DATA_TYPE_SINGLE, title=title_to_search)
found = prediction.generate_test_predictions(single_prediction=True)
LOGGER.info(f'Title: {kwargs["title"]}')
LOGGER.info(f'\n\nClosest match: {found}\n')
return found
@cli.command()
@time_usage
def get_predictions_accuracy(**kwargs):
"""Print predictions accuracy!"""
import pandas as pd
import doppelspeller.constants as c
import doppelspeller.settings as s
try:
actual = pd.read_csv(s.TEST_WITH_ACTUALS_FILE, sep=s.TEST_FILE_DELIMITER)
except: # noqa
raise Exception(f'Error reading {s.TEST_WITH_ACTUALS_FILE} (TEST_WITH_ACTUAL_FILE in settings.py)')
predictions = pd.read_csv(s.FINAL_OUTPUT_FILE, sep=s.TEST_FILE_DELIMITER)
actual.set_index(c.COLUMN_TEST_INDEX, inplace=True)
predictions.set_index(c.COLUMN_TEST_INDEX, inplace=True)
actual = actual.to_dict()[s.TEST_WITH_ACTUALS_TITLE_ID]
predictions = predictions.to_dict()[c.COLUMN_TITLE_ID]
correctly_matched_existing, correctly_matched_non_existing = 0, 0
incorrectly_matched_existing, incorrectly_matched_non_existing = 0, 0
for key, actual_value in actual.items():
prediction_value = predictions[key]
if prediction_value == -1:
if actual_value == prediction_value:
correctly_matched_non_existing += 1
else:
incorrectly_matched_non_existing += 1
else:
if actual_value == prediction_value:
correctly_matched_existing += 1
else:
incorrectly_matched_existing += 1
LOGGER.info(f"""\n
Correctly matched titles {correctly_matched_existing}
Incorrectly matched titles {incorrectly_matched_existing}
Correctly marked as not-found {correctly_matched_non_existing}
Incorrectly marked as not-found {incorrectly_matched_non_existing}
Custom Error {incorrectly_matched_non_existing + (incorrectly_matched_existing * 5)}
[custom_error = number_of_incorrectly_matched_non_existing + (number_of_incorrectly_matched_existing * 5)]
""")
return True