Skip to content

Commit 91cf264

Browse files
authored
Merge pull request #8 from oracle/2.6.x
Release version 2.6.1
2 parents d40a0ef + 5e0394f commit 91cf264

File tree

102 files changed

+4268
-1951
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+4268
-1951
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,13 @@ Multiple extra dependencies can be installed together. For example:
123123

124124
## Contributing
125125

126-
This project welcomes contributions from the community. Before submitting a pull request, please review our contribution guide.
126+
This project welcomes contributions from the community. Before submitting a pull request, please review our contribution guide [CONTRIBUTING.md](https://github.com/oracle/accelerated-data-science/blob/main/CONTRIBUTING.md).
127127

128-
Find Getting Started instructions for developers in [README-development.md](./README-development.md)
128+
Find Getting Started instructions for developers in [README-development.md](https://github.com/oracle/accelerated-data-science/blob/main/README-development.md)
129129

130130
## Security
131131

132-
Please consult the security guide for our responsible security vulnerability disclosure process.
132+
Please consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
133133

134134
## License
135135

ads/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@
1010
import sys
1111

1212
import IPython
13-
import oci
1413
from IPython import get_ipython
1514
from IPython.core.error import UsageError
16-
1715
import matplotlib.font_manager # causes matplotlib to regenerate its fonts
1816
import json
1917

2018
import ocifs
19+
import oci
2120
from ads.common.decorator.deprecate import deprecated
2221
from ads.feature_engineering.accessor.series_accessor import ADSSeriesAccessor
2322
from ads.feature_engineering.accessor.dataframe_accessor import ADSDataFrameAccessor

ads/ads_version.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"version": "2.5.10"
2+
"version": "2.6.1"
33
}

ads/automl/driver.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,29 @@
44
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

7-
from __future__ import print_function, absolute_import, division
7+
from __future__ import absolute_import, division, print_function
88

9+
import numpy as np
10+
import pandas as pd
11+
import scipy
912
import copy
10-
13+
from ads.common import logger, utils
1114
from ads.common.model import ADSModel
12-
from ads.common import logger
1315
from ads.dataset import helper
14-
from ads.dataset.dataset_with_target import ADSDatasetWithTarget
1516
from ads.dataset.classification_dataset import (
1617
BinaryClassificationDataset,
18+
BinaryTextClassificationDataset,
1719
MultiClassClassificationDataset,
1820
MultiClassTextClassificationDataset,
19-
BinaryTextClassificationDataset,
2021
)
21-
from ads.dataset.regression_dataset import RegressionDataset
22-
22+
from ads.dataset.dataset_with_target import ADSDatasetWithTarget
2323
from ads.dataset.pipeline import TransformerPipeline
24+
from ads.dataset.regression_dataset import RegressionDataset
2425
from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
2526
from ads.type_discovery.typed_feature import (
2627
ContinuousTypedFeature,
2728
DiscreteTypedFeature,
2829
)
29-
from ads.common import utils
30-
3130

3231
dataset_task_map = {
3332
BinaryClassificationDataset: utils.ml_task_types.BINARY_CLASSIFICATION,
@@ -114,7 +113,7 @@ def __init__(
114113
>>> olabs_automl = OracleAutoMLProvider()
115114
>>> model, baseline = AutoML(train, provider=olabs_automl).train()
116115
"""
117-
from ads.automl.provider import OracleAutoMLProvider, BaselineAutoMLProvider
116+
from ads.automl.provider import BaselineAutoMLProvider, OracleAutoMLProvider
118117

119118
if hasattr(training_data, "transformer_pipeline"):
120119
self.transformer_pipeline = training_data.transformer_pipeline
@@ -154,7 +153,23 @@ def __init__(
154153
or utils._is_dask_series(training_data.y)
155154
else training_data.y
156155
)
157-
self.target_name = y.name
156+
157+
if isinstance(y, pd.DataFrame):
158+
if len(y.columns) != 1:
159+
raise ValueError("Data must be 1-dimensional.")
160+
y = y.squeeze()
161+
elif isinstance(y, np.ndarray):
162+
y = pd.Series(y)
163+
if y.name:
164+
self.target_name = str(y.name)
165+
else:
166+
y.name = str(0)
167+
self.target_name = str(0)
168+
169+
if isinstance(X, np.ndarray):
170+
X = pd.DataFrame(X)
171+
elif isinstance(X, scipy.sparse.csr.csr_matrix):
172+
X = pd.DataFrame(X.todense())
158173
self.feature_names = X.columns.values
159174
self.client = client
160175
class_names = y.unique()
@@ -256,8 +271,8 @@ def train(self, **kwargs):
256271
avail_n_cores = utils.get_cpu_count()
257272

258273
warn_params = [
259-
(10 ** 5, 4, "VM.Standard.E2.4"),
260-
(10 ** 6, 16, "VM.Standard.2.16"),
274+
(10**5, 4, "VM.Standard.E2.4"),
275+
(10**6, 16, "VM.Standard.2.16"),
261276
]
262277

263278
# train using automl and baseline

ads/automl/provider.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,9 @@ def decide_estimator(self, **kwargs):
304304

305305
# An installation of oracle labs automl is required only for this class
306306
class OracleAutoMLProvider(AutoMLProvider, ABC):
307-
def __init__(self, n_jobs=-1, loglevel=None, logger_override=None):
307+
def __init__(
308+
self, n_jobs=-1, loglevel=None, logger_override=None, model_n_jobs: int = 1
309+
):
308310
"""
309311
The Oracle AutoML Provider automatically provides a tuned ML pipeline that best models the given a training
310312
dataset and a prediction task at hand.
@@ -317,6 +319,9 @@ def __init__(self, n_jobs=-1, loglevel=None, logger_override=None):
317319
loglevel : int
318320
The verbosity of output for Oracle AutoML. Can be specified using the Python logging module
319321
(https://docs.python.org/3/library/logging.html#logging-levels).
322+
model_n_jobs: (optional, int). Defaults to 1.
323+
Specifies the model parallelism used by AutoML.
324+
This will be passed to the underlying model it is training.
320325
"""
321326
try:
322327
self.automl = __import__("automl")
@@ -327,24 +332,13 @@ def __init__(self, n_jobs=-1, loglevel=None, logger_override=None):
327332
super(OracleAutoMLProvider, self).__init__()
328333
if loglevel is None:
329334
loglevel = logging.DEBUG if ads.debug_mode else logging.ERROR
330-
else:
331-
loglevel = loglevel
332-
if logger_override:
333-
logr = logger_override
334-
else:
335-
logr = logging.getLogger(__name__)
336-
if "AMD" in self.cpuinfo.get_cpu_info().get("brand", "UNKNOWN-BRAND"):
337-
# Disable intra-model parallelism for LightGBM and XGBoost libraries
338-
# which seem to be unstable currently on AMD shapes
339-
self.automl.init(
340-
engine="local",
341-
engine_opts={"n_jobs": n_jobs, "model_n_jobs": 1},
342-
loglevel=loglevel,
343-
)
344-
else:
345-
self.automl.init(
346-
engine="local", engine_opts={"n_jobs": n_jobs}, logger=logr
347-
)
335+
336+
self.automl.init(
337+
engine="local",
338+
engine_opts={"n_jobs": n_jobs, "model_n_jobs": model_n_jobs},
339+
logger=logger_override,
340+
loglevel=loglevel,
341+
)
348342

349343
def __repr__(self):
350344
super(OracleAutoMLProvider, self).__repr__()
@@ -620,15 +614,16 @@ def _decide_estimator(self, **kwargs):
620614
)
621615
if not self.col_types:
622616
if len(self.X_train.columns) == 1:
623-
self.col_types = ['text']
617+
self.col_types = ["text"]
624618
elif len(self.X_train.columns) == 2:
625-
self.col_types = ['text', 'text']
619+
self.col_types = ["text", "text"]
626620
else:
627-
raise ValueError("We detected a text classification problem. Pass " \
628-
"in `col_types = [<type of column1>, <type of column2>, ...]`." \
629-
" Valid types are: ['categorical', 'numerical', 'text', 'datetime'," \
621+
raise ValueError(
622+
"We detected a text classification problem. Pass "
623+
"in `col_types = [<type of column1>, <type of column2>, ...]`."
624+
" Valid types are: ['categorical', 'numerical', 'text', 'datetime',"
630625
" 'timedelta']."
631-
)
626+
)
632627

633628
elif self.ml_task_type == ml_task_types.REGRESSION:
634629
est = self.automl.Pipeline(

ads/bds/auth.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def refresh_ticket(
119119
... auth='KERBEROS',
120120
... kerberos_service_name="hive").cursor()
121121
"""
122+
keytab_path = os.path.abspath(os.path.expanduser(keytab_path))
122123
os.environ[KRB5_CONFIG] = os.path.abspath(os.path.expanduser(kerb5_path))
123124
if not os.path.exists(os.environ[KRB5_CONFIG]):
124125
raise FileNotFoundError(f"krb5 config file not found in {kerb5_path}.")

0 commit comments

Comments
 (0)