uxlfoundation · samir-nasibli · Oct 2, 2024 · Oct 4, 2024 · Oct 4, 2024 · Oct 8, 2024
@@ -31,10 +31,14 @@
     import dpctl.tensor as dpt
 
 
-def _apply_and_pass(func, *args):
+def _apply_and_pass(func, *args, **kwargs):
     if len(args) == 1:
-        return func(args[0])
-    return tuple(map(func, args))
+        return func(args[0], **kwargs) if len(kwargs) > 0 else func(args[0])
+    return (
+        tuple(func(arg, **kwargs) for arg in args)
+        if len(kwargs) > 0
+        else tuple(func(arg) for arg in args)
+    )
 
 
 def from_table(*args):
@@ -58,7 +62,7 @@ def to_table(*args):
 if _is_dpc_backend:
     from ..common._policy import _HostInteropPolicy
 
-    def _convert_to_supported(policy, *data):
+    def _convert_to_supported(policy, *data, xp=np):
         def func(x):
             return x
 
@@ -70,13 +74,13 @@ def func(x):
         device = policy._queue.sycl_device
 
         def convert_or_pass(x):
-            if (x is not None) and (x.dtype == np.float64):
+            if (x is not None) and (x.dtype == xp.float64):
                 warnings.warn(
                     "Data will be converted into float32 from "
                     "float64 because device does not support it",
                     RuntimeWarning,
                 )
-                return x.astype(np.float32)
+                return xp.astype(x, dtype=xp.float32)
             else:
                 return x
 
@@ -87,7 +91,7 @@ def convert_or_pass(x):
 
 else:
 
-    def _convert_to_supported(policy, *data):
+    def _convert_to_supported(policy, *data, xp=np):
         def func(x):
             return x
 

@@ -23,6 +23,8 @@
 from .pca import BasePCA
 
 
+# TODO:
+# update for BasePCA.
 class IncrementalPCA(BasePCA):
     """
     Incremental estimator for PCA based on oneDAL implementation.

@@ -23,6 +23,12 @@
 
 from ..common._base import BaseEstimator
 from ..datatypes import _convert_to_supported, from_table, to_table
+from ..utils._array_api import (
+    _asarray,
+    _convert_to_numpy,
+    get_namespace,
+    sklearn_array_api_dispatch,
+)
 
 
 class BasePCA(BaseEstimator, metaclass=ABCMeta):
@@ -42,13 +48,13 @@ def __init__(
         self.is_deterministic = is_deterministic
         self.whiten = whiten
 
-    def _get_onedal_params(self, data, stage=None):
+    def _get_onedal_params(self, data, xp, stage=None):
         if stage is None:
             n_components = self._resolve_n_components_for_training(data.shape)
         elif stage == "predict":
             n_components = self.n_components_
         return {
-            "fptype": "float" if data.dtype == np.float32 else "double",
+            "fptype": "float" if data.dtype == xp.float32 else "double",
             "method": self.method,
             "n_components": n_components,
             "is_deterministic": self.is_deterministic,
@@ -95,77 +101,125 @@ def _resolve_n_components_for_result(self, shape_tuple):
         elif self.n_components == "mle":
             return _infer_dimension(self.explained_variance_, shape_tuple[0])
         elif 0.0 < self.n_components < 1.0:
+            # TODO:
+            # check for Array API.
             ratio_cumsum = stable_cumsum(self.explained_variance_ratio_)
             return np.searchsorted(ratio_cumsum, self.n_components, side="right") + 1
         elif isinstance(self.n_components, float) and self.n_components == 1.0:
             return min(shape_tuple)
         else:
             return self.n_components
 
-    def _compute_noise_variance(self, n_components, n_sf_min):
+    def _compute_noise_variance(self, xp, n_components, n_sf_min):
         if n_components < n_sf_min:
             if len(self.explained_variance_) == n_sf_min:
                 return self.explained_variance_[n_components:].mean()
             elif len(self.explained_variance_) < n_sf_min:
                 # TODO Rename variances_ to var_ to align with sklearn/sklearnex IncrementalPCA
+                # TODO:
+                # check xp.sum for Array API.
                 if hasattr(self, "variances_"):
-                    resid_var = self.variances_.sum()
+                    resid_var = xp.sum(self.variances_)
                 elif hasattr(self, "var_"):
-                    resid_var = self.var_.sum()
+                    resid_var = xp.sum(self.var_)
 
-                resid_var -= self.explained_variance_.sum()
+                resid_var -= xp.sum(self.explained_variance_)
                 return resid_var / (n_sf_min - n_components)
         else:
             return 0.0
 
-    def _create_model(self):
+    def _create_model(self, xp):
         m = self._get_backend("decomposition", "dim_reduction", "model")
-        m.eigenvectors = to_table(self.components_)
-        m.means = to_table(self.mean_)
+        m.eigenvectors = to_table(_convert_to_numpy(self.components_, xp=xp))
+        m.means = to_table(_convert_to_numpy(self.mean_, xp=xp))
         if self.whiten:
-            m.eigenvalues = to_table(self.explained_variance_)
+            m.eigenvalues = to_table(_convert_to_numpy(self.explained_variance_, xp=xp))
         self._onedal_model = m
         return m
 
-    def predict(self, X, queue=None):
+    def _predict(self, X, xp, queue=None):
         policy = self._get_policy(queue, X)
-        model = self._create_model()
+        model = self._create_model(xp)
         X = _convert_to_supported(policy, X)
-        params = self._get_onedal_params(X, stage="predict")
+        params = self._get_onedal_params(X, xp, stage="predict")
 
         result = self._get_backend(
-            "decomposition", "dim_reduction", "infer", policy, params, model, to_table(X)
+            "decomposition",
+            "dim_reduction",
+            "infer",
+            policy,
+            params,
+            model,
+            to_table(_convert_to_numpy(X, xp=xp)),
         )
-        return from_table(result.transformed_data)
+        # Since `from_table` data management enabled only for numpy host,
+        # copy data from numpy host output to xp namespace array.
+        return _asarray(
+            from_table(result.transformed_data).reshape(-1), xp=xp, sycl_queue=queue
+        )
+
+    def _predict(self, X, xp, queue=None):
+        xp, is_array_api_compliant = get_namespace(X)
+        # update for queue getting.
+        queue = X.sycl_queue
+        return self._fit(X, xp, is_array_api_compliant, queue)
 
 
 class PCA(BasePCA):
 
-    def fit(self, X, y=None, queue=None):
+    @sklearn_array_api_dispatch()
+    def _fit(self, X, xp, is_array_api_compliant, y=None, queue=None):
         n_samples, n_features = X.shape
         n_sf_min = min(n_samples, n_features)
         self._validate_n_components(self.n_components, n_samples, n_features)
 
         policy = self._get_policy(queue, X)
         # TODO: investigate why np.ndarray with OWNDATA=FALSE flag
         # fails to be converted to oneDAL table
+        # TODO:
+        # check if only numpy issues.
         if isinstance(X, np.ndarray) and not X.flags["OWNDATA"]:
             X = X.copy()
-        X = _convert_to_supported(policy, X)
+        X = _convert_to_supported(policy, X, xp=xp)
 
         params = self._get_onedal_params(X)
         result = self._get_backend(
-            "decomposition", "dim_reduction", "train", policy, params, to_table(X)
+            "decomposition",
+            "dim_reduction",
+            "train",
+            policy,
+            params,
+            to_table(_convert_to_numpy(X, xp=xp)),
         )
 
-        self.mean_ = from_table(result.means).ravel()
-        self.variances_ = from_table(result.variances)
-        self.components_ = from_table(result.eigenvectors)
-        self.singular_values_ = from_table(result.singular_values).ravel()
-        self.explained_variance_ = np.maximum(from_table(result.eigenvalues).ravel(), 0)
-        self.explained_variance_ratio_ = from_table(
-            result.explained_variances_ratio
-        ).ravel()
+        # Since `from_table` data management enabled only for numpy host,
+        # copy data from numpy host output to xp namespace array.
+        self.mean_ = _asarray(
+            from_table(result.means).reshape(-1), xp=xp, sycl_queue=queue
+        )
+        self.variances_ = _asarray(
+            from_table(result.variances).reshape(-1), xp=xp, sycl_queue=queue
+        )
+        self.components_ = _asarray(
+            from_table(result.eigenvectors).reshape(-1), xp=xp, sycl_queue=queue
+        )
+        self.singular_values_ = _asarray(
+            from_table(result.singular_values).reshape(-1), xp=xp, sycl_queue=queue
+        )
+        # self.explained_variance_ = np.maximum(from_table(result.eigenvalues).ravel(), 0)
+        # TODO:
+        # check for Array API.
+        self.explained_variance_ = xp.max(
+            _asarray(
+                from_table(result.singular_values).reshape(-1), xp=xp, sycl_queue=queue
+            ),
+            0,
+        )
+        self.explained_variance_ratio_ = _asarray(
+            from_table(result.explained_variances_ratio).reshape(-1),
+            xp=xp,
+            sycl_queue=queue,
+        )
         self.n_samples_ = n_samples
         self.n_features_ = n_features
 
@@ -175,12 +229,20 @@ def fit(self, X, y=None, queue=None):
 
         n_components = self._resolve_n_components_for_result(X.shape)
         self.n_components_ = n_components
-        self.noise_variance_ = self._compute_noise_variance(n_components, n_sf_min)
+        self.noise_variance_ = self._compute_noise_variance(xp, n_components, n_sf_min)
 
+        # TODO:
+        # check ufunc work here.
         if n_components < params["n_components"]:
             self.explained_variance_ = self.explained_variance_[:n_components]
             self.components_ = self.components_[:n_components]
             self.singular_values_ = self.singular_values_[:n_components]
             self.explained_variance_ratio_ = self.explained_variance_ratio_[:n_components]
 
         return self
+
+    def fit(self, X, y=None, queue=None):
+        xp, is_array_api_compliant = get_namespace(X)
+        # update for queue getting.
+        queue = X.sycl_queue
+        return self._fit(X, xp, is_array_api_compliant, y, queue)
@@ -0,0 +1,17 @@
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# TODO:
+# TBD.