oracle
diff --git a/‎README.md
Lines changed: 2 additions & 1 deletion b/‎README.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎ads/__init__.py
Lines changed: 14 additions & 2 deletions b/‎ads/__init__.py
Lines changed: 14 additions & 2 deletions
diff --git a/‎ads/ads_version.json
Lines changed: 1 addition & 1 deletion b/‎ads/ads_version.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/automl/provider.py
Lines changed: 20 additions & 5 deletions b/‎ads/automl/provider.py
Lines changed: 20 additions & 5 deletions
diff --git a/‎ads/bds/__init__.py
Lines changed: 5 additions & 0 deletions b/‎ads/bds/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎ads/bds/auth.py
Lines changed: 126 additions & 0 deletions b/‎ads/bds/auth.py
Lines changed: 126 additions & 0 deletions
diff --git a/‎ads/bds/big_data_service.py
Lines changed: 142 additions & 0 deletions b/‎ads/bds/big_data_service.py
Lines changed: 142 additions & 0 deletions
@@ -74,8 +74,9 @@ Multiple extra dependencies can be installed together. For example:
 ```python
   import ads
   from ads.common.auth import default_signer
+  import oci
 
-  ads.set_auth(auth="api_key", profile="DEFAULT")
+  ads.set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT")
   bucket_name = <bucket-name>
   file_name = <file-name>
   namespace = <namespace>
 
@@ -10,6 +10,7 @@
 import sys
 
 import IPython
+import oci
 from IPython import get_ipython
 from IPython.core.error import UsageError
 
@@ -31,28 +32,39 @@
 
 debug_mode = os.environ.get("DEBUG_MODE", False)
 documentation_mode = os.environ.get("DOCUMENTATION_MODE", "False") == "True"
+oci_config_path = oci.config.DEFAULT_LOCATION  # "~/.oci/config"
 oci_key_profile = "DEFAULT"
 test_mode = os.environ.get("TEST_MODE", False)
 resource_principal_mode = bool(os.environ.get("RESOURCE_PRINCIPAL_MODE", False))
 
 
-def set_auth(auth="api_key", profile="DEFAULT"):
+def set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"):
     """
     Enable/disable resource principal identity or keypair identity in a notebook session.
 
     Parameters
     ----------
     auth: {'api_key', 'resource_principal'}, default 'api_key'
          Enable/disable resource principal identity or keypair identity in a notebook session
+    oci_config_location: str, default oci.config.DEFAULT_LOCATION, which is '~/.oci/config'
+        config file location
     profile: str, default 'DEFAULT'
          profile name for api keys config file
     """
     global resource_principal_mode
+    global oci_config_path
     global oci_key_profile
     oci_key_profile = profile
+    if os.path.exists(os.path.expanduser(oci_config_location)):
+        oci_config_path = oci_config_location
+    else:
+        logging.warning(
+            f"{oci_config_location} file not exists, default value oci.config.DEFAULT_LOCATION used instead"
+        )
+        oci_config_path = oci.config.DEFAULT_LOCATION
     if auth == "api_key":
         resource_principal_mode = False
-    if auth == "resource_principal":
+    elif auth == "resource_principal":
         resource_principal_mode = True
 
 
 
@@ -1,3 +1,3 @@
 {
-  "version": "2.5.9"
+  "version": "2.5.10"
 }
@@ -26,6 +26,7 @@
     is_notebook,
 )
 from ads.dataset.label_encoder import DataFrameLabelEncoder
+from ads.dataset.helper import is_text_data
 
 from IPython.core.display import display, HTML
 
@@ -533,10 +534,11 @@ def train(self, **kwargs):
                             )
 
         self.train_start_time = time.time()
-        if "time_budget" in kwargs:
-            self.time_budget = kwargs.pop("time_budget")
-        else:
-            self.time_budget = 0  # unlimited
+
+        self.time_budget = kwargs.pop("time_budget", 0)  # 0 means unlimited
+
+        self.col_types = kwargs.pop("col_types", None)
+
         self.est = self._decide_estimator(**kwargs)
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
@@ -546,6 +548,7 @@ def train(self, **kwargs):
                 X_valid=self.X_valid,
                 y_valid=self.y_valid,
                 time_budget=self.time_budget,
+                col_types=self.col_types,
             )
         self.train_end_time = time.time()
         self.print_summary(max_rows=10)
@@ -613,8 +616,20 @@ def _decide_estimator(self, **kwargs):
             or self.ml_task_type == ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION
         ):
             est = self.automl.Pipeline(
-                task="classification", text=True, score_metric=score_metric, **kwargs
+                task="classification", score_metric=score_metric, **kwargs
             )
+            if not self.col_types:
+                if len(self.X_train.columns) == 1:
+                    self.col_types = ['text']
+                elif len(self.X_train.columns) == 2:
+                    self.col_types = ['text', 'text']
+                else:
+                    raise ValueError("We detected a text classification problem. Pass " \
+                        "in `col_types = [<type of column1>, <type of column2>, ...]`." \
+                        " Valid types are: ['categorical', 'numerical', 'text', 'datetime'," \
+                        " 'timedelta']."
+                        )
+
         elif self.ml_task_type == ml_task_types.REGRESSION:
             est = self.automl.Pipeline(
                 task="regression", score_metric=score_metric, **kwargs
 
@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2022 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2022 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+
+import os
+import subprocess
+from contextlib import contextmanager
+
+
+DEFAULT_KRB5_CONFIG_PATH = "~/.bds_config/krb5.conf"
+KRB5_CONFIG = "KRB5_CONFIG"
+
+
+class KRB5KinitError(Exception):
+    """KRB5KinitError class when kinit -kt command failed to generate cached ticket with the keytab file and the krb5 config file."""
+
+    pass
+
+
+def has_kerberos_ticket():
+    """Whether kerberos cache ticket exists."""
+    return True if subprocess.call(["klist", "-s"]) == 0 else False
+
+
+def init_ccache_with_keytab(principal: str, keytab_file: str) -> None:
+    """Initialize credential cache using keytab file.
+
+    Parameters
+    ----------
+    principal: str
+        The unique identity to which Kerberos can assign tickets.
+    keytab_path: str
+        Path to your keytab file.
+
+    Returns
+    -------
+    None
+        Nothing.
+    """
+    cmd = "kinit -kt %(keytab_file)s  %(principal)s"
+    args = {}
+
+    args["principal"] = principal
+    args["keytab_file"] = keytab_file
+
+    kinit_proc = subprocess.Popen((cmd % args).split(), stderr=subprocess.PIPE)
+    stdout_data, stderr_data = kinit_proc.communicate()
+
+    if kinit_proc.returncode > 0:
+        raise KRB5KinitError(stderr_data)
+
+
+@contextmanager
+def krbcontext(
+    principal: str, keytab_path: str, kerb5_path: str = DEFAULT_KRB5_CONFIG_PATH
+) -> None:
+    """A context manager for Kerberos-related actions.
+    It provides a Kerberos context that you can put code inside.
+    It will initialize credential cache automatically with keytab if no cached ticket exists.
+    Otherwise, does nothing.
+
+    Parameters
+    ----------
+    principal: str
+        The unique identity to which Kerberos can assign tickets.
+    keytab_path: str
+        Path to your keytab file.
+    kerb5_path: (str, optional).
+        Path to your krb5 config file.
+
+    Returns
+    -------
+    None
+        Nothing.
+
+    Examples
+    --------
+    >>> from ads.bds.auth import krbcontext
+    >>> from pyhive import hive
+    >>> with krbcontext(principal = "your_principal", keytab_path = "your_keytab_path"):
+    >>>    hive_cursor = hive.connect(host="your_hive_host",
+    ...                    port="your_hive_port",
+    ...                    auth='KERBEROS',
+    ...                    kerberos_service_name="hive").cursor()
+    """
+    refresh_ticket(principal=principal, keytab_path=keytab_path, kerb5_path=kerb5_path)
+    yield
+
+
+def refresh_ticket(
+    principal: str, keytab_path: str, kerb5_path: str = DEFAULT_KRB5_CONFIG_PATH
+) -> None:
+    """generate new cached ticket based on the principal and keytab file path.
+
+    Parameters
+    ----------
+    principal: str
+        The unique identity to which Kerberos can assign tickets.
+    keytab_path: str
+        Path to your keytab file.
+    kerb5_path: (str, optional).
+        Path to your krb5 config file.
+
+    Returns
+    -------
+    None
+        Nothing.
+
+    Examples
+    --------
+    >>> from ads.bds.auth import refresh_ticket
+    >>> from pyhive import hive
+    >>> refresh_ticket(principal = "your_principal", keytab_path = "your_keytab_path")
+    >>> hive_cursor = hive.connect(host="your_hive_host",
+    ...                    port="your_hive_port",
+    ...                    auth='KERBEROS',
+    ...                    kerberos_service_name="hive").cursor()
+    """
+    os.environ[KRB5_CONFIG] = os.path.abspath(os.path.expanduser(kerb5_path))
+    if not os.path.exists(os.environ[KRB5_CONFIG]):
+        raise FileNotFoundError(f"krb5 config file not found in {kerb5_path}.")
+    if not has_kerberos_ticket():
+        init_ccache_with_keytab(principal, keytab_path)
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2022 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from abc import ABC, abstractmethod
+from time import time
+from typing import Dict, Iterator, List, Optional, Union
+
+import impala
+import impala.dbapi as impyla  # noqa
+import pandas as pd
+from impala.error import Error as ImpylaError  # noqa
+from impala.error import HiveServer2Error as HS2Error  # noqa
+
+
+class HiveConnection(ABC):
+    """Base class Interface."""
+
+    def __init__(self, **params):
+        """set up the impala connection."""
+        self.params = params
+        self.con = None  # setup the connection
+
+    @abstractmethod
+    def get_cursor(self):
+        """return the cursor from the connection.
+
+        Returns
+        -------
+        HiveServer2Cursor:
+            cursor using a specific client.
+        """
+        return None
+
+
+class ImpylaHiveConnection(HiveConnection):
+    """ImpalaHiveConnection class which uses impyla client."""
+
+    def __init__(self, **params):
+        """set up the impala connection."""
+        self.params = params
+        self.con = None  # setup the connection
+
+    def get_cursor(self) -> "impala.hiveserver2.HiveServer2Cursor":
+        """return the cursor from the connection.
+
+        Returns
+        -------
+        impala.hiveserver2.HiveServer2Cursor:
+            cursor using impyla client.
+        """
+        return None
+
+
+class OracleHiveConnection(ImpylaHiveConnection):
+    def __init__(
+        self,
+        host: str,
+        port: str,
+        **kwargs,
+    ):
+        """Initiate the connection.
+
+        Parameters
+        ----------
+        host: str
+            Hive host name.
+        port: str
+            Hive port.
+        kwargs:
+            Other connection parameters accepted by the client.
+        """
+        pass
+
+    def insert(
+        self,
+        table_name: str,
+        df: pd.DataFrame,
+        if_exists: str,
+        partition: List[str] = None,
+    ):
+        """insert a table from a pandas dataframe.
+
+        Parameters
+        ----------
+        table_name (str):
+            Table Name.
+        df (pd.DataFrame):
+            Data to be injected to the database.
+        if_exists (str):
+            Whether to replace, append or fail if the table already exists.
+        partition (List[str], optional): Defaults to None.
+            For partitioned tables, indicate the partition that's being
+            inserted into, either with an ordered list of partition keys or a
+            dict of partition field name to value. For example for the
+            partition (year=2007, month=7), this can be either (2007, 7) or
+            {'year': 2007, 'month': 7}.
+        """
+        if if_exists not in ["fail", "replace", "append"]:
+            raise ValueError(
+                "Unknown option `if_exists`={if_exists}. Valid options are 'fail', 'replace', 'append'"
+            )
+        pass
+
+    def _fetch_by_batch(
+        self, cursor: "impala.hiveserver2.HiveServer2Cursor", chunksize: int
+    ):
+        """fetch the data by batch of chunksize."""
+        while True:
+            rows = cursor.fetchmany(chunksize)
+            if rows:
+                yield rows
+            else:
+                break
+
+    def query(
+        self,
+        sql: str,
+        bind_variables: Optional[Dict] = None,
+        chunksize: Optional[int] = None,
+    ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
+        """Query data which support select statement.
+
+        Parameters
+        ----------
+        sql (str):
+            sql query.
+        bind_variables (Optional[Dict]):
+            Parameters to be bound to variables in the SQL query, if any.
+            Impyla supports all DB API `paramstyle`s, including `qmark`,
+            `numeric`, `named`, `format`, `pyformat`.
+        chunksize (Optional[int]): . Defaults to None.
+            chunksize of each of the dataframe in the iterator.
+
+        Returns
+        -------
+        Union[pd.DataFrame, Iterator[pd.DataFrame]]:
+            A pandas DataFrame or a pandas DataFrame iterator.
+        """
+        return None
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- "version": "2.5.9"`
	`2`	`+ "version": "2.5.10"`
`3`	`3`	`}`