From 6925cd74721cc474a02a6759490b2b4c8bef3831 Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Wed, 10 Apr 2019 17:15:03 -0700 Subject: [PATCH] Adding kerberos support for Presto (#229) * Adding kerberos support for Presto * Improving formatting of presto.py * making it backward compatible by incorporating review comments and some minor changes * moving request_kwargs at the end * Fixing missing argument error * Fixing missing argument error --- dev_requirements.txt | 1 + pyhive/presto.py | 56 ++++++++++++++++++++++++++++++++++++-------- setup.py | 2 ++ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 73c419be..0bf6d8a7 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,7 @@ pytest-timeout==1.2.0 # actual dependencies: let things break if a package changes requests>=1.0.0 +requests_kerberos>=0.12.0 sasl>=0.2.1 thrift>=0.10.0 #thrift_sasl>=0.1.0 diff --git a/pyhive/presto.py b/pyhive/presto.py index e90d5bfa..cc84bf80 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -18,6 +18,8 @@ import logging import requests from requests.auth import HTTPBasicAuth +from requests_kerberos import HTTPKerberosAuth, OPTIONAL +import os try: # Python 3 import urllib.parse as urlparse @@ -80,7 +82,10 @@ class Cursor(common.DBAPICursor): def __init__(self, host, port='8080', username=None, catalog='hive', schema='default', poll_interval=1, source='pyhive', session_props=None, - protocol='http', password=None, requests_session=None, requests_kwargs=None): + protocol='http', password=None, requests_session=None, requests_kwargs=None, + KerberosRemoteServiceName=None, KerberosPrincipal=None, + KerberosConfigPath=None, KerberosKeytabPath=None, + KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None): """ :param host: hostname to connect to, e.g. ``presto.example.com`` :param port: int -- port, defaults to 8080 @@ -100,6 +105,18 @@ def __init__(self, host, port='8080', username=None, catalog='hive', class will use the default requests behavior of making a new session per HTTP request. Caller is responsible for closing session. :param requests_kwargs: Additional ``**kwargs`` to pass to requests + :param KerberosRemoteServiceName: string -- Presto coordinator Kerberos service name. + This parameter is required for Kerberos authentiation. + :param KerberosPrincipal: string -- The principal to use when authenticating to + the Presto coordinator. + :param KerberosConfigPath: string -- Kerberos configuration file. + (default: /etc/krb5.conf) + :param KerberosKeytabPath: string -- Kerberos keytab file. + :param KerberosCredentialCachePath: string -- Kerberos credential cache. + :param KerberosUseCanonicalHostname: boolean -- Use the canonical hostname of the + Presto coordinator for the Kerberos service principal by first resolving the + hostname to an IP address and then doing a reverse DNS lookup for that IP address. + This is enabled by default. """ super(Cursor, self).__init__(poll_interval) # Config @@ -120,15 +137,34 @@ class will use the default requests behavior of making a new session per HTTP re self._requests_session = requests_session or requests requests_kwargs = dict(requests_kwargs) if requests_kwargs is not None else {} - if password is not None and 'auth' in requests_kwargs: - raise ValueError("Cannot use both password and requests_kwargs authentication") - for k in ('method', 'url', 'data', 'headers'): - if k in requests_kwargs: - raise ValueError("Cannot override requests argument {}".format(k)) - if password is not None: - requests_kwargs['auth'] = HTTPBasicAuth(username, password) - if protocol != 'https': - raise ValueError("Protocol must be https when passing a password") + + if KerberosRemoteServiceName is not None: + hostname_override = None + if KerberosUseCanonicalHostname is not None \ + and KerberosUseCanonicalHostname.lower() == 'false': + hostname_override = host + if KerberosConfigPath is not None: + os.environ['KRB5_CONFIG'] = KerberosConfigPath + if KerberosKeytabPath is not None: + os.environ['KRB5_CLIENT_KTNAME'] = KerberosKeytabPath + if KerberosCredentialCachePath is not None: + os.environ['KRB5CCNAME'] = KerberosCredentialCachePath + + requests_kwargs['auth'] = HTTPKerberosAuth(mutual_authentication=OPTIONAL, + principal=KerberosPrincipal, + service=KerberosRemoteServiceName, + hostname_override=hostname_override) + + else: + if password is not None and 'auth' in requests_kwargs: + raise ValueError("Cannot use both password and requests_kwargs authentication") + for k in ('method', 'url', 'data', 'headers'): + if k in requests_kwargs: + raise ValueError("Cannot override requests argument {}".format(k)) + if password is not None: + requests_kwargs['auth'] = HTTPBasicAuth(username, password) + if protocol != 'https': + raise ValueError("Protocol must be https when passing a password") self._requests_kwargs = requests_kwargs self._reset_state() diff --git a/setup.py b/setup.py index e4e6b1cc..f9c78895 100755 --- a/setup.py +++ b/setup.py @@ -46,12 +46,14 @@ def run_tests(self): 'presto': ['requests>=1.0.0'], 'hive': ['sasl>=0.2.1', 'thrift>=0.10.0', 'thrift_sasl>=0.1.0'], 'sqlalchemy': ['sqlalchemy>=0.8.7'], + 'kerberos': ['requests_kerberos>=0.12.0'], }, tests_require=[ 'mock>=1.0.0', 'pytest', 'pytest-cov', 'requests>=1.0.0', + 'requests_kerberos>=0.12.0', 'sasl>=0.2.1', 'sqlalchemy>=0.12.0', 'thrift>=0.10.0',