Skip to content

Commit

Permalink
Rename gcs.endpoint to gcs.service.host (apache#1007)
Browse files Browse the repository at this point in the history
* Rename `gcs.endpoint` to `gcs.service.host`

To make it in line with Java:

https://github.com/apache/iceberg/blob/6ee6d1327d3811dbd5795c4e87efdc41b7a58eaa/gcp/src/main/java/org/apache/iceberg/gcp/GCPProperties.java#L32

* Import

Co-authored-by: Andre Luis Anastacio <[email protected]>

* Use `deprecation_message` instead

Co-authored-by: Andre Luis Anastacio <[email protected]>

* Use `deprecation_message` instead

Co-authored-by: Andre Luis Anastacio <[email protected]>

* Fix message

* Update pyiceberg/io/fsspec.py

Co-authored-by: Kevin Liu <[email protected]>

* Update pyiceberg/io/fsspec.py

Co-authored-by: Kevin Liu <[email protected]>

* Update pyiceberg/io/pyarrow.py

Co-authored-by: Kevin Liu <[email protected]>

---------

Co-authored-by: Andre Luis Anastacio <[email protected]>
Co-authored-by: Kevin Liu <[email protected]>
  • Loading branch information
3 people authored Nov 6, 2024
1 parent ef5c6ef commit c7b55b1
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 7 deletions.
2 changes: 1 addition & 1 deletion mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ For the FileIO there are several configuration options available:
| gcs.cache-timeout | 60 | Configure the cache expiration time in seconds for object metadata cache |
| gcs.requester-pays | False | Configure whether to use requester-pays requests |
| gcs.session-kwargs | {} | Configure a dict of parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy settings. |
| gcs.endpoint | <http://0.0.0.0:4443> | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
| gcs.service.host | <http://0.0.0.0:4443> | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
| gcs.default-location | US | Configure the default location where buckets are created, like 'US' or 'EUROPE-WEST3'. |
| gcs.version-aware | False | Configure whether to support object versioning on the GCS bucket. |

Expand Down
1 change: 1 addition & 0 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
GCS_REQUESTER_PAYS = "gcs.requester-pays"
GCS_SESSION_KWARGS = "gcs.session-kwargs"
GCS_ENDPOINT = "gcs.endpoint"
GCS_SERVICE_HOST = "gcs.service.host"
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
GCS_VERSION_AWARE = "gcs.version-aware"
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
Expand Down
9 changes: 8 additions & 1 deletion pyiceberg/io/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
GCS_ENDPOINT,
GCS_PROJECT_ID,
GCS_REQUESTER_PAYS,
GCS_SERVICE_HOST,
GCS_SESSION_KWARGS,
GCS_TOKEN,
GCS_VERSION_AWARE,
Expand Down Expand Up @@ -171,6 +172,12 @@ def _gs(properties: Properties) -> AbstractFileSystem:
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
from gcsfs import GCSFileSystem

if properties.get(GCS_ENDPOINT):
deprecation_message(
deprecated_in="0.8.0",
removed_in="0.9.0",
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
)
return GCSFileSystem(
project=properties.get(GCS_PROJECT_ID),
access=properties.get(GCS_ACCESS, "full_control"),
Expand All @@ -179,7 +186,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
endpoint_url=properties.get(GCS_ENDPOINT),
endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, GCS_ENDPOINT),
default_location=properties.get(GCS_DEFAULT_LOCATION),
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
)
Expand Down
11 changes: 9 additions & 2 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
AWS_SESSION_TOKEN,
GCS_DEFAULT_LOCATION,
GCS_ENDPOINT,
GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
HDFS_HOST,
Expand Down Expand Up @@ -163,7 +164,7 @@
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.config import Config
from pyiceberg.utils.datetime import millis_to_datetime
from pyiceberg.utils.deprecated import deprecated
from pyiceberg.utils.deprecated import deprecated, deprecation_message
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
from pyiceberg.utils.singleton import Singleton
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
Expand Down Expand Up @@ -400,7 +401,13 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
gcs_kwargs["default_bucket_location"] = bucket_location
if endpoint := self.properties.get(GCS_ENDPOINT):
if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT):
if self.properties.get(GCS_ENDPOINT):
deprecation_message(
deprecated_in="0.8.0",
removed_in="0.9.0",
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
)
url_parts = urlparse(endpoint)
gcs_kwargs["scheme"] = url_parts.scheme
gcs_kwargs["endpoint_override"] = url_parts.netloc
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@
from pyiceberg.catalog.noop import NoopCatalog
from pyiceberg.expressions import BoundReference
from pyiceberg.io import (
GCS_ENDPOINT,
GCS_PROJECT_ID,
GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
fsspec,
Expand Down Expand Up @@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) -> FsspecFileIO:
@pytest.fixture
def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:
properties = {
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
}
Expand All @@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO":
from pyiceberg.io.pyarrow import PyArrowFileIO

properties = {
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 * 1000,
Expand Down

0 comments on commit c7b55b1

Please sign in to comment.