Skip to content

Commit

Permalink
remove usage stats and various cleanup (#1596)
Browse files Browse the repository at this point in the history
## Description
Remove the usage_stats emit_usage telemetry from whylogs and various
cleanup:
* remove whylabs API key format length checks
* more verbose retry warnings in failures using the whylabs_client
* test cleanup

- [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md)
and the [Code of Conduct](CODE_OF_CONDUCT.md).
  • Loading branch information
jamie256 authored Jan 10, 2025
1 parent e5b5828 commit 976bc4c
Show file tree
Hide file tree
Showing 18 changed files with 5 additions and 282 deletions.
16 changes: 0 additions & 16 deletions python/tests/api/writer/test_whylabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,22 +342,6 @@ def test_option_will_overwrite_defaults(self) -> None:
assert writer._whylabs_client._dataset_id == "new_dataset_id"
assert writer.key_id == "newkeynewk"

def test_api_key_prefers_parameter_over_env_var(self, results, caplog):
with pytest.raises(ValueError):
results.writer("whylabs").option(org_id="org_id", api_key="api_key_123.foo").write(dataset_id="dataset_id")

def test_writer_accepts_dest_param(self, results, caplog):
# TODO: inspect error or mock better to avoid network call and keep test focused.
with pytest.raises(ValueError):
results.writer("whylabs").option(api_key="bad_key_format").write(dataset_id="dataset_id", dest="tmp")

def test_write_response(self, results):
with pytest.raises(ValueError):
response = (
results.writer("whylabs").option(api_key="bad_key_format").write(dataset_id="dataset_id", dest="tmp")
)
assert response[0] is True

def test_changing_api_key_works(self) -> None:
#
# Defaults
Expand Down
3 changes: 0 additions & 3 deletions python/whylogs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
reader,
write,
)
from .api.usage_stats import emit_usage as __emit_usage_stats
from .api.whylabs import init
from .core import DatasetProfileView
from .migration.converters import v0_to_v1_view
Expand Down Expand Up @@ -65,5 +64,3 @@ def package_version(package: str = __package__) -> str:
__version__,
init,
]

__emit_usage_stats("import")
8 changes: 2 additions & 6 deletions python/whylogs/api/fugue/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# flake8: noqa
from whylogs.api.usage_stats import emit_usage
from .profiler import fugue_profile

# This import has a side effect
from .profiler import fugue_profile # type: ignore

emit_usage("fugue")
assert fugue_profile is not None
2 changes: 0 additions & 2 deletions python/whylogs/api/logger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
_log_segment,
)
from whylogs.api.logger.transient import TransientLogger
from whylogs.api.usage_stats import emit_usage
from whylogs.api.whylabs.session.notebook_logger import (
notebook_session_log,
notebook_session_log_comparison,
Expand Down Expand Up @@ -55,7 +54,6 @@ def log(
) -> ResultSet:
if multiple is not None:
result_sets: Dict[str, ResultSet] = {}
emit_usage("multiple")
for alias, data in multiple.items():
result_set = TransientLogger(schema=schema).log(data, trace_id=trace_id)
if dataset_timestamp is not None:
Expand Down
2 changes: 0 additions & 2 deletions python/whylogs/api/pyspark/experimental/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@
from typing import Dict, Iterable, Optional, Tuple

import whylogs as why
from whylogs.api.usage_stats import emit_usage
from whylogs.core import DatasetSchema
from whylogs.core.metrics.metrics import conf
from whylogs.core.stubs import pd
from whylogs.core.view.column_profile_view import ColumnProfileView
from whylogs.core.view.dataset_profile_view import DatasetProfileView

logger = getLogger(__name__)
emit_usage("pyspark")

try: # type: ignore
from pyspark.ml.functions import vector_to_array
Expand Down
2 changes: 0 additions & 2 deletions python/whylogs/api/pyspark/experimental/segmented_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import whylogs as why
from whylogs.api.logger.result_set import ResultSet, SegmentedResultSet
from whylogs.api.pyspark.experimental.profiler import COL_NAME_FIELD, COL_PROFILE_FIELD
from whylogs.api.usage_stats import emit_usage
from whylogs.core import DatasetSchema
from whylogs.core.segment import Segment
from whylogs.core.segmentation_partition import SegmentationPartition
Expand All @@ -17,7 +16,6 @@
from whylogs.core.view.dataset_profile_view import DatasetProfileView

logger = getLogger(__name__)
emit_usage("pyspark")

try: # type: ignore
from pyspark.ml.functions import vector_to_array
Expand Down
218 changes: 0 additions & 218 deletions python/whylogs/api/usage_stats/__init__.py

This file was deleted.

5 changes: 0 additions & 5 deletions python/whylogs/api/whylabs/session/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,12 @@ def __init__(self, config: SessionConfig) -> None:
If neither exist then this will attempt to create a new session and store the id in the config,
which does require a successful service call to whylabs.
"""
from whylogs.api.usage_stats import emit_usage

super().__init__(config)

# Using lazy initialization to work around circular dependency issues
self._whylabs_session_api = Lazy(self.__create_session_api)
self._user_guid = self._get_or_create_user_guid()
emit_usage("guest_session")

def __create_session_api(self) -> SessionsApi:
from whylogs.api.whylabs.session.whylabs_client_cache import ClientCacheConfig
Expand Down Expand Up @@ -255,15 +253,12 @@ def upload_batch_profile(self, profile: ResultSet) -> Union[UploadResult, NotSup

class ApiKeySession(Session):
def __init__(self, config: SessionConfig) -> None:
from whylogs.api.usage_stats import emit_usage

super().__init__(config)
self.api_key = config.get_api_key()
self.org_id = config.get_org_id()

# Using lazy initialization to work around circular dependency issues
self._whylabs_log_api = Lazy(partial(self.__create_log_api, config))
emit_usage("api_key_session")

def __create_log_api(self, config: SessionConfig) -> LogApi:
from whylogs.api.whylabs.session.whylabs_client_cache import ClientCacheConfig
Expand Down
4 changes: 0 additions & 4 deletions python/whylogs/api/whylabs/session/whylabs_client_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ def __call__(self, config: Configuration) -> None:
def _validate_api_key(self, api_key: Optional[str]) -> str:
if api_key is None:
raise ValueError("Missing API key. Set it via WHYLABS_API_KEY environment variable or as an api_key option")
if len(api_key) < 12:
raise ValueError("API key too short")
if api_key[10] != ".":
raise ValueError("Invalid format. Expecting a dot at an index 10")
return api_key[:10]


Expand Down
2 changes: 0 additions & 2 deletions python/whylogs/api/writer/mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import mlflow

from whylogs.api.usage_stats import emit_usage
from whylogs.api.writer import Writer
from whylogs.api.writer.writer import _Writable
from whylogs.core.utils import deprecated_alias
Expand All @@ -18,7 +17,6 @@ def __init__(self) -> None:
self._file_dir = "whylogs"
self._file_name = None
self._end_run = True
emit_usage("mlflow_writer")

@deprecated_alias(profile="file")
def write(
Expand Down
2 changes: 0 additions & 2 deletions python/whylogs/api/writer/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from botocore.client import BaseClient
from botocore.exceptions import ClientError

from whylogs.api.usage_stats import emit_usage
from whylogs.api.writer import Writer
from whylogs.api.writer.writer import _Writable
from whylogs.core.utils import deprecated_alias
Expand Down Expand Up @@ -63,7 +62,6 @@ def __init__(
self.base_prefix = base_prefix or "profile"
self.bucket_name = bucket_name or ""
self.object_name = object_name or None
emit_usage("s3_writer")

@deprecated_alias(profile="file")
def write(
Expand Down
4 changes: 3 additions & 1 deletion python/whylogs/api/writer/whylabs_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@


def _giveup(e) -> bool:
return (e.status not in _RETRY_CODES,) # type: ignore
result = e.status not in _RETRY_CODES
logger.warning(f"whylabs client communication error: {e}, giveup: {result}")
return result


def _get_column_names(x: Union[DatasetProfile, DatasetProfileView, SegmentedDatasetProfileView, ResultSet]) -> Set[str]:
Expand Down
Loading

0 comments on commit 976bc4c

Please sign in to comment.