Skip to content

Commit

Permalink
Merge pull request #228 from tokern/athena
Browse files Browse the repository at this point in the history
feature: Athena
  • Loading branch information
nicolepng authored Jul 5, 2023
2 parents f89670a + b5911ca commit a1bced1
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 11 deletions.
2 changes: 1 addition & 1 deletion piicatcher/dbinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def get_sample_query(


class Athena(Postgres):
pass
_sample_query_template = "SELECT {column_list} FROM {schema_name}.{table_name} ORDER BY RAND() LIMIT {num_rows}"


def get_dbinfo(source_type: str, *args, **kwargs) -> DbInfo:
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "piicatcher"
version = "0.21.1"
version = "0.21.2"
description = "Find PII data in databases"
authors = ["Tokern <[email protected]>"]
license = "Apache 2.0"
Expand Down Expand Up @@ -28,7 +28,7 @@ pyyaml = "*"
click = "*"
python-json-logger = "^2.0.2"
commonregex-improved = "1.0.2"
dbcat = "0.14.1"
dbcat = "0.14.2"
typer = "^0.4.0"
goog-stats = "^0.1.2"
tabulate = "^0.8.9"
Expand Down
33 changes: 29 additions & 4 deletions tests/test_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,6 @@ def test_get_sample_query(sqlalchemy_engine):
'SELECT "column" FROM public.table ORDER BY RANDOM() LIMIT 1',
),
("snowflake", "SELECT column FROM public.table TABLESAMPLE BERNOULLI (1 ROWS)"),
(
"athena",
'SELECT "column" FROM public.table TABLESAMPLE BERNOULLI (10) LIMIT 1',
),
],
)
def test_get_sample_query_redshift(mocker, source_type, expected_query):
Expand Down Expand Up @@ -257,6 +253,35 @@ def test_get_select_query_bigquery(mocker, source_type, expected_query):
assert query == expected_query


@pytest.mark.parametrize(
("source_type", "expected_query"),
[
(
"athena",
'SELECT "column" FROM public.table ORDER BY RAND() LIMIT 1',
),
],
)
def test_get_sample_query_athena(mocker, source_type, expected_query):
source = CatSource(name="src", source_type=source_type)
schema = CatSchema(source=source, name="public")
table = CatTable(schema=schema, name="table")
column = CatColumn(table=table, name="column")

mocker.patch("piicatcher.generators._get_table_count", return_value=100)
query = _get_query(
schema=schema,
table=table,
column_list=[column],
dbinfo=get_dbinfo(source.source_type, schema, table),
connection=None,
sample_size=1,
source=source,
)

assert query == expected_query


def test_row_generator(sqlalchemy_engine):
catalog, source, conn = sqlalchemy_engine
schemata = catalog.search_schema(source_like=source.name, schema_like="%")
Expand Down

0 comments on commit a1bced1

Please sign in to comment.