diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 5bc196b..c1bf3a2 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -41,5 +41,6 @@ jobs: DATABRICKS_ACCESS_TOKEN: ${{ secrets.DATABRICKS_ACCESS_TOKEN }} S3_UPLOAD_BUCKET: ${{ secrets.S3_UPLOAD_BUCKET }} S3_OUTPUT_BUCKET: ${{ secrets.S3_OUTPUT_BUCKET }} + S3_KMS_KEY_ARN: ${{ secrets.S3_KMS_KEY_ARN }} run: | poetry run pytest \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 24a73db..ea47f6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tonic-textual" -version = "3.6.0" +version = "3.6.1" description = "Wrappers around the Tonic Textual API" authors = ["Adam Kamor ", "Joe Ferrara ", "Ander Steele ", "Ethan Philpott ", "Lyon Van Voorhis ", "Kirill Medvedev ", "Travis Matthews "] license = "MIT" diff --git a/tests/sample.env b/tests/sample.env index 32aec35..767dcc0 100644 --- a/tests/sample.env +++ b/tests/sample.env @@ -9,4 +9,5 @@ AWS_DEFAULT_REGION=us-east-1 AZURE_ACCOUNT_KEY= AZURE_ACCOUNT_NAME= DATABRICKS_URL= -DATABRICKS_ACCESS_TOKEN= \ No newline at end of file +DATABRICKS_ACCESS_TOKEN= +S3_KMS_KEY_ARN= \ No newline at end of file diff --git a/tests/tests/parse_tests/test_pipeline_creation.py b/tests/tests/parse_tests/test_pipeline_creation.py index f2193f0..79c1589 100644 --- a/tests/tests/parse_tests/test_pipeline_creation.py +++ b/tests/tests/parse_tests/test_pipeline_creation.py @@ -31,6 +31,27 @@ def test_s3_pipelines(textual_parse): credentials=creds, ) +# test just checks that exception is not thrown +def test_s3_pipeline_with_kms(textual_parse): + for synth in [False, True]: + for cred_source in ["user_provided", "from_environment"]: + creds = ( + PipelineAwsCredential( + aws_access_key_id=os.environ["S3_UPLOAD_ACCESS_KEY"], + aws_region=os.environ["AWS_DEFAULT_REGION"], + aws_secret_access_key=os.environ["S3_UPLOAD_SECRET_KEY"], + ) + if cred_source == "user_provided" + else None + ) + textual_parse.create_s3_pipeline( + f"aws_{cred_source}_{str(synth)}_{uuid.uuid4()}", + aws_credentials_source=cred_source, + synthesize_files=synth, + credentials=creds, + kms_key_arn=os.environ["S3_KMS_KEY_ARN"] + ) + def test_local_pipelines(textual_parse): for synth in [False, True]: diff --git a/tonic_textual/__init__.py b/tonic_textual/__init__.py index 85197cb..b202327 100644 --- a/tonic_textual/__init__.py +++ b/tonic_textual/__init__.py @@ -1 +1 @@ -__version__ = "3.6.0" +__version__ = "3.6.1" diff --git a/tonic_textual/parse_api.py b/tonic_textual/parse_api.py index cc538f6..96a1964 100644 --- a/tonic_textual/parse_api.py +++ b/tonic_textual/parse_api.py @@ -90,6 +90,7 @@ def create_s3_pipeline( credentials: Optional[PipelineAwsCredential] = None, aws_credentials_source: Optional[str] = "user_provided", synthesize_files: Optional[bool] = False, + kms_key_arn:Optional[str] = None ) -> S3Pipeline: """Create a new pipeline with files from Amazon S3. @@ -105,7 +106,8 @@ def create_s3_pipeline( Whether to generate a redacted version of the file in addition to the parsed output. Default value is `False`. aws_credentials_source: Optional[str] For an Amazon S3 pipeline, how to obtain the AWS credentials. Options are `user_provided` and `from_environment`. For `user_provided`, you provide the credentials in the `credentials` parameter. For `from_environment`, the credentials are read from your Textual instance. - + kms_key_arn: Optional[str] + When provided, the KMS key denoted by the ARN will be used to encrypted files prior to writing to output location via SSE-KMS. This value cannot be changed later. Returns ------- S3Pipeline @@ -145,6 +147,9 @@ def create_s3_pipeline( if aws_credentials_source is not None and fs == FileSource.aws: data["awsCredentialSource"] = aws_cred_source + if kms_key_arn is not None: + data["fileSourceConfig"] = { "awsS3ServerSideEncryptionType": "Kms", "awsS3ServerSideEncryptionKey": kms_key_arn} + p = self.client.http_post("/api/parsejobconfig", data=data) return S3Pipeline(p.get("name"), p.get("id"), self.client) except RequestException as req_err: