diff --git a/src/instana/instrumentation/aws/boto3.py b/src/instana/instrumentation/aws/boto3.py index 6cfc3193..7538a126 100644 --- a/src/instana/instrumentation/aws/boto3.py +++ b/src/instana/instrumentation/aws/boto3.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Sequence, Tuple, Type from instana.instrumentation.aws.dynamodb import collect_dynamodb_attributes +from instana.instrumentation.aws.s3 import collect_s3_attributes from opentelemetry.semconv.trace import SpanAttributes if TYPE_CHECKING: @@ -76,6 +77,10 @@ def make_api_call_with_instana( collect_dynamodb_attributes( wrapped, instance, args, kwargs, parent_context ) + elif instance.meta.service_model.service_name == "s3": + collect_s3_attributes( + wrapped, instance, args, kwargs, parent_context + ) else: with tracer.start_as_current_span( "boto3", span_context=parent_context diff --git a/src/instana/instrumentation/aws/s3.py b/src/instana/instrumentation/aws/s3.py new file mode 100644 index 00000000..1e82159e --- /dev/null +++ b/src/instana/instrumentation/aws/s3.py @@ -0,0 +1,86 @@ +# (c) Copyright IBM Corp. 2021 +# (c) Copyright Instana Inc. 2020 + +from typing import TYPE_CHECKING, Any, Callable, Dict, Sequence, Type + +from instana.span_context import SpanContext + +if TYPE_CHECKING: + from botocore.client import BaseClient + +try: + import wrapt + + from instana.log import logger + from instana.singletons import tracer + from instana.util.traceutils import ( + get_tracer_tuple, + tracing_is_off, + ) + + operations = { + "upload_file": "UploadFile", + "upload_fileobj": "UploadFileObj", + "download_file": "DownloadFile", + "download_fileobj": "DownloadFileObj", + } + + def collect_s3_attributes( + wrapped: Callable[..., Dict[str, Any]], + instance: Type["BaseClient"], + args: Sequence[Dict[str, Any]], + kwargs: Dict[str, Any], + parent_context: SpanContext, + ) -> None: + with tracer.start_as_current_span("s3", span_context=parent_context) as span: + try: + span.set_attribute("s3.op", args[0]) + if "Bucket" in args[1].keys(): + span.set_attribute("s3.bucket", args[1]["Bucket"]) + except Exception as exc: + span.record_exception(exc) + logger.debug( + "collect_dynamodb_attributes: collect error", exc_info=True + ) + + def collect_s3_injected_attributes( + wrapped: Callable[..., object], + instance: Type["BaseClient"], + args: Sequence[object], + kwargs: Dict[str, Any], + ) -> Callable[..., object]: + # If we're not tracing, just return + if tracing_is_off(): + return wrapped(*args, **kwargs) + + tracer, parent_span, _ = get_tracer_tuple() + + parent_context = parent_span.get_span_context() if parent_span else None + + with tracer.start_as_current_span("s3", span_context=parent_context) as span: + try: + span.set_attribute("s3.op", operations[wrapped.__name__]) + if wrapped.__name__ in ["download_file", "download_fileobj"]: + span.set_attribute("s3.bucket", args[0]) + else: + span.set_attribute("s3.bucket", args[1]) + return wrapped(*args, **kwargs) + except Exception as exc: + span.record_exception(exc) + logger.debug( + "s3_inject_method_with_instana: collect error", exc_info=True + ) + + for method in [ + "upload_file", + "upload_fileobj", + "download_file", + "download_fileobj", + ]: + wrapt.wrap_function_wrapper( + "boto3.s3.inject", method, collect_s3_injected_attributes + ) + + logger.debug("Instrumenting s3") +except ImportError: + pass diff --git a/src/instana/span/kind.py b/src/instana/span/kind.py index 53947bd3..b93fa207 100644 --- a/src/instana/span/kind.py +++ b/src/instana/span/kind.py @@ -50,6 +50,7 @@ "redis", "rpc-client", "sqlalchemy", + "s3", "tornado-client", "urllib3", "pymongo", diff --git a/src/instana/span/registered_span.py b/src/instana/span/registered_span.py index 41d52ba3..a658f0b8 100644 --- a/src/instana/span/registered_span.py +++ b/src/instana/span/registered_span.py @@ -260,6 +260,10 @@ def _populate_exit_span_data(self, span: "InstanaSpan") -> None: # self.data["rpc"]["baggage"] = span.attributes.pop("rpc.baggage", None) self.data["rpc"]["error"] = span.attributes.pop("rpc.error", None) + elif span.name == "s3": + self.data["s3"]["op"] = span.attributes.pop("s3.op", None) + self.data["s3"]["bucket"] = span.attributes.pop("s3.bucket", None) + elif span.name == "sqlalchemy": self.data["sqlalchemy"]["sql"] = span.attributes.pop("sqlalchemy.sql", None) self.data["sqlalchemy"]["eng"] = span.attributes.pop("sqlalchemy.eng", None) diff --git a/tests/clients/boto3/test_boto3_s3.py b/tests/clients/boto3/test_boto3_s3.py index 6410a6ea..b772ab42 100644 --- a/tests/clients/boto3/test_boto3_s3.py +++ b/tests/clients/boto3/test_boto3_s3.py @@ -50,56 +50,40 @@ def test_s3_create_bucket(self) -> None: spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "CreateBucket" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["boto3"]["payload"] == {"Bucket": "aws_bucket_name"} - assert boto_span.data["http"]["status"] == 200 - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/CreateBucket" - ) + assert not s3_span.ec + + assert s3_span.data["s3"]["op"] == "CreateBucket" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name" def test_s3_create_bucket_as_root_exit_span(self) -> None: agent.options.allow_exit_as_root = True self.s3.create_bucket(Bucket="aws_bucket_name") agent.options.allow_exit_as_root = False - result = self.s3.list_buckets() - assert len(result["Buckets"]) == 1 - assert result["Buckets"][0]["Name"] == "aws_bucket_name" + self.s3.list_buckets() spans = self.recorder.queued_spans() assert len(spans) == 1 - boto_span = spans[0] - assert boto_span - assert boto_span.n == "boto3" - assert not boto_span.p - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "CreateBucket" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["boto3"]["payload"] == {"Bucket": "aws_bucket_name"} - assert boto_span.data["http"]["status"] == 200 - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/CreateBucket" - ) + + s3_span = spans[0] + assert s3_span + + assert not s3_span.ec + + assert s3_span.data["s3"]["op"] == "CreateBucket" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name" def test_s3_list_buckets(self) -> None: with tracer.start_as_current_span("test"): @@ -111,29 +95,22 @@ def test_s3_list_buckets(self) -> None: spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "ListBuckets" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["boto3"]["payload"] == {} - assert boto_span.data["http"]["status"] == 200 - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/ListBuckets" - ) + assert not s3_span.ec + + assert s3_span.data["s3"]["op"] == "ListBuckets" + assert not s3_span.data["s3"]["bucket"] def test_s3_vanilla_upload_file(self) -> None: object_name = "aws_key_name" @@ -155,33 +132,22 @@ def test_s3_upload_file(self) -> None: spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "upload_file" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - payload = { - "Filename": upload_filename, - "Bucket": "aws_bucket_name", - "Key": "aws_key_name", - } - assert boto_span.data["boto3"]["payload"] == payload - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/upload_file" - ) + assert not s3_span.ec + + assert s3_span.data["s3"]["op"] == "UploadFile" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name" def test_s3_upload_file_obj(self) -> None: object_name = "aws_key_name" @@ -196,30 +162,22 @@ def test_s3_upload_file_obj(self) -> None: spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "upload_fileobj" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - payload = {"Bucket": "aws_bucket_name", "Key": "aws_key_name"} - assert boto_span.data["boto3"]["payload"] == payload - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] - == "https://s3.amazonaws.com:443/upload_fileobj" - ) + assert not s3_span.ec + + assert s3_span.data["s3"]["op"] == "UploadFileObj" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name" def test_s3_download_file(self) -> None: object_name = "aws_key_name" @@ -234,34 +192,22 @@ def test_s3_download_file(self) -> None: spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "download_file" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - payload = { - "Bucket": "aws_bucket_name", - "Key": "aws_key_name", - "Filename": "%s" % download_target_filename, - } - assert boto_span.data["boto3"]["payload"] == payload - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] - == "https://s3.amazonaws.com:443/download_file" - ) + assert not s3_span.ec + + assert s3_span.data["s3"]["op"] == "DownloadFile" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name" def test_s3_download_file_obj(self) -> None: object_name = "aws_key_name" @@ -277,204 +223,47 @@ def test_s3_download_file_obj(self) -> None: spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "download_fileobj" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] - == "https://s3.amazonaws.com:443/download_fileobj" - ) - - def test_request_header_capture_before_call(self) -> None: - original_extra_http_headers = agent.options.extra_http_headers - agent.options.extra_http_headers = ["X-Capture-This", "X-Capture-That"] - - # Access the event system on the S3 client - event_system = self.s3.meta.events - - request_headers = {"X-Capture-This": "this", "X-Capture-That": "that"} - - # Create a function that adds custom headers - def add_custom_header_before_call(params, **kwargs): - params["headers"].update(request_headers) - - # Register the function to before-call event. - event_system.register( - "before-call.s3.CreateBucket", add_custom_header_before_call - ) - - with tracer.start_as_current_span("test"): - self.s3.create_bucket(Bucket="aws_bucket_name") - - result = self.s3.list_buckets() - assert len(result["Buckets"]) == 1 - assert result["Buckets"][0]["Name"] == "aws_bucket_name" - - spans = self.recorder.queued_spans() - assert len(spans) == 2 - - filter = lambda span: span.n == "sdk" - test_span = get_first_span_by_filter(spans, filter) - assert test_span - - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span - - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s - - assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "CreateBucket" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["boto3"]["payload"] == {"Bucket": "aws_bucket_name"} - assert boto_span.data["http"]["status"] == 200 - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/CreateBucket" - ) + assert not s3_span.ec - assert "X-Capture-This" in boto_span.data["http"]["header"] - assert boto_span.data["http"]["header"]["X-Capture-This"] == "this" - assert "X-Capture-That" in boto_span.data["http"]["header"] - assert boto_span.data["http"]["header"]["X-Capture-That"] == "that" + assert s3_span.data["s3"]["op"] == "DownloadFileObj" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name" - agent.options.extra_http_headers = original_extra_http_headers - - def test_request_header_capture_before_sign(self) -> None: - original_extra_http_headers = agent.options.extra_http_headers - agent.options.extra_http_headers = ["X-Custom-1", "X-Custom-2"] - - # Access the event system on the S3 client - event_system = self.s3.meta.events - - request_headers = {"X-Custom-1": "Value1", "X-Custom-2": "Value2"} - - # Create a function that adds custom headers - def add_custom_header_before_sign(request, **kwargs): - for name, value in request_headers.items(): - request.headers.add_header(name, value) + def test_s3_list_obj(self) -> None: + bucket_name = "aws_bucket_name" - # Register the function to before-sign event. - event_system.register_first( - "before-sign.s3.CreateBucket", add_custom_header_before_sign - ) + self.s3.create_bucket(Bucket=bucket_name) with tracer.start_as_current_span("test"): - self.s3.create_bucket(Bucket="aws_bucket_name") - - result = self.s3.list_buckets() - assert len(result["Buckets"]) == 1 - assert result["Buckets"][0]["Name"] == "aws_bucket_name" + self.s3.list_objects(Bucket=bucket_name) spans = self.recorder.queued_spans() assert len(spans) == 2 - filter = lambda span: span.n == "sdk" + filter = lambda span: span.n == "sdk" # noqa: E731 test_span = get_first_span_by_filter(spans, filter) assert test_span - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span + filter = lambda span: span.n == "s3" # noqa: E731 + s3_span = get_first_span_by_filter(spans, filter) + assert s3_span - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s + assert s3_span.t == test_span.t + assert s3_span.p == test_span.s assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "CreateBucket" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["boto3"]["payload"] == {"Bucket": "aws_bucket_name"} - assert boto_span.data["http"]["status"] == 200 - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/CreateBucket" - ) - - assert "X-Custom-1" in boto_span.data["http"]["header"] - assert boto_span.data["http"]["header"]["X-Custom-1"] == "Value1" - assert "X-Custom-2" in boto_span.data["http"]["header"] - assert boto_span.data["http"]["header"]["X-Custom-2"] == "Value2" - - agent.options.extra_http_headers = original_extra_http_headers - - def test_response_header_capture(self) -> None: - original_extra_http_headers = agent.options.extra_http_headers - agent.options.extra_http_headers = ["X-Capture-This-Too", "X-Capture-That-Too"] - - # Access the event system on the S3 client - event_system = self.s3.meta.events - - response_headers = { - "X-Capture-This-Too": "this too", - "X-Capture-That-Too": "that too", - } - - # Create a function that sets the custom headers in the after-call event. - def modify_after_call_args(parsed, **kwargs): - parsed["ResponseMetadata"]["HTTPHeaders"].update(response_headers) + assert not s3_span.ec - # Register the function to an event - event_system.register("after-call.s3.CreateBucket", modify_after_call_args) - - with tracer.start_as_current_span("test"): - self.s3.create_bucket(Bucket="aws_bucket_name") - - result = self.s3.list_buckets() - assert len(result["Buckets"]) == 1 - assert result["Buckets"][0]["Name"] == "aws_bucket_name" - - spans = self.recorder.queued_spans() - assert len(spans) == 2 - - filter = lambda span: span.n == "sdk" - test_span = get_first_span_by_filter(spans, filter) - assert test_span - - filter = lambda span: span.n == "boto3" - boto_span = get_first_span_by_filter(spans, filter) - assert boto_span - - assert boto_span.t == test_span.t - assert boto_span.p == test_span.s - - assert not test_span.ec - assert not boto_span.ec - - assert boto_span.data["boto3"]["op"] == "CreateBucket" - assert boto_span.data["boto3"]["ep"] == "https://s3.amazonaws.com" - assert boto_span.data["boto3"]["reg"] == "us-east-1" - assert boto_span.data["boto3"]["payload"] == {"Bucket": "aws_bucket_name"} - assert boto_span.data["http"]["status"] == 200 - assert boto_span.data["http"]["method"] == "POST" - assert ( - boto_span.data["http"]["url"] == "https://s3.amazonaws.com:443/CreateBucket" - ) - - assert "X-Capture-This-Too" in boto_span.data["http"]["header"] - assert boto_span.data["http"]["header"]["X-Capture-This-Too"] == "this too" - assert "X-Capture-That-Too" in boto_span.data["http"]["header"] - assert boto_span.data["http"]["header"]["X-Capture-That-Too"] == "that too" - - agent.options.extra_http_headers = original_extra_http_headers + assert s3_span.data["s3"]["op"] == "ListObjects" + assert s3_span.data["s3"]["bucket"] == "aws_bucket_name"