From cafa4b37d0c7fa42e7af58dfd270ee38da9ed051 Mon Sep 17 00:00:00 2001 From: Bodor Laszlo Date: Thu, 25 Jan 2024 03:59:45 +0100 Subject: [PATCH] TEZ-4357: Report url to logs in case of fetcher connection failure (#169). (Laszlo Bodor, reviewed by Ayush Saxena) --- .../tez/runtime/library/common/shuffle/Fetcher.java | 8 ++++---- .../orderedgrouped/FetcherOrderedGrouped.java | 13 +++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java index c700f29782..fc7ad3823b 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java @@ -494,8 +494,9 @@ protected HostFetchResult doHttpFetch() { } private HostFetchResult setupConnection(Collection attempts) { + StringBuilder baseURI = null; try { - StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host, + baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host, port, partition, partitionCount, appId.toString(), dagIdentifier, httpConnectionParams.isSslShuffle()); this.url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts, httpConnectionParams.isKeepAlive()); @@ -547,9 +548,8 @@ private HostFetchResult setupConnection(Collection attem } } else { InputAttemptIdentifier firstAttempt = attempts.iterator().next(); - LOG.warn(String.format( - "Fetch Failure while connecting from %s to: %s:%d, attempt: %s Informing ShuffleManager: ", - localHostname, host, port, firstAttempt), e); + LOG.warn("FETCH_FAILURE: Fetch Failure while connecting from {} to: {}:{}, attempt: {}, url: {}" + + " Informing ShuffleManager", localHostname, host, port, firstAttempt, baseURI, e); return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), new InputAttemptFetchFailure[] { new InputAttemptFetchFailure(firstAttempt) }, true); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java index c277f57382..9e2706e280 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java @@ -340,8 +340,9 @@ private void invokeCopyFailedForFailedTasks(MapHost host, boolean setupConnection(MapHost host, Collection attempts) throws IOException { boolean connectSucceeded = false; + StringBuilder baseURI = null; try { - StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host.getHost(), + baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host.getHost(), host.getPort(), host.getPartitionId(), host.getPartitionCount(), applicationId, dagId, sslShuffle); URL url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts, httpConnectionParams.isKeepAlive()); httpConnection = ShuffleUtils.getHttpConnection(asyncHttp, url, httpConnectionParams, @@ -364,13 +365,13 @@ boolean setupConnection(MapHost host, Collection attempt } ioErrs.increment(1); if (!connectSucceeded) { - LOG.warn(String.format("Failed to connect from %s to %s with %d inputs", localShuffleHost, - host, remaining.size()), ie); + LOG.warn("FETCH_FAILURE: Failed to connect from {} to {} with {} inputs, url: {}", localShuffleHost, + host, remaining.size(), baseURI, ie); connectionErrs.increment(1); } else { - LOG.warn(String.format( - "Failed to verify reply after connecting from %s to %s with %d inputs pending", - localShuffleHost, host, remaining.size()), ie); + LOG.warn( + "FETCH_FAILURE: Failed to verify reply after connecting from {} to {} with {} inputs pending, url: {}", + localShuffleHost, host, remaining.size(), baseURI, ie); } // At this point, either the connection failed, or the initial header verification failed.