From 21611246b1dc4cdc61d3c1970c646fb9a6943351 Mon Sep 17 00:00:00 2001 From: Sourabh Badhya Date: Tue, 9 Jan 2024 21:16:05 +0530 Subject: [PATCH] TEZ-4526: Avoid calling LocationProvider#getPreferredLocations multiple times while generating grouped splits (#323) --- .../apache/tez/mapreduce/grouper/TezSplitGrouper.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java index b4143494f8..176eb1a10e 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java @@ -208,10 +208,12 @@ public List getGroupedSplits(Configuration conf, long totalLength = 0; Map distinctLocations = createLocationsMap(conf); + Map splitToLocationsMap = new HashMap<>(originalSplits.size()); // go through splits and add them to locations for (SplitContainer split : originalSplits) { totalLength += estimator.getEstimatedSize(split); String[] locations = locationProvider.getPreferredLocations(split); + splitToLocationsMap.put(split, locations); if (locations == null || locations.length == 0) { locations = emptyLocations; allSplitsHaveLocalhost = false; @@ -293,7 +295,7 @@ public List getGroupedSplits(Configuration conf, groupedSplits = new ArrayList(originalSplits.size()); for (SplitContainer split : originalSplits) { GroupedSplitContainer newSplit = - new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(locationProvider.getPreferredLocations(split)), + new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(splitToLocationsMap.get(split)), null); newSplit.addSplit(split); groupedSplits.add(newSplit); @@ -314,7 +316,7 @@ public List getGroupedSplits(Configuration conf, Set locSet = new HashSet(); for (SplitContainer split : originalSplits) { locSet.clear(); - String[] locations = locationProvider.getPreferredLocations(split); + String[] locations = splitToLocationsMap.get(split); if (locations == null || locations.length == 0) { locations = emptyLocations; } @@ -408,7 +410,7 @@ public List getGroupedSplits(Configuration conf, groupLocation = null; } else if (doingRackLocal) { for (SplitContainer splitH : group) { - String[] locations = locationProvider.getPreferredLocations(splitH); + String[] locations = splitToLocationsMap.get(splitH); if (locations != null) { for (String loc : locations) { if (loc != null) { @@ -503,7 +505,7 @@ public List getGroupedSplits(Configuration conf, } numRackSplitsToGroup--; rackSet.clear(); - String[] locations = locationProvider.getPreferredLocations(split); + String[] locations = splitToLocationsMap.get(split); if (locations == null || locations.length == 0) { locations = emptyLocations; }