|
18 | 18 |
|
19 | 19 | package org.apache.flink.runtime.scheduler.adaptive.allocator;
|
20 | 20 |
|
| 21 | +import org.apache.flink.annotation.VisibleForTesting; |
21 | 22 | import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
|
22 | 23 | import org.apache.flink.runtime.jobmaster.SlotInfo;
|
23 | 24 | import org.apache.flink.runtime.scheduler.adaptive.JobSchedulingPlan.SlotAssignment;
|
24 | 25 | import org.apache.flink.runtime.scheduler.adaptive.allocator.SlotSharingSlotAllocator.ExecutionSlotSharingGroup;
|
25 |
| -import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID; |
| 26 | +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; |
| 27 | + |
| 28 | +import javax.annotation.Nullable; |
26 | 29 |
|
27 | 30 | import java.util.ArrayList;
|
28 | 31 | import java.util.Collection;
|
29 |
| -import java.util.HashMap; |
30 |
| -import java.util.HashSet; |
| 32 | +import java.util.Comparator; |
31 | 33 | import java.util.Iterator;
|
32 | 34 | import java.util.List;
|
33 | 35 | import java.util.Map;
|
34 | 36 | import java.util.Set;
|
35 | 37 | import java.util.stream.Collectors;
|
36 | 38 |
|
37 |
| -/** Simple {@link SlotAssigner} that treats all slots and slot sharing groups equally. */ |
| 39 | +import static java.util.function.Function.identity; |
| 40 | +import static org.apache.flink.runtime.scheduler.adaptive.allocator.AllocatorUtil.checkMinimumRequiredSlots; |
| 41 | +import static org.apache.flink.runtime.scheduler.adaptive.allocator.AllocatorUtil.createExecutionSlotSharingGroups; |
| 42 | + |
| 43 | +/** |
| 44 | + * Simple {@link SlotAssigner} that treats all slots and slot sharing groups equally. Specifically, |
| 45 | + * when the cluster is deployed in application mode and the {@link |
| 46 | + * org.apache.flink.configuration.JobManagerOptions#SCHEDULER_PREFER_MINIMAL_TASKMANAGERS_ENABLED} |
| 47 | + * is enabled, execution slot sharing groups are preferentially assigned to the minimal number of |
| 48 | + * task managers. |
| 49 | + */ |
38 | 50 | public class DefaultSlotAssigner implements SlotAssigner {
|
39 | 51 |
|
| 52 | + @VisibleForTesting static final String APPLICATION_MODE_EXECUTION_TARGET = "embedded"; |
| 53 | + |
| 54 | + private final @Nullable String executionTarget; |
| 55 | + private final boolean minimalTaskManagerPreferred; |
| 56 | + |
| 57 | + DefaultSlotAssigner(@Nullable String executionTarget, boolean minimalTaskManagerPreferred) { |
| 58 | + this.executionTarget = executionTarget; |
| 59 | + this.minimalTaskManagerPreferred = minimalTaskManagerPreferred; |
| 60 | + } |
| 61 | + |
40 | 62 | @Override
|
41 | 63 | public Collection<SlotAssignment> assignSlots(
|
42 | 64 | JobInformation jobInformation,
|
43 | 65 | Collection<? extends SlotInfo> freeSlots,
|
44 | 66 | VertexParallelism vertexParallelism,
|
45 | 67 | JobAllocationsInformation previousAllocations) {
|
46 |
| - List<ExecutionSlotSharingGroup> allGroups = new ArrayList<>(); |
| 68 | + checkMinimumRequiredSlots(jobInformation, freeSlots); |
| 69 | + |
| 70 | + final List<ExecutionSlotSharingGroup> allGroups = new ArrayList<>(); |
47 | 71 | for (SlotSharingGroup slotSharingGroup : jobInformation.getSlotSharingGroups()) {
|
48 | 72 | allGroups.addAll(createExecutionSlotSharingGroups(vertexParallelism, slotSharingGroup));
|
49 | 73 | }
|
50 | 74 |
|
51 |
| - Iterator<? extends SlotInfo> iterator = freeSlots.iterator(); |
| 75 | + final Collection<? extends SlotInfo> pickedSlots = |
| 76 | + pickSlotsIfNeeded(allGroups.size(), freeSlots); |
| 77 | + |
| 78 | + Iterator<? extends SlotInfo> iterator = pickedSlots.iterator(); |
52 | 79 | Collection<SlotAssignment> assignments = new ArrayList<>();
|
53 | 80 | for (ExecutionSlotSharingGroup group : allGroups) {
|
54 | 81 | assignments.add(new SlotAssignment(iterator.next(), group));
|
55 | 82 | }
|
56 | 83 | return assignments;
|
57 | 84 | }
|
58 | 85 |
|
59 |
| - static List<ExecutionSlotSharingGroup> createExecutionSlotSharingGroups( |
60 |
| - VertexParallelism vertexParallelism, SlotSharingGroup slotSharingGroup) { |
61 |
| - final Map<Integer, Set<ExecutionVertexID>> sharedSlotToVertexAssignment = new HashMap<>(); |
62 |
| - slotSharingGroup |
63 |
| - .getJobVertexIds() |
64 |
| - .forEach( |
65 |
| - jobVertexId -> { |
66 |
| - int parallelism = vertexParallelism.getParallelism(jobVertexId); |
67 |
| - for (int subtaskIdx = 0; subtaskIdx < parallelism; subtaskIdx++) { |
68 |
| - sharedSlotToVertexAssignment |
69 |
| - .computeIfAbsent(subtaskIdx, ignored -> new HashSet<>()) |
70 |
| - .add(new ExecutionVertexID(jobVertexId, subtaskIdx)); |
71 |
| - } |
72 |
| - }); |
73 |
| - return sharedSlotToVertexAssignment.values().stream() |
74 |
| - .map(ExecutionSlotSharingGroup::new) |
75 |
| - .collect(Collectors.toList()); |
| 86 | + @VisibleForTesting |
| 87 | + Collection<? extends SlotInfo> pickSlotsIfNeeded( |
| 88 | + int requestExecutionSlotSharingGroups, Collection<? extends SlotInfo> freeSlots) { |
| 89 | + Collection<? extends SlotInfo> pickedSlots = freeSlots; |
| 90 | + if (APPLICATION_MODE_EXECUTION_TARGET.equalsIgnoreCase(executionTarget) |
| 91 | + && minimalTaskManagerPreferred |
| 92 | + // To avoid the sort-work loading. |
| 93 | + && freeSlots.size() > requestExecutionSlotSharingGroups) { |
| 94 | + final Map<TaskManagerLocation, ? extends Set<? extends SlotInfo>> slotsPerTaskExecutor = |
| 95 | + getSlotsPerTaskExecutor(freeSlots); |
| 96 | + pickedSlots = |
| 97 | + pickSlotsInMinimalTaskExecutors( |
| 98 | + slotsPerTaskExecutor, requestExecutionSlotSharingGroups); |
| 99 | + } |
| 100 | + return pickedSlots; |
| 101 | + } |
| 102 | + |
| 103 | + /** |
| 104 | + * In order to minimize the using of task executors at the resource manager side in the |
| 105 | + * application-mode and release more task executors in a timely manner, it is a good choice to |
| 106 | + * prioritize selecting slots on task executors with the most available slots. |
| 107 | + * |
| 108 | + * @param slotsPerTaskExecutor The slots per task manager. |
| 109 | + * @return The ordered task manager that orders by the number of free slots descending. |
| 110 | + */ |
| 111 | + private Iterator<TaskManagerLocation> getSortedTaskExecutors( |
| 112 | + Map<TaskManagerLocation, ? extends Set<? extends SlotInfo>> slotsPerTaskExecutor) { |
| 113 | + final Comparator<TaskManagerLocation> taskExecutorComparator = |
| 114 | + (leftTml, rightTml) -> |
| 115 | + Integer.compare( |
| 116 | + slotsPerTaskExecutor.get(rightTml).size(), |
| 117 | + slotsPerTaskExecutor.get(leftTml).size()); |
| 118 | + return slotsPerTaskExecutor.keySet().stream().sorted(taskExecutorComparator).iterator(); |
| 119 | + } |
| 120 | + |
| 121 | + /** |
| 122 | + * Pick the target slots to assign with the requested groups. |
| 123 | + * |
| 124 | + * @param slotsByTaskExecutor slots per task executor. |
| 125 | + * @param requestedGroups the number of the request execution slot sharing groups. |
| 126 | + * @return the target slots that are distributed on the minimal task executors. |
| 127 | + */ |
| 128 | + private Collection<? extends SlotInfo> pickSlotsInMinimalTaskExecutors( |
| 129 | + Map<TaskManagerLocation, ? extends Set<? extends SlotInfo>> slotsByTaskExecutor, |
| 130 | + int requestedGroups) { |
| 131 | + final List<SlotInfo> pickedSlots = new ArrayList<>(); |
| 132 | + final Iterator<TaskManagerLocation> sortedTaskExecutors = |
| 133 | + getSortedTaskExecutors(slotsByTaskExecutor); |
| 134 | + while (pickedSlots.size() < requestedGroups) { |
| 135 | + Set<? extends SlotInfo> slotInfos = slotsByTaskExecutor.get(sortedTaskExecutors.next()); |
| 136 | + pickedSlots.addAll(slotInfos); |
| 137 | + } |
| 138 | + return pickedSlots; |
| 139 | + } |
| 140 | + |
| 141 | + private Map<TaskManagerLocation, ? extends Set<? extends SlotInfo>> getSlotsPerTaskExecutor( |
| 142 | + Collection<? extends SlotInfo> slots) { |
| 143 | + return slots.stream() |
| 144 | + .collect( |
| 145 | + Collectors.groupingBy( |
| 146 | + SlotInfo::getTaskManagerLocation, |
| 147 | + Collectors.mapping(identity(), Collectors.toSet()))); |
76 | 148 | }
|
77 | 149 | }
|
0 commit comments