Skip to content

Commit 0c61209

Browse files
committed
rls: Avoid missed config update from reentrancy
Since ChildPolicyWrapper() called into the child before childPolicyMap.put(), it is possible for that child to call back into RLS and further update state without that child being known. When CDS is_dynamic=true (since ca99a8c), it registers the cluster with XdsDependencyManager, which adds a watch to XdsClient. If XdsClient already has the results cached then the watch callback can be enqueued immediately onto the syncContext and execute still within the constructor. Calling into the child with the lock held isn't great, as it allows for this type of reentrancy bug. But that'll take larger changes to fix. b/464116731
1 parent 83854d7 commit 0c61209

File tree

2 files changed

+29
-13
lines changed

2 files changed

+29
-13
lines changed

rls/src/main/java/io/grpc/rls/LbPolicyConfiguration.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,10 @@ ChildPolicyWrapper createOrGet(String target) {
248248
RefCountedChildPolicyWrapper pooledChildPolicyWrapper = childPolicyMap.get(target);
249249
if (pooledChildPolicyWrapper == null) {
250250
ChildPolicyWrapper childPolicyWrapper = new ChildPolicyWrapper(
251-
target, childPolicy, childLbResolvedAddressFactory, childLbHelperProvider,
252-
childLbStatusListener);
251+
target, childPolicy, childLbHelperProvider, childLbStatusListener);
253252
pooledChildPolicyWrapper = RefCountedChildPolicyWrapper.of(childPolicyWrapper);
254253
childPolicyMap.put(target, pooledChildPolicyWrapper);
254+
childPolicyWrapper.start(childLbResolvedAddressFactory);
255255
return pooledChildPolicyWrapper.getObject();
256256
} else {
257257
ChildPolicyWrapper childPolicyWrapper = pooledChildPolicyWrapper.getObject();
@@ -298,7 +298,6 @@ static final class ChildPolicyWrapper {
298298
public ChildPolicyWrapper(
299299
String target,
300300
ChildLoadBalancingPolicy childPolicy,
301-
final ResolvedAddressFactory childLbResolvedAddressFactory,
302301
ChildLoadBalancerHelperProvider childLbHelperProvider,
303302
ChildLbStatusListener childLbStatusListener) {
304303
this.target = target;
@@ -313,6 +312,9 @@ public ChildPolicyWrapper(
313312
this.childLbConfig = lbConfig.getConfig();
314313
helper.getChannelLogger().log(
315314
ChannelLogLevel.DEBUG, "RLS child lb created. config: {0}", childLbConfig);
315+
}
316+
317+
void start(ResolvedAddressFactory childLbResolvedAddressFactory) {
316318
helper.getSynchronizationContext().execute(
317319
new Runnable() {
318320
@Override

rls/src/test/java/io/grpc/rls/LbPolicyConfigurationTest.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.mockito.ArgumentMatchers.any;
2222
import static org.mockito.Mockito.doReturn;
2323
import static org.mockito.Mockito.mock;
24+
import static org.mockito.Mockito.times;
2425
import static org.mockito.Mockito.verify;
2526
import static org.mockito.Mockito.when;
2627

@@ -45,8 +46,8 @@
4546
import io.grpc.rls.LbPolicyConfiguration.ChildPolicyWrapper.ChildPolicyReportingHelper;
4647
import io.grpc.rls.LbPolicyConfiguration.InvalidChildPolicyConfigException;
4748
import io.grpc.rls.LbPolicyConfiguration.RefCountedChildPolicyWrapperFactory;
48-
import java.lang.Thread.UncaughtExceptionHandler;
4949
import java.util.Map;
50+
import java.util.concurrent.atomic.AtomicBoolean;
5051
import org.junit.Before;
5152
import org.junit.Test;
5253
import org.junit.runner.RunWith;
@@ -61,6 +62,9 @@ public class LbPolicyConfigurationTest {
6162
private final LoadBalancer lb = mock(LoadBalancer.class);
6263
private final SubchannelStateManager subchannelStateManager = new SubchannelStateManagerImpl();
6364
private final SubchannelPicker picker = mock(SubchannelPicker.class);
65+
private final SynchronizationContext syncContext = new SynchronizationContext((t, e) -> {
66+
throw new AssertionError(e);
67+
});
6468
private final ChildLbStatusListener childLbStatusListener = mock(ChildLbStatusListener.class);
6569
private final ResolvedAddressFactory resolvedAddressFactory =
6670
new ResolvedAddressFactory() {
@@ -84,15 +88,7 @@ public ResolvedAddresses create(Object childLbConfig) {
8488
@Before
8589
public void setUp() {
8690
doReturn(mock(ChannelLogger.class)).when(helper).getChannelLogger();
87-
doReturn(
88-
new SynchronizationContext(
89-
new UncaughtExceptionHandler() {
90-
@Override
91-
public void uncaughtException(Thread t, Throwable e) {
92-
throw new AssertionError(e);
93-
}
94-
}))
95-
.when(helper).getSynchronizationContext();
91+
doReturn(syncContext).when(helper).getSynchronizationContext();
9692
doReturn(lb).when(lbProvider).newLoadBalancer(any(Helper.class));
9793
doReturn(ConfigOrError.fromConfig(new Object()))
9894
.when(lbProvider).parseLoadBalancingPolicyConfig(ArgumentMatchers.<Map<String, ?>>any());
@@ -190,4 +186,22 @@ public void updateBalancingState_triggersListener() {
190186
// picker governs childPickers will be reported to parent LB
191187
verify(helper).updateBalancingState(ConnectivityState.READY, picker);
192188
}
189+
190+
@Test
191+
public void refCountedGetOrCreate_addsChildBeforeConfiguringChild() {
192+
AtomicBoolean calledAlready = new AtomicBoolean();
193+
when(lb.acceptResolvedAddresses(any(ResolvedAddresses.class))).thenAnswer(i -> {
194+
if (!calledAlready.get()) {
195+
calledAlready.set(true);
196+
// Should end up calling this function again, as this child should already be added to the
197+
// list of children. In practice, this can be caused by CDS is_dynamic=true starting a watch
198+
// when XdsClient already has the cluster cached (e.g., from another channel).
199+
syncContext.execute(() ->
200+
factory.acceptResolvedAddressFactory(resolvedAddressFactory));
201+
}
202+
return Status.OK;
203+
});
204+
ChildPolicyWrapper unused = factory.createOrGet("foo.google.com");
205+
verify(lb, times(2)).acceptResolvedAddresses(any(ResolvedAddresses.class));
206+
}
193207
}

0 commit comments

Comments
 (0)