Skip to content

Commit 9c562c8

Browse files
authored
grpclb: should not ignore subchannels with CONNECTING state in aggregating the overall LB state (#7959)
We should treat both IDLE and CONNECTING subchannels as "connection in progress" when aggregating for the overall load balancing state. Otherwise, RPCs could fail prematurely if one subchannel enters TF while all the others are still in CONNECTING. 23d2796 made each individual subchannel stay in TF until READY if it previously was in TF. So subchannels with CONNECTING state are those in first time connecting. We should give them time to connect.
1 parent afe8831 commit 9c562c8

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ private void maybeUpdatePicker() {
776776
case ROUND_ROBIN:
777777
pickList = new ArrayList<>(backendList.size());
778778
Status error = null;
779-
boolean hasIdle = false;
779+
boolean hasPending = false;
780780
for (BackendEntry entry : backendList) {
781781
Subchannel subchannel = entry.subchannel;
782782
Attributes attrs = subchannel.getAttributes();
@@ -785,12 +785,12 @@ private void maybeUpdatePicker() {
785785
pickList.add(entry);
786786
} else if (stateInfo.getState() == TRANSIENT_FAILURE) {
787787
error = stateInfo.getStatus();
788-
} else if (stateInfo.getState() == IDLE) {
789-
hasIdle = true;
788+
} else {
789+
hasPending = true;
790790
}
791791
}
792792
if (pickList.isEmpty()) {
793-
if (error != null && !hasIdle) {
793+
if (error != null && !hasPending) {
794794
pickList.add(new ErrorEntry(error));
795795
state = TRANSIENT_FAILURE;
796796
} else {

grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,9 +1175,10 @@ public void roundRobinMode_subchannelStayTransientFailureUntilReady() {
11751175
deliverSubchannelState(subchannel2, ConnectivityStateInfo.forNonError(CONNECTING));
11761176
inOrder.verify(helper).updateBalancingState(eq(CONNECTING), any(SubchannelPicker.class));
11771177

1178-
// Switch subchannel1 to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too.
1179-
Status error = Status.UNAVAILABLE.withDescription("error1");
1178+
// Switch all subchannels to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too.
1179+
Status error = Status.UNAVAILABLE.withDescription("error");
11801180
deliverSubchannelState(subchannel1, ConnectivityStateInfo.forTransientFailure(error));
1181+
deliverSubchannelState(subchannel2, ConnectivityStateInfo.forTransientFailure(error));
11811182
inOrder.verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), pickerCaptor.capture());
11821183
assertThat(((RoundRobinPicker) pickerCaptor.getValue()).pickList)
11831184
.containsExactly(new ErrorEntry(error));

0 commit comments

Comments
 (0)