Skip to content

Commit 589fc30

Browse files
committed
GEODE-9060: Remove the member from a copy of replicates as GII candid… (#6246)
* GEODE-9060: Remove the member from a copy of replicates as GII candidate if it's not part of the same distributed system, but leave original replicates unchanged. (cherry picked from commit 76a5afd)
1 parent 8a77331 commit 589fc30

File tree

2 files changed

+48
-4
lines changed

2 files changed

+48
-4
lines changed

geode-core/src/distributedTest/java/org/apache/geode/internal/cache/persistence/PersistentRecoveryOrderDUnitTest.java

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,11 +877,43 @@ public void testSplitBrain() {
877877
Throwable thrown = catchThrowable(() -> {
878878
createReplicateRegion(regionName, getDiskDirs(getVMId()));
879879
});
880-
assertThat(thrown).isInstanceOf(ConflictingPersistentDataException.class);
880+
assertThat(thrown)
881+
.isInstanceOf(ConflictingPersistentDataException.class)
882+
.hasMessageContaining("was not part of the same distributed system as the local data");
881883
}
882884
});
883885
}
884886

887+
@Test
888+
public void testRecoverableSplitBrain() {
889+
vm2.invoke(() -> {
890+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
891+
});
892+
vm0.invoke(() -> {
893+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
894+
putEntry("A", "B");
895+
getCache().getRegion(regionName).close();
896+
});
897+
898+
vm1.invoke(() -> {
899+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
900+
validateEntry("A", "B");
901+
updateEntry("A", "C");
902+
getCache().getRegion(regionName).close();
903+
});
904+
905+
// VM0 doesn't know that VM1 ever existed so it will start up.
906+
vm0.invoke(() -> {
907+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
908+
validateEntry("A", "C");
909+
});
910+
911+
vm1.invoke(() -> {
912+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
913+
validateEntry("A", "C");
914+
});
915+
}
916+
885917
/**
886918
* Test to make sure that if if a member crashes while a GII is in progress, we wait for the
887919
* member to come back for starting.

geode-core/src/main/java/org/apache/geode/internal/cache/persistence/PersistenceAdvisorImpl.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,18 +510,18 @@ public Set<PersistentMemberID> getPersistedMembers() {
510510
public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates)
511511
throws ReplyException {
512512
PersistentStateQueryResults remoteStates = getMyStateOnMembers(replicates);
513+
Set<InternalDistributedMember> copyOfReplicates = null;
513514

514515
persistenceAdvisorObserver.observe(regionPath);
515516

516517
boolean equal = false;
518+
PersistentMemberID myId = getPersistentID();
517519
for (Map.Entry<InternalDistributedMember, PersistentMemberState> entry : remoteStates
518520
.getStateOnPeers().entrySet()) {
519521
InternalDistributedMember member = entry.getKey();
520522
PersistentMemberID remoteId = remoteStates.getPersistentIds().get(member);
521523

522-
PersistentMemberID myId = getPersistentID();
523524
PersistentMemberState stateOnPeer = entry.getValue();
524-
525525
if (PersistentMemberState.REVOKED.equals(stateOnPeer)) {
526526
throw new RevokedPersistentDataException(
527527
String.format(
@@ -533,7 +533,19 @@ public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates)
533533
String message = String.format(
534534
"Region %s remote member %s with persistent data %s was not part of the same distributed system as the local data from %s",
535535
regionPath, member, remoteId, myId);
536-
throw new ConflictingPersistentDataException(message);
536+
// Conceptually the removed member due to not knowing current member, should be equal to
537+
// existing replicates.
538+
// It can still be used as GII provider candidate. Use a copyOfReplicates to avoid modifying
539+
// the replicates.
540+
if (copyOfReplicates == null) {
541+
copyOfReplicates = new HashSet<>(replicates);
542+
}
543+
copyOfReplicates.remove(member);
544+
if (copyOfReplicates.isEmpty()) {
545+
throw new ConflictingPersistentDataException(message);
546+
} else {
547+
logger.info(message);
548+
}
537549
}
538550

539551
if (myId != null && stateOnPeer == PersistentMemberState.EQUAL) {

0 commit comments

Comments
 (0)