@@ -839,7 +839,8 @@ class DDTeamCollectionImpl {
839839 // If there are too few machines to even build teams or there are too few represented datacenters, can't
840840 // build any team.
841841 self->lastBuildTeamsFailed = true ;
842- TraceEvent (SevWarnAlways, " BuildTeamsNotEnoughUniqueMachines" , self->distributorId )
842+ TraceEvent (SevWarnAlways, " BuildTeamsLastBuildTeamsFailed" , self->distributorId )
843+ .detail (" Reason" , " Do not have enough unique machines" )
843844 .detail (" Primary" , self->primary )
844845 .detail (" UniqueMachines" , uniqueMachines)
845846 .detail (" Replication" , self->configuration .storageTeamSize );
@@ -4424,7 +4425,8 @@ bool DDTeamCollection::isValidLocality(Reference<IReplicationPolicy> storagePoli
44244425void DDTeamCollection::evaluateTeamQuality () const {
44254426 int teamCount = teams.size (), serverCount = allServers.size ();
44264427 double teamsPerServer = (double )teamCount * configuration.storageTeamSize / serverCount;
4427-
4428+ const int targetTeamNumPerServer =
4429+ (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1 )) / 2 ;
44284430 ASSERT_EQ (serverCount, server_info.size ());
44294431
44304432 int minTeams = std::numeric_limits<int >::max ();
@@ -4440,6 +4442,16 @@ void DDTeamCollection::evaluateTeamQuality() const {
44404442 varTeams += (stc - teamsPerServer) * (stc - teamsPerServer);
44414443 // Use zoneId as server's machine id
44424444 machineTeams[info->getLastKnownInterface ().locality .zoneId ()] += stc;
4445+ // Check invariant: if latest buildTeam succeeds, then each server must have at least
4446+ // targetTeamNumPerServer serverTeams
4447+ // lastBuildTeamsFailed is set only when (1) machine count is less than configured team size;
4448+ // (2) Not find any server team candidates when creating server team; (3) failed to add machine team
4449+ if (SERVER_KNOBS->DD_VALIDATE_SERVER_TEAM_COUNT_AFTER_BUILD_TEAM && !lastBuildTeamsFailed &&
4450+ stc < targetTeamNumPerServer) {
4451+ TraceEvent (SevError, " NewAddServerNotMatchTargetSTCount" , distributorId)
4452+ .detail (" CurrentServerTeams" , stc)
4453+ .detail (" TargetServerTeams" , targetTeamNumPerServer);
4454+ }
44434455 }
44444456 }
44454457 varTeams /= teamsPerServer * teamsPerServer;
@@ -5063,9 +5075,9 @@ int DDTeamCollection::addBestMachineTeams(int machineTeamsToBuild) {
50635075 // When too many teams exist in simulation, traceAllInfo will buffer too many trace logs before
50645076 // trace has a chance to flush its buffer, which causes assertion failure.
50655077 traceAllInfo (!g_network->isSimulated ());
5066- TraceEvent (SevWarn, " DataDistributionBuildTeams " , distributorId)
5078+ TraceEvent (SevWarn, " BuildTeamsLastBuildTeamsFailed " , distributorId)
50675079 .detail (" Primary" , primary)
5068- .detail (" Reason" , " Unable to make desired machine Teams " )
5080+ .detail (" Reason" , " Unable to make desired machineTeams " )
50695081 .detail (" Hint" , " Check TraceAllInfo event" );
50705082 lastBuildTeamsFailed = true ;
50715083 break ;
@@ -5475,6 +5487,11 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
54755487 if (bestServerTeam.size () != configuration.storageTeamSize ) {
54765488 // Not find any team and will unlikely find a team
54775489 lastBuildTeamsFailed = true ;
5490+ TraceEvent (SevWarn, " BuildTeamsLastBuildTeamsFailed" , distributorId)
5491+ .detail (" Reason" , " Unable to find any valid serverTeam" )
5492+ .detail (" Primary" , primary)
5493+ .detail (" BestServerTeam" , describe (bestServerTeam))
5494+ .detail (" ConfigStorageTeamSize" , configuration.storageTeamSize );
54785495 break ;
54795496 }
54805497
0 commit comments