Skip to content

Commit 5446985

Browse files
author
Tianci Shen
committed
feat: add nucleus connectivity validation
1 parent 2f671e3 commit 5446985

File tree

6 files changed

+150
-7
lines changed

6 files changed

+150
-7
lines changed

src/main/java/com/aws/greengrass/deployment/DeploymentConfigMerger.java

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@
2727
import com.aws.greengrass.lifecyclemanager.exceptions.ServiceLoadException;
2828
import com.aws.greengrass.logging.api.Logger;
2929
import com.aws.greengrass.logging.impl.LogManager;
30+
import com.aws.greengrass.mqttclient.MqttClient;
3031
import com.aws.greengrass.util.Coerce;
3132
import lombok.AccessLevel;
3233
import lombok.AllArgsConstructor;
3334
import lombok.Getter;
3435
import software.amazon.awssdk.services.greengrassv2.model.DeploymentComponentUpdatePolicyAction;
3536

37+
import java.time.Duration;
3638
import java.util.Collection;
3739
import java.util.HashMap;
3840
import java.util.HashSet;
@@ -66,6 +68,8 @@ public class DeploymentConfigMerger {
6668
private Kernel kernel;
6769
private DeviceConfiguration deviceConfiguration;
6870
private DynamicComponentConfigurationValidator validator;
71+
private MqttClient mqttClient;
72+
private ThingGroupHelper thingGroupHelper;
6973

7074
/**
7175
* Merge in new configuration values and new services.
@@ -143,7 +147,7 @@ private void updateActionForDeployment(Map<String, Object> newConfig, Deployment
143147
}
144148

145149
// Validate the AWS region, IoT credentials endpoint as well as the IoT data endpoint.
146-
if (!validateNucleusConfig(totallyCompleteFuture, nucleusConfig)) {
150+
if (!validateNucleusConfig(totallyCompleteFuture, activator, deployment, nucleusConfig)) {
147151
return;
148152
}
149153

@@ -153,6 +157,7 @@ private void updateActionForDeployment(Map<String, Object> newConfig, Deployment
153157
}
154158

155159
private boolean validateNucleusConfig(CompletableFuture<DeploymentResult> totallyCompleteFuture,
160+
DeploymentActivator activator, Deployment deployment,
156161
Map<String, Object> nucleusConfig) {
157162
if (nucleusConfig != null) {
158163
String awsRegion = tryGetAwsRegionFromNewConfig(nucleusConfig);
@@ -166,10 +171,79 @@ private boolean validateNucleusConfig(CompletableFuture<DeploymentResult> totall
166171
.complete(new DeploymentResult(DeploymentResult.DeploymentStatus.FAILED_NO_STATE_CHANGE, e));
167172
return false;
168173
}
174+
175+
Integer timeoutSec = deployment.getDeploymentDocumentObj().getConfigurationValidationPolicy().timeoutInSeconds();
176+
long configTimeout = Duration.ofSeconds(20).toMillis();
177+
if (timeoutSec != null) {
178+
configTimeout = Duration.ofSeconds(timeoutSec).toMillis();
179+
}
180+
181+
Map<String, Object> currentConfig = kernel.getConfig().toPOJO();
182+
if (configTimeout == 0
183+
|| !deviceConfiguration.isDeviceConfiguredToTalkToCloud()
184+
|| !hasNucleusConfigurationChange(currentConfig, nucleusConfig)) {
185+
logger.atInfo().log("Skipping connectivity validation");
186+
return true;
187+
}
188+
try {
189+
// Update Nucleus with new config
190+
logger.atInfo().log("Applying Nucleus config");
191+
Map<String, Object> modifiedConfig = kernel.getConfig().toPOJO();
192+
replaceNucleusConfiguration(modifiedConfig, nucleusConfig);
193+
activator.mergeConfig(modifiedConfig, deployment);
194+
TimeUnit.SECONDS.sleep(5);
195+
196+
// Check that MQTT client has reconnected
197+
logger.atInfo().log("Checking MQTT Reconnected");
198+
mqttClient.waitForReconnect(configTimeout);
199+
200+
// Check that HTTP client works
201+
logger.atInfo().log("Checking HTTP Reconnected");
202+
thingGroupHelper.waitForReconnect(configTimeout);
203+
} catch (Exception e) {
204+
activator.mergeConfig(currentConfig, deployment);
205+
logger.atError().cause(e).log("Nucleus connectivity validation failed");
206+
totallyCompleteFuture
207+
.complete(new DeploymentResult(DeploymentResult.DeploymentStatus.FAILED_NO_STATE_CHANGE, e));
208+
return false;
209+
}
169210
}
170211
return true;
171212
}
172213

214+
private void replaceNucleusConfiguration(Map<String, Object> kernelConfig, Map<String, Object> nucleusConfig) {
215+
Map<String, Object> currentNucleusConfig = getNucleusConfiguration(kernelConfig);
216+
if (nucleusConfig != null){
217+
currentNucleusConfig.put(CONFIGURATION_CONFIG_KEY, nucleusConfig);
218+
}
219+
}
220+
221+
private boolean hasNucleusConfigurationChange(Map<String, Object> kernelConfig, Map<String, Object> nucleusConfig) {
222+
Map<String, Object> currentNucleusConfig = getNucleusConfiguration(kernelConfig);
223+
if (nucleusConfig != null) {
224+
return currentNucleusConfig.equals(nucleusConfig);
225+
}
226+
return false;
227+
}
228+
229+
private Map<String, Object> getNucleusConfiguration(Map<String, Object> kernelConfig){
230+
Map<String, Object> nucleusConfig = null;
231+
if (kernelConfig.containsKey(SERVICES_NAMESPACE_TOPIC)) {
232+
// Get services config if it exists
233+
Map<String, Object> serviceConfig = (Map<String, Object>) kernelConfig.get(SERVICES_NAMESPACE_TOPIC);
234+
if (serviceConfig.containsKey(deviceConfiguration.getNucleusComponentName())) {
235+
// Get nucleus config if it exists
236+
Map<String, Object> currentNucleusConfig = (Map<String, Object>) serviceConfig
237+
.get(deviceConfiguration.getNucleusComponentName());
238+
if (currentNucleusConfig.containsKey(CONFIGURATION_CONFIG_KEY)){
239+
// Replace nucleus config new the new values
240+
nucleusConfig = (Map<String, Object>) currentNucleusConfig.get(CONFIGURATION_CONFIG_KEY);
241+
}
242+
}
243+
}
244+
return nucleusConfig;
245+
}
246+
173247
/**
174248
* Completes the provided future when all of the listed services are running.
175249
*
@@ -307,7 +381,6 @@ public AggregateServicesChangeManager createRollbackManager() {
307381

308382
/**
309383
* Start the new services the merge intends to add.
310-
*
311384
*/
312385
public void startNewServices() {
313386
for (String serviceName : servicesToAdd) {

src/main/java/com/aws/greengrass/deployment/ThingGroupHelper.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package com.aws.greengrass.deployment;
77

8+
import com.aws.greengrass.deployment.errorcode.DeploymentErrorCode;
9+
import com.aws.greengrass.deployment.exceptions.ComponentConfigurationValidationException;
810
import com.aws.greengrass.deployment.exceptions.DeviceConfigurationException;
911
import com.aws.greengrass.deployment.exceptions.RetryableServerErrorException;
1012
import com.aws.greengrass.logging.api.Logger;
@@ -26,6 +28,7 @@
2628
import java.util.List;
2729
import java.util.Optional;
2830
import java.util.Set;
31+
import java.util.concurrent.TimeUnit;
2932
import java.util.concurrent.atomic.AtomicReference;
3033
import javax.inject.Inject;
3134

@@ -102,4 +105,41 @@ public Optional<Set<String>> listThingGroupsForDevice(int maxAttemptCount) throw
102105
return Optional.of(thingGroupNames);
103106
}, "get-thing-group-hierarchy", logger);
104107
}
108+
109+
public void waitForReconnect(long timeoutMillis) throws Exception {
110+
if (!deviceConfiguration.isDeviceConfiguredToTalkToCloud()) {
111+
return;
112+
}
113+
Duration initialInterval = Duration.ofMillis(timeoutMillis / 8);
114+
Duration maxRetryInterval = Duration.ofMillis(timeoutMillis / 4);
115+
116+
try {
117+
RetryUtils.runWithRetry(clientExceptionRetryConfig.toBuilder()
118+
.maxAttempt(3)
119+
.initialRetryInterval(initialInterval)
120+
.maxRetryInterval(maxRetryInterval)
121+
.build(),
122+
() -> {
123+
ListThingGroupsForCoreDeviceRequest request = ListThingGroupsForCoreDeviceRequest.builder()
124+
.coreDeviceThingName(Coerce.toString(deviceConfiguration.getThingName()))
125+
.build();
126+
127+
ListThingGroupsForCoreDeviceResponse response;
128+
try {
129+
response =
130+
clientFactory.fetchGreengrassV2DataClient().listThingGroupsForCoreDevice(request);
131+
} catch (GreengrassV2DataException e) {
132+
if (RetryUtils.retryErrorCodes(e.statusCode())) {
133+
throw new RetryableServerErrorException("Failed with retryable error " + e.statusCode()
134+
+ " when calling listThingGroupsForCoreDevice", e);
135+
}
136+
throw e;
137+
}
138+
return response;
139+
}, "get-thing-group-hierarchy", logger);
140+
} catch (Exception e) {
141+
throw new ComponentConfigurationValidationException("HTTP client failed to reconnect with new configuration",
142+
DeploymentErrorCode.FAILED_TO_RECONNECT);
143+
}
144+
}
105145
}

src/main/java/com/aws/greengrass/deployment/activator/DeploymentActivator.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ protected DeploymentActivator(Kernel kernel) {
4343
public abstract void activate(Map<String, Object> newConfig, Deployment deployment,
4444
CompletableFuture<DeploymentResult> totallyCompleteFuture);
4545

46+
public void mergeConfig(Map<String, Object> newConfig, Deployment deployment) {
47+
DeploymentDocument deploymentDocument = deployment.getDeploymentDocumentObj();
48+
updateConfiguration(deploymentDocument.getTimestamp(), newConfig);
49+
}
50+
4651
protected boolean takeConfigSnapshot(CompletableFuture<DeploymentResult> totallyCompleteFuture) {
4752
try {
4853
deploymentDirectoryManager.takeConfigSnapshot(deploymentDirectoryManager.getSnapshotFilePath());

src/main/java/com/aws/greengrass/deployment/errorcode/DeploymentErrorCode.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ public enum DeploymentErrorCode {
8787
UNSUPPORTED_REGION(DeploymentErrorType.REQUEST_ERROR),
8888
IOT_CRED_ENDPOINT_FORMAT_NOT_VALID(DeploymentErrorType.REQUEST_ERROR),
8989
IOT_DATA_ENDPOINT_FORMAT_NOT_VALID(DeploymentErrorType.REQUEST_ERROR),
90+
FAILED_TO_RECONNECT(DeploymentErrorType.REQUEST_ERROR),
9091

9192
/* Docker issues */
9293
DOCKER_ERROR(DeploymentErrorType.DEPENDENCY_ERROR),

src/main/java/com/aws/greengrass/mqttclient/MqttClient.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import com.aws.greengrass.config.Topics;
99
import com.aws.greengrass.config.WhatHappened;
1010
import com.aws.greengrass.deployment.DeviceConfiguration;
11+
import com.aws.greengrass.deployment.errorcode.DeploymentErrorCode;
12+
import com.aws.greengrass.deployment.exceptions.ComponentConfigurationValidationException;
1113
import com.aws.greengrass.lifecyclemanager.Kernel;
1214
import com.aws.greengrass.logging.api.LogEventBuilder;
1315
import com.aws.greengrass.logging.api.Logger;
@@ -159,6 +161,7 @@ public class MqttClient implements Closeable {
159161
private int maxPublishRetryCount;
160162
private int maxPublishMessageSize;
161163
private final AtomicBoolean isClosed = new AtomicBoolean(false);
164+
private boolean isReconnected = true;
162165

163166
@Getter(AccessLevel.PROTECTED)
164167
private final MqttClientConnectionEvents callbacks = new MqttClientConnectionEvents() {
@@ -312,6 +315,7 @@ protected MqttClient(DeviceConfiguration deviceConfiguration,
312315

313316
logger.atDebug().kv("modifiedNode", node.getFullName()).kv("changeType", what)
314317
.log("Reconfiguring MQTT clients");
318+
isReconnected = false;
315319
return false;
316320
}, (what) -> {
317321
validateAndSetMqttPublishConfiguration();
@@ -351,6 +355,9 @@ protected MqttClient(DeviceConfiguration deviceConfiguration,
351355
.log("Error while reconnecting MQTT client");
352356
}
353357
}
358+
if (brokenConnections.isEmpty()){
359+
isReconnected = true;
360+
}
354361
} while (!brokenConnections.isEmpty());
355362
}, 1, TimeUnit.SECONDS));
356363

@@ -1041,4 +1048,16 @@ public int getMqttOperationTimeoutMillis() {
10411048
private String getMqttVersion() {
10421049
return Coerce.toString(mqttTopics.findOrDefault(DEFAULT_MQTT_VERSION, MQTT_VERSION_KEY));
10431050
}
1051+
1052+
public void waitForReconnect(long timeoutMillis) throws Exception {
1053+
long timout = System.currentTimeMillis() + timeoutMillis;
1054+
while(System.currentTimeMillis() < timout) {
1055+
if (isReconnected) {
1056+
return;
1057+
}
1058+
TimeUnit.SECONDS.sleep(1);
1059+
}
1060+
throw new ComponentConfigurationValidationException("MQTT client failed to reconnect with new configuration",
1061+
DeploymentErrorCode.FAILED_TO_RECONNECT);
1062+
}
10441063
}

src/test/java/com/aws/greengrass/deployment/DeploymentConfigMergerTest.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import com.aws.greengrass.lifecyclemanager.exceptions.ServiceLoadException;
2727
import com.aws.greengrass.logging.api.Logger;
2828
import com.aws.greengrass.logging.impl.LogManager;
29+
import com.aws.greengrass.mqttclient.MqttClient;
2930
import com.aws.greengrass.testcommons.testutilities.GGExtension;
3031
import org.junit.jupiter.api.AfterEach;
3132
import org.junit.jupiter.api.BeforeEach;
@@ -90,6 +91,10 @@ class DeploymentConfigMergerTest {
9091
@Mock
9192
private DynamicComponentConfigurationValidator validator;
9293
@Mock
94+
private MqttClient mqttClient;
95+
@Mock
96+
private ThingGroupHelper thingGroupHelper;
97+
@Mock
9398
private Context context;
9499

95100
@BeforeEach
@@ -307,7 +312,7 @@ void GIVEN_deployment_WHEN_check_safety_selected_THEN_check_safety_before_update
307312
when(deploymentActivatorFactory.getDeploymentActivator(any())).thenReturn(deploymentActivator);
308313
when(context.get(DeploymentActivatorFactory.class)).thenReturn(deploymentActivatorFactory);
309314

310-
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator);
315+
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator, mqttClient, thingGroupHelper);
311316

312317
DeploymentDocument doc = new DeploymentDocument();
313318
doc.setConfigurationArn("NoSafetyCheckDeploy");
@@ -345,7 +350,7 @@ void GIVEN_deployment_WHEN_task_cancelled_THEN_update_is_cancelled() throws Thro
345350
});
346351

347352
// GIVEN
348-
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator);
353+
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator, mqttClient, thingGroupHelper);
349354
DeploymentDocument doc = mock(DeploymentDocument.class);
350355
when(doc.getDeploymentId()).thenReturn("DeploymentId");
351356
when(doc.getComponentUpdatePolicy()).thenReturn(
@@ -381,7 +386,7 @@ void GIVEN_deployment_WHEN_task_not_cancelled_THEN_update_is_continued() throws
381386
when(context.get(DefaultActivator.class)).thenReturn(defaultActivator);
382387

383388
// GIVEN
384-
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator);
389+
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator, mqttClient, thingGroupHelper);
385390
DeploymentDocument doc = mock(DeploymentDocument.class);
386391
when(doc.getDeploymentId()).thenReturn("DeploymentId");
387392
when(doc.getComponentUpdatePolicy()).thenReturn(
@@ -437,7 +442,7 @@ void GIVEN_deployment_activate_WHEN_deployment_has_new_config_THEN_new_config_is
437442
newConfig2.put(DEFAULT_NUCLEUS_COMPONENT_NAME, newConfig3);
438443
newConfig.put(SERVICES_NAMESPACE_TOPIC, newConfig2);
439444
// GIVEN
440-
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator);
445+
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator, mqttClient, thingGroupHelper);
441446
DeploymentDocument doc = mock(DeploymentDocument.class);
442447
when(doc.getDeploymentId()).thenReturn("DeploymentId");
443448
when(doc.getComponentUpdatePolicy()).thenReturn(
@@ -498,7 +503,7 @@ void GIVEN_deployment_activate_WHEN_deployment_has_some_new_config_THEN_old_conf
498503
newConfig2.put(DEFAULT_NUCLEUS_COMPONENT_NAME, newConfig3);
499504
newConfig.put(SERVICES_NAMESPACE_TOPIC, newConfig2);
500505
// GIVEN
501-
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator);
506+
DeploymentConfigMerger merger = new DeploymentConfigMerger(kernel, deviceConfiguration, validator, mqttClient, thingGroupHelper);
502507
DeploymentDocument doc = mock(DeploymentDocument.class);
503508
when(doc.getDeploymentId()).thenReturn("DeploymentId");
504509
when(doc.getComponentUpdatePolicy()).thenReturn(

0 commit comments

Comments
 (0)