Skip to content

Commit 5f2c4dd

Browse files
committed
Collect timesync status in inventory
1 parent 782ff1e commit 5f2c4dd

File tree

18 files changed

+523
-51
lines changed

18 files changed

+523
-51
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/ls-apis/tests/api_dependencies.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Nexus Internal API (client: nexus-client)
7171
consumed by: propolis-server (propolis/bin/propolis-server) via 3 paths
7272

7373
NTP Admin (client: ntp-admin-client)
74+
consumed by: omicron-nexus (omicron/nexus) via 2 paths
7475
consumed by: omicron-sled-agent (omicron/sled-agent) via 2 paths
7576

7677
External API (client: oxide-client)

dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,4 +393,8 @@ KEEPER MEMBERSHIP
393393
no membership retrieved
394394

395395

396+
COCKROACH STATUS
397+
no cockroach status retrieved
398+
399+
396400

internal-dns/resolver/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ hickory-resolver.workspace = true
1313
hickory-proto.workspace = true
1414
internal-dns-types.workspace = true
1515
omicron-common.workspace = true
16+
omicron-uuid-kinds.workspace = true
1617
omicron-workspace-hack.workspace = true
1718
qorb.workspace = true
1819
reqwest = { workspace = true, features = ["rustls-tls", "stream"] }

internal-dns/resolver/src/resolver.rs

Lines changed: 236 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,20 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5+
use hickory_resolver::ResolveError as HickoryResolveError;
6+
use hickory_resolver::ResolveErrorKind as HickoryResolveErrorKind;
57
use hickory_resolver::TokioResolver;
68
use hickory_resolver::config::{
79
LookupIpStrategy, NameServerConfig, ResolveHosts, ResolverConfig,
810
ResolverOpts,
911
};
1012
use hickory_resolver::lookup::SrvLookup;
1113
use hickory_resolver::name_server::TokioConnectionProvider;
12-
use internal_dns_types::names::ServiceName;
14+
use internal_dns_types::names::{DNS_ZONE, ServiceName};
1315
use omicron_common::address::{
1416
AZ_PREFIX, DNS_PORT, Ipv6Subnet, get_internal_dns_server_addresses,
1517
};
18+
use omicron_uuid_kinds::OmicronZoneUuid;
1619
use slog::{debug, error, info, trace};
1720
use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6};
1821

@@ -28,6 +31,37 @@ pub enum ResolveError {
2831
NotFoundByString(String),
2932
}
3033

34+
fn is_no_records_found(err: &hickory_resolver::ResolveError) -> bool {
35+
match err.kind() {
36+
hickory_resolver::ResolveErrorKind::Proto(proto_error) => {
37+
match proto_error.kind() {
38+
hickory_resolver::proto::ProtoErrorKind::NoRecordsFound {
39+
..
40+
} => true,
41+
_ => false,
42+
}
43+
}
44+
_ => false,
45+
}
46+
}
47+
48+
impl ResolveError {
49+
/// Returns "true" if this error indicates the record is not found.
50+
pub fn is_not_found(&self) -> bool {
51+
match self {
52+
ResolveError::NotFound(_) | ResolveError::NotFoundByString(_) => {
53+
true
54+
}
55+
ResolveError::Resolve(hickory_err)
56+
if is_no_records_found(&hickory_err) =>
57+
{
58+
true
59+
}
60+
_ => false,
61+
}
62+
}
63+
}
64+
3165
/// A wrapper around a set of bootstrap DNS addresses, providing a convenient
3266
/// way to construct a [`qorb::resolvers::dns::DnsResolver`] for specific
3367
/// services.
@@ -314,6 +348,40 @@ impl Resolver {
314348
}
315349
}
316350

351+
/// Returns the targets of the SRV records for a DNS name with their
352+
/// associated zone UUIDs.
353+
///
354+
/// Similar to [`Resolver::lookup_all_socket_v6`], but extracts the
355+
/// OmicronZoneUuid from DNS target names that follow the pattern
356+
/// `{uuid}.host.{DNS_ZONE}`. Returns a list of (OmicronZoneUuid,
357+
/// SocketAddrV6) pairs.
358+
///
359+
/// Returns an error if any target cannot be parsed as a zone UUID pattern.
360+
pub async fn lookup_all_socket_and_zone_v6(
361+
&self,
362+
service: ServiceName,
363+
) -> Result<Vec<(OmicronZoneUuid, SocketAddrV6)>, ResolveError> {
364+
let name = service.srv_name();
365+
trace!(self.log, "lookup_all_socket_and_zone_v6 srv"; "dns_name" => &name);
366+
let response = self.resolver.srv_lookup(&name).await?;
367+
debug!(
368+
self.log,
369+
"lookup_all_socket_and_zone_v6 srv";
370+
"dns_name" => &name,
371+
"response" => ?response
372+
);
373+
374+
let results = self
375+
.lookup_service_targets_with_zones(response)
376+
.await?
377+
.collect::<Vec<_>>();
378+
if !results.is_empty() {
379+
Ok(results)
380+
} else {
381+
Err(ResolveError::NotFound(service))
382+
}
383+
}
384+
317385
// Returns an iterator of SocketAddrs for the specified SRV name.
318386
//
319387
// Acts on a raw string for compatibility with the reqwest::dns::Resolve
@@ -399,6 +467,99 @@ impl Resolver {
399467
.flatten()
400468
}
401469

470+
/// Similar to [`Resolver::lookup_service_targets`], but extracts zone UUIDs from target names.
471+
///
472+
/// Returns an iterator of (OmicronZoneUuid, SocketAddrV6) pairs for targets that match
473+
/// the pattern `{uuid}.host.{DNS_ZONE}`. Returns an error if any target doesn't match
474+
/// this pattern.
475+
async fn lookup_service_targets_with_zones(
476+
&self,
477+
service_lookup: SrvLookup,
478+
) -> Result<
479+
impl Iterator<Item = (OmicronZoneUuid, SocketAddrV6)> + Send,
480+
ResolveError,
481+
> {
482+
let futures =
483+
std::iter::repeat((self.log.clone(), self.resolver.clone()))
484+
.zip(service_lookup.into_iter())
485+
.map(|((log, resolver), srv)| async move {
486+
let target = srv.target();
487+
let port = srv.port();
488+
let target_str = target.to_string();
489+
// Try to parse the zone UUID from the target name
490+
let zone_uuid = match Self::parse_zone_uuid_from_target(&target_str) {
491+
Some(uuid) => uuid,
492+
None => {
493+
error!(
494+
log,
495+
"lookup_service_targets_with_zones: target doesn't match zone pattern";
496+
"target" => ?target_str,
497+
);
498+
return Err((
499+
target.clone(),
500+
HickoryResolveError::from(
501+
HickoryResolveErrorKind::Message(
502+
"target doesn't match zone pattern"
503+
)
504+
)
505+
));
506+
}
507+
};
508+
trace!(
509+
log,
510+
"lookup_service_targets_with_zones: looking up SRV target";
511+
"name" => ?target,
512+
"zone_uuid" => ?zone_uuid,
513+
);
514+
resolver
515+
.ipv6_lookup(target.clone())
516+
.await
517+
.map(|ips| (ips, port, zone_uuid))
518+
.map_err(|err| (target.clone(), err))
519+
});
520+
let log = self.log.clone();
521+
let results = futures::future::join_all(futures).await;
522+
let mut socket_addrs = Vec::new();
523+
for result in results {
524+
match result {
525+
Ok((ips, port, zone_uuid)) => {
526+
// Add all IP addresses for this zone
527+
for aaaa in ips {
528+
socket_addrs.push((
529+
zone_uuid,
530+
SocketAddrV6::new(aaaa.into(), port, 0, 0),
531+
));
532+
}
533+
}
534+
Err((target, err)) => {
535+
error!(
536+
log,
537+
"lookup_service_targets_with_zones: failed looking up target";
538+
"name" => ?target,
539+
"error" => ?err,
540+
);
541+
return Err(ResolveError::Resolve(err));
542+
}
543+
}
544+
}
545+
Ok(socket_addrs.into_iter())
546+
}
547+
548+
/// Parse a zone UUID from a DNS target name following the pattern `{uuid}.host.{DNS_ZONE}`.
549+
fn parse_zone_uuid_from_target(target: &str) -> Option<OmicronZoneUuid> {
550+
// Remove trailing dot if present
551+
let target = target.strip_suffix('.').unwrap_or(target);
552+
553+
// Expected format: "{uuid}.host.{DNS_ZONE}"
554+
let expected_suffix = format!(".host.{}", DNS_ZONE);
555+
556+
if let Some(uuid_str) = target.strip_suffix(&expected_suffix) {
557+
uuid_str.parse::<OmicronZoneUuid>().ok()
558+
} else {
559+
None
560+
}
561+
}
562+
402563
/// Lookup a specific record's IPv6 address
403564
///
404565
/// In general, callers should _not_ be using this function, and instead
@@ -436,7 +597,7 @@ mod test {
436597
use internal_dns_types::names::DNS_ZONE;
437598
use internal_dns_types::names::ServiceName;
438599
use omicron_test_utils::dev::test_setup_log;
439-
use omicron_uuid_kinds::OmicronZoneUuid;
600+
use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};
440601
use slog::{Logger, o};
441602
use std::collections::HashMap;
442603
use std::net::Ipv6Addr;
@@ -1131,4 +1292,77 @@ mod test {
11311292
dns_server.cleanup_successful();
11321293
logctx.cleanup_successful();
11331294
}
1295+
1296+
#[tokio::test]
1297+
async fn lookup_all_socket_and_zone_v6_success_and_failure() {
1298+
let logctx =
1299+
test_setup_log("lookup_all_socket_and_zone_v6_success_and_failure");
1300+
let dns_server = DnsServer::create(&logctx.log).await;
1301+
let resolver = dns_server.resolver().unwrap();
1302+
1303+
// Create DNS config with both zone and sled services
1304+
let mut dns_config = DnsConfigBuilder::new();
1305+
1306+
// Add a zone service (BoundaryNtp) that should succeed
1307+
let zone_uuid = OmicronZoneUuid::new_v4();
1308+
let zone_ip = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1);
1309+
let zone_port = 8080;
1310+
let zone_host = dns_config.host_zone(zone_uuid, zone_ip).unwrap();
1311+
dns_config
1312+
.service_backend_zone(
1313+
ServiceName::BoundaryNtp,
1314+
&zone_host,
1315+
zone_port,
1316+
)
1317+
.unwrap();
1318+
1319+
// Add a sled service (SledAgent) that should fail
1320+
let sled_uuid = SledUuid::new_v4();
1321+
let sled_ip = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x2);
1322+
let sled_port = 8081;
1323+
let sled_host = dns_config.host_sled(sled_uuid, sled_ip).unwrap();
1324+
dns_config
1325+
.service_backend_sled(
1326+
ServiceName::SledAgent(sled_uuid),
1327+
&sled_host,
1328+
sled_port,
1329+
)
1330+
.unwrap();
1331+
1332+
let dns_config = dns_config.build_full_config_for_initial_generation();
1333+
dns_server.update(&dns_config).await.unwrap();
1334+
1335+
// Test 1: Zone service should succeed
1336+
let zone_results = resolver
1337+
.lookup_all_socket_and_zone_v6(ServiceName::BoundaryNtp)
1338+
.await
1339+
.expect("Should have been able to look up zone service");
1340+
1341+
assert_eq!(zone_results.len(), 1);
1342+
let (returned_zone_uuid, returned_addr) = &zone_results[0];
1343+
assert_eq!(*returned_zone_uuid, zone_uuid);
1344+
assert_eq!(returned_addr.ip(), &zone_ip);
1345+
assert_eq!(returned_addr.port(), zone_port);
1346+
1347+
// Test 2: Sled service should fail (targets don't match zone pattern)
1348+
let sled_error = resolver
1349+
.lookup_all_socket_and_zone_v6(ServiceName::SledAgent(sled_uuid))
1350+
.await
1351+
.expect_err("Should have failed to look up sled service");
1352+
1353+
// The error should be a ResolveError indicating the target doesn't match the zone pattern
1354+
match sled_error {
1355+
ResolveError::Resolve(hickory_err) => {
1356+
assert!(
1357+
hickory_err
1358+
.to_string()
1359+
.contains("target doesn't match zone pattern")
1360+
);
1361+
}
1362+
_ => panic!("Expected ResolveError::Resolve, got {:?}", sled_error),
1363+
}
1364+
1365+
dns_server.cleanup_successful();
1366+
logctx.cleanup_successful();
1367+
}
11341368
}

nexus/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ nexus-mgs-updates.workspace = true
6464
nexus-networking.workspace = true
6565
nexus-saga-recovery.workspace = true
6666
nexus-test-interface.workspace = true
67+
ntp-admin-client.workspace = true
6768
num-integer.workspace = true
6869
omicron-cockroach-metrics.workspace = true
6970
openssl.workspace = true

nexus/db-model/src/inventory.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use nexus_db_schema::schema::{
3434
inv_last_reconciliation_disk_result,
3535
inv_last_reconciliation_orphaned_dataset,
3636
inv_last_reconciliation_zone_result, inv_mupdate_override_non_boot,
37-
inv_nvme_disk_firmware, inv_omicron_sled_config,
37+
inv_ntp_timesync, inv_nvme_disk_firmware, inv_omicron_sled_config,
3838
inv_omicron_sled_config_dataset, inv_omicron_sled_config_disk,
3939
inv_omicron_sled_config_zone, inv_omicron_sled_config_zone_nic,
4040
inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page,
@@ -61,7 +61,7 @@ use nexus_sled_agent_shared::inventory::{
6161
};
6262
use nexus_types::inventory::{
6363
BaseboardId, Caboose, CockroachStatus, Collection, NvmeFirmware,
64-
PowerState, RotPage, RotSlot,
64+
PowerState, RotPage, RotSlot, TimeSync,
6565
};
6666
use omicron_common::api::external;
6767
use omicron_common::api::internal::shared::NetworkInterface;
@@ -2833,6 +2833,33 @@ impl TryFrom<InvCockroachStatus> for CockroachStatus {
28332833
}
28342834
}
28352835

2836+
#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
2837+
#[diesel(table_name = inv_ntp_timesync)]
2838+
pub struct InvNtpTimesync {
2839+
pub inv_collection_id: DbTypedUuid<CollectionKind>,
2840+
pub zone_id: DbTypedUuid<OmicronZoneKind>,
2841+
pub synced: bool,
2842+
}
2843+
2844+
impl InvNtpTimesync {
2845+
pub fn new(
2846+
inv_collection_id: CollectionUuid,
2847+
timesync: &TimeSync,
2848+
) -> Result<Self, anyhow::Error> {
2849+
Ok(Self {
2850+
inv_collection_id: inv_collection_id.into(),
2851+
zone_id: timesync.zone_id.into(),
2852+
synced: timesync.synced,
2853+
})
2854+
}
2855+
}
2856+
2857+
impl From<InvNtpTimesync> for nexus_types::inventory::TimeSync {
2858+
fn from(value: InvNtpTimesync) -> Self {
2859+
Self { zone_id: value.zone_id.into(), synced: value.synced }
2860+
}
2861+
}
2862+
28362863
#[cfg(test)]
28372864
mod test {
28382865
use nexus_types::inventory::NvmeFirmware;

0 commit comments

Comments
 (0)