diff --git a/.azure-pipelines/docker-sonic-vs/start.sh b/.azure-pipelines/docker-sonic-vs/start.sh index 337661fd09..752c9ff675 100755 --- a/.azure-pipelines/docker-sonic-vs/start.sh +++ b/.azure-pipelines/docker-sonic-vs/start.sh @@ -61,10 +61,15 @@ else qos_cmd="-j /tmp/qos.json" fi + if [ -f /usr/share/sonic/single_asic_voq_fs/default_config.json ]; then + sonic-cfggen -j /usr/share/sonic/single_asic_voq_fs/default_config.json --print-data > /tmp/voq.json + voq_cmd="-j /tmp/voq.json" + fi + sonic-cfggen -p /usr/share/sonic/device/$PLATFORM/$PLATFORM_CONF -k $HWSKU --print-data > /tmp/ports.json # change admin_status from up to down; Test cases dependent sed -i "s/up/down/g" /tmp/ports.json - sonic-cfggen -j /etc/sonic/init_cfg.json $buffers_cmd $qos_cmd -j /tmp/ports.json --print-data > /etc/sonic/config_db.json + sonic-cfggen -j /etc/sonic/init_cfg.json $buffers_cmd $qos_cmd $voq_cmd -j /tmp/ports.json --print-data > /etc/sonic/config_db.json fi sonic-cfggen -t /usr/share/sonic/templates/copp_cfg.j2 > /etc/sonic/copp_cfg.json diff --git a/.azure-pipelines/test-docker-sonic-vs-template.yml b/.azure-pipelines/test-docker-sonic-vs-template.yml index 3639f9de12..0e826a002f 100644 --- a/.azure-pipelines/test-docker-sonic-vs-template.yml +++ b/.azure-pipelines/test-docker-sonic-vs-template.yml @@ -180,6 +180,8 @@ jobs: retry=3 IMAGE_NAME=docker-sonic-vs:$(Build.DefinitionName).$(Build.BuildNumber).asan-${{ parameters.asan }} echo $all_tests | xargs -n 1 | xargs -P 8 -I TEST_MODULE sudo DEFAULT_CONTAINER_REGISTRY=publicmirror.azurecr.io/ ./run-tests.sh "$IMAGE_NAME" "$params" "TEST_MODULE" 3 + single_asic_voq_tests="test_portchannel.py test_neighbor.py test_route.py" + echo $single_asic_voq_tests | xargs -n 1 | xargs -P 3 -I TEST_MODULE sudo ./run-tests.sh "$IMAGE_NAME" "--force-recreate-dvs --switch-mode=single_asic_voq_fs" "TEST_MODULE" 3 rm -rf $(Build.ArtifactStagingDirectory)/download displayName: "Run vs tests" diff --git a/Cargo.lock b/Cargo.lock index e4291a63ed..acc3ab59f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -262,12 +262,12 @@ dependencies = [ [[package]] name = "binrw" -version = "0.14.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d4bca59c20d6f40c2cc0802afbe1e788b89096f61bdf7aeea6bf00f10c2909b" +checksum = "81419ff39e6ed10a92a7f125290859776ced35d9a08a665ae40b23e7ca702f30" dependencies = [ "array-init", - "binrw_derive 0.14.1", + "binrw_derive 0.15.0", "bytemuck", ] @@ -286,15 +286,15 @@ dependencies = [ [[package]] name = "binrw_derive" -version = "0.14.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8ba42866ce5bced2645bfa15e97eef2c62d2bdb530510538de8dd3d04efff3c" +checksum = "376404e55ec40d0d6f8b4b7df3f87b87954bd987f0cf9a7207ea3b6ea5c9add4" dependencies = [ "either", - "owo-colors 3.5.0", + "owo-colors 4.2.2", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.104", ] [[package]] @@ -396,7 +396,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim 0.11.1", + "strsim", "terminal_size", "unicase", "unicode-width", @@ -465,15 +465,19 @@ version = "0.1.0" dependencies = [ "ahash", "async-trait", - "binrw 0.14.1", + "binrw 0.15.0", "byteorder", "chrono", "clap", "color-eyre", "env_logger", + "genetlink", "ipfixrw", + "libc", "log", - "neli", + "netlink-packet-core 0.7.0", + "netlink-packet-generic 0.3.3", + "netlink-sys", "once_cell", "opentelemetry 0.25.0", "opentelemetry-http", @@ -521,72 +525,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "darling" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", -] - -[[package]] -name = "darling_macro" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" -dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "derive_builder" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07adf7be193b71cc36b193d0f5fe60b918a3a9db4dad0449f57bcfd519704a3" -dependencies = [ - "derive_builder_macro", -] - -[[package]] -name = "derive_builder_core" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f91d4cfa921f1c05904dc3c57b4a32c38aed3340cce209f3a6fd1478babafc4" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "derive_builder_macro" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68" -dependencies = [ - "derive_builder_core", - "syn 1.0.109", -] - [[package]] name = "derive_more" version = "0.99.20" @@ -774,6 +712,21 @@ dependencies = [ "slab", ] +[[package]] +name = "genetlink" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d8935531e8e0919b17043c668cc18bfac1622f2fab73125f4f018124ee330b8" +dependencies = [ + "futures", + "log", + "netlink-packet-core 0.8.1", + "netlink-packet-generic 0.4.0", + "netlink-proto", + "thiserror 1.0.69", + "tokio", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -1083,12 +1036,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - [[package]] name = "idna" version = "1.1.0" @@ -1335,30 +1282,83 @@ dependencies = [ ] [[package]] -name = "neli" -version = "0.7.0-rc2" -source = "git+https://github.com/jbaublitz/neli.git?tag=neli-v0.7.0-rc2#73528ae1fb0b2af177711f1a7c6228349d770dfb" +name = "netlink-packet-core" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72724faf704479d67b388da142b186f916188505e7e0b26719019c525882eda4" dependencies = [ - "bitflags", + "anyhow", "byteorder", - "derive_builder", - "getset", - "libc", + "netlink-packet-utils", +] + +[[package]] +name = "netlink-packet-core" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3463cbb78394cb0141e2c926b93fc2197e473394b761986eca3b9da2c63ae0f4" +dependencies = [ + "paste", +] + +[[package]] +name = "netlink-packet-generic" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd7eb8ad331c84c6b8cb7f685b448133e5ad82e1ffd5acafac374af4a5a308b" +dependencies = [ + "anyhow", + "byteorder", + "netlink-packet-core 0.7.0", + "netlink-packet-utils", +] + +[[package]] +name = "netlink-packet-generic" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f891b2e0054cac5a684a06628f59568f841c93da4e551239da6e518f539e775" +dependencies = [ + "netlink-packet-core 0.8.1", +] + +[[package]] +name = "netlink-packet-utils" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34" +dependencies = [ + "anyhow", + "byteorder", + "paste", + "thiserror 1.0.69", +] + +[[package]] +name = "netlink-proto" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65d130ee111430e47eed7896ea43ca693c387f097dd97376bffafbf25812128" +dependencies = [ + "bytes", + "futures", "log", - "neli-proc-macros", - "parking_lot", + "netlink-packet-core 0.8.1", + "netlink-sys", + "thiserror 2.0.17", ] [[package]] -name = "neli-proc-macros" -version = "0.2.0-rc2" -source = "git+https://github.com/jbaublitz/neli.git?tag=neli-v0.7.0-rc2#73528ae1fb0b2af177711f1a7c6228349d770dfb" +name = "netlink-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "416060d346fbaf1f23f9512963e3e878f1a78e707cb699ba9215761754244307" dependencies = [ - "either", - "proc-macro2", - "quote", - "serde", - "syn 1.0.109", + "bytes", + "futures", + "libc", + "log", + "tokio", ] [[package]] @@ -1432,7 +1432,7 @@ dependencies = [ "js-sys", "once_cell", "pin-project-lite", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1446,7 +1446,7 @@ dependencies = [ "js-sys", "once_cell", "pin-project-lite", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1474,7 +1474,7 @@ dependencies = [ "opentelemetry-proto", "opentelemetry_sdk 0.25.0", "prost", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic", ] @@ -1513,7 +1513,7 @@ dependencies = [ "ordered-float", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1531,7 +1531,7 @@ dependencies = [ "opentelemetry 0.24.0", "percent-encoding", "rand", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1550,7 +1550,7 @@ dependencies = [ "percent-encoding", "rand", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", ] @@ -1605,6 +1605,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1890,7 +1896,7 @@ dependencies = [ "http", "reqwest", "serde", - "thiserror", + "thiserror 1.0.69", "tower-service", ] @@ -1905,7 +1911,7 @@ dependencies = [ "http", "reqwest", "serde", - "thiserror", + "thiserror 1.0.69", "tower-service", ] @@ -2116,12 +2122,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - [[package]] name = "strsim" version = "0.11.1" @@ -2212,7 +2212,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", ] [[package]] @@ -2226,6 +2235,17 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "thread_local" version = "1.1.9" diff --git a/Cargo.toml b/Cargo.toml index a2db894e13..dbaefd2337 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,13 +26,16 @@ tokio = { version = "1.37", features = ["full"] } tokio-util = { version = "0.7", features = ["rt"] } tokio-stream = "0.1" -# Netlink for network operations -neli = { git = "https://github.com/jbaublitz/neli.git", tag = "neli-v0.7.0-rc2" } +# Using genetlink and netlink-packet-generic for netlink support +genetlink = "0.2" +netlink-packet-core = "0.7" +netlink-packet-generic = "0.3" +netlink-sys = "0.8" # IPFIX parser for traffic flow analysis ipfixrw = "0.1.0" ahash = "0.8.11" -binrw = "0.14.1" +binrw = "0.15.0" byteorder = "1.5.0" # Configuration and serialization @@ -75,4 +78,4 @@ pretty_assertions = "1" # Build dependencies tonic-build = "0.12" -vergen = { version = "8.2", features = ["build", "git", "gitoxide", "cargo", "rustc", "si"] } \ No newline at end of file +vergen = { version = "8.2", features = ["build", "git", "gitoxide", "cargo", "rustc", "si"] } diff --git a/crates/countersyncd/Cargo.toml b/crates/countersyncd/Cargo.toml index c13b11e030..861a904e60 100644 --- a/crates/countersyncd/Cargo.toml +++ b/crates/countersyncd/Cargo.toml @@ -16,7 +16,11 @@ tokio = { workspace = true } yaml-rust = { workspace = true } # Netlink for network operations -neli = { workspace = true } +genetlink = { workspace = true } +netlink-packet-core = { workspace = true } +netlink-packet-generic = { workspace = true } +netlink-sys = { workspace = true } +libc = "0.2" # IPFIX parser for traffic flow analysis ipfixrw = { workspace = true } diff --git a/crates/countersyncd/src/actor/control_netlink.rs b/crates/countersyncd/src/actor/control_netlink.rs index 81875b4e49..fd4df33c3f 100644 --- a/crates/countersyncd/src/actor/control_netlink.rs +++ b/crates/countersyncd/src/actor/control_netlink.rs @@ -2,21 +2,16 @@ use std::{thread::sleep, time::Duration}; use log::{debug, info, warn}; -#[allow(unused_imports)] -use neli::{ - consts::socket::{Msg, NlFamily}, - router::synchronous::NlRouter, - socket::NlSocket, - utils::Groups, -}; +use netlink_sys::{Socket, SocketAddr, protocols::NETLINK_GENERIC}; use tokio::sync::mpsc::Sender; use std::io; use super::super::message::netlink::NetlinkCommand; +use super::netlink_utils; #[cfg(not(test))] -type SocketType = NlSocket; +type SocketType = Socket; #[cfg(test)] type SocketType = test::MockSocket; @@ -42,6 +37,8 @@ const CTRL_CMD_DELFAMILY: u8 = 2; const CTRL_ATTR_FAMILY_NAME: u16 = 2; /// Size of generic netlink header in bytes const GENL_HEADER_SIZE: usize = 20; +/// Netlink control notify multicast group ID +const NLCTRL_NOTIFY_GROUP_ID: u32 = 1; /// Actor responsible for monitoring netlink family registration/unregistration. /// @@ -58,9 +55,9 @@ pub struct ControlNetlinkActor { command_sender: Sender, /// Last time we checked if the family exists last_family_check: std::time::Instant, - /// Reusable netlink resolver for family existence checks + /// Reusable netlink socket for family existence checks #[cfg(not(test))] - resolver: Option, + resolver: Option, #[cfg(test)] #[allow(dead_code)] resolver: Option<()>, @@ -99,42 +96,37 @@ impl ControlNetlinkActor { actor } - /// Establishes a connection to the netlink control socket (legacy interface). + /// Establishes a connection to the netlink control socket. #[cfg(not(test))] fn connect_control_socket() -> Option { - // Create a router to resolve the control group - let (router, _) = match NlRouter::connect(NlFamily::Generic, Some(0), Groups::empty()) { - Ok(result) => result, + // Create a raw netlink socket + let mut socket = match Socket::new(NETLINK_GENERIC) { + Ok(s) => s, Err(e) => { - warn!("Failed to connect control router: {:?}", e); + warn!("Failed to create netlink socket: {:?}", e); return None; } }; - // Resolve the "notify" multicast group for nlctrl family - let notify_group_id = match router.resolve_nl_mcast_group("nlctrl", "notify") { - Ok(group_id) => { - debug!("Resolved nlctrl notify group ID: {}", group_id); - group_id - } - Err(e) => { - warn!("Failed to resolve nlctrl notify group: {:?}", e); - return None; - } - }; + // Bind the socket with automatic port assignment + let addr = SocketAddr::new(0, 0); + if let Err(e) = socket.bind(&addr) { + warn!("Failed to bind control socket: {:?}", e); + return None; + } - // Connect to NETLINK_GENERIC with the notify group - let socket = match SocketType::connect( - NlFamily::Generic, - Some(0), - Groups::new_groups(&[notify_group_id]), - ) { - Ok(socket) => socket, - Err(e) => { - warn!("Failed to connect control socket: {:?}", e); - return None; - } - }; + // Subscribe to nlctrl notify group (group ID 1 for nlctrl notify) + // The nlctrl family uses a well-known multicast group ID + if let Err(e) = socket.add_membership(NLCTRL_NOTIFY_GROUP_ID) { + warn!("Failed to add multicast membership: {:?}", e); + return None; + } + + // Set non-blocking mode + if let Err(e) = socket.set_non_blocking(true) { + warn!("Failed to set non-blocking mode: {:?}", e); + return None; + } debug!("Successfully connected control socket and subscribed to nlctrl notifications"); Some(socket) @@ -147,30 +139,17 @@ impl ControlNetlinkActor { None } - /// Creates a netlink resolver for family/group resolution. - /// - /// # Returns - /// - /// Some(router) if creation is successful, None otherwise + /// Creates a netlink socket for family/group resolution. + /// Now delegates to netlink_utils module. #[cfg(not(test))] - fn create_nl_resolver() -> Option { - match NlRouter::connect(NlFamily::Generic, Some(0), Groups::empty()) { - Ok((router, _)) => { - debug!("Created netlink resolver for family/group resolution"); - Some(router) - } - Err(e) => { - warn!("Failed to create netlink resolver: {:?}", e); - None - } - } + fn create_nl_resolver() -> Option { + netlink_utils::create_nl_resolver() } /// Mock netlink resolver for testing. #[cfg(test)] #[allow(dead_code)] - fn create_nl_resolver() -> Option { - // Return None for tests to avoid complexity + fn create_nl_resolver() -> Option<()> { None } @@ -194,18 +173,18 @@ impl ControlNetlinkActor { } } - if let Some(ref resolver) = self.resolver { - match resolver.resolve_genl_family(&self.family) { - Ok(family_info) => { - debug!("Family '{}' exists with ID: {}", self.family, family_info); + if let Some(ref mut resolver) = self.resolver { + match netlink_utils::resolve_family_id(resolver, &self.family) { + Ok(family_id) => { + debug!("Family '{}' exists with ID: {}", self.family, family_id); true } Err(e) => { debug!("Family '{}' resolution failed: {:?}", self.family, e); // Only clear resolver on specific errors that indicate it's stale - // For "family not found" errors, keep the resolver as it's still valid - if e.to_string().contains("No such file or directory") - || e.to_string().contains("Connection refused") + let err_str = format!("{:?}", e); + if err_str.contains("No such file or directory") + || err_str.contains("Connection refused") { debug!("Clearing resolver due to connection error"); self.resolver = None; @@ -233,13 +212,14 @@ impl ControlNetlinkActor { socket: Option<&mut SocketType>, target_family: &str, ) -> Result { + debug!("Attempting to receive control message"); let socket = socket.ok_or_else(|| { io::Error::new(io::ErrorKind::NotConnected, "No control socket available") })?; let mut buffer = vec![0; BUFFER_SIZE]; - match socket.recv(&mut buffer, Msg::DONTWAIT) { - Ok((size, _)) => { + match socket.recv_from(&mut buffer, 0) { + Ok((size, _addr)) => { if size == 0 { return Ok(false); } @@ -410,8 +390,9 @@ impl ControlNetlinkActor { // Log heartbeat every minute to show the actor is running if heartbeat_counter % HEARTBEAT_LOG_INTERVAL == 0 { info!( - "ControlNetlinkActor is running normally - monitoring family '{}'", - actor.family + "ControlNetlinkActor is running normally - monitoring family '{}', family_was_available={}", + actor.family, + family_was_available, ); } @@ -470,16 +451,16 @@ impl ControlNetlinkActor { family_was_available = family_available; } else if family_available { // Family is available but we haven't sent a reconnect recently - // Send periodic reconnect commands to ensure DataNetlinkActor stays connected + // Send periodic soft reconnect commands to ensure DataNetlinkActor stays connected // This handles cases where DataNetlinkActor disconnected due to socket errors - // Since DataNetlinkActor.connect() now skips unnecessary reconnects, we can be more conservative + // SoftReconnect only reconnects if socket is unhealthy, avoiding unnecessary reconnections if heartbeat_counter - last_periodic_reconnect_counter >= PERIODIC_RECONNECT_INTERVAL { - debug!("Sending periodic reconnect command to ensure data socket stays connected (counter: {}, last: {}, interval: {})", + debug!("Sending periodic soft reconnect command to check data socket health (counter: {}, last: {}, interval: {})", heartbeat_counter, last_periodic_reconnect_counter, PERIODIC_RECONNECT_INTERVAL); - if let Err(e) = actor.command_sender.send(NetlinkCommand::Reconnect).await { - warn!("Failed to send periodic reconnect command: {:?}", e); + if let Err(e) = actor.command_sender.send(NetlinkCommand::SoftReconnect).await { + warn!("Failed to send periodic soft reconnect command: {:?}", e); break; // Channel is closed, exit } last_periodic_reconnect_counter = heartbeat_counter; @@ -512,7 +493,7 @@ pub mod test { pub struct MockSocket; impl MockSocket { - pub fn recv(&mut self, _buf: &mut [u8], _flags: Msg) -> Result<(usize, Groups), io::Error> { + pub fn recv_from(&mut self, _buf: &mut [u8], _flags: i32) -> Result<(usize, SocketAddr), io::Error> { // Always return WouldBlock to simulate no control messages Err(io::Error::new( io::ErrorKind::WouldBlock, diff --git a/crates/countersyncd/src/actor/data_netlink.rs b/crates/countersyncd/src/actor/data_netlink.rs index 3c4ea7817f..a22ee55f16 100644 --- a/crates/countersyncd/src/actor/data_netlink.rs +++ b/crates/countersyncd/src/actor/data_netlink.rs @@ -5,18 +5,13 @@ use std::{ time::{Duration, Instant}, }; +use std::os::unix::io::AsRawFd; #[cfg(test)] -use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::io::RawFd; use log::{debug, info, warn}; -#[allow(unused_imports)] -use neli::{ - consts::socket::{Msg, NlFamily}, - router::synchronous::NlRouter, - socket::NlSocket, - utils::Groups, -}; +use netlink_sys::{Socket, SocketAddr, protocols::NETLINK_GENERIC}; use tokio::sync::mpsc::{Receiver, Sender}; use std::io; @@ -25,14 +20,15 @@ use super::super::message::{ buffer::SocketBufferMessage, netlink::{NetlinkCommand, SocketConnect}, }; +use super::netlink_utils; #[cfg(not(test))] -type SocketType = NlSocket; +type SocketType = Socket; #[cfg(test)] type SocketType = test::MockSocket; /// Path to the sonic constants configuration file -const SONIC_CONSTANTS: &str = "/usr/share/sonic/countersyncd/constants.yml"; +const SONIC_CONSTANTS: &str = "/etc/sonic/constants.yml"; /// Size of the buffer used for receiving netlink messages const BUFFER_SIZE: usize = 0x1FFFF; @@ -45,7 +41,7 @@ const ENOBUFS: i32 = 105; const MAX_LOCAL_RECONNECT_ATTEMPTS: u32 = 3; /// Socket health check timeout - if no data received for this duration, socket is considered unhealthy -const SOCKET_HEALTH_TIMEOUT_SECS: u64 = 10; +const SOCKET_HEALTH_TIMEOUT_SECS: u64 = 60; /// Heartbeat logging interval (in iterations) - log every 5 minutes at 10ms per iteration const HEARTBEAT_LOG_INTERVAL: u32 = 30000; // 30000 * 10ms = 5 minutes @@ -238,11 +234,11 @@ pub struct DataNetlinkActor { group: String, /// The active netlink socket connection (None if disconnected) socket: Option, - /// Reusable netlink resolver for family/group resolution (None if not available) + /// Reusable netlink socket for family/group resolution (None if not available) #[allow(dead_code)] - nl_resolver: Option, + nl_resolver: Option, /// Timestamp of when we last received data on the socket (for health checking) - last_data_time: Option, + last_data_time: Instant, /// List of channels to send received buffer messages to buffer_recipients: LinkedList>, /// Channel for receiving control commands @@ -270,7 +266,7 @@ impl DataNetlinkActor { group: group.to_string(), socket: None, nl_resolver, - last_data_time: None, + last_data_time: Instant::now(), buffer_recipients: LinkedList::new(), command_recipient, message_parser: NetlinkMessageParser::new(), @@ -291,29 +287,21 @@ impl DataNetlinkActor { self.buffer_recipients.push_back(recipient); } - /// Creates a netlink resolver for family/group resolution. + /// Creates a netlink socket for family/group resolution. /// /// # Returns /// - /// Some(router) if creation is successful, None otherwise + /// Some(socket) if creation is successful, None otherwise + /// Creates a netlink socket for family/group resolution. + /// Now delegates to netlink_utils module. #[cfg(not(test))] - fn create_nl_resolver() -> Option { - match NlRouter::connect(NlFamily::Generic, Some(0), Groups::empty()) { - Ok((router, _)) => { - debug!("Created netlink resolver for family/group resolution"); - Some(router) - } - Err(e) => { - warn!("Failed to create netlink resolver: {:?}", e); - None - } - } + fn create_nl_resolver() -> Option { + netlink_utils::create_nl_resolver() } /// Mock netlink resolver for testing. #[cfg(test)] - fn create_nl_resolver() -> Option { - // Return None for tests to avoid complexity + fn create_nl_resolver() -> Option { None } @@ -335,17 +323,17 @@ impl DataNetlinkActor { ); // Try to use existing netlink resolver first - let group_id = if let Some(ref resolver) = self.nl_resolver { - match resolver.resolve_nl_mcast_group(family, group) { + let group_id = if let Some(ref mut resolver) = self.nl_resolver { + match netlink_utils::resolve_multicast_group(resolver, family, group) { Ok(id) => { - debug!( + info!( "Resolved group ID {} for family '{}', group '{}' (using netlink resolver)", id, family, group ); id } Err(e) => { - debug!( + info!( "Failed to resolve group with netlink resolver: {:?}, recreating resolver", e ); @@ -353,10 +341,10 @@ impl DataNetlinkActor { self.nl_resolver = Self::create_nl_resolver(); // Try again with new resolver - if let Some(ref resolver) = self.nl_resolver { - match resolver.resolve_nl_mcast_group(family, group) { + if let Some(ref mut resolver) = self.nl_resolver { + match netlink_utils::resolve_multicast_group(resolver, family, group) { Ok(id) => { - debug!("Resolved group ID {} for family '{}', group '{}' (using new netlink resolver)", id, family, group); + info!("Resolved group ID {} for family '{}', group '{}' (using new netlink resolver)", id, family, group); id } Err(e) => { @@ -369,7 +357,7 @@ impl DataNetlinkActor { } } } else { - // Fallback to creating temporary router + // Fallback to creating temporary socket return Self::connect_fallback(family, group); } } @@ -378,10 +366,10 @@ impl DataNetlinkActor { // Create netlink resolver if not available self.nl_resolver = Self::create_nl_resolver(); - if let Some(ref resolver) = self.nl_resolver { - match resolver.resolve_nl_mcast_group(family, group) { + if let Some(ref mut resolver) = self.nl_resolver { + match netlink_utils::resolve_multicast_group(resolver, family, group) { Ok(id) => { - debug!("Resolved group ID {} for family '{}', group '{}' (using new netlink resolver)", id, family, group); + info!("Resolved group ID {} for family '{}', group '{}' (using new netlink resolver)", id, family, group); id } Err(e) => { @@ -397,7 +385,7 @@ impl DataNetlinkActor { } } } else { - // Fallback to creating temporary router + // Fallback to creating temporary socket return Self::connect_fallback(family, group); } }; @@ -406,39 +394,46 @@ impl DataNetlinkActor { "Creating socket for family '{}' with group_id {}", family, group_id ); - let socket = match SocketType::connect( - NlFamily::Generic, - // 0 is pid of kernel -> socket is connected to kernel - Some(0), - Groups::empty(), - ) { - Ok(socket) => socket, + + // Create a raw netlink socket using netlink-sys + let mut socket = match Socket::new(NETLINK_GENERIC) { + Ok(s) => s, Err(e) => { - warn!("Failed to connect socket: {:?}", e); + warn!("Failed to create netlink socket: {:?}", e); return None; } }; + // Bind the socket with automatic port assignment + let addr = SocketAddr::new(0, 0); + if let Err(e) = socket.bind(&addr) { + warn!("Failed to bind socket: {:?}", e); + return None; + } + debug!("Adding multicast membership for group_id {}", group_id); - match socket.add_mcast_membership(Groups::new_groups(&[group_id])) { - Ok(_) => { - info!( - "Successfully connected to family '{}', group '{}' with group_id: {}", - family, group, group_id - ); - debug!("Socket created successfully, ready to receive multicast messages on group_id: {}", group_id); - Some(socket) - } - Err(e) => { - warn!( - "Failed to add mcast membership for group_id {}: {:?}", - group_id, e - ); - // Explicitly drop the socket to ensure it's closed - drop(socket); - None - } + if let Err(e) = socket.add_membership(group_id) { + warn!( + "Failed to add mcast membership for group_id {}: {:?}", + group_id, e + ); + // Explicitly drop the socket to ensure it's closed + drop(socket); + return None; } + + // Set non-blocking mode + if let Err(e) = socket.set_non_blocking(true) { + warn!("Failed to set non-blocking mode: {:?}", e); + return None; + } + + info!( + "Successfully connected to family '{}', group '{}' with group_id: {}", + family, group, group_id + ); + debug!("Socket created successfully, ready to receive multicast messages on group_id: {}", group_id); + Some(socket) } /// Mock connection method using shared router for testing. @@ -464,25 +459,28 @@ impl DataNetlinkActor { family, group ); - let (sock, _) = match NlRouter::connect( - NlFamily::Generic, - // 0 is pid of kernel -> socket is connected to kernel - Some(0), - Groups::empty(), - ) { - Ok(result) => result, + // Create a temporary netlink socket for resolution + let mut temp_socket = match Socket::new(NETLINK_GENERIC) { + Ok(mut s) => { + let addr = SocketAddr::new(0, 0); + if let Err(e) = s.bind(&addr) { + warn!("Failed to bind temporary socket: {:?}", e); + return None; + } + s + } Err(e) => { - warn!("Failed to connect to netlink router: {:?}", e); + warn!("Failed to create temporary netlink socket: {:?}", e); warn!("Possible causes: insufficient permissions, netlink not supported, or kernel module not loaded"); return None; } }; debug!( - "Router connected, resolving group ID for family '{}', group '{}'", + "Temporary socket created, resolving group ID for family '{}', group '{}'", family, group ); - let group_id = match sock.resolve_nl_mcast_group(family, group) { + let group_id = match netlink_utils::resolve_multicast_group(&mut temp_socket, family, group) { Ok(id) => { debug!( "Resolved group ID {} for family '{}', group '{}'", @@ -499,8 +497,6 @@ impl DataNetlinkActor { "This suggests the family '{}' is not registered in the kernel", family ); - // Explicitly drop the temporary router to ensure it's closed - drop(sock); return None; } }; @@ -509,79 +505,88 @@ impl DataNetlinkActor { "Creating socket for family '{}' with group_id {}", family, group_id ); - let socket = match SocketType::connect( - NlFamily::Generic, - // 0 is pid of kernel -> socket is connected to kernel - Some(0), - Groups::empty(), - ) { - Ok(socket) => socket, + + // Create a raw netlink socket + let mut socket = match Socket::new(NETLINK_GENERIC) { + Ok(s) => s, Err(e) => { - warn!("Failed to connect socket: {:?}", e); - // Explicitly drop the temporary router to ensure it's closed - drop(sock); + warn!("Failed to create netlink socket: {:?}", e); return None; } }; + // Bind the socket + let addr = SocketAddr::new(0, 0); + if let Err(e) = socket.bind(&addr) { + warn!("Failed to bind socket: {:?}", e); + return None; + } + debug!("Adding multicast membership for group_id {}", group_id); - match socket.add_mcast_membership(Groups::new_groups(&[group_id])) { - Ok(_) => { - info!( - "Successfully connected to family '{}', group '{}' with group_id: {}", - family, group, group_id - ); - debug!("Socket created successfully, ready to receive multicast messages on group_id: {}", group_id); - // Explicitly drop the temporary router since we no longer need it - drop(sock); - Some(socket) - } - Err(e) => { - warn!( - "Failed to add mcast membership for group_id {}: {:?}", - group_id, e - ); - // Explicitly drop both socket and temporary router to ensure they're closed - drop(socket); - drop(sock); - None - } + if let Err(e) = socket.add_membership(group_id) { + warn!( + "Failed to add mcast membership for group_id {}: {:?}", + group_id, e + ); + return None; } + + // Set non-blocking mode + if let Err(e) = socket.set_non_blocking(true) { + warn!("Failed to set non-blocking mode: {:?}", e); + return None; + } + + info!( + "Successfully connected to family '{}', group '{}' with group_id: {}", + family, group, group_id + ); + debug!("Socket created successfully, ready to receive multicast messages on group_id: {}", group_id); + Some(socket) } /// Attempts to establish a connection on demand. /// - /// This will be called when receiving a Reconnect command from ControlNetlinkActor. - /// Implements socket health checking - if current socket hasn't received data recently, - /// it will be closed and replaced with a new connection. - fn connect(&mut self) { - // Check if current socket is healthy - if let Some(_socket) = &self.socket { - if let Some(last_data_time) = self.last_data_time { - let time_since_last_data = Instant::now().duration_since(last_data_time); - if time_since_last_data.as_secs() > SOCKET_HEALTH_TIMEOUT_SECS { + /// This will be called when receiving a Reconnect or SoftReconnect command from ControlNetlinkActor. + /// + /// # Arguments + /// + /// * `force` - If true, always reconnect regardless of socket health status. + /// If false, only reconnect if socket is unhealthy (no data received recently). + /// + /// # Behavior + /// + /// - `force=true` (Reconnect): Always closes current socket and creates new connection + /// - `force=false` (SoftReconnect): Only reconnects if socket hasn't received data for SOCKET_HEALTH_TIMEOUT_SECS + fn connect(&mut self, force: bool) { + // Check if current socket is healthy (only when not forcing reconnection) + if !force { + if let Some(_socket) = &self.socket { + let time_since_last_data = Instant::now().duration_since(self.last_data_time); + if time_since_last_data > Duration::from_secs(SOCKET_HEALTH_TIMEOUT_SECS) { warn!( "Socket unhealthy - no data received for {} seconds, forcing reconnection", time_since_last_data.as_secs() - ); - // Close the unhealthy socket - self.socket = None; - self.last_data_time = None; + ); + // Close the unhealthy socket + self.socket = None; } else { - debug!( + info!( "Socket healthy - data received {} seconds ago, skipping reconnect", time_since_last_data.as_secs() ); return; } - } else { - // Socket exists but no data ever received - consider it new - debug!("Socket exists but no data received yet, skipping reconnect"); - return; + } + } else { + // Force reconnection: close current socket if exists + if self.socket.is_some() { + info!("Force reconnection requested, closing current socket"); + self.socket = None; } } - debug!( + info!( "Establishing new connection for family '{}', group '{}'", self.family, self.group ); @@ -591,7 +596,7 @@ impl DataNetlinkActor { "Successfully connected to family '{}', group '{}'", self.family, self.group ); - self.last_data_time = None; // Reset data time for new socket + self.last_data_time = Instant::now(); // Reset data time for new socket } else { warn!( "Failed to connect to family '{}', group '{}'", @@ -613,7 +618,6 @@ impl DataNetlinkActor { self.family, self.group ); self.socket = None; - self.last_data_time = None; // Clear the resolver as it might be stale self.nl_resolver = None; } @@ -632,7 +636,7 @@ impl DataNetlinkActor { ); self.family = family.to_string(); self.group = group.to_string(); - self.connect(); + self.connect(true); // Force reconnection on reset } /// Attempts to receive messages from the netlink socket. @@ -653,12 +657,33 @@ impl DataNetlinkActor { let mut buffer = vec![0; BUFFER_SIZE]; - // Try to receive with MSG_DONTWAIT to make it non-blocking + // Try to receive with non-blocking mode (socket should already be set to non-blocking) debug!("Attempting to receive netlink message..."); - let result = socket.recv(&mut buffer, Msg::DONTWAIT); + + #[cfg(not(test))] + let result = { + let fd = socket.as_raw_fd(); + unsafe { + libc::recv( + fd, + buffer.as_mut_ptr() as *mut libc::c_void, + buffer.len(), + 0 + ) + } + }; + + #[cfg(test)] + let result = match socket.recv(&mut buffer, 0) { + Ok(size) => size as isize, + Err(e) => { + return Err(e); + } + }; match result { - Ok((size, _groups)) => { + size if size > 0 => { + let size = size as usize; debug!("Received netlink data, size: {} bytes", size); if size == 0 { @@ -677,13 +702,25 @@ impl DataNetlinkActor { Ok(messages) } - Err(e) => { - debug!( - "Socket recv failed: {:?} (raw_os_error: {:?})", - e, - e.raw_os_error() - ); - Err(e) + size if size == 0 => { + Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "Connection closed", + )) + } + _ => { + // size < 0, errno is set + let err = io::Error::last_os_error(); + // WouldBlock is expected for non-blocking sockets with no data + // Only log other errors + if err.kind() != io::ErrorKind::WouldBlock { + debug!( + "Socket recv failed: {:?} (raw_os_error: {:?})", + err, + err.raw_os_error() + ); + } + Err(err) } } } @@ -768,7 +805,11 @@ impl DataNetlinkActor { consecutive_failures = 0; // Reset failure counter on reconnect command } NetlinkCommand::Reconnect => { - actor.connect(); + actor.connect(true); // Force reconnection + consecutive_failures = 0; // Reset failure counter on reconnect command + } + NetlinkCommand::SoftReconnect => { + actor.connect(false); // Check health before reconnecting consecutive_failures = 0; // Reset failure counter on reconnect command } NetlinkCommand::Close => { @@ -786,7 +827,7 @@ impl DataNetlinkActor { match Self::try_recv(actor.socket.as_mut(), &mut actor.message_parser).await { Ok(messages) => { consecutive_failures = 0; // Reset failure counter on successful receive - actor.last_data_time = Some(Instant::now()); // Update data reception timestamp + actor.last_data_time = Instant::now(); // Update data reception timestamp if messages.is_empty() { debug!("Received data but no complete messages yet (partial message)"); @@ -846,7 +887,7 @@ impl DataNetlinkActor { "Attempting quick reconnect #{}", consecutive_failures ); - actor.connect(); + actor.connect(true); // Force reconnection on error } else { debug!("Too many consecutive failures, waiting for reconnect command from ControlNetlinkActor"); } @@ -1006,8 +1047,8 @@ pub mod test { /// /// # Returns /// - /// Ok((size, groups)) on success, Err on failure or empty message - pub fn recv(&mut self, buf: &mut [u8], _flags: Msg) -> Result<(usize, Groups), io::Error> { + /// Ok(size) on success, Err on failure or empty message + pub fn recv(&mut self, buf: &mut [u8], _flags: i32) -> Result { sleep(Duration::from_millis(1)); if self.budget == 0 { @@ -1026,7 +1067,7 @@ pub mod test { let copy_len = std::cmp::min(msg.len(), buf.len()); buf[..copy_len].copy_from_slice(&msg[..copy_len]); - Ok((copy_len, Groups::empty())) + Ok(copy_len) } else { Err(io::Error::new( io::ErrorKind::ConnectionAborted, diff --git a/crates/countersyncd/src/actor/mod.rs b/crates/countersyncd/src/actor/mod.rs index 58545b74a7..35a1a00c68 100644 --- a/crates/countersyncd/src/actor/mod.rs +++ b/crates/countersyncd/src/actor/mod.rs @@ -2,6 +2,7 @@ pub mod control_netlink; pub mod counter_db; pub mod data_netlink; pub mod ipfix; +pub mod netlink_utils; pub mod stats_reporter; pub mod swss; pub mod otel; diff --git a/crates/countersyncd/src/actor/netlink_utils.rs b/crates/countersyncd/src/actor/netlink_utils.rs new file mode 100644 index 0000000000..4613e4a034 --- /dev/null +++ b/crates/countersyncd/src/actor/netlink_utils.rs @@ -0,0 +1,376 @@ +/// Shared netlink utilities for family and multicast group resolution. +/// +/// This module provides common functionality used by both ControlNetlinkActor +/// and DataNetlinkActor to avoid code duplication. + +use std::io; +use std::os::fd::AsRawFd; + +use log::{debug, warn}; +use netlink_packet_core::{NetlinkMessage, NetlinkPayload, NLM_F_ACK, NLM_F_REQUEST}; +use netlink_packet_generic::{ + ctrl::{ + nlas::{GenlCtrlAttrs, McastGrpAttrs}, + GenlCtrl, GenlCtrlCmd, + }, + GenlMessage, +}; +use netlink_sys::{protocols::NETLINK_GENERIC, Socket, SocketAddr}; + +/// Creates a netlink socket for family/group resolution. +/// +/// The socket is configured in blocking mode for request-response operations. +/// +/// # Returns +/// +/// Some(socket) if creation is successful, None otherwise +#[cfg(not(test))] +pub fn create_nl_resolver() -> Option { + match Socket::new(NETLINK_GENERIC) { + Ok(mut socket) => { + let addr = SocketAddr::new(0, 0); + if let Err(e) = socket.bind(&addr) { + warn!("Failed to bind resolver socket: {:?}", e); + return None; + } + // Set to blocking mode for request-response operations + if let Err(e) = socket.set_non_blocking(false) { + warn!("Failed to set resolver socket to blocking mode: {:?}", e); + return None; + } + debug!("Created netlink socket for family/group resolution (blocking mode)"); + Some(socket) + } + Err(e) => { + warn!("Failed to create netlink socket: {:?}", e); + None + } + } +} + +/// Mock netlink resolver for testing. +#[cfg(test)] +pub fn create_nl_resolver() -> Option { + None +} + +/// Resolves a generic netlink family name to its ID. +/// +/// # Arguments +/// +/// * `socket` - The netlink socket to use for resolution +/// * `family_name` - The name of the generic netlink family +/// +/// # Returns +/// +/// Ok(family_id) if successful, Err otherwise +#[cfg(not(test))] +pub fn resolve_family_id(socket: &mut Socket, family_name: &str) -> Result { + debug!( + "resolve_family_id: Starting resolution for family '{}'", + family_name + ); + + // Drain any pending data from socket before sending request + // This prevents reading stale responses + let mut drain_buf = vec![0; 8192]; + loop { + match socket.recv_from(&mut drain_buf, libc::MSG_DONTWAIT) { + Ok((n, _addr)) if n > 0 => { + debug!( + "resolve_family_id: Drained {} stale bytes from socket", + n + ); + continue; + } + _ => break, + } + } + + // Create a GET_FAMILY request + let mut genlmsg: GenlMessage = GenlMessage::from_payload(GenlCtrl { + cmd: GenlCtrlCmd::GetFamily, + nlas: vec![GenlCtrlAttrs::FamilyName(family_name.to_owned())], + }); + genlmsg.finalize(); + + let mut nlmsg = NetlinkMessage::from(genlmsg); + nlmsg.header.flags = NLM_F_REQUEST | NLM_F_ACK; // Request with ACK + nlmsg.header.sequence_number = 1; // Set sequence number + nlmsg.finalize(); + + // Send the request + let mut buf = vec![0; nlmsg.buffer_len()]; + nlmsg.serialize(&mut buf[..]); + debug!( + "resolve_family_id: Sending request of {} bytes (seq={})", + buf.len(), + nlmsg.header.sequence_number + ); + + // Debug: print request hex + let req_preview = buf.len().min(36); + let hex_str: Vec = buf[..req_preview] + .iter() + .map(|b| format!("{:02x}", b)) + .collect(); + debug!("resolve_family_id: Request hex: {}", hex_str.join(" ")); + + let kernel_addr = SocketAddr::new(0, 0); + let sent_bytes = socket.send_to(&buf[..], &kernel_addr, 0)?; + debug!( + "resolve_family_id: Sent {} bytes to kernel", + sent_bytes + ); + + // Receive response using raw recv syscall to avoid netlink-sys buffer issues + let mut response_buf = vec![0u8; 8192]; + debug!("resolve_family_id: Calling raw recv() to receive response..."); + + let fd = socket.as_raw_fd(); + let bytes_read = unsafe { + libc::recv( + fd, + response_buf.as_mut_ptr() as *mut libc::c_void, + response_buf.len(), + 0, + ) + }; + + if bytes_read < 0 { + return Err(io::Error::last_os_error()); + } + let bytes_read = bytes_read as usize; + debug!( + "resolve_family_id: Received {} bytes from kernel", + bytes_read + ); + + if bytes_read == 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "No response received from kernel", + )); + } + + // Debug: print first 36 bytes of response in hex + let resp_preview = bytes_read.min(36); + let resp_hex: Vec = response_buf[..resp_preview] + .iter() + .map(|b| format!("{:02x}", b)) + .collect(); + debug!("resolve_family_id: Response hex: {}", resp_hex.join(" ")); + + // Debug: check netlink header + if bytes_read >= 16 { + let nl_len = u32::from_le_bytes([ + response_buf[0], + response_buf[1], + response_buf[2], + response_buf[3], + ]); + let nl_type = u16::from_le_bytes([response_buf[4], response_buf[5]]); + let nl_flags = u16::from_le_bytes([response_buf[6], response_buf[7]]); + let nl_seq = u32::from_le_bytes([ + response_buf[8], + response_buf[9], + response_buf[10], + response_buf[11], + ]); + let nl_pid = u32::from_le_bytes([ + response_buf[12], + response_buf[13], + response_buf[14], + response_buf[15], + ]); + debug!( + "resolve_family_id: Netlink header - len:{} type:{} flags:{} seq:{} pid:{}", + nl_len, nl_type, nl_flags, nl_seq, nl_pid + ); + } + + // Parse the response + let response = NetlinkMessage::>::deserialize(&response_buf[..bytes_read]) + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to parse response: {:?}", e), + ) + })?; + + match response.payload { + NetlinkPayload::InnerMessage(genlmsg) => { + for nla in &genlmsg.payload.nlas { + if let GenlCtrlAttrs::FamilyId(id) = nla { + return Ok(*id); + } + } + Err(io::Error::new( + io::ErrorKind::NotFound, + "Family ID not found in response", + )) + } + NetlinkPayload::Error(err) => Err(io::Error::new( + io::ErrorKind::Other, + format!("Netlink error: {:?}", err), + )), + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + "Unexpected response type", + )), + } +} + +/// Resolves a generic netlink multicast group to its ID. +/// +/// # Arguments +/// +/// * `socket` - The netlink socket to use for resolution +/// * `family_name` - The name of the generic netlink family +/// * `group_name` - The name of the multicast group +/// +/// # Returns +/// +/// Ok(group_id) if successful, Err otherwise +#[cfg(not(test))] +pub fn resolve_multicast_group( + socket: &mut Socket, + family_name: &str, + group_name: &str, +) -> Result { + debug!( + "resolve_multicast_group: Starting for family '{}', group '{}'", + family_name, group_name + ); + + // Drain any pending data from socket before sending request + let mut drain_buf = vec![0; 8192]; + loop { + match socket.recv_from(&mut drain_buf, libc::MSG_DONTWAIT) { + Ok((n, _addr)) if n > 0 => { + debug!( + "resolve_multicast_group: Drained {} stale bytes from socket", + n + ); + continue; + } + _ => break, + } + } + + // Create a GET_FAMILY request (this will include multicast group info in response) + let mut genlmsg: GenlMessage = GenlMessage::from_payload(GenlCtrl { + cmd: GenlCtrlCmd::GetFamily, + nlas: vec![GenlCtrlAttrs::FamilyName(family_name.to_owned())], + }); + genlmsg.finalize(); + + let mut nlmsg = NetlinkMessage::from(genlmsg); + nlmsg.header.flags = NLM_F_REQUEST | NLM_F_ACK; + nlmsg.header.sequence_number = 1; + nlmsg.finalize(); + + // Send the request + let mut buf = vec![0; nlmsg.buffer_len()]; + nlmsg.serialize(&mut buf[..]); + debug!("resolve_multicast_group: Sending {} bytes", buf.len()); + + let kernel_addr = SocketAddr::new(0, 0); + socket.send_to(&buf[..], &kernel_addr, 0)?; + + // Receive response using raw recv syscall to avoid netlink-sys buffer issues + let mut response_buf = vec![0u8; 8192]; + + let fd = socket.as_raw_fd(); + let bytes_read = unsafe { + libc::recv( + fd, + response_buf.as_mut_ptr() as *mut libc::c_void, + response_buf.len(), + 0, + ) + }; + + if bytes_read < 0 { + return Err(io::Error::last_os_error()); + } + let bytes_read = bytes_read as usize; + debug!("resolve_multicast_group: Received {} bytes", bytes_read); + + if bytes_read == 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "No response received from kernel", + )); + } + + // Parse the response + let response = NetlinkMessage::>::deserialize(&response_buf[..bytes_read]) + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to parse response: {:?}", e), + ) + })?; + + match response.payload { + NetlinkPayload::InnerMessage(genlmsg) => { + debug!( + "resolve_multicast_group: Parsing {} attributes", + genlmsg.payload.nlas.len() + ); + // Look for multicast groups + for nla in &genlmsg.payload.nlas { + if let GenlCtrlAttrs::McastGroups(groups) = nla { + debug!( + "resolve_multicast_group: Found {} multicast groups", + groups.len() + ); + for group_nlas in groups { + let mut found_name = None; + let mut found_id = None; + + for group_attr in group_nlas { + match group_attr { + McastGrpAttrs::Name(name) => { + debug!( + "resolve_multicast_group: Found group name '{}'", + name + ); + if name == group_name { + found_name = Some(name.clone()); + } + } + McastGrpAttrs::Id(id) => { + debug!("resolve_multicast_group: Found group id {}", id); + found_id = Some(*id); + } + } + } + + if found_name.is_some() && found_id.is_some() { + let group_id = found_id.unwrap(); + debug!( + "resolve_multicast_group: Successfully resolved '{}' to ID {}", + group_name, group_id + ); + return Ok(group_id); + } + } + } + } + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("Multicast group '{}' not found", group_name), + )) + } + NetlinkPayload::Error(err) => Err(io::Error::new( + io::ErrorKind::Other, + format!("Netlink error: {:?}", err), + )), + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + "Unexpected response type", + )), + } +} diff --git a/crates/countersyncd/src/actor/otel.rs b/crates/countersyncd/src/actor/otel.rs index 2281c80edc..a5d117b5a0 100644 --- a/crates/countersyncd/src/actor/otel.rs +++ b/crates/countersyncd/src/actor/otel.rs @@ -1,16 +1,15 @@ -use std::{sync::Arc, time::Duration, collections::HashMap}; +use std::time::Duration; use tokio::{sync::mpsc::Receiver, sync::oneshot, select}; -use opentelemetry::metrics::MetricsError; use opentelemetry_proto::tonic::{ common::v1::{KeyValue as ProtoKeyValue, AnyValue, any_value::Value, InstrumentationScope}, - metrics::v1::{Metric, Gauge as ProtoGauge, ResourceMetrics, ScopeMetrics, NumberDataPoint}, + metrics::v1::{Metric, Gauge as ProtoGauge, ResourceMetrics, ScopeMetrics}, resource::v1::Resource as ProtoResource, }; use crate::message::{ - saistats::{SAIStats, SAIStatsMessage}, - otel::{OtelMetrics, OtelMetricsMessageExt}, + saistats::SAIStatsMessage, + otel::OtelMetrics, }; -use log::{info, error, debug, warn}; +use log::{info, error, debug}; use opentelemetry_proto::tonic::collector::metrics::v1::metrics_service_client::MetricsServiceClient; use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest; use tonic::transport::Endpoint; @@ -235,47 +234,6 @@ impl OtelActor { } } - pub fn print_conversion_report(sai_stats: &SAIStats, otel_metrics: &OtelMetrics) { - info!("[Conversion Report] SAI Stats → OpenTelemetry Gauges"); - info!("Conversion timestamp: {}", sai_stats.observation_time); - info!("Input: {} SAI statistics", sai_stats.stats.len()); - info!("Output: {} OpenTelemetry gauges", otel_metrics.len()); - - info!("BEFORE - Original SAI Statistics:"); - for (index, sai_stat) in sai_stats.stats.iter().enumerate().take(10) { - info!( - "[{:2}] Object: {:20} | Type: {:3} | Stat: {:3} | Counter: {:>12}", - index + 1, - sai_stat.object_name, - sai_stat.type_id, - sai_stat.stat_id, - sai_stat.counter - ); - } - - info!("AFTER - Converted OpenTelemetry Gauges:"); - for (index, gauge) in otel_metrics.gauges.iter().enumerate().take(10) { - let data_point = &gauge.data_points[0]; - info!( - "[{:2}] Metric: {:35} | Value: {:>12} | Time: {}ns", - index + 1, - gauge.name, - data_point.value, - data_point.time_unix_nano - ); - - // Show key attributes on the same line - let attrs: Vec = data_point.attributes.iter() - .map(|attr| format!("{}={}", attr.key, attr.value)) - .collect(); - if !attrs.is_empty() { - info!("Attributes: [{}]", attrs.join(", ")); - } - info!("Description: {}", gauge.description); - } - info!("Conversion completed successfully!"); - } - /// Shutdown the actor async fn shutdown(self) { info!("Shutting down OtelActor..."); diff --git a/crates/countersyncd/src/main.rs b/crates/countersyncd/src/main.rs index bb39709ded..7b5d54f21f 100644 --- a/crates/countersyncd/src/main.rs +++ b/crates/countersyncd/src/main.rs @@ -227,7 +227,7 @@ async fn main() -> Result<(), Box> { let (stats_report_sender, stats_report_receiver) = channel(args.stats_reporter_capacity); let (counter_db_sender, counter_db_receiver) = channel(args.counter_db_capacity); let (otel_sender, otel_receiver) = channel(args.otel_capacity); - let (otel_shutdown_sender, otel_shutdown_receiver) = tokio::sync::oneshot::channel(); + let (otel_shutdown_sender, _otel_shutdown_receiver) = tokio::sync::oneshot::channel(); // Get netlink family and group configuration from SONiC constants let (family, group) = get_genl_family_group(); diff --git a/crates/countersyncd/src/message/netlink.rs b/crates/countersyncd/src/message/netlink.rs index 6ec1b6ad3c..ae9d4c7146 100644 --- a/crates/countersyncd/src/message/netlink.rs +++ b/crates/countersyncd/src/message/netlink.rs @@ -9,5 +9,6 @@ pub struct SocketConnect { pub enum NetlinkCommand { Close, Reconnect, + SoftReconnect, SocketConnect(SocketConnect), } diff --git a/crates/countersyncd/src/message/otel.rs b/crates/countersyncd/src/message/otel.rs index 3ab8bdf53f..63f532e6c2 100644 --- a/crates/countersyncd/src/message/otel.rs +++ b/crates/countersyncd/src/message/otel.rs @@ -3,13 +3,11 @@ //! This module defines data structures for converting SAI statistics //! to OpenTelemetry gauge format for export to observability systems. -use std::sync::Arc; use crate::message::saistats::{SAIStat, SAIStats}; use opentelemetry_proto::tonic::{ common::v1::{KeyValue as ProtoKeyValue, AnyValue, any_value::Value}, metrics::v1::{NumberDataPoint, number_data_point}, }; -use log::{info, error, debug, warn}; /// OpenTelemetry Gauge representation for SAI statistics /// @@ -171,32 +169,12 @@ impl OtelMetrics { } } -/// Type alias for Arc-wrapped OtelMetrics for efficient sharing -pub type OtelMetricsMessage = Arc; - -/// Extension trait for creating OtelMetricsMessage instances -pub trait OtelMetricsMessageExt { - /// Converts OtelMetrics to Arc-wrapped message - fn into_message(self) -> OtelMetricsMessage; - - /// Creates OtelMetricsMessage from SAI statistics - fn from_sai_stats(sai_stats: &SAIStats) -> OtelMetricsMessage; -} - -impl OtelMetricsMessageExt for OtelMetrics { - fn into_message(self) -> OtelMetricsMessage { - Arc::new(self) - } - - fn from_sai_stats(sai_stats: &SAIStats) -> OtelMetricsMessage { - Arc::new(OtelMetrics::from_sai_stats(sai_stats)) - } -} - #[cfg(test)] mod tests { use super::*; - use crate::message::saistats::{SAIStat, SAIStats, SAIStatsMessageExt}; + use std::sync::Arc; + use crate::message::saistats::{SAIStat, SAIStats}; + use log::{info, debug}; /// Helper function to create test SAI statistics (similar to saistats.rs pattern) fn create_test_sai_stats(observation_time: u64, stat_count: usize) -> SAIStats { @@ -336,11 +314,8 @@ mod tests { fn test_otel_metrics_message_creation() { let sai_stats = create_test_sai_stats(555555, 2); - // Test using into_message() - let otel_metrics = OtelMetrics::from_sai_stats(&sai_stats); - let message1 = otel_metrics.into_message(); - - // Test using from_sai_stats() + // Wrap metrics in Arc manually for sharing scenarios + let message1 = Arc::new(OtelMetrics::from_sai_stats(&sai_stats)); let message2 = OtelMetrics::from_sai_stats(&sai_stats); assert_eq!(message1.service_name, message2.service_name); diff --git a/gearsyncd/gearboxparser.cpp b/gearsyncd/gearboxparser.cpp index 879624fd25..1760a449a8 100644 --- a/gearsyncd/gearboxparser.cpp +++ b/gearsyncd/gearboxparser.cpp @@ -158,6 +158,12 @@ bool GearboxParser::parse() attr = std::make_pair("macsec_ipg", std::to_string(val.get())); attrs.push_back(attr); } + if (phy.find("macsec_supported") != phy.end()) + { + val = phy["macsec_supported"]; + attr = std::make_pair("macsec_supported", val.get() ? "true" : "false"); + attrs.push_back(attr); + } if (phy.find("hwinfo") == phy.end()) { SWSS_LOG_ERROR("missing 'hwinfo' field in 'phys' item %d in gearbox configuration", iter); diff --git a/lib/gearboxutils.cpp b/lib/gearboxutils.cpp index bc35ed3456..c987b28bc8 100644 --- a/lib/gearboxutils.cpp +++ b/lib/gearboxutils.cpp @@ -137,6 +137,9 @@ std::map GearboxUtils::loadPhyMap(Table *gearboxTable) { gearbox_phy_t phy = {}; + // default capability if absent + phy.macsec_supported = true; + gearboxTable->get(k, ovalues); for (auto &val : ovalues) { @@ -193,6 +196,10 @@ std::map GearboxUtils::loadPhyMap(Table *gearboxTable) { phy.macsec_ipg = std::stoi(val.second); } + else if (val.first == "macsec_supported") + { + phy.macsec_supported = (val.second == "true"); + } } gearboxPhyMap[phy.phy_id] = phy; } diff --git a/lib/gearboxutils.h b/lib/gearboxutils.h index a239aa3a10..070e30c8a1 100644 --- a/lib/gearboxutils.h +++ b/lib/gearboxutils.h @@ -64,6 +64,7 @@ typedef struct uint32_t bus_id; uint32_t context_id; uint32_t macsec_ipg; + bool macsec_supported; } gearbox_phy_t; typedef struct diff --git a/orchagent/high_frequency_telemetry/hftelorch.cpp b/orchagent/high_frequency_telemetry/hftelorch.cpp index 1765fab05f..94c79e2333 100644 --- a/orchagent/high_frequency_telemetry/hftelorch.cpp +++ b/orchagent/high_frequency_telemetry/hftelorch.cpp @@ -469,44 +469,56 @@ void HFTelOrch::doTask(Consumer &consumer) string key = kfvKey(t); string op = kfvOp(t); - if (table_name == CFG_HIGH_FREQUENCY_TELEMETRY_PROFILE_TABLE_NAME) + try { - if (op == SET_COMMAND) + if (table_name == CFG_HIGH_FREQUENCY_TELEMETRY_PROFILE_TABLE_NAME) { - status = profileTableSet(key, kfvFieldsValues(t)); + if (op == SET_COMMAND) + { + status = profileTableSet(key, kfvFieldsValues(t)); + } + else if (op == DEL_COMMAND) + { + status = profileTableDel(key); + } + else + { + SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); + } } - else if (op == DEL_COMMAND) + else if (table_name == CFG_HIGH_FREQUENCY_TELEMETRY_GROUP_TABLE_NAME) { - status = profileTableDel(key); + auto tokens = tokenize(key, '|'); + if (tokens.size() != 2) + { + SWSS_LOG_THROW("Invalid key %s in the %s", key.c_str(), table_name.c_str()); + } + if (op == SET_COMMAND) + { + status = groupTableSet(tokens[0], tokens[1], kfvFieldsValues(t)); + } + else if (op == DEL_COMMAND) + { + status = groupTableDel(tokens[0], tokens[1]); + } + else + { + SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); + } } else { - SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); + SWSS_LOG_ERROR("Unknown table %s\n", table_name.c_str()); } } - else if (table_name == CFG_HIGH_FREQUENCY_TELEMETRY_GROUP_TABLE_NAME) + catch (const std::exception &e) { - auto tokens = tokenize(key, '|'); - if (tokens.size() != 2) - { - SWSS_LOG_THROW("Invalid key %s in the %s", key.c_str(), table_name.c_str()); - } - if (op == SET_COMMAND) - { - status = groupTableSet(tokens[0], tokens[1], kfvFieldsValues(t)); - } - else if (op == DEL_COMMAND) - { - status = groupTableDel(tokens[0], tokens[1]); - } - else - { - SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); - } - } - else - { - SWSS_LOG_ERROR("Unknown table %s\n", table_name.c_str()); + SWSS_LOG_ERROR("Failed to process the task for table %s, key %s, operation %s: %s", + table_name.c_str(), + key.c_str(), + op.c_str(), + e.what()); + status = task_process_status::task_failed; } if (status == task_process_status::task_need_retry) diff --git a/orchagent/high_frequency_telemetry/hftelprofile.cpp b/orchagent/high_frequency_telemetry/hftelprofile.cpp index 2d25f4be85..c453d2af81 100644 --- a/orchagent/high_frequency_telemetry/hftelprofile.cpp +++ b/orchagent/high_frequency_telemetry/hftelprofile.cpp @@ -706,25 +706,38 @@ sai_object_id_t HFTelProfile::getTAMTelTypeObjID(sai_object_type_t object_type) attr.value.s32 = SAI_TAM_TELEMETRY_TYPE_COUNTER_SUBSCRIPTION; attrs.push_back(attr); - attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS; - attr.value.booldata = true; - attrs.push_back(attr); - - attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS_INGRESS; - attr.value.booldata = true; - attrs.push_back(attr); - - attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS_EGRESS; - attr.value.booldata = true; - attrs.push_back(attr); + if (object_type == SAI_OBJECT_TYPE_PORT) + { + attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS; + attr.value.booldata = true; + attrs.push_back(attr); - attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_MMU_STATS; - attr.value.booldata = true; - attrs.push_back(attr); + attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS_INGRESS; + attr.value.booldata = true; + attrs.push_back(attr); - attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_OUTPUT_QUEUE_STATS; - attr.value.booldata = true; - attrs.push_back(attr); + attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS_EGRESS; + attr.value.booldata = true; + attrs.push_back(attr); + } + else if (object_type == SAI_OBJECT_TYPE_BUFFER_POOL || + object_type == SAI_OBJECT_TYPE_INGRESS_PRIORITY_GROUP) + { + attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_MMU_STATS; + attr.value.booldata = true; + attrs.push_back(attr); + } + else if (object_type == SAI_OBJECT_TYPE_QUEUE) + { + attr.id = SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_OUTPUT_QUEUE_STATS; + attr.value.booldata = true; + attrs.push_back(attr); + } + else + { + SWSS_LOG_THROW("Unsupported object type %s for high frequency telemetry", + sai_serialize_object_type(object_type).c_str()); + } attr.id = SAI_TAM_TEL_TYPE_ATTR_MODE ; attr.value.s32 = SAI_TAM_TEL_TYPE_MODE_SINGLE_TYPE; diff --git a/orchagent/intfsorch.cpp b/orchagent/intfsorch.cpp index 867bf044b4..b536160273 100644 --- a/orchagent/intfsorch.cpp +++ b/orchagent/intfsorch.cpp @@ -38,6 +38,7 @@ extern NeighOrch *gNeighOrch; extern string gMySwitchType; extern int32_t gVoqMySwitchId; extern bool gTraditionalFlexCounter; +extern bool isChassisDbInUse(); const int intfsorch_pri = 35; @@ -98,7 +99,7 @@ IntfsOrch::IntfsOrch(DBConnector *db, string tableName, VRFOrch *vrf_orch, DBCon RIF_PLUGIN_FIELD, rifRateSha); - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { //Add subscriber to process VOQ system interface tableName = CHASSIS_APP_SYSTEM_INTERFACE_TABLE_NAME; @@ -1310,7 +1311,7 @@ bool IntfsOrch::addRouterIntfs(sai_object_id_t vrf_id, Port &port, string loopba SWSS_LOG_NOTICE("Create router interface %s MTU %u", port.m_alias.c_str(), port.m_mtu); - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { // Sync the interface of local port/LAG to the SYSTEM_INTERFACE table of CHASSIS_APP_DB voqSyncAddIntf(port.m_alias); @@ -1363,7 +1364,7 @@ bool IntfsOrch::removeRouterIntfs(Port &port) SWSS_LOG_NOTICE("Remove router interface for port %s", port.m_alias.c_str()); - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { // Sync the removal of interface of local port/LAG to the SYSTEM_INTERFACE table of CHASSIS_APP_DB voqSyncDelIntf(port.m_alias); diff --git a/orchagent/macsecorch.cpp b/orchagent/macsecorch.cpp index dbce7eb1a8..51febf62ea 100644 --- a/orchagent/macsecorch.cpp +++ b/orchagent/macsecorch.cpp @@ -355,12 +355,25 @@ class MACsecOrchContext { return nullptr; } - if (port->m_line_side_id != SAI_NULL_OBJECT_ID) + + const auto *phy = get_gearbox_phy(); + bool force_npu = true; + if (phy) + { + force_npu = !phy->macsec_supported; + } + + if (!force_npu && port->m_line_side_id != SAI_NULL_OBJECT_ID) { m_port_id = std::make_unique(port->m_line_side_id); } else { + if (force_npu && port->m_line_side_id != SAI_NULL_OBJECT_ID) + { + SWSS_LOG_NOTICE("MACsec: %s -> backend=NPU (phy marked unsupported), using NPU port", + m_port_name ? m_port_name->c_str() : ""); + } m_port_id = std::make_unique(port->m_port_id); } } @@ -381,12 +394,27 @@ class MACsecOrchContext { switchId = port->m_switch_id; } + if (switchId == SAI_NULL_OBJECT_ID) { SWSS_LOG_ERROR("Switch ID cannot be found"); return nullptr; } m_switch_id = std::make_unique(switchId); + + const auto *phy = port ? m_orch->m_port_orch->getGearboxPhy(*port) : nullptr; + bool force_npu = true; + if (phy) + { + force_npu = !phy->macsec_supported; + } + + if (force_npu && (*m_switch_id != gSwitchId)) + { + SWSS_LOG_NOTICE("MACsec: %s -> backend=NPU (phy marked unsupported), override switch to global", + m_port_name ? m_port_name->c_str() : ""); + *m_switch_id = gSwitchId; + } } return m_switch_id.get(); } @@ -2507,28 +2535,32 @@ bool MACsecOrch::deleteMACsecSA(sai_object_id_t sa_id) FlexCounterManager& MACsecOrch::MACsecSaStatManager(MACsecOrchContext &ctx) { - if (ctx.get_gearbox_phy() != nullptr) + const auto *phy = ctx.get_gearbox_phy(); + if (phy && phy->macsec_supported) return m_gb_macsec_sa_stat_manager; return m_macsec_sa_stat_manager; } FlexCounterManager& MACsecOrch::MACsecSaAttrStatManager(MACsecOrchContext &ctx) { - if (ctx.get_gearbox_phy() != nullptr) + const auto *phy = ctx.get_gearbox_phy(); + if (phy && phy->macsec_supported) return m_gb_macsec_sa_attr_manager; return m_macsec_sa_attr_manager; } FlexCounterManager& MACsecOrch::MACsecFlowStatManager(MACsecOrchContext &ctx) { - if (ctx.get_gearbox_phy() != nullptr) + const auto *phy = ctx.get_gearbox_phy(); + if (phy && phy->macsec_supported) return m_gb_macsec_flow_stat_manager; return m_macsec_flow_stat_manager; } Table& MACsecOrch::MACsecCountersMap(MACsecOrchContext &ctx) { - if (ctx.get_gearbox_phy() != nullptr) + const auto *phy = ctx.get_gearbox_phy(); + if (phy && phy->macsec_supported) return m_gb_macsec_counters_map; return m_macsec_counters_map; } diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 8013965801..ce9d5b1d73 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -17,6 +17,8 @@ extern "C" { #include #include #include +#include +#include #include #include @@ -78,6 +80,12 @@ string gMyHostName = ""; string gMyAsicName = ""; bool gTraditionalFlexCounter = false; uint32_t create_switch_timeout = 0; +bool gMultiAsicVoq = false; + +bool isChassisDbInUse() +{ + return gMultiAsicVoq; +} void usage() { @@ -210,6 +218,18 @@ void getCfgSwitchType(DBConnector *cfgDb, string &switch_type, string &switch_su } +bool isChassisAppDbPresent() +{ + std::ifstream file(SonicDBConfig::DEFAULT_SONIC_DB_CONFIG_FILE); + if (!file.is_open()) return false; + + nlohmann::json db_config; + file >> db_config; + + return db_config.contains("DATABASES") && + db_config["DATABASES"].contains("CHASSIS_APP_DB"); +} + bool getSystemPortConfigList(DBConnector *cfgDb, DBConnector *appDb, vector &sysportcfglist) { Table cfgDeviceMetaDataTable(cfgDb, CFG_DEVICE_METADATA_TABLE_NAME); @@ -627,7 +647,18 @@ int main(int argc, char **argv) //Connect to CHASSIS_APP_DB in redis-server in control/supervisor card as per //connection info in database_config.json - chassis_app_db = make_shared("CHASSIS_APP_DB", 0, true); + if (isChassisAppDbPresent()) + { + gMultiAsicVoq = true; + try + { + chassis_app_db = make_shared("CHASSIS_APP_DB", 0, true); + } + catch (const std::exception& e) + { + SWSS_LOG_NOTICE("CHASSIS_APP_DB not available, operating in standalone VOQ mode"); + } + } } else if (gMySwitchType == "fabric") { @@ -877,7 +908,7 @@ int main(int argc, char **argv) } shared_ptr orchDaemon; - + DBConnector *chassis_db = chassis_app_db.get(); /* * Declare shared pointers for dpu specific databases. * These dpu databases exist on the npu for smartswitch. @@ -894,7 +925,7 @@ int main(int argc, char **argv) else if (gMySwitchType != "fabric") { - orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_app_db.get(), zmq_server.get()); + orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_db, zmq_server.get()); if (gMySwitchType == "voq") { orchDaemon->setFabricEnabled(true); @@ -904,7 +935,7 @@ int main(int argc, char **argv) } else { - orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_app_db.get(), zmq_server.get()); + orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_db, zmq_server.get()); } if (gRingMode) { diff --git a/orchagent/neighorch.cpp b/orchagent/neighorch.cpp index 88cba35779..1eb425a90a 100644 --- a/orchagent/neighorch.cpp +++ b/orchagent/neighorch.cpp @@ -25,6 +25,8 @@ extern BfdOrch *gBfdOrch; extern size_t gMaxBulkSize; extern string gMyHostName; +extern bool isChassisDbInUse(); + const int neighorch_pri = 30; NeighOrch::NeighOrch(DBConnector *appDb, string tableName, IntfsOrch *intfsOrch, FdbOrch *fdbOrch, PortsOrch *portsOrch, DBConnector *chassisAppDb) : @@ -46,7 +48,7 @@ NeighOrch::NeighOrch(DBConnector *appDb, string tableName, IntfsOrch *intfsOrch, gBfdOrch->attach(this); } - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { //Add subscriber to process VOQ system neigh tableName = CHASSIS_APP_SYSTEM_NEIGH_TABLE_NAME; @@ -794,7 +796,7 @@ bool NeighOrch::getNeighborEntry(const NextHopKey &nexthop, NeighborEntry &neigh { return false; } - if (gMySwitchType == "voq") + if (m_intfsOrch->isRemoteSystemPortIntf(nexthop.alias)) { gPortsOrch->getInbandPort(inbp); assert(inbp.m_alias.length()); @@ -1230,7 +1232,7 @@ bool NeighOrch::addNeighbor(NeighborContext& ctx) NeighborUpdate update = { neighborEntry, macAddress, true }; notify(SUBJECT_TYPE_NEIGH_CHANGE, static_cast(&update)); - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { //Sync the neighbor to add to the CHASSIS_APP_DB voqSyncAddNeigh(alias, ip_address, macAddress, neighbor_entry); @@ -1379,7 +1381,7 @@ bool NeighOrch::removeNeighbor(NeighborContext& ctx, bool disable) NeighborUpdate update = { neighborEntry, MacAddress(), false }; notify(SUBJECT_TYPE_NEIGH_CHANGE, static_cast(&update)); - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { //Sync the neighbor to delete from the CHASSIS_APP_DB voqSyncDelNeigh(alias, ip_address); diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index ab38ec8d81..d9e08e57e7 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -177,11 +177,13 @@ bool OrchDaemon::init() TableConnector conf_asic_sensors(m_configDb, CFG_ASIC_SENSORS_TABLE_NAME); TableConnector conf_switch_hash(m_configDb, CFG_SWITCH_HASH_TABLE_NAME); TableConnector conf_switch_trim(m_configDb, CFG_SWITCH_TRIMMING_TABLE_NAME); + TableConnector conf_switch_fast_linkup(m_configDb, CFG_SWITCH_FAST_LINKUP_TABLE_NAME); TableConnector conf_suppress_asic_sdk_health_categories(m_configDb, CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME); vector switch_tables = { conf_switch_hash, conf_switch_trim, + conf_switch_fast_linkup, conf_asic_sensors, conf_suppress_asic_sdk_health_categories, app_switch_table diff --git a/orchagent/p4orch/acl_rule_manager.cpp b/orchagent/p4orch/acl_rule_manager.cpp index d04cb14fcf..a2eb8b17ff 100644 --- a/orchagent/p4orch/acl_rule_manager.cpp +++ b/orchagent/p4orch/acl_rule_manager.cpp @@ -860,7 +860,6 @@ ReturnCode AclRuleManager::setMatchValue(const acl_entry_attr_union_t attr_name, case SAI_ACL_ENTRY_ATTR_FIELD_IP_IDENTIFICATION: case SAI_ACL_ENTRY_ATTR_FIELD_OUTER_VLAN_ID: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_VLAN_ID: - case SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_ETHER_TYPE: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_L4_SRC_PORT: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_L4_DST_PORT: { @@ -1030,6 +1029,20 @@ ReturnCode AclRuleManager::setMatchValue(const acl_entry_attr_union_t attr_name, value->aclfield.mask.u32 = 0xFFFFFFFF; break; } + case SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID: { + const std::vector& value_and_mask = + tokenize(attr_value, kDataMaskDelimiter); + const std::string vrf_id = trim(value_and_mask[0]); + if (vrf_id.empty() || !m_vrfOrch->isVRFexists(vrf_id)) { + return ReturnCode(StatusCode::SWSS_RC_NOT_FOUND) + << "VRF ID " << QuotedVar(vrf_id) << " was not found."; + } + value->aclfield.data.oid = m_vrfOrch->getVRFid(vrf_id); + if (value_and_mask.size() > 1) { + SWSS_LOG_INFO("Mask ignored for VRF ID."); + } + break; + } case SAI_ACL_ENTRY_ATTR_FIELD_IPMC_NPU_META_DST_HIT: { const std::vector& value_and_mask = @@ -1399,7 +1412,9 @@ ReturnCode AclRuleManager::setActionValue(const acl_entry_attr_union_t attr_name case SAI_ACL_ENTRY_ATTR_ACTION_SET_DSCP: case SAI_ACL_ENTRY_ATTR_ACTION_SET_ECN: case SAI_ACL_ENTRY_ATTR_ACTION_SET_INNER_VLAN_PRI: - case SAI_ACL_ENTRY_ATTR_ACTION_SET_OUTER_VLAN_PRI: { + case SAI_ACL_ENTRY_ATTR_ACTION_SET_OUTER_VLAN_PRI: + case SAI_ACL_ENTRY_ATTR_ACTION_SET_ACL_META_DATA: { + try { value->aclaction.parameter.u8 = to_uint(attr_value); @@ -1516,7 +1531,7 @@ ReturnCode AclRuleManager::createAclRule(P4AclRule &acl_rule) { SWSS_LOG_ENTER(); - // Track if the entry creats a new counter or meter + // Track if the entry creates a new counter or meter bool created_meter = false; bool created_counter = false; const auto &table_name_and_rule_key = concatTableNameAndRuleKey(acl_rule.acl_table_name, acl_rule.acl_rule_key); diff --git a/orchagent/p4orch/acl_util.cpp b/orchagent/p4orch/acl_util.cpp index 052e31934a..240c6740c7 100644 --- a/orchagent/p4orch/acl_util.cpp +++ b/orchagent/p4orch/acl_util.cpp @@ -929,7 +929,6 @@ bool isDiffMatchFieldValue(const acl_entry_attr_union_t attr_name, const sai_att case SAI_ACL_ENTRY_ATTR_FIELD_IP_IDENTIFICATION: case SAI_ACL_ENTRY_ATTR_FIELD_OUTER_VLAN_ID: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_VLAN_ID: - case SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_ETHER_TYPE: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_L4_SRC_PORT: case SAI_ACL_ENTRY_ATTR_FIELD_INNER_L4_DST_PORT: { @@ -955,6 +954,9 @@ bool isDiffMatchFieldValue(const acl_entry_attr_union_t attr_name, const sai_att return memcmp(value.aclfield.data.mac, old_value.aclfield.data.mac, sizeof(sai_mac_t)) || memcmp(value.aclfield.mask.mac, old_value.aclfield.mask.mac, sizeof(sai_mac_t)); } + case SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID: { + return value.aclfield.data.oid != old_value.aclfield.data.oid; + } case SAI_ACL_ENTRY_ATTR_FIELD_IPMC_NPU_META_DST_HIT: { return value.aclfield.data.booldata != old_value.aclfield.data.booldata; diff --git a/orchagent/p4orch/acl_util.h b/orchagent/p4orch/acl_util.h index 652838c130..38b86ba2a5 100644 --- a/orchagent/p4orch/acl_util.h +++ b/orchagent/p4orch/acl_util.h @@ -355,6 +355,7 @@ using P4AclRuleTables = std::map>; #define P4_ACTION_SET_L4_DST_PORT "SAI_ACL_ENTRY_ATTR_ACTION_SET_L4_DST_PORT" #define P4_ACTION_SET_DO_NOT_LEARN "SAI_ACL_ENTRY_ATTR_ACTION_SET_DO_NOT_LEARN" #define P4_ACTION_SET_VRF "SAI_ACL_ENTRY_ATTR_ACTION_SET_VRF" +#define P4_ACTION_SET_ACL_META_DATA "SAI_ACL_ENTRY_ATTR_ACTION_SET_ACL_META_DATA" #define P4_ACTION_SET_QOS_QUEUE "QOS_QUEUE" #define P4_PACKET_ACTION_FORWARD "SAI_PACKET_ACTION_FORWARD" @@ -362,6 +363,9 @@ using P4AclRuleTables = std::map>; #define P4_PACKET_ACTION_COPY "SAI_PACKET_ACTION_COPY" #define P4_PACKET_ACTION_PUNT "SAI_PACKET_ACTION_TRAP" #define P4_PACKET_ACTION_LOG "SAI_PACKET_ACTION_LOG" +#define P4_PACKET_ACTION_COPY_CANCEL "SAI_PACKET_ACTION_COPY_CANCEL" +#define P4_PACKET_ACTION_DENY "SAI_PACKET_ACTION_DENY" + #define P4_PACKET_ACTION_REDIRECT "REDIRECT" @@ -428,7 +432,12 @@ using P4AclRuleTables = std::map>; #define GENL_PACKET_TRAP_GROUP_NAME_PREFIX "trap.group.cpu.queue." #define EMPTY_STRING "" -#define P4_CPU_QUEUE_MAX_NUM 8 + +// TODO : To avoid existing p4 tests failure, extend the queue +// temporarily, should set to 7-14 later. +#define P4_CPU_QUEUE_MIN_NUM 1 // 7 +#define P4_CPU_QUEUE_MAX_NUM 15 // 14 + #define IPV6_SINGLE_WORD_BYTES_LENGTH 4 #define BYTE_BITWIDTH 8 @@ -540,7 +549,7 @@ static const acl_table_attr_format_lookup_t aclMatchTableAttrFormatLookup = { {SAI_ACL_TABLE_ATTR_FIELD_IPV6_NEXT_HEADER, Format::HEX_STRING}, {SAI_ACL_TABLE_ATTR_FIELD_ROUTE_DST_USER_META, Format::HEX_STRING}, {SAI_ACL_TABLE_ATTR_FIELD_ACL_USER_META, Format::HEX_STRING}, - {SAI_ACL_TABLE_ATTR_FIELD_VRF_ID, Format::HEX_STRING}, + {SAI_ACL_TABLE_ATTR_FIELD_VRF_ID, Format::STRING}, {SAI_ACL_TABLE_ATTR_FIELD_IPMC_NPU_META_DST_HIT, Format::HEX_STRING}, }; @@ -614,6 +623,8 @@ static const acl_packet_action_lookup_t aclPacketActionLookup = { {P4_PACKET_ACTION_FORWARD, SAI_PACKET_ACTION_FORWARD}, {P4_PACKET_ACTION_DROP, SAI_PACKET_ACTION_DROP}, {P4_PACKET_ACTION_COPY, SAI_PACKET_ACTION_COPY}, {P4_PACKET_ACTION_PUNT, SAI_PACKET_ACTION_TRAP}, {P4_PACKET_ACTION_LOG, SAI_PACKET_ACTION_LOG}, + {P4_PACKET_ACTION_COPY_CANCEL, SAI_PACKET_ACTION_COPY_CANCEL}, + {P4_PACKET_ACTION_DENY, SAI_PACKET_ACTION_DENY}, }; static const acl_rule_attr_lookup_t aclActionLookup = { @@ -643,6 +654,7 @@ static const acl_rule_attr_lookup_t aclActionLookup = { {P4_ACTION_SET_QOS_QUEUE, SAI_ACL_ENTRY_ATTR_ACTION_SET_USER_TRAP_ID}, {P4_ACTION_SET_DO_NOT_LEARN, SAI_ACL_ENTRY_ATTR_ACTION_SET_DO_NOT_LEARN}, {P4_ACTION_SET_VRF, SAI_ACL_ENTRY_ATTR_ACTION_SET_VRF}, + {P4_ACTION_SET_ACL_META_DATA, SAI_ACL_ENTRY_ATTR_ACTION_SET_ACL_META_DATA}, }; static const acl_packet_color_policer_attr_lookup_t aclPacketColorPolicerAttrLookup = { diff --git a/orchagent/p4orch/neighbor_manager.cpp b/orchagent/p4orch/neighbor_manager.cpp index d453f228fc..8651aacd70 100644 --- a/orchagent/p4orch/neighbor_manager.cpp +++ b/orchagent/p4orch/neighbor_manager.cpp @@ -30,7 +30,7 @@ extern CrmOrch *gCrmOrch; namespace { -std::vector getSaiAttrs(const P4NeighborEntry &neighbor_entry) +std::vector prepareSaiAttrs(const P4NeighborEntry &neighbor_entry) { std::vector attrs; sai_attribute_t attr; @@ -62,7 +62,7 @@ P4NeighborEntry::P4NeighborEntry(const std::string &router_interface_id, const s neighbor_key = KeyGenerator::generateNeighborKey(router_intf_id, neighbor_id); } -ReturnCodeOr NeighborManager::getSaiEntry(const P4NeighborEntry &neighbor_entry) +ReturnCodeOr NeighborManager::prepareSaiEntry(const P4NeighborEntry &neighbor_entry) { const std::string &router_intf_key = neighbor_entry.router_intf_key; sai_object_id_t router_intf_oid; @@ -185,8 +185,8 @@ ReturnCode NeighborManager::createNeighbor(P4NeighborEntry &neighbor_entry) << " already exists in centralized map"); } - ASSIGN_OR_RETURN(neighbor_entry.neigh_entry, getSaiEntry(neighbor_entry)); - auto attrs = getSaiAttrs(neighbor_entry); + ASSIGN_OR_RETURN(neighbor_entry.neigh_entry, prepareSaiEntry(neighbor_entry)); + auto attrs = prepareSaiAttrs(neighbor_entry); CHECK_ERROR_AND_LOG_AND_RETURN(sai_neighbor_api->create_neighbor_entry( &neighbor_entry.neigh_entry, static_cast(attrs.size()), attrs.data()), @@ -555,13 +555,13 @@ std::string NeighborManager::verifyStateCache(const P4NeighborAppDbEntry &app_db std::string NeighborManager::verifyStateAsicDb(const P4NeighborEntry *neighbor_entry) { - auto sai_entry_or = getSaiEntry(*neighbor_entry); + auto sai_entry_or = prepareSaiEntry(*neighbor_entry); if (!sai_entry_or.ok()) { return std::string("Failed to get SAI entry: ") + sai_entry_or.status().message(); } sai_neighbor_entry_t sai_entry = *sai_entry_or; - auto attrs = getSaiAttrs(*neighbor_entry); + auto attrs = prepareSaiAttrs(*neighbor_entry); std::vector exp = saimeta::SaiAttributeList::serialize_attr_list( SAI_OBJECT_TYPE_NEIGHBOR_ENTRY, (uint32_t)attrs.size(), attrs.data(), /*countOnly=*/false); diff --git a/orchagent/p4orch/neighbor_manager.h b/orchagent/p4orch/neighbor_manager.h index a2dcae132a..df6801828a 100644 --- a/orchagent/p4orch/neighbor_manager.h +++ b/orchagent/p4orch/neighbor_manager.h @@ -69,7 +69,7 @@ class NeighborManager : public ObjectManagerInterface ReturnCode processDeleteRequest(const std::string &neighbor_key); std::string verifyStateCache(const P4NeighborAppDbEntry &app_db_entry, const P4NeighborEntry *neighbor_entry); std::string verifyStateAsicDb(const P4NeighborEntry *neighbor_entry); - ReturnCodeOr getSaiEntry(const P4NeighborEntry &neighbor_entry); + ReturnCodeOr prepareSaiEntry(const P4NeighborEntry &neighbor_entry); P4OidMapper *m_p4OidMapper; P4NeighborTable m_neighborTable; diff --git a/orchagent/p4orch/tests/acl_manager_test.cpp b/orchagent/p4orch/tests/acl_manager_test.cpp index d38221bf8a..63b9dbc1a6 100644 --- a/orchagent/p4orch/tests/acl_manager_test.cpp +++ b/orchagent/p4orch/tests/acl_manager_test.cpp @@ -23,6 +23,7 @@ #include "table.h" #include "tokenize.h" #include "vrforch.h" +#include "logger.h" using ::p4orch::kTableKeyDelimiter; @@ -637,8 +638,7 @@ P4AclTableDefinitionAppDbEntry getDefaultAclTableDefAppDbEntry() app_db_entry.match_field_lookup["inner_vlan_id"] = BuildMatchFieldJsonStrKindSaiField(P4_MATCH_INNER_VLAN_ID); app_db_entry.match_field_lookup["inner_vlan_cfi"] = BuildMatchFieldJsonStrKindSaiField(P4_MATCH_INNER_VLAN_CFI); app_db_entry.match_field_lookup["vrf_id"] = - BuildMatchFieldJsonStrKindSaiField(P4_MATCH_VRF_ID, P4_FORMAT_HEX_STRING, - /*bitwidth=*/16); + BuildMatchFieldJsonStrKindSaiField(P4_MATCH_VRF_ID, P4_FORMAT_STRING); app_db_entry.match_field_lookup["ipmc_table_hit"] = BuildMatchFieldJsonStrKindSaiField(P4_MATCH_IPMC_TABLE_HIT, P4_FORMAT_HEX_STRING, /*bitwidth=*/1); @@ -716,9 +716,34 @@ P4AclTableDefinitionAppDbEntry getDefaultAclTableDefAppDbEntry() app_db_entry.action_field_lookup["do_not_learn"].push_back( {.sai_action = P4_ACTION_SET_DO_NOT_LEARN, .p4_param_name = EMPTY_STRING}); app_db_entry.action_field_lookup["set_vrf"].push_back({.sai_action = P4_ACTION_SET_VRF, .p4_param_name = "vrf"}); + app_db_entry.action_field_lookup["set_metadata"].push_back( + {.sai_action = P4_ACTION_SET_ACL_META_DATA, + .p4_param_name = "acl_metadata"}); app_db_entry.action_field_lookup["qos_queue"].push_back( {.sai_action = P4_ACTION_SET_QOS_QUEUE, .p4_param_name = "cpu_queue"}); + + // action/acl_rate_limit_copy = [ + // {"action":"SAI_PACKET_ACTION_FORWARD","packet_color":"SAI_PACKET_COLOR_GREEN"}, + // {"action":"SAI_PACKET_ACTION_COPY_CANCEL","packet_color":"SAI_PACKET_COLOR_YELLOW"}, + // {"action":"SAI_PACKET_ACTION_COPY_CANCEL","packet_color":"SAI_PACKET_COLOR_RED"}, + // {"action":"QOS_QUEUE","param":"qos_queue"} + // ] + + app_db_entry.packet_action_color_lookup["acl_rate_limit_copy"].push_back( + {.packet_action = P4_PACKET_ACTION_FORWARD, + .packet_color = P4_PACKET_COLOR_GREEN}); + app_db_entry.packet_action_color_lookup["acl_rate_limit_copy"].push_back( + {.packet_action = P4_PACKET_ACTION_COPY_CANCEL, + .packet_color = P4_PACKET_COLOR_YELLOW}); + app_db_entry.packet_action_color_lookup["acl_rate_limit_copy"].push_back( + {.packet_action = P4_PACKET_ACTION_COPY_CANCEL, + .packet_color = P4_PACKET_COLOR_RED}); + app_db_entry.action_field_lookup["acl_rate_limit_copy"].push_back( + {.sai_action = P4_ACTION_SET_QOS_QUEUE, .p4_param_name = "qos_queue"}); + + + // "action/acl_trap" = [ // {"action": "SAI_PACKET_ACTION_TRAP", "packet_color": // "SAI_PACKET_COLOR_GREEN"}, @@ -2721,6 +2746,14 @@ TEST_F(AclManagerTest, CreateAclRuleWithInvalidSaiMatchFails) app_db_entry.match_fvs.erase("arp_tpa"); acl_table->udf_group_attr_index_lookup = saved_udf_group_attr_index_lookup; + // ACL rule has invalid VRF ID. + app_db_entry.match_fvs["vrf_id"] = "invalid"; + acl_rule_key = + KeyGenerator::generateAclRuleKey(app_db_entry.match_fvs, "100"); + EXPECT_EQ(StatusCode::SWSS_RC_NOT_FOUND, + ProcessAddRuleRequest(acl_rule_key, app_db_entry)); + app_db_entry.match_fvs.erase("vrf_id"); + // ACL rule has undefined match field app_db_entry.match_fvs["undefined"] = "1"; acl_rule_key = KeyGenerator::generateAclRuleKey(app_db_entry.match_fvs, "100"); @@ -2800,7 +2833,7 @@ TEST_F(AclManagerTest, AclRuleWithValidMatchFields) app_db_entry.match_fvs["inner_vlan_pri"] = "200"; app_db_entry.match_fvs["inner_vlan_id"] = "200"; app_db_entry.match_fvs["inner_vlan_cfi"] = "200"; - app_db_entry.match_fvs["vrf_id"] = "0x777"; + app_db_entry.match_fvs["vrf_id"] = gVrfName; app_db_entry.match_fvs["ipmc_table_hit"] = "0x1"; const auto &acl_rule_key = KeyGenerator::generateAclRuleKey(app_db_entry.match_fvs, "100"); @@ -2898,8 +2931,9 @@ TEST_F(AclManagerTest, AclRuleWithValidMatchFields) EXPECT_EQ(SAI_ACL_IP_FRAG_HEAD, acl_rule->match_fvs[SAI_ACL_ENTRY_ATTR_FIELD_ACL_IP_FRAG].aclfield.data.u32); EXPECT_EQ(SAI_PACKET_VLAN_SINGLE_OUTER_TAG, acl_rule->match_fvs[SAI_ACL_ENTRY_ATTR_FIELD_PACKET_VLAN].aclfield.data.u32); - EXPECT_EQ(0x777, acl_rule->match_fvs[SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID].aclfield.data.u16); - EXPECT_EQ(0xFFFF, acl_rule->match_fvs[SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID].aclfield.mask.u16); + EXPECT_EQ( + gVrfOid, + acl_rule->match_fvs[SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID].aclfield.data.oid); EXPECT_EQ(true, acl_rule->match_fvs[SAI_ACL_ENTRY_ATTR_FIELD_IPMC_NPU_META_DST_HIT] .aclfield.data.booldata); @@ -2958,6 +2992,124 @@ TEST_F(AclManagerTest, AclRuleWithColorPacketActionsButNoRateLimit) acl_rule->action_fvs[SAI_ACL_ENTRY_ATTR_ACTION_SET_USER_TRAP_ID].aclaction.parameter.oid); } +TEST_F(AclManagerTest, AclRuleWithColorPacketActionsButWithRateLimit) { + ASSERT_NO_FATAL_FAILURE(AddDefaultIngressTable()); + + // Create app_db_entry with color packet action, but no rate limit attributes + P4AclRuleAppDbEntry app_db_entry; + app_db_entry.acl_table_name = kAclIngressTableName; + app_db_entry.priority = 100; + // ACL rule match fields + app_db_entry.match_fvs["ether_type"] = "0x0800"; + app_db_entry.match_fvs["ipv6_dst"] = "fdf8:f53b:82e4::53"; + app_db_entry.match_fvs["ether_dst"] = "AA:BB:CC:DD:EE:FF"; + app_db_entry.match_fvs["ether_src"] = "AA:BB:CC:DD:EE:FF"; + app_db_entry.match_fvs["ipv6_next_header"] = "1"; + app_db_entry.match_fvs["src_ipv6_64bit"] = "fdf8:f53b:82e4::"; + app_db_entry.match_fvs["arp_tpa"] = "0xff112231"; + app_db_entry.match_fvs["udf2"] = "0x9876 & 0xAAAA"; + app_db_entry.db_key = + "ACL_PUNT_TABLE:{\"match/ether_type\": \"0x0800\",\"match/ipv6_dst\": " + "\"fdf8:f53b:82e4::53\",\"match/ether_dst\": \"AA:BB:CC:DD:EE:FF\", " + "\"match/ether_src\": \"AA:BB:CC:DD:EE:FF\", \"match/ipv6_next_header\": " + "\"1\", \"match/src_ipv6_64bit\": " + "\"fdf8:f53b:82e4::\",\"match/arp_tpa\": \"0xff112231\",\"match/udf2\": " + "\"0x9876 & 0xAAAA\",\"priority\":100}"; + + const auto& acl_rule_key = + KeyGenerator::generateAclRuleKey(app_db_entry.match_fvs, "100"); + + // Set user defined trap for QOS_QUEUE, and color packet actions in meter + int queue_num = 8; + app_db_entry.action = "acl_rate_limit_copy"; + app_db_entry.action_param_fvs["qos_queue"] = std::to_string(queue_num); + // Install rule + EXPECT_CALL(mock_sai_acl_, create_acl_entry(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<0>(kAclIngressRuleOid1), + Return(SAI_STATUS_SUCCESS))); + + EXPECT_CALL(mock_sai_acl_, create_acl_counter(_, _, _, _)) + .WillOnce(Return(SAI_STATUS_SUCCESS)); + EXPECT_CALL( + mock_sai_policer_, + create_policer( + _, Eq(gSwitchId), Eq(9), + Truly(std::bind(MatchSaiPolicerAttribute, 9, SAI_METER_TYPE_PACKETS, + SAI_PACKET_ACTION_FORWARD, + SAI_PACKET_ACTION_COPY_CANCEL, + SAI_PACKET_ACTION_COPY_CANCEL, + 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, + std::placeholders::_1)))) + .WillOnce( + DoAll(SetArgPointee<0>(kAclMeterOid1), Return(SAI_STATUS_SUCCESS))); + EXPECT_EQ(StatusCode::SWSS_RC_SUCCESS, + ProcessAddRuleRequest(acl_rule_key, app_db_entry)); + auto acl_rule = GetAclRule(kAclIngressTableName, acl_rule_key); + ASSERT_NE(nullptr, acl_rule); + // Check action field value + EXPECT_EQ(gUserDefinedTrapStartOid + queue_num - P4_CPU_QUEUE_MIN_NUM + 1, + acl_rule->action_fvs[SAI_ACL_ENTRY_ATTR_ACTION_SET_USER_TRAP_ID] + .aclaction.parameter.oid); +} + +TEST_F(AclManagerTest, AclRuleWithMockedPacketAction) { + ASSERT_NO_FATAL_FAILURE(AddDefaultIngressTable()); + auto app_db_entry = getDefaultAclRuleAppDbEntryWithoutAction(); + const auto& acl_rule_key = + KeyGenerator::generateAclRuleKey(app_db_entry.match_fvs, "100"); + + // set packet action + app_db_entry.action = "set_packet_action"; + app_db_entry.action_param_fvs["packet_action"] = + "SAI_PACKET_ACTION_COPY_CANCEL"; + + // Install rule + EXPECT_CALL(mock_sai_acl_, create_acl_entry(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<0>(kAclIngressRuleOid1), + Return(SAI_STATUS_SUCCESS))); + EXPECT_CALL(mock_sai_acl_, create_acl_counter(_, _, _, _)) + .WillOnce(Return(SAI_STATUS_SUCCESS)); + EXPECT_CALL(mock_sai_policer_, create_policer(_, _, _, _)) + .WillOnce( + DoAll(SetArgPointee<0>(kAclMeterOid1), Return(SAI_STATUS_SUCCESS))); + EXPECT_EQ(StatusCode::SWSS_RC_SUCCESS, + ProcessAddRuleRequest(acl_rule_key, app_db_entry)); + auto* acl_rule = GetAclRule(kAclIngressTableName, acl_rule_key); + ASSERT_NE(nullptr, acl_rule); + + // Check action field value + EXPECT_EQ(SAI_PACKET_ACTION_COPY_CANCEL, + acl_rule->action_fvs[SAI_ACL_ENTRY_ATTR_ACTION_PACKET_ACTION] + .aclaction.parameter.s32); + + // update packet action + app_db_entry.action_param_fvs["packet_action"] = "SAI_PACKET_ACTION_DENY"; + EXPECT_CALL(mock_sai_acl_, + set_acl_entry_attribute(Eq(kAclIngressRuleOid1), _)) + .WillOnce(Return(SAI_STATUS_SUCCESS)); + EXPECT_EQ(StatusCode::SWSS_RC_SUCCESS, + ProcessUpdateRuleRequest(app_db_entry, *acl_rule)); + acl_rule = GetAclRule(kAclIngressTableName, acl_rule_key); + ASSERT_NE(nullptr, acl_rule); + + // Check action field value + EXPECT_EQ(SAI_PACKET_ACTION_DENY, + acl_rule->action_fvs[SAI_ACL_ENTRY_ATTR_ACTION_PACKET_ACTION] + .aclaction.parameter.s32); + + // Remove rule + EXPECT_CALL(mock_sai_acl_, remove_acl_entry(Eq(kAclIngressRuleOid1))) + .WillOnce(Return(SAI_STATUS_SUCCESS)); + EXPECT_CALL(mock_sai_acl_, remove_acl_counter(_)) + .WillOnce(Return(SAI_STATUS_SUCCESS)); + EXPECT_CALL(mock_sai_policer_, remove_policer(Eq(kAclMeterOid1))) + .WillOnce(Return(SAI_STATUS_SUCCESS)); + EXPECT_EQ(StatusCode::SWSS_RC_SUCCESS, + ProcessDeleteRuleRequest(kAclIngressTableName, acl_rule_key)); + EXPECT_EQ(nullptr, GetAclRule(kAclIngressTableName, acl_rule_key)); +} + + #pragma GCC diagnostic warning "-Wdisabled-optimization" TEST_F(AclManagerTest, AclRuleWithValidAction) @@ -4285,7 +4437,7 @@ TEST_F(AclManagerTest, CreateAclRuleWithInvalidActionFails) app_db_entry.action_param_fvs.erase("target"); // Invalid cpu queue number app_db_entry.action = "qos_queue"; - app_db_entry.action_param_fvs["cpu_queue"] = "10"; + app_db_entry.action_param_fvs["cpu_queue"] = "18"; EXPECT_EQ(StatusCode::SWSS_RC_INVALID_PARAM, ProcessAddRuleRequest(acl_rule_key, app_db_entry)); app_db_entry.action_param_fvs["cpu_queue"] = "invalid"; EXPECT_EQ(StatusCode::SWSS_RC_INVALID_PARAM, ProcessAddRuleRequest(acl_rule_key, app_db_entry)); @@ -5155,12 +5307,13 @@ TEST_F(AclManagerTest, AclRuleVerifyStateTest) attributes.push_back(swss::FieldValueTuple{"meter/pir", "200"}); attributes.push_back(swss::FieldValueTuple{"meter/pburst", "200"}); attributes.push_back(swss::FieldValueTuple{"controller_metadata", "..."}); - const auto &acl_rule_json_key = "{\"match/ether_type\":\"0x0800\",\"match/" - "ipv6_dst\":\"fdf8:f53b:82e4::53 & " - "fdf8:f53b:82e4::53\",\"match/arp_tpa\": \"0xff112231\", " - "\"match/in_ports\": \"Ethernet1,Ethernet2\", \"match/out_ports\": " - "\"Ethernet4,Ethernet5\", \"priority\":15,\"match/ipmc_table_hit\":" - "\"0x1\"}"; + const auto& acl_rule_json_key = + "{\"match/ether_type\":\"0x0800\",\"match/" + "ipv6_dst\":\"fdf8:f53b:82e4::53 & " + "fdf8:f53b:82e4::53\",\"match/arp_tpa\": \"0xff112231\", " + "\"match/in_ports\": \"Ethernet1,Ethernet2\", \"match/out_ports\": " + "\"Ethernet4,Ethernet5\", \"priority\":15,\"match/ipmc_table_hit\":" + "\"0x1\",\"match/vrf_id\":\"b4-traffic\"}"; const auto &rule_tuple_key = std::string(kAclIngressTableName) + kTableKeyDelimiter + acl_rule_json_key; EnqueueRuleTuple(std::string(kAclIngressTableName), swss::KeyOpFieldsValuesTuple({rule_tuple_key, SET_COMMAND, attributes})); @@ -5182,21 +5335,37 @@ TEST_F(AclManagerTest, AclRuleVerifyStateTest) table.set( "SAI_OBJECT_TYPE_ACL_ENTRY:oid:0x3e9", std::vector{ - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_TABLE_ID", "oid:0x7000000000606"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_TABLE_ID", + "oid:0x7000000000606"}, swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_PRIORITY", "15"}, swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ADMIN_STATE", "true"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_DST_IPV6", "fdf8:f53b:82e4::53&mask:fdf8:f53b:82e4::53"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_ETHER_TYPE", "2048&mask:0xffff"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_ACL_IP_TYPE", - "SAI_ACL_IP_TYPE_ANY&mask:0xffffffffffffffff"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_USER_DEFINED_FIELD_GROUP_MIN", "2:255,17&mask:2:0xff,0xff"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_IPMC_NPU_META_DST_HIT", "true"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_USER_DEFINED_FIELD_GROUP_1", "2:34,49&mask:2:0xff,0xff"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_IN_PORTS", "2:oid:0x112233,oid:0x1fed3"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_OUT_PORTS", "2:oid:0x9988,oid:0x56789abcdef"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ACTION_MIRROR_INGRESS", "1:oid:0x2329"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ACTION_SET_POLICER", "oid:0x7d1"}, - swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ACTION_COUNTER", "oid:0xbb9"}}); + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_DST_IPV6", + "fdf8:f53b:82e4::53&mask:fdf8:f53b:82e4::53"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_ETHER_TYPE", + "2048&mask:0xffff"}, + swss::FieldValueTuple{ + "SAI_ACL_ENTRY_ATTR_FIELD_ACL_IP_TYPE", + "SAI_ACL_IP_TYPE_ANY&mask:0xffffffffffffffff"}, + swss::FieldValueTuple{ + "SAI_ACL_ENTRY_ATTR_USER_DEFINED_FIELD_GROUP_MIN", + "2:255,17&mask:2:0xff,0xff"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_VRF_ID", + "oid:0x6f"}, + swss::FieldValueTuple{ + "SAI_ACL_ENTRY_ATTR_FIELD_IPMC_NPU_META_DST_HIT", "true"}, + swss::FieldValueTuple{ + "SAI_ACL_ENTRY_ATTR_USER_DEFINED_FIELD_GROUP_1", + "2:34,49&mask:2:0xff,0xff"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_IN_PORTS", + "2:oid:0x112233,oid:0x1fed3"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_FIELD_OUT_PORTS", + "2:oid:0x9988,oid:0x56789abcdef"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ACTION_MIRROR_INGRESS", + "1:oid:0x2329"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ACTION_SET_POLICER", + "oid:0x7d1"}, + swss::FieldValueTuple{"SAI_ACL_ENTRY_ATTR_ACTION_COUNTER", + "oid:0xbb9"}}); table.set("SAI_OBJECT_TYPE_ACL_COUNTER:oid:0xbb9", std::vector{ swss::FieldValueTuple{"SAI_ACL_COUNTER_ATTR_TABLE_ID", "oid:0x7000000000606"}, @@ -5221,38 +5390,47 @@ TEST_F(AclManagerTest, AclRuleVerifyStateTest) EXPECT_FALSE(VerifyRuleState(std::string(APP_P4RT_TABLE_NAME) + ":invalid", attributes).empty()); EXPECT_FALSE(VerifyRuleState(std::string(APP_P4RT_TABLE_NAME) + ":invalid:invalid", attributes).empty()); EXPECT_FALSE(VerifyRuleState(std::string(APP_P4RT_TABLE_NAME) + ":ACL_PUNT_TABLE:invalid", attributes).empty()); - EXPECT_FALSE(VerifyRuleState(std::string(APP_P4RT_TABLE_NAME) + - ":ACL_PUNT_TABLE:{\"match/ether_type\":\"0x0800\",\"match/" - "ipv6_dst\":\"fdf8:f53b:82e4::53 & " - "fdf8:f53b:82e4::53\",\"priority\":0,\"match/ipmc_table_hit\":" - "\"0x1\"}", - attributes) - .empty()); - EXPECT_FALSE(VerifyRuleState(std::string(APP_P4RT_TABLE_NAME) + - ":ACL_PUNT_TABLE:{\"match/ether_type\":\"0x0800\",\"match/" - "ipv6_dst\":\"127.0.0.1/24\",\"priority\":15," - "\"match/ipmc_table_hit\":\"0x1\"}", - attributes) - .empty()); + EXPECT_FALSE( + VerifyRuleState( + std::string(APP_P4RT_TABLE_NAME) + + ":ACL_PUNT_TABLE:{\"match/ether_type\":\"0x0800\",\"match/" + "ipv6_dst\":\"fdf8:f53b:82e4::53 & " + "fdf8:f53b:82e4::53\",\"priority\":0,\"match/ipmc_table_hit\":" + "\"0x1\",\"match/vrf_id\":\"b4-traffic\"}", + attributes) + .empty()); + EXPECT_FALSE( + VerifyRuleState( + std::string(APP_P4RT_TABLE_NAME) + + ":ACL_PUNT_TABLE:{\"match/ether_type\":\"0x0800\",\"match/" + "ipv6_dst\":\"127.0.0.1/24\",\"priority\":15," + "\"match/ipmc_table_hit\":\"0x1\",\"match/" + "vrf_id\":\"b4-traffic\"}", + attributes) + .empty()); // Verification should fail if entry does not exist. - EXPECT_FALSE(VerifyRuleState(std::string(APP_P4RT_TABLE_NAME) + - ":ACL_PUNT_TABLE:{\"match/ether_type\":\"0x0800\",\"match/" - "ipv6_dst\":\"fdf8:f53b:82e4::54 & " - "fdf8:f53b:82e4::54\",\"priority\":15,\"match/ipmc_table_hit\":" - "\"0x1\"}", - attributes) - .empty()); + EXPECT_FALSE( + VerifyRuleState( + std::string(APP_P4RT_TABLE_NAME) + + ":ACL_PUNT_TABLE:{\"match/ether_type\":\"0x0800\",\"match/" + "ipv6_dst\":\"fdf8:f53b:82e4::54 & " + "fdf8:f53b:82e4::54\",\"priority\":15,\"match/ipmc_table_hit\":" + "\"0x1\",\"match/vrf_id\":\"b4-traffic\"}", + attributes) + .empty()); // Verification should fail with invalid attribute. EXPECT_FALSE(VerifyTableState(db_key, std::vector{{kAction, "invalid"}}).empty()); auto *acl_table = GetAclTable(kAclIngressTableName); EXPECT_NE(acl_table, nullptr); - const auto &acl_rule_key = "match/arp_tpa=0xff112231:match/ether_type=0x0800:match/" - "in_ports=Ethernet1,Ethernet2:match/ipmc_table_hit=0x1:" - "match/ipv6_dst=fdf8:f53b:82e4::53 & " - "fdf8:f53b:82e4::53:match/out_ports=Ethernet4,Ethernet5:priority=15"; + const auto& acl_rule_key = + "match/arp_tpa=0xff112231:match/ether_type=0x0800:match/" + "in_ports=Ethernet1,Ethernet2:match/ipmc_table_hit=0x1:" + "match/ipv6_dst=fdf8:f53b:82e4::53 & " + "fdf8:f53b:82e4::53:match/out_ports=Ethernet4,Ethernet5:match/" + "vrf_id=b4-traffic:priority=15"; auto *acl_rule = GetAclRule(kAclIngressTableName, acl_rule_key); ASSERT_NE(acl_rule, nullptr); diff --git a/orchagent/p4orch/tests/test_main.cpp b/orchagent/p4orch/tests/test_main.cpp index 0ad50751bc..976cc9ec44 100644 --- a/orchagent/p4orch/tests/test_main.cpp +++ b/orchagent/p4orch/tests/test_main.cpp @@ -38,6 +38,12 @@ sai_object_id_t gUnderlayIfId = 0x101; string gMyAsicName = ""; event_handle_t g_events_handle; +bool gMultiAsicVoq = false; +bool isChassisDbInUse() +{ + return gMultiAsicVoq; +} + #define DEFAULT_BATCH_SIZE 128 #define DEFAULT_MAX_BULK_SIZE 1000 extern int gBatchSize; diff --git a/orchagent/port/portcnt.h b/orchagent/port/portcnt.h index 3442af130f..83b49a3725 100644 --- a/orchagent/port/portcnt.h +++ b/orchagent/port/portcnt.h @@ -98,6 +98,11 @@ class PortConfig final bool is_set = false; } link_training; // Port link training + struct { + bool value; + bool is_set = false; + } fast_linkup; // Port fast link-up enable + struct { std::string value; bool is_set = false; diff --git a/orchagent/port/porthlpr.cpp b/orchagent/port/porthlpr.cpp index aef5dbc3f1..af99456c9d 100644 --- a/orchagent/port/porthlpr.cpp +++ b/orchagent/port/porthlpr.cpp @@ -697,6 +697,32 @@ bool PortHelper::parsePortLinkTraining(PortConfig &port, const std::string &fiel return true; } +bool PortHelper::parsePortFastLinkup(PortConfig &port, const std::string &field, const std::string &value) const +{ + SWSS_LOG_ENTER(); + + if (value.empty()) + { + SWSS_LOG_ERROR("Failed to parse field(%s): empty value is not allowed", field.c_str()); + return false; + } + if (value == "true") + { + port.fast_linkup.value = true; + } + else if (value == "false") + { + port.fast_linkup.value = false; + } + else + { + SWSS_LOG_ERROR("Failed to parse field(%s): invalid value(%s)", field.c_str(), value.c_str()); + return false; + } + port.fast_linkup.is_set = true; + return true; +} + template bool PortHelper::parsePortSerdes(T &serdes, const std::string &field, const std::string &value) const { @@ -1092,6 +1118,13 @@ bool PortHelper::parsePortConfig(PortConfig &port) const return false; } } + else if (field == PORT_FAST_LINKUP) + { + if (!this->parsePortFastLinkup(port, field, value)) + { + return false; + } + } else if (field == PORT_UNRELIABLE_LOS) { if (!this->parsePortUnreliableLos(port, field, value)) diff --git a/orchagent/port/porthlpr.h b/orchagent/port/porthlpr.h index 23e6d51678..292794acf3 100644 --- a/orchagent/port/porthlpr.h +++ b/orchagent/port/porthlpr.h @@ -73,4 +73,5 @@ class PortHelper final bool parsePortPtIntfId(PortConfig &port, const std::string &field, const std::string &value) const; bool parsePortPtTimestampTemplate(PortConfig &port, const std::string &field, const std::string &value) const; bool parsePortMediaType(PortConfig &port, const std::string &field, const std::string &value) const; + bool parsePortFastLinkup(PortConfig &port, const std::string &field, const std::string &value) const; }; diff --git a/orchagent/port/portschema.h b/orchagent/port/portschema.h index 6f42f6957a..f0deba604a 100644 --- a/orchagent/port/portschema.h +++ b/orchagent/port/portschema.h @@ -105,3 +105,4 @@ #define PORT_MODE "mode" #define PORT_UNRELIABLE_LOS "unreliable_los" #define PORT_MEDIA_TYPE "media_type" +#define PORT_FAST_LINKUP "fast_linkup" diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index a8b679444f..85a29b6fa8 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -69,6 +69,8 @@ extern int32_t gVoqMySwitchId; extern string gMyHostName; extern string gMyAsicName; extern event_handle_t g_events_handle; +extern bool isChassisDbInUse(); +extern bool gMultiAsicVoq; // defines ------------------------------------------------------------------------------------------------------------ @@ -969,6 +971,12 @@ PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vectorquerySwitchCapability(SAI_OBJECT_TYPE_PORT, SAI_PORT_ATTR_FAST_LINKUP_ENABLED)) + { + m_fastLinkupPortAttrSupported = true; + } + if (gMySwitchType != "dpu") { // System Ports not supported on dpu @@ -1007,7 +1015,7 @@ PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vectorset_port_attribute(port.m_port_id, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to set fast_linkup %d on port %s", enable, port.m_alias.c_str()); + return handleSaiSetStatus(SAI_API_PORT, status); + } + SWSS_LOG_INFO("Set port %s fast_linkup %s", port.m_alias.c_str(), enable ? "true" : "false"); + return task_success; +} + task_process_status PortsOrch::setPortUnreliableLOS(Port &port, bool enabled) { SWSS_LOG_ENTER(); @@ -4654,6 +4684,19 @@ void PortsOrch::doPortTask(Consumer &consumer) } } + // Handle fast_linkup boolean + if (pCfg.fast_linkup.is_set) + { + auto status = setPortFastLinkupEnabled(p, pCfg.fast_linkup.value); + // For fast_linkup attribute, task_need_retry is not a meaningful return, so treat any failure as a permanent failure and erase the task. + if (status != task_success) + { + SWSS_LOG_ERROR("Failed to set port %s fast_linkup to %s", p.m_alias.c_str(), pCfg.fast_linkup.value ? "true" : "false"); + it = taskMap.erase(it); + continue; + } + } + if (pCfg.link_event_damping_algorithm.is_set) { if (p.m_link_event_damping_algorithm != pCfg.link_event_damping_algorithm.value) @@ -4793,11 +4836,6 @@ void PortsOrch::doPortTask(Consumer &consumer) p.m_alias.c_str(), pCfg.speed.value ); } - else - { - /* Always update Gearbox speed on Gearbox ports */ - setGearboxPortsAttr(p, SAI_PORT_ATTR_SPEED, &pCfg.speed.value); - } } if (pCfg.adv_speeds.is_set) @@ -5085,6 +5123,7 @@ void PortsOrch::doPortTask(Consumer &consumer) p.m_fec_mode = pCfg.fec.value; p.m_override_fec = pCfg.fec.override_fec; + p.m_fec_cfg = true; m_portList[p.m_alias] = p; SWSS_LOG_NOTICE( @@ -6056,7 +6095,7 @@ void PortsOrch::doLagMemberTask(Consumer &consumer) } } - if ((gMySwitchType == "voq") && (port.m_type != Port::SYSTEM)) + if (isChassisDbInUse() && (port.m_type != Port::SYSTEM)) { //Sync to SYSTEM_LAG_MEMBER_TABLE of CHASSIS_APP_DB voqSyncAddLagMember(lag, port, status); @@ -7674,13 +7713,16 @@ bool PortsOrch::addLag(string lag_alias, uint32_t spa_id, int32_t switch_id) switch_id = gVoqMySwitchId; system_lag_alias = gMyHostName + "|" + gMyAsicName + "|" + lag_alias; - // Allocate unique lag id - spa_id = m_lagIdAllocator->lagIdAdd(system_lag_alias, 0); - - if ((int32_t)spa_id <= 0) + if (gMultiAsicVoq) { - SWSS_LOG_ERROR("Failed to allocate unique LAG id for local lag %s rv:%d", lag_alias.c_str(), spa_id); - return false; + // Allocate unique lag id + spa_id = m_lagIdAllocator->lagIdAdd(system_lag_alias, 0); + + if ((int32_t)spa_id <= 0) + { + SWSS_LOG_ERROR("Failed to allocate unique LAG id for local lag %s rv:%d", lag_alias.c_str(), spa_id); + return false; + } } } @@ -7792,7 +7834,7 @@ bool PortsOrch::removeLag(Port lag) m_counterLagTable->hdel("", lag.m_alias); - if (gMySwitchType == "voq") + if (isChassisDbInUse()) { // Free the lag id, if this is local LAG @@ -7905,7 +7947,7 @@ bool PortsOrch::addLagMember(Port &lag, Port &port, string member_status) LagMemberUpdate update = { lag, port, true }; notify(SUBJECT_TYPE_LAG_MEMBER_CHANGE, static_cast(&update)); - if (gMySwitchType == "voq") + if (isChassisDbInUse()) { //Sync to SYSTEM_LAG_MEMBER_TABLE of CHASSIS_APP_DB voqSyncAddLagMember(lag, port, member_status); @@ -7953,7 +7995,7 @@ bool PortsOrch::removeLagMember(Port &lag, Port &port) LagMemberUpdate update = { lag, port, false }; notify(SUBJECT_TYPE_LAG_MEMBER_CHANGE, static_cast(&update)); - if (gMySwitchType == "voq") + if (isChassisDbInUse()) { //Sync to SYSTEM_LAG_MEMBER_TABLE of CHASSIS_APP_DB voqSyncDelLagMember(lag, port); @@ -9190,7 +9232,7 @@ void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status) } } - if(gMySwitchType == "voq") + if(isChassisDbInUse()) { if (gIntfsOrch->isLocalSystemPortIntf(port.m_alias)) { @@ -10444,7 +10486,8 @@ void PortsOrch::voqSyncAddLag (Port &lag) // Sync only local lag add to CHASSIS_APP_DB - if (switch_id != gVoqMySwitchId) + if (switch_id != gVoqMySwitchId || + !gMultiAsicVoq) { return; } diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index e7873eeb1e..66cb2c55ef 100644 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -264,6 +264,7 @@ class PortsOrch : public Orch, public Subject bool setPortPtIntfId(const Port& port, sai_uint16_t intf_id); bool setPortPtTimestampTemplate(const Port& port, sai_port_path_tracing_timestamp_type_t ts_type); + task_process_status setPortFastLinkupEnabled(Port &port, bool enable); private: unique_ptr m_counterNameMapUpdater; @@ -373,6 +374,7 @@ class PortsOrch : public Orch, public Subject bool saiHwTxSignalSupported = false; bool saiTxReadyNotifySupported = false; bool m_supportsHostIfTxQueue = false; + bool m_fastLinkupPortAttrSupported = false; swss::SelectableTimer *m_port_state_poller = nullptr; diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp index 9ed36e9a36..b3a5afac60 100644 --- a/orchagent/switchorch.cpp +++ b/orchagent/switchorch.cpp @@ -167,6 +167,7 @@ SwitchOrch::SwitchOrch(DBConnector *db, vector& connectors, Tabl querySwitchPortMirrorCapability(); querySwitchHashDefaults(); setSwitchIcmpOffloadCapability(); + setFastLinkupCapability(); auto executorT = new ExecutableTimer(m_sensorsPollerTimer, this, "ASIC_SENSORS_POLL_TIMER"); Orch::addExecutor(executorT); @@ -1476,6 +1477,10 @@ void SwitchOrch::doTask(Consumer &consumer) { doCfgSwitchTrimmingTableTask(consumer); } + else if (tableName == CFG_SWITCH_FAST_LINKUP_TABLE_NAME) + { + doCfgSwitchFastLinkupTableTask(consumer); + } else if (tableName == CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME) { doCfgSuppressAsicSdkHealthEventTableTask(consumer); @@ -2051,6 +2056,190 @@ bool SwitchOrch::querySwitchCapability(sai_object_type_t sai_object, sai_attr_id } } +void SwitchOrch::setFastLinkupCapability() +{ + SWSS_LOG_ENTER(); + + std::vector fvVector; + + // Determine support by checking create/set capability on polling time attribute (enabled in real SAI) + bool supported = querySwitchCapability(SAI_OBJECT_TYPE_SWITCH, SAI_SWITCH_ATTR_FAST_LINKUP_POLLING_TIMEOUT); + m_fastLinkupCap.supported = supported; + + if (!supported) + { + fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_FAST_LINKUP_CAPABLE, "false"); + set_switch_capability(fvVector); + return; + } + fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_FAST_LINKUP_CAPABLE, "true"); + + // Query allowed ranges if supported by SAI + sai_attribute_t attr; + attr.id = SAI_SWITCH_ATTR_FAST_LINKUP_POLLING_TIMEOUT_RANGE; + sai_status_t status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); + if (status == SAI_STATUS_SUCCESS) + { + m_fastLinkupCap.has_ranges = true; + m_fastLinkupCap.polling_min = attr.value.u16range.min; + m_fastLinkupCap.polling_max = attr.value.u16range.max; + fvVector.emplace_back( + SWITCH_CAPABILITY_TABLE_FAST_LINKUP_POLLING_TIMER_RANGE, + to_string(m_fastLinkupCap.polling_min) + "," + to_string(m_fastLinkupCap.polling_max)); + } + else + { + SWSS_LOG_ERROR("Failed to get fast linkup polling range: %s", sai_serialize_status(status).c_str()); + } + + attr.id = SAI_SWITCH_ATTR_FAST_LINKUP_GUARD_TIMEOUT_RANGE; + status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); + if (status == SAI_STATUS_SUCCESS) + { + m_fastLinkupCap.has_ranges = true; + m_fastLinkupCap.guard_min = attr.value.u16range.min; + m_fastLinkupCap.guard_max = attr.value.u16range.max; + fvVector.emplace_back( + SWITCH_CAPABILITY_TABLE_FAST_LINKUP_GUARD_TIMER_RANGE, + to_string(m_fastLinkupCap.guard_min) + "," + to_string(m_fastLinkupCap.guard_max)); + } + else + { + SWSS_LOG_ERROR("Failed to get fast linkup guard range: %s", sai_serialize_status(status).c_str()); + } + set_switch_capability(fvVector); +} + +bool SwitchOrch::setSwitchFastLinkup(const FastLinkupConfig &cfg) +{ + SWSS_LOG_ENTER(); + + if (!m_fastLinkupCap.supported) + { + SWSS_LOG_NOTICE("Fast link-up is not supported on this platform"); + return false; + } + + // Validate ranges if known + if (cfg.has_polling && m_fastLinkupCap.has_ranges) + { + if (cfg.polling_time < m_fastLinkupCap.polling_min || cfg.polling_time > m_fastLinkupCap.polling_max) + { + SWSS_LOG_NOTICE("Invalid polling_time %u; allowed [%u,%u]", cfg.polling_time, m_fastLinkupCap.polling_min, m_fastLinkupCap.polling_max); + return false; + } + } + if (cfg.has_guard && m_fastLinkupCap.has_ranges) + { + if (cfg.guard_time < m_fastLinkupCap.guard_min || cfg.guard_time > m_fastLinkupCap.guard_max) + { + SWSS_LOG_NOTICE("Invalid guard_time %u; allowed [%u,%u]", cfg.guard_time, m_fastLinkupCap.guard_min, m_fastLinkupCap.guard_max); + return false; + } + } + + // Apply attributes conditionally + sai_status_t status; + if (cfg.has_polling) + { + sai_attribute_t attr; + attr.id = SAI_SWITCH_ATTR_FAST_LINKUP_POLLING_TIMEOUT; + attr.value.u16 = cfg.polling_time; + status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_NOTICE("Failed to set FAST_LINKUP_POLLING_TIME=%u: %s", cfg.polling_time, sai_serialize_status(status).c_str()); + return false; + } + } + + if (cfg.has_guard) + { + sai_attribute_t attr; + attr.id = SAI_SWITCH_ATTR_FAST_LINKUP_GUARD_TIMEOUT; + attr.value.u8 = cfg.guard_time; + status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_NOTICE("Failed to set FAST_LINKUP_GUARD_TIME=%u: %s", cfg.guard_time, sai_serialize_status(status).c_str()); + return false; + } + } + + if (cfg.has_ber) + { + sai_attribute_t attr; + attr.id = SAI_SWITCH_ATTR_FAST_LINKUP_BER_THRESHOLD; + attr.value.u8 = cfg.ber_threshold; + status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_NOTICE("Failed to set FAST_LINKUP_BER_THRESHOLD=%u: %s", cfg.ber_threshold, sai_serialize_status(status).c_str()); + return false; + } + } + SWSS_LOG_INFO("Fast link-up set: polling_time=%s, guard_time=%s, ber_threshold=%s", + cfg.has_polling ? std::to_string(cfg.polling_time).c_str() : "N/A", + cfg.has_guard ? std::to_string(cfg.guard_time).c_str() : "N/A", + cfg.has_ber ? std::to_string(cfg.ber_threshold).c_str() : "N/A"); + return true; +} + +void SwitchOrch::doCfgSwitchFastLinkupTableTask(Consumer &consumer) +{ + SWSS_LOG_ENTER(); + + auto &map = consumer.m_toSync; + auto it = map.begin(); + + while (it != map.end()) + { + auto keyOpFieldsValues = it->second; + auto key = kfvKey(keyOpFieldsValues); + auto op = kfvOp(keyOpFieldsValues); + + if (op == SET_COMMAND) + { + FastLinkupConfig cfg; + for (const auto &cit : kfvFieldsValues(keyOpFieldsValues)) + { + auto fieldName = fvField(cit); + auto fieldValue = fvValue(cit); + if (fieldName == "polling_time") + { + try { cfg.polling_time = to_uint(fieldValue); cfg.has_polling = true; } + catch (...) { SWSS_LOG_ERROR("Invalid polling_time value %s", fieldValue.c_str()); } + } + else if (fieldName == "guard_time") + { + try { cfg.guard_time = to_uint(fieldValue); cfg.has_guard = true; } + catch (...) { SWSS_LOG_ERROR("Invalid guard_time value %s", fieldValue.c_str()); } + } + else if (fieldName == "ber_threshold") + { + try { cfg.ber_threshold = to_uint(fieldValue); cfg.has_ber = true; } + catch (...) { SWSS_LOG_ERROR("Invalid ber_threshold value %s", fieldValue.c_str()); } + } + else + { + SWSS_LOG_WARN("Unknown field %s in SWITCH_FAST_LINKUP", fieldName.c_str()); + } + } + + if (!setSwitchFastLinkup(cfg)) + { + SWSS_LOG_ERROR("Failed to configure fast link-up from CONFIG_DB"); + } + } + else + { + SWSS_LOG_ERROR("Unsupported operation %s for SWITCH_FAST_LINKUP", op.c_str()); + } + + it = map.erase(it); + } +} + // Bind ACL table (with bind type switch) to switch bool SwitchOrch::bindAclTableToSwitch(acl_stage_type_t stage, sai_object_id_t table_id) { diff --git a/orchagent/switchorch.h b/orchagent/switchorch.h index d55bd72f8b..ffe99987fb 100644 --- a/orchagent/switchorch.h +++ b/orchagent/switchorch.h @@ -21,6 +21,11 @@ #define SWITCH_CAPABILITY_TABLE_PATH_TRACING_CAPABLE "PATH_TRACING_CAPABLE" #define SWITCH_CAPABILITY_TABLE_ICMP_OFFLOAD_CAPABLE "ICMP_OFFLOAD_CAPABLE" +// Fast link-up capabilities +#define SWITCH_CAPABILITY_TABLE_FAST_LINKUP_CAPABLE "FAST_LINKUP_CAPABLE" +#define SWITCH_CAPABILITY_TABLE_FAST_LINKUP_POLLING_TIMER_RANGE "FAST_LINKUP_POLLING_TIMER_RANGE" +#define SWITCH_CAPABILITY_TABLE_FAST_LINKUP_GUARD_TIMER_RANGE "FAST_LINKUP_GUARD_TIMER_RANGE" + #define ASIC_SDK_HEALTH_EVENT_ELIMINATE_INTERVAL 3600 #define SWITCH_CAPABILITY_TABLE_ASIC_SDK_HEALTH_EVENT_CAPABLE "ASIC_SDK_HEALTH_EVENT" #define SWITCH_CAPABILITY_TABLE_REG_FATAL_ASIC_SDK_HEALTH_CATEGORY "REG_FATAL_ASIC_SDK_HEALTH_CATEGORY" @@ -89,6 +94,7 @@ class SwitchOrch : public Orch void doTask(swss::SelectableTimer &timer); void doCfgSwitchHashTableTask(Consumer &consumer); void doCfgSwitchTrimmingTableTask(Consumer &consumer); + void doCfgSwitchFastLinkupTableTask(Consumer &consumer); void doCfgSensorsTableTask(Consumer &consumer); void doCfgSuppressAsicSdkHealthEventTableTask(Consumer &consumer); void doAppSwitchTableTask(Consumer &consumer); @@ -109,6 +115,25 @@ class SwitchOrch : public Orch void querySwitchHashDefaults(); void setSwitchIcmpOffloadCapability(); + // Fast link-up + struct FastLinkupConfig + { + bool has_polling = false; uint16_t polling_time = 0; + bool has_guard = false; uint8_t guard_time = 0; + bool has_ber = false; uint8_t ber_threshold= 0; + }; + + struct FastLinkupCapabilities + { + bool supported = false; + bool has_ranges = false; + uint16_t polling_min = 0, polling_max = 0; + uint16_t guard_min = 0, guard_max = 0; + }; + + void setFastLinkupCapability(); + bool setSwitchFastLinkup(const FastLinkupConfig &cfg); + // Switch trimming bool setSwitchTrimmingSizeSai(const SwitchTrimming &trim) const; bool setSwitchTrimmingDscpModeSai(const SwitchTrimming &trim) const; @@ -195,4 +220,7 @@ class SwitchOrch : public Orch // Switch OA helper SwitchHelper swHlpr; SwitchTrimmingHelper trimHlpr; + + // Fast link-up cache + FastLinkupCapabilities m_fastLinkupCap; }; diff --git a/tests/conftest.py b/tests/conftest.py index 89420cd74c..527f4b59b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,6 +44,8 @@ # Voq asics will have 16 fabric ports created (defined in Azure/sonic-buildimage#7629). FABRIC_NUM_PORTS = 16 +SINGLE_ASIC_VOQ_FS = "single_asic_voq_fs" + def ensure_system(cmd): rc, output = subprocess.getstatusoutput(cmd) if rc: @@ -112,6 +114,12 @@ def pytest_addoption(parser): default=False, help="Collect the test coverage information") + parser.addoption("--switch-mode", + action="store", + default=None, + type=str, + help="Set switch mode information") + def random_string(size=4, chars=string.ascii_uppercase + string.digits): return "".join(random.choice(chars) for x in range(size)) @@ -294,7 +302,8 @@ def __init__( newctnname: str = None, ctnmounts: Dict[str, str] = None, buffer_model: str = None, - enable_coverage: bool = False + enable_coverage: bool = False, + switch_mode: str = None ): self.basicd = ["redis-server", "rsyslogd"] self.swssd = [ @@ -317,6 +326,7 @@ def __init__( self.vct = vct self.ctn = None self.enable_coverage = enable_coverage + self.switch_mode = switch_mode self.cleanup = not keeptb @@ -583,7 +593,10 @@ def check_swss_ready(self, timeout: int = 300) -> None: self.get_config_db() metadata = self.config_db.get_entry('DEVICE_METADATA|localhost', '') if metadata.get('switch_type', 'npu') in ['voq', 'fabric']: - num_ports = NUM_PORTS + FABRIC_NUM_PORTS + if self.switch_mode and self.switch_mode == SINGLE_ASIC_VOQ_FS: + num_ports = NUM_PORTS + else: + num_ports = NUM_PORTS + FABRIC_NUM_PORTS # Verify that all ports have been initialized and configured app_db = self.get_app_db() @@ -603,8 +616,9 @@ def _polling_function(): # Verify that fabric ports are monitored in STATE_DB if metadata.get('switch_type', 'npu') in ['voq', 'fabric']: - self.get_state_db() - self.state_db.wait_for_n_keys("FABRIC_PORT_TABLE", FABRIC_NUM_PORTS) + if not self.switch_mode or (self.switch_mode and self.switch_mode != SINGLE_ASIC_VOQ_FS): + self.get_state_db() + self.state_db.wait_for_n_keys("FABRIC_PORT_TABLE", FABRIC_NUM_PORTS) def net_cleanup(self) -> None: """Clean up network, remove extra links.""" @@ -1880,6 +1894,7 @@ def manage_dvs(request) -> str: force_recreate = request.config.getoption("--force-recreate-dvs") graceful_stop = request.config.getoption("--graceful-stop") enable_coverage = request.config.getoption("--enable-coverage") + switch_mode = request.config.getoption("--switch-mode") dvs = None curr_dvs_env = [] # lgtm[py/unused-local-variable] @@ -1911,7 +1926,13 @@ def update_dvs(log_path, new_dvs_env=[]): dvs.get_logs() dvs.destroy() - dvs = DockerVirtualSwitch(name, imgname, keeptb, new_dvs_env, log_path, max_cpu, forcedvs, buffer_model = buffer_model, enable_coverage=enable_coverage) + vol = {} + if switch_mode and switch_mode == SINGLE_ASIC_VOQ_FS: + cwd = os.getcwd() + voq_configs = cwd + "/single_asic_voq_fs" + vol[voq_configs] = {"bind": "/usr/share/sonic/single_asic_voq_fs", "mode": "ro"} + + dvs = DockerVirtualSwitch(name, imgname, keeptb, new_dvs_env, log_path, max_cpu, forcedvs, buffer_model = buffer_model, enable_coverage=enable_coverage, ctnmounts=vol, switch_mode=switch_mode) curr_dvs_env = new_dvs_env diff --git a/tests/gearbox.py b/tests/gearbox.py new file mode 100644 index 0000000000..1f98dc1b3a --- /dev/null +++ b/tests/gearbox.py @@ -0,0 +1,105 @@ +""" +Generic helper functions for gearbox testing. + +This module provides reusable utility functions for gearbox-related tests, +including port management and configuration setup. +""" + +import json + + +class TestGearboxHelper: + """Helper class for gearbox-related test operations.""" + + @staticmethod + def get_first_gearbox_port(gearbox): + """ + Get the first port from Gearbox object (reads from _GEARBOX_TABLE in APPL_DB). + + Args: + gearbox: Gearbox fixture + + Returns: + tuple: (port_name, phy_id) - First available gearbox port and its PHY ID + """ + assert len(gearbox.interfaces) > 0, "No interfaces found in gearbox" + + # Get first interface + first_idx = next(iter(gearbox.interfaces)) + first_intf = gearbox.interfaces[first_idx] + + port_name = first_intf.get("name") + phy_id = first_intf.get("phy_id") + + assert port_name, "First interface has no 'name' field" + assert phy_id is not None, "First interface has no 'phy_id' field" + + return port_name, phy_id + + @staticmethod + def configure_gearbox_macsec_support(dvs, gearbox, phy_id=None, macsec_supported=None): + """ + Configure MACsec support on a gearbox PHY by modifying gearbox_config.json and restarting DVS. + + This is necessary because: + 1. gearsyncd reads gearbox_config.json only at startup + 2. PortsOrch caches _GEARBOX_TABLE only at startup (initGearbox) + 3. MACsecOrch reads from PortsOrch's cache, not from _GEARBOX_TABLE + 4. Full DVS restart is the only reliable way to reload the configuration + because partial service restarts cause inconsistent port state + + Args: + dvs: Docker Virtual Switch instance + gearbox: Gearbox fixture + phy_id: PHY ID (string, e.g., "1"). If None, uses the first PHY from Gearbox object. + macsec_supported: None (remove field), True, or False + """ + # If phy_id not provided, use the first PHY from Gearbox object + if phy_id is None: + assert len(gearbox.phys) > 0, "No PHYs found in gearbox" + phy_id = next(iter(gearbox.phys)) + print(f"No phy_id provided, using first PHY: {phy_id}") + + # Resolve symlink to get actual config path + config_path = "/usr/share/sonic/hwsku/gearbox_config.json" + rc, actual_path = dvs.runcmd(f"readlink -f {config_path}") + if rc == 0 and actual_path.strip(): + config_path = actual_path.strip() + + # Read current config + rc, config_json = dvs.runcmd(f"cat {config_path}") + assert rc == 0, f"Failed to read gearbox_config.json from {config_path}" + config = json.loads(config_json) + + phy_id = int(phy_id) + + # Find and modify the PHY configuration + phy_found = False + for phy in config.get("phys", []): + if phy.get("phy_id") == phy_id: + phy_found = True + if macsec_supported is None: + # Remove the field if it exists + if "macsec_supported" in phy: + del phy["macsec_supported"] + else: + # Set the field + phy["macsec_supported"] = macsec_supported + break + + assert phy_found, f"PHY {phy_id} not found in gearbox_config.json" + + # Write modified config back using heredoc + config_str = json.dumps(config, indent=2) + heredoc = "__GEARBOX_JSON__" + rc, _ = dvs.runcmd( + "bash -lc 'cat > {path} <<\"{tag}\"\n{payload}\n{tag}\n'".format( + path=config_path, + tag=heredoc, + payload=config_str, + ) + ) + assert rc == 0, f"Failed to write modified config to {config_path}" + + # Restart DVS to reload configuration + dvs.restart() diff --git a/tests/macsec.py b/tests/macsec.py new file mode 100644 index 0000000000..7501f9fb8f --- /dev/null +++ b/tests/macsec.py @@ -0,0 +1,185 @@ +""" +Generic helper functions for MACsec testing. + +This module provides reusable utility functions for MACsec-related tests, +including port configuration, secure channel/association management, and verification. +""" + +from swsscommon import swsscommon +from dvslib.dvs_database import DVSDatabase +from test_macsec import WPASupplicantMock + + +class TestMacsecHelper: + """Helper class for MACsec-related test operations.""" + + @staticmethod + def enable_macsec_on_port(dvs, port_name, with_secure_channels=True): + """ + Enable MACsec on a port with optional secure channels and associations. + + Args: + dvs: Docker Virtual Switch instance + port_name: Port name to enable MACsec on + with_secure_channels: If True, create Secure Channels and Associations with static keys + + Returns: + WPASupplicantMock: The WPA supplicant mock instance + """ + wpa = WPASupplicantMock(dvs) + wpa.init_macsec_port(port_name) + + # Configure MACsec port with protection and encryption enabled + wpa.config_macsec_port(port_name, { + "enable": True, + "enable_protect": True, + "enable_encrypt": True, + "send_sci": True, + }) + + # If requested, create Secure Channels and Associations with static keys + if with_secure_channels: + local_mac_address = "00-15-5D-78-FF-C1" + peer_mac_address = "00-15-5D-78-FF-C2" + macsec_port_identifier = 1 + an = 0 # Association Number + sak = "0" * 32 # SAK: 128-bit key (32 hex chars) + auth_key = "0" * 32 # Auth key: 128-bit key (32 hex chars) + packet_number = 1 + ssci = 1 # Short SCI + salt = "0" * 24 # Salt for XPN cipher suites + + # Create Transmit Secure Channel (local) - MUST come first! + wpa.create_transmit_sc( + port_name, + local_mac_address, + macsec_port_identifier) + + # Create Receive Secure Channel (from peer) + wpa.create_receive_sc( + port_name, + peer_mac_address, + macsec_port_identifier) + + # Create Receive Secure Association with static keys + wpa.create_receive_sa( + port_name, + peer_mac_address, + macsec_port_identifier, + an, + sak, + auth_key, + packet_number, + ssci, + salt) + + # Create Transmit Secure Association with static keys + wpa.create_transmit_sa( + port_name, + local_mac_address, + macsec_port_identifier, + an, + sak, + auth_key, + packet_number, + ssci, + salt) + + # Enable Receive SA + wpa.set_enable_receive_sa( + port_name, + peer_mac_address, + macsec_port_identifier, + an, + True) + + # Enable MACsec control + wpa.set_macsec_control(port_name, True) + + # Enable Transmit SA + wpa.set_enable_transmit_sa( + port_name, + local_mac_address, + macsec_port_identifier, + an, + True) + + return wpa + + @staticmethod + def cleanup_macsec(dvs, port_name): + """ + Cleanup MACsec configuration on a port to prevent test pollution. + + Args: + dvs: Docker Virtual Switch instance + port_name: Port name to cleanup + """ + try: + wpa = WPASupplicantMock(dvs) + app_db = dvs.get_app_db() + + # Disable MACsec control first + wpa.set_macsec_control(port_name, False) + + # Delete all SAs for this port (must delete before SCs) + for table in ["MACSEC_EGRESS_SA_TABLE", "MACSEC_INGRESS_SA_TABLE"]: + for key in app_db.get_keys(table): + if key.startswith(f"{port_name}:"): + app_db.delete_entry(table, key) + + # Delete all SCs for this port + for table in ["MACSEC_EGRESS_SC_TABLE", "MACSEC_INGRESS_SC_TABLE"]: + for key in app_db.get_keys(table): + if key.startswith(f"{port_name}:"): + app_db.delete_entry(table, key) + + # Finally delete the MACsec port entry + wpa.deinit_macsec_port(port_name) + + except Exception as e: + print(f"Cleanup encountered error: {e}") + + @staticmethod + def verify_macsec_in_gb_asic_db(dvs, should_exist=True): + """ + Verify MACsec objects exist (or don't exist) in GB_ASIC_DB + + Args: + dvs: Docker Virtual Switch instance + should_exist: True if objects should exist, False otherwise + + Returns: + bool: True if verification passes + """ + + gb_asic_db = DVSDatabase(swsscommon.GB_ASIC_DB, dvs.redis_sock) + + macsec_keys = gb_asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_MACSEC") + + if should_exist: + return len(macsec_keys) > 0 # Should have at least one object + else: + return len(macsec_keys) == 0 # Should have no objects + + @staticmethod + def verify_macsec_in_asic_db(dvs, should_exist=True): + """ + Verify MACsec objects exist (or don't exist) in ASIC_DB (NPU) + + Args: + dvs: Docker Virtual Switch instance + should_exist: True if objects should exist, False otherwise + + Returns: + bool: True if verification passes + """ + asic_db = dvs.get_asic_db() + + macsec_keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_MACSEC") + + if should_exist: + return len(macsec_keys) > 0 + else: + return len(macsec_keys) == 0 + diff --git a/tests/mock_tests/mock_orchagent_main.cpp b/tests/mock_tests/mock_orchagent_main.cpp index c14753c62a..6fa8b19a8b 100644 --- a/tests/mock_tests/mock_orchagent_main.cpp +++ b/tests/mock_tests/mock_orchagent_main.cpp @@ -24,3 +24,9 @@ sai_redis_communication_mode_t gRedisCommunicationMode = SAI_REDIS_COMMUNICATION VRFOrch *gVrfOrch; void syncd_apply_view() {} + +bool gMultiAsicVoq = false; +bool isChassisDbInUse() +{ + return gMultiAsicVoq; +} diff --git a/tests/p4rt/acl.py b/tests/p4rt/acl.py index 6623bb3fcf..a9e8d51efb 100644 --- a/tests/p4rt/acl.py +++ b/tests/p4rt/acl.py @@ -42,6 +42,7 @@ class P4RtAclTableDefinitionWrapper(util.DBInterface): ACTION_COPY_AND_SET_TC = "action/copy_and_set_tc" ACTION_PUNT_AND_SET_TC = "action/punt_and_set_tc" ACTION_SET_QOS_QUEUE = "action/qos_queue" + ACTION_SET_ACL_RATE_LIMIT_COPY = "action/acl_rate_limit_copy" METER_UNIT = "meter/unit" COUNTER_UNIT = "counter/unit" diff --git a/tests/p4rt/test_p4rt_acl.py b/tests/p4rt/test_p4rt_acl.py index 515354e101..cf010f8cdf 100644 --- a/tests/p4rt/test_p4rt_acl.py +++ b/tests/p4rt/test_p4rt_acl.py @@ -176,6 +176,8 @@ def test_AclRulesAddUpdateDelPass(self, dvs, testlog): punt_and_set_tc = '[{"action":"SAI_PACKET_ACTION_TRAP","packet_color":"SAI_PACKET_COLOR_RED"},{"action":"SAI_ACL_ENTRY_ATTR_ACTION_SET_TC","param":"traffic_class"}]' qos_queue = '[{"action":"SAI_PACKET_ACTION_TRAP"},{"action":"QOS_QUEUE","param":"cpu_queue"}]' + acl_rate_limit_copy = '[{"action":"SAI_PACKET_ACTION_FORWARD","packet_color":"SAI_PACKET_COLOR_GREEN"},{"action":"SAI_PACKET_ACTION_COPY_CANCEL","packet_color":"SAI_PACKET_COLOR_YELLOW"},{"action":"SAI_PACKET_ACTION_COPY_CANCEL","packet_color":"SAI_PACKET_COLOR_RED"},{"action":"QOS_QUEUE","param":"qos_queue"}]' + attr_list = [ (self._p4rt_acl_table_definition_obj.STAGE_FIELD, stage), (self._p4rt_acl_table_definition_obj.PRIORITY_FIELD, priority), @@ -201,6 +203,7 @@ def test_AclRulesAddUpdateDelPass(self, dvs, testlog): punt_and_set_tc, ), (self._p4rt_acl_table_definition_obj.ACTION_SET_QOS_QUEUE, qos_queue), + (self._p4rt_acl_table_definition_obj.ACTION_SET_ACL_RATE_LIMIT_COPY, acl_rate_limit_copy), (self._p4rt_acl_table_definition_obj.METER_UNIT, meter_unit), (self._p4rt_acl_table_definition_obj.COUNTER_UNIT, counter_unit), ] @@ -1021,6 +1024,107 @@ def test_AclRulesAddUpdateDelPass(self, dvs, testlog): ] util.verify_attr(fvs, attr_list) + + # update ACL rule 2 with acl_rate_limit_copy action + action = "acl_rate_limit_copy" + + attr_list = [ + (self._p4rt_acl_rule_obj.ACTION, action), + ("param/qos_queue", "7"), + (self._p4rt_acl_rule_obj.METER_CIR, meter_cir), + (self._p4rt_acl_rule_obj.METER_CBURST, meter_cbs), + (self._p4rt_acl_rule_obj.METER_PIR, meter_pir), + (self._p4rt_acl_rule_obj.METER_PBURST, meter_pbs), + ] + + self._p4rt_acl_rule_obj.set_app_db_entry( + table_name_with_rule_key2, attr_list) + util.verify_response( + self.response_consumer, + table_name_with_rule_key2, + attr_list, + "SWSS_RC_SUCCESS", + ) + + # query application database for ACL rules + acl_rules = util.get_keys( + self._p4rt_acl_rule_obj.appl_db, + self._p4rt_acl_rule_obj.APP_DB_TBL_NAME + ":" + table_name, + ) + assert len(acl_rules) == len(original_appl_acl_rules) + 3 + + # query application database for updated ACL rule + (status, fvs) = util.get_key( + self._p4rt_acl_rule_obj.appl_db, + self._p4rt_acl_table_definition_obj.APP_DB_TBL_NAME, + table_name_with_rule_key2, + ) + assert status == True + util.verify_attr(fvs, attr_list) + + # query ASIC database for updated ACL meter + (status, fvs) = util.get_key( + self._p4rt_acl_meter_obj.asic_db, + self._p4rt_acl_meter_obj.ASIC_DB_TBL_NAME, + meter_asic_db_key2, + ) + assert status == True + attr_list = [ + ( + self._p4rt_acl_meter_obj.SAI_ATTR_GREEN_PACKET_ACTION, + "SAI_PACKET_ACTION_FORWARD", + ), + ( + self._p4rt_acl_meter_obj.SAI_ATTR_YELLOW_PACKET_ACTION, + "SAI_PACKET_ACTION_COPY_CANCEL", + ), + ( + self._p4rt_acl_meter_obj.SAI_ATTR_RED_PACKET_ACTION, + "SAI_PACKET_ACTION_COPY_CANCEL", + ), + (self._p4rt_acl_meter_obj.SAI_ATTR_METER_TYPE, "SAI_METER_TYPE_PACKETS"), + (self._p4rt_acl_meter_obj.SAI_ATTR_METER_MODE, "SAI_POLICER_MODE_TR_TCM"), + (self._p4rt_acl_meter_obj.SAI_ATTR_METER_CIR, meter_cir), + (self._p4rt_acl_meter_obj.SAI_ATTR_METER_CBS, meter_cbs), + (self._p4rt_acl_meter_obj.SAI_ATTR_METER_PIR, meter_pir), + (self._p4rt_acl_meter_obj.SAI_ATTR_METER_PBS, meter_pbs), + ] + util.verify_attr(fvs, attr_list) + + # query ASIC database for updated ACL rule + (status, fvs) = util.get_key( + self._p4rt_acl_rule_obj.asic_db, + self._p4rt_acl_rule_obj.ASIC_DB_TBL_NAME, + rule_asic_db_key2, + ) + assert status == True + attr_list = [ + ( + self._p4rt_acl_rule_obj.SAI_ATTR_ACTION_SET_USER_TRAP_ID, + user_trap_asic_db_key, + ), + ( + self._p4rt_acl_rule_obj.SAI_ATTR_ACTION_PACKET_ACTION, + "disabled", + ), + (self._p4rt_acl_rule_obj.SAI_ATTR_MATCH_ETHER_TYPE, "2048&mask:0xffff"), + ( + self._p4rt_acl_rule_obj.SAI_ATTR_MATCH_IP_TYPE, + "SAI_ACL_IP_TYPE_IP&mask:0xffffffffffffffff", + ), + ( + self._p4rt_acl_rule_obj.SAI_ATTR_MATCH_DST_MAC, + "AA:BB:CC:DD:EE:FF&mask:FF:FF:FF:FF:FF:FF", + ), + (self._p4rt_acl_rule_obj.SAI_ATTR_TABLE_ID, table_asic_db_key), + (self._p4rt_acl_rule_obj.SAI_ATTR_SET_POLICER, meter_asic_db_key2), + (self._p4rt_acl_rule_obj.SAI_ATTR_COUNTER, counter_asic_db_key2), + (self._p4rt_acl_rule_obj.SAI_ATTR_ADMIN_STATE, "true"), + (self._p4rt_acl_rule_obj.SAI_ATTR_PRIORITY, "100"), + ] + util.verify_attr(fvs, attr_list) + + # remove ACL rule 3 self._p4rt_acl_rule_obj.remove_app_db_entry(table_name_with_rule_key3) util.verify_response( diff --git a/tests/single_asic_voq_fs/default_config.json b/tests/single_asic_voq_fs/default_config.json new file mode 100644 index 0000000000..85292999ee --- /dev/null +++ b/tests/single_asic_voq_fs/default_config.json @@ -0,0 +1,276 @@ +{ + "DEVICE_METADATA": { + "localhost": { + "asic_name" : "Asic0", + "switch_type": "voq", + "switch_id": "0", + "max_cores": "8" + } + }, + "SYSTEM_PORT": { + "VS|Asic0|Cpu0": { + "speed": "10000", + "system_port_id": "0", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "0" + }, + "VS|Asic0|Ethernet0": { + "speed": "40000", + "system_port_id": "1", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "1" + }, + "VS|Asic0|Ethernet4": { + "speed": "40000", + "system_port_id": "2", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "2" + }, + "VS|Asic0|Ethernet8": { + "speed": "40000", + "system_port_id": "3", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "3" + }, + "VS|Asic0|Ethernet12": { + "speed": "40000", + "system_port_id": "4", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "4" + }, + "VS|Asic0|Ethernet16": { + "speed": "40000", + "system_port_id": "5", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "5" + }, + "VS|Asic0|Ethernet20": { + "speed": "40000", + "system_port_id": "6", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "6" + }, + "VS|Asic0|Ethernet24": { + "speed": "40000", + "system_port_id": "7", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "7" + }, + "VS|Asic0|Ethernet28": { + "speed": "40000", + "system_port_id": "8", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "8" + }, + "VS|Asic0|Ethernet32": { + "speed": "40000", + "system_port_id": "9", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "9" + }, + "VS|Asic0|Ethernet36": { + "speed": "40000", + "system_port_id": "10", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "10" + }, + "VS|Asic0|Ethernet40": { + "speed": "40000", + "system_port_id": "11", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "11" + }, + "VS|Asic0|Ethernet44": { + "speed": "40000", + "system_port_id": "12", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "12" + }, + "VS|Asic0|Ethernet48": { + "speed": "40000", + "system_port_id": "13", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "13" + }, + "VS|Asic0|Ethernet52": { + "speed": "40000", + "system_port_id": "14", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "14" + }, + "VS|Asic0|Ethernet56": { + "speed": "40000", + "system_port_id": "15", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "15" + }, + "VS|Asic0|Ethernet60": { + "speed": "40000", + "system_port_id": "16", + "switch_id": "0", + "core_index": "0", + "num_voq":8, + "core_port_index": "16" + }, + "VS|Asic0|Ethernet64": { + "speed": "40000", + "system_port_id": "17", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "1" + }, + "VS|Asic0|Ethernet68": { + "speed": "40000", + "system_port_id": "18", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "2" + }, + "VS|Asic0|Ethernet72": { + "speed": "40000", + "system_port_id": "19", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "3" + }, + "VS|Asic0|Ethernet76": { + "speed": "40000", + "system_port_id": "20", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "4" + }, + "VS|Asic0|Ethernet80": { + "speed": "40000", + "system_port_id": "21", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "5" + }, + "VS|Asic0|Ethernet84": { + "speed": "40000", + "system_port_id": "22", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "6" + }, + "VS|Asic0|Ethernet88": { + "speed": "40000", + "system_port_id": "23", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "7" + }, + "VS|Asic0|Ethernet92": { + "speed": "40000", + "system_port_id": "24", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "8" + }, + "VS|Asic0|Ethernet96": { + "speed": "40000", + "system_port_id": "25", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "9" + }, + "VS|Asic0|Ethernet100": { + "speed": "40000", + "system_port_id": "26", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "10" + }, + "VS|Asic0|Ethernet104": { + "speed": "40000", + "system_port_id": "27", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "11" + }, + "VS|Asic0|Ethernet108": { + "speed": "40000", + "system_port_id": "28", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "12" + }, + "VS|Asic0|Ethernet112": { + "speed": "40000", + "system_port_id": "29", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "13" + }, + "VS|Asic0|Ethernet116": { + "speed": "40000", + "system_port_id": "30", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "14" + }, + "VS|Asic0|Ethernet120": { + "speed": "40000", + "system_port_id": "31", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "15" + }, + "VS|Asic0|Ethernet124": { + "speed": "40000", + "system_port_id": "32", + "switch_id": "0", + "core_index": "1", + "num_voq":8, + "core_port_index": "16" + } + } +} diff --git a/tests/test_fast_linkup.py b/tests/test_fast_linkup.py new file mode 100644 index 0000000000..dc8da5513e --- /dev/null +++ b/tests/test_fast_linkup.py @@ -0,0 +1,105 @@ +import time +import pytest + +from swsscommon import swsscommon + + +def set_cfg_entry(db, table, key, pairs): + tbl = swsscommon.ProducerStateTable(db, table) + fvs = swsscommon.FieldValuePairs(pairs) + tbl.set(key, fvs) + time.sleep(1) + + +def get_switch_oid(dvs): + db = swsscommon.DBConnector(swsscommon.ASIC_DB, dvs.redis_sock, 0) + tbl = swsscommon.Table(db, "ASIC_STATE:SAI_OBJECT_TYPE_SWITCH") + entries = list(tbl.getKeys()) + return entries[0] + + +def expect_switch_attrs(dvs, oid, expected): + asic_db = swsscommon.DBConnector(swsscommon.ASIC_DB, dvs.redis_sock, 0) + tbl = swsscommon.Table(asic_db, "ASIC_STATE:SAI_OBJECT_TYPE_SWITCH") + dvs.asic_db.wait_for_field_match("ASIC_STATE:SAI_OBJECT_TYPE_SWITCH", oid, expected) + + +class TestFastLinkupSwss(object): + def test_capability_state_db(self, dvs, testlog): + # Verify capability fields exist (at least the CAPABLE key is written) + state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0) + cap_tbl = swsscommon.Table(state_db, "SWITCH_CAPABILITY") + status, fvs = cap_tbl.get("switch") + assert status + cap_map = {k: v for k, v in fvs} + assert "FAST_LINKUP_CAPABLE" in cap_map + # Optional ranges + # Do not assert exact values; presence indicates SAI support + # FAST_LINKUP_POLLING_TIMER_RANGE / FAST_LINKUP_GUARD_TIMER_RANGE may be absent on platforms without ranges + + def test_global_config_applies_sai(self, dvs, testlog): + # Apply global config via CONFIG_DB and expect ASIC DB attrs set + app_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + set_cfg_entry(app_db, "SWITCH_FAST_LINKUP", "GLOBAL", + [("polling_time", "60"), ("guard_time", "10"), ("ber_threshold", "12")]) + + switch_oid = get_switch_oid(dvs) + # Values are stored as strings in ASIC DB tables + expected = { + 'SAI_SWITCH_ATTR_FAST_LINKUP_POLLING_TIMEOUT': '60', + 'SAI_SWITCH_ATTR_FAST_LINKUP_GUARD_TIMEOUT': '10', + 'SAI_SWITCH_ATTR_FAST_LINKUP_BER_THRESHOLD': '12', + } + expect_switch_attrs(dvs, switch_oid, expected) + + def test_global_config_out_of_range_rejected(self, dvs, testlog): + # If ranges are published, send out-of-range and assert ASIC DB not updated with that value + state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0) + cap_tbl = swsscommon.Table(state_db, "SWITCH_CAPABILITY") + status, fvs = cap_tbl.get("switch") + cap_map = {k: v for k, v in fvs} if status else {} + poll_rng = cap_map.get("FAST_LINKUP_POLLING_TIMER_RANGE") + guard_rng = cap_map.get("FAST_LINKUP_GUARD_TIMER_RANGE") + if poll_rng and guard_rng: + poll_min, poll_max = [int(x) for x in poll_rng.split(',')] + guard_min, guard_max = [int(x) for x in guard_rng.split(',')] + + cfg_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + # Attempt invalid values (below min) + set_cfg_entry(cfg_db, "SWITCH_FAST_LINKUP", "GLOBAL", + [("polling_time", str(max(poll_min - 1, 0))), ("guard_time", str(max(guard_min - 1, 0)))]) + # Give orch time to process; check ASIC DB does not reflect those values (negative match) + switch_oid = get_switch_oid(dvs) + try: + dvs.asic_db.wait_for_field_negative_match("ASIC_STATE:SAI_OBJECT_TYPE_SWITCH", switch_oid, + {'SAI_SWITCH_ATTR_FAST_LINKUP_POLLING_TIMEOUT': str(max(poll_min - 1, 0))}) + dvs.asic_db.wait_for_field_negative_match("ASIC_STATE:SAI_OBJECT_TYPE_SWITCH", switch_oid, + {'SAI_SWITCH_ATTR_FAST_LINKUP_GUARD_TIMEOUT': str(max(guard_min - 1, 0))}) + except Exception: + # On VS without validation paths, skip strict assertion + pass + + def test_port_fast_linkup_enable(self, dvs, testlog): + # Toggle per-port fast_linkup and validate via ASIC DB when supported + cfg_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + port_tbl = swsscommon.Table(cfg_db, "PORT") + + # Read a port key + app_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0) + app_port_tbl = swsscommon.Table(app_db, "PORT_TABLE") + port_keys = app_port_tbl.getKeys() + assert len(port_keys) > 0 + first_port = port_keys[0] + + # Modify in CONFIG_DB + status, values = port_tbl.get(first_port) + assert status + current = {k: v for k, v in values} + current["fast_linkup"] = "true" + port_tbl.set(first_port, swsscommon.FieldValuePairs(list(current.items()))) + time.sleep(1) + current["fast_linkup"] = "false" + port_tbl.set(first_port, swsscommon.FieldValuePairs(list(current.items()))) + time.sleep(1) + + diff --git a/tests/test_hft.py b/tests/test_hft.py index 7a23b11a0a..a533fcbb05 100644 --- a/tests/test_hft.py +++ b/tests/test_hft.py @@ -50,6 +50,19 @@ def create_hft_group_without_fields(self, dvs, profile_name="test", group_name=" fvs = swsscommon.FieldValuePairs([("NULL", "NULL")]) tbl.set(key, fvs) + def create_hft_group_empty_group_name(self, dvs, profile_name="test"): + """Create HFT group in CONFIG_DB with empty group_name (invalid configuration).""" + config_db = swsscommon.DBConnector(4, dvs.redis_sock, 0) + tbl = swsscommon.Table(config_db, "HIGH_FREQUENCY_TELEMETRY_GROUP") + + # Invalid key with only profile_name (no group_name) + key = profile_name + fvs = swsscommon.FieldValuePairs([ + ("object_names", "Ethernet0"), + ("object_counters", "IF_IN_OCTETS") + ]) + tbl.set(key, fvs) + def delete_hft_profile(self, dvs, name="test"): """Delete HFT profile from CONFIG_DB.""" config_db = swsscommon.DBConnector(4, dvs.redis_sock, 0) @@ -63,6 +76,14 @@ def delete_hft_group(self, dvs, profile_name="test", group_name="PORT"): key = f"{profile_name}|{group_name}" tbl._del(key) + def delete_hft_group_empty_group_name(self, dvs, profile_name="test"): + """Delete HFT group with empty group_name from CONFIG_DB.""" + config_db = swsscommon.DBConnector(4, dvs.redis_sock, 0) + tbl = swsscommon.Table(config_db, "HIGH_FREQUENCY_TELEMETRY_GROUP") + # Key with only profile_name (no group_name) + key = profile_name + tbl._del(key) + def get_asic_db_objects(self, dvs): """Get all relevant HFT-related objects from ASIC_STATE DB.""" asic_db = swsscommon.DBConnector(1, dvs.redis_sock, 0) @@ -89,6 +110,8 @@ def get_asic_db_objects(self, dvs): asic_db, "ASIC_STATE:SAI_OBJECT_TYPE_PORT") buffer_pool_tbl = swsscommon.Table( asic_db, "ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL") + queue_tbl = swsscommon.Table( + asic_db, "ASIC_STATE:SAI_OBJECT_TYPE_QUEUE") return { "tam_transport": self._get_table_entries(tam_transport_tbl), @@ -103,7 +126,8 @@ def get_asic_db_objects(self, dvs): hostif_trap_tbl), "host_trap_group": self._get_table_entries(host_trap_group_tbl), "ports": self._get_table_entries(ports_tbl), - "buffer_pool": self._get_table_entries(buffer_pool_tbl) + "buffer_pool": self._get_table_entries(buffer_pool_tbl), + "queues": self._get_table_entries(queue_tbl) } def _get_table_entries(self, table): @@ -165,10 +189,19 @@ def verify_asic_db_objects(self, asic_db, groups=[(1, 1)], watermark_count=0): "SAI_TAM_TELEMETRY_TYPE_COUNTER_SUBSCRIPTION", \ "Expected tam telemetry type to be " \ "SAI_TAM_TELEMETRY_TYPE_COUNTER_SUBSCRIPTION" - assert tam_tel_type[ - "SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS"] == \ - "true", \ - "Expected tam telemetry to be switch enable port stats" + enable_capability = False + enable_capability = enable_capability or tam_tel_type.get( + "SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_PORT_STATS", "false") == \ + "true" + enable_capability = enable_capability or tam_tel_type.get( + "SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_MMU_STATS", "false") == \ + "true" + enable_capability = enable_capability or tam_tel_type.get( + "SAI_TAM_TEL_TYPE_ATTR_SWITCH_ENABLE_OUTPUT_QUEUE_STATS", "false") == \ + "true" + assert enable_capability, \ + "Expected tam telemetry to have at least one enable " \ + "capability set to true" assert tam_tel_type["SAI_TAM_TEL_TYPE_ATTR_MODE"] == \ "SAI_TAM_TEL_TYPE_MODE_SINGLE_TYPE", \ "Expected tam telemetry to be mode single type" @@ -228,8 +261,8 @@ def verify_asic_db_objects(self, asic_db, groups=[(1, 1)], watermark_count=0): # Fix: Use only the object ID subscription_oid = tam_counter_sub[ "SAI_TAM_COUNTER_SUBSCRIPTION_ATTR_OBJECT_ID"] - assert (subscription_oid in asic_db["ports"] or subscription_oid in asic_db["buffer_pool"]), \ - "Expected tam counter subscription to reference port" + assert (subscription_oid in asic_db["ports"] or subscription_oid in asic_db["buffer_pool"] or subscription_oid in asic_db["queues"]), \ + "Expected tam counter subscription to reference port, buffer_pool, or queue" # Only check if we have counter subscriptions if counters_number > 0: @@ -469,6 +502,27 @@ def test_hft_missing_fields_with_disabled_status(self, dvs, testlog): self.delete_hft_group(dvs) self.delete_hft_profile(dvs) + def test_hft_buffer_queue_group(self, dvs, testlog): + """Test HFT with QUEUE (buffer queue) objects.""" + self.create_hft_profile(dvs) + self.create_hft_group(dvs, + group_name="QUEUE", + object_names="Ethernet0|7", + object_counters="PACKETS") + + time.sleep(5) + + asic_db = self.get_asic_db_objects(dvs) + self.verify_asic_db_objects(asic_db, groups=[(1, 1)]) + + self.delete_hft_group(dvs, group_name="QUEUE") + time.sleep(2) + + asic_db = self.get_asic_db_objects(dvs) + self.verify_asic_db_objects(asic_db, groups=[]) + + self.delete_hft_profile(dvs) + def test_hft_multiple_groups(self, dvs, testlog): """Test HFT with multiple groups and objects.""" # Create HFT profile and groups @@ -514,6 +568,64 @@ def test_hft_multiple_groups(self, dvs, testlog): # Clean up profile self.delete_hft_profile(dvs) + def test_hft_invalid_counters_no_orchagent_restart(self, dvs, testlog): + """Test that invalid object_counters don't cause orchagent to restart.""" + # Get orchagent PID inside the DVS container and record it + (exitcode, out) = dvs.runcmd("pgrep -x orchagent") + if exitcode != 0 or not out.strip(): + # orchagent not found; skip + return + + original_pid = out.strip().splitlines()[0].strip() + + # Create HFT profile and group with invalid counters + self.create_hft_profile(dvs) + self.create_hft_group(dvs, object_counters="INVALID_STATS1,INVALID_STATS2") + + # Allow some time for processing; orchagent should not restart + time.sleep(5) + + # Re-check orchagent PID inside the container + (exitcode, out) = dvs.runcmd("pgrep -x orchagent") + assert exitcode == 0 and out.strip(), "orchagent disappeared after applying invalid object_counters" + + new_pid = out.strip().splitlines()[0].strip() + + assert new_pid == original_pid, "orchagent restarted (PID changed) after applying invalid object_counters" + + # Clean up created config + self.delete_hft_group(dvs) + self.delete_hft_profile(dvs) + + def test_hft_empty_group_name_no_orchagent_restart(self, dvs, testlog): + """Test that empty group_name (invalid key format) doesn't cause orchagent to restart.""" + # Get orchagent PID inside the DVS container and record it + (exitcode, out) = dvs.runcmd("pgrep -x orchagent") + if exitcode != 0 or not out.strip(): + # orchagent not found; skip + return + + original_pid = out.strip().splitlines()[0].strip() + + # Create HFT profile and group with empty group_name (invalid key format) + self.create_hft_profile(dvs) + self.create_hft_group_empty_group_name(dvs) + + # Allow some time for processing; orchagent should not restart + time.sleep(5) + + # Re-check orchagent PID inside the container + (exitcode, out) = dvs.runcmd("pgrep -x orchagent") + assert exitcode == 0 and out.strip(), "orchagent disappeared after applying empty group_name" + + new_pid = out.strip().splitlines()[0].strip() + + assert new_pid == original_pid, "orchagent restarted (PID changed) after applying empty group_name" + + # Clean up created config + self.delete_hft_group_empty_group_name(dvs) + self.delete_hft_profile(dvs) + def test_hft_empty_default_config_cleanup(self, dvs, testlog): """Test that TAM_TELEMETRY objects are properly cleaned up when deleting a default HFT profile with empty object_names and object_counters.""" diff --git a/tests/test_macsec_gearbox.py b/tests/test_macsec_gearbox.py new file mode 100644 index 0000000000..2d0c4ec71b --- /dev/null +++ b/tests/test_macsec_gearbox.py @@ -0,0 +1,133 @@ +import pytest + +from test_gearbox import Gearbox +from gearbox import TestGearboxHelper +from macsec import TestMacsecHelper + +DVS_ENV = ["HWSKU=brcm_gearbox_vs"] + +@pytest.fixture(scope="module") +def gearbox(dvs): + return Gearbox(dvs) + +@pytest.fixture(scope="module") +def gearbox_config(dvs): + """ + Backup and restore gearbox_config.json once for all tests in the module. + + Backup happens before the first test, restore happens after the last test. + """ + # Resolve symlink to get actual config path + config_path = "/usr/share/sonic/hwsku/gearbox_config.json" + rc, actual_path = dvs.runcmd(f"readlink -f {config_path}") + if rc == 0 and actual_path.strip(): + config_path = actual_path.strip() + + # Backup original config (once at start) + dvs.runcmd(f"cp {config_path} {config_path}.bak") + + yield config_path + + # Restore original config (once at end) + dvs.runcmd(f"mv {config_path}.bak {config_path}") + +class TestMacsecGearbox(object): + + def test_macsec_phy_switch_default(self, dvs, gearbox, gearbox_config): + """ + When macsec_supported field is ABSENT (not specified), the system should: + 1. Default to using PHY switch for MACsec + 2. Create MACsec objects in GB_ASIC_DB (not ASIC_DB) + 3. This preserves backward compatibility with existing platforms + + Args: + dvs: Docker Virtual Switch instance (pytest fixture) + gearbox: Gearbox fixture + gearbox_config: Gearbox config fixture (auto backup/restore) + """ + # Derive port and phy_id from Gearbox object + port_name, phy_id = TestGearboxHelper.get_first_gearbox_port(gearbox) + + try: + TestGearboxHelper.configure_gearbox_macsec_support(dvs, gearbox, phy_id=phy_id, macsec_supported=None) + TestMacsecHelper.enable_macsec_on_port(dvs, port_name=port_name, with_secure_channels=True) + + assert TestMacsecHelper.verify_macsec_in_gb_asic_db(dvs, should_exist=True), ( + "FAILED: MACsec objects should exist in GB_ASIC_DB " + "when macsec_supported is absent" + ) + + assert TestMacsecHelper.verify_macsec_in_asic_db(dvs, should_exist=False), ( + "FAILED: MACsec objects should NOT exist in ASIC_DB " + "when using PHY backend" + ) + + finally: + TestMacsecHelper.cleanup_macsec(dvs, port_name) + + def test_macsec_phy_switch_explicit(self, dvs, gearbox, gearbox_config): + """ + When macsec_supported field is explicitly set to TRUE, the system should: + 1. Use PHY switch for MACsec (same as default) + 2. Create MACsec objects in GB_ASIC_DB (not ASIC_DB) + 3. This is the explicit way to declare PHY MACsec support + + Args: + dvs: Docker Virtual Switch instance (pytest fixture) + gearbox: Gearbox fixture + gearbox_config: Gearbox config fixture (auto backup/restore) + """ + # Derive port and phy_id from Gearbox object + port_name, phy_id = TestGearboxHelper.get_first_gearbox_port(gearbox) + + try: + TestGearboxHelper.configure_gearbox_macsec_support(dvs, gearbox, phy_id=phy_id, macsec_supported=True) + TestMacsecHelper.enable_macsec_on_port(dvs, port_name=port_name, with_secure_channels=True) + + assert TestMacsecHelper.verify_macsec_in_gb_asic_db(dvs, should_exist=True), ( + "FAILED: MACsec objects should exist in GB_ASIC_DB " + "when macsec_supported=true" + ) + + assert TestMacsecHelper.verify_macsec_in_asic_db(dvs, should_exist=False), ( + "FAILED: MACsec objects should NOT exist in ASIC_DB " + "when using PHY backend" + ) + + finally: + TestMacsecHelper.cleanup_macsec(dvs, port_name) + + def test_macsec_npu_switch(self, dvs, gearbox, gearbox_config): + """ + Test MACsec NPU backend selection when macsec_supported=false. + + 1. When a gearbox PHY has macsec_supported=false in _GEARBOX_TABLE, + 2. MACsec objects should be created in ASIC_DB (NPU backend), not in + GB_ASIC_DB (PHY backend). + + Args: + dvs: Docker Virtual Switch instance (pytest fixture) + gearbox: Gearbox fixture + gearbox_config: Gearbox config fixture (auto backup/restore) + """ + # Derive port and phy_id from Gearbox object + port_name, phy_id = TestGearboxHelper.get_first_gearbox_port(gearbox) + + try: + # Setup gearbox with macsec_supported=false + TestGearboxHelper.configure_gearbox_macsec_support(dvs, gearbox, phy_id=phy_id, macsec_supported=False) + TestMacsecHelper.enable_macsec_on_port(dvs, port_name=port_name, with_secure_channels=True) + + assert TestMacsecHelper.verify_macsec_in_asic_db(dvs, should_exist=True), ( + "FAILED: MACsec objects should exist in ASIC_DB " + "when macsec_supported=false" + ) + + assert TestMacsecHelper.verify_macsec_in_gb_asic_db(dvs, should_exist=False), ( + "FAILED: MACsec objects should NOT exist in GB_ASIC_DB " + "when macsec_supported=false" + ) + + finally: + TestMacsecHelper.cleanup_macsec(dvs, port_name) +