|
8 | 8 | # |
9 | 9 | # See the LICENSE file in the source distribution for further information. |
10 | 10 |
|
| 11 | +import json |
11 | 12 | import os |
12 | 13 | from sos.report.plugins import (Plugin, RedHatPlugin, DebianPlugin, |
13 | 14 | UbuntuPlugin, PluginOpt) |
@@ -151,6 +152,123 @@ def collect_per_resource_details(self): |
151 | 152 | subdir=f'cluster-info/{nspace}/{res}' |
152 | 153 | ) |
153 | 154 |
|
| 155 | + def _discover_dpu_clusters(self): |
| 156 | + """Discover all dpucluster objects in the host cluster. |
| 157 | + |
| 158 | + Returns a list of dicts with cluster name and namespace. |
| 159 | + """ |
| 160 | + result = self.collect_cmd_output( |
| 161 | + f"{self.kube_cmd} get dpucluster -A -o json", |
| 162 | + subdir='cluster-info' |
| 163 | + ) |
| 164 | + |
| 165 | + if result['status'] != 0: |
| 166 | + self._log_warn("Failed to discover DPU clusters") |
| 167 | + return [] |
| 168 | + |
| 169 | + try: |
| 170 | + data = json.loads(result['output']) |
| 171 | + clusters = [] |
| 172 | + for item in data.get('items', []): |
| 173 | + cluster_name = item['metadata']['name'] |
| 174 | + namespace = item['metadata']['namespace'] |
| 175 | + clusters.append({ |
| 176 | + 'name': cluster_name, |
| 177 | + 'namespace': namespace |
| 178 | + }) |
| 179 | + |
| 180 | + self._log_info(f"Discovered {len(clusters)} DPU cluster(s)") |
| 181 | + return clusters |
| 182 | + except (json.JSONDecodeError, KeyError) as e: |
| 183 | + self._log_warn(f"Failed to parse DPU clusters: {e}") |
| 184 | + return [] |
| 185 | + |
| 186 | + def _collect_dpu_cluster_resources(self, cluster_info): |
| 187 | + """Collect resources from a single DPU cluster. |
| 188 | + |
| 189 | + cluster_info: Dict with 'name' and 'namespace' keys |
| 190 | + """ |
| 191 | + cluster_name = cluster_info['name'] |
| 192 | + namespace = cluster_info['namespace'] |
| 193 | + # expected secret name to be found in the namespace |
| 194 | + secret_name = f"{cluster_name}-admin-kubeconfig" |
| 195 | + subdir_base = f'dpu-clusters/{namespace}/{cluster_name}' |
| 196 | + |
| 197 | + # Create unique temp kubeconfig path |
| 198 | + mktemp_ret = self.exec_cmd('mktemp /tmp/sos-dpu-kc.XXXXXX') |
| 199 | + if mktemp_ret['status'] != 0: |
| 200 | + self._log_warn( |
| 201 | + f"Failed to create temporary kubeconfig for {cluster_name}" |
| 202 | + ) |
| 203 | + return |
| 204 | + kc_path = mktemp_ret['output'].strip() |
| 205 | + |
| 206 | + extract_cmd = ( |
| 207 | + f"{self.kube_cmd} get secret {secret_name} -n {namespace} " |
| 208 | + f"-o jsonpath='{{.data.admin\\.conf}}' | base64 -d > {kc_path}" |
| 209 | + ) |
| 210 | + extract_result = self.exec_cmd(extract_cmd) |
| 211 | + if extract_result['status'] != 0: |
| 212 | + self._log_warn( |
| 213 | + f"Failed to retrieve kubeconfig for DPU cluster " |
| 214 | + f"{cluster_name} in namespace {namespace}" |
| 215 | + ) |
| 216 | + self.exec_cmd(f"rm -f {kc_path}") |
| 217 | + return |
| 218 | + |
| 219 | + dpu_kube_cmd = f"kubectl --kubeconfig={kc_path} --request-timeout=10s" |
| 220 | + kns_result = self.collect_cmd_output( |
| 221 | + f"{dpu_kube_cmd} get namespaces -o json", |
| 222 | + subdir=subdir_base |
| 223 | + ) |
| 224 | + |
| 225 | + if kns_result['status'] != 0: |
| 226 | + self._log_warn(f"Failed to access DPU cluster {cluster_name}") |
| 227 | + self.exec_cmd(f"rm -f {kc_path}") |
| 228 | + return |
| 229 | + |
| 230 | + try: |
| 231 | + ns_data = json.loads(kns_result['output']) |
| 232 | + namespaces = [n['metadata']['name'] for n in ns_data.get('items', [])] |
| 233 | + except (json.JSONDecodeError, KeyError) as e: |
| 234 | + self._log_warn( |
| 235 | + f"Failed to parse namespaces for {cluster_name}: {e}" |
| 236 | + ) |
| 237 | + self.exec_cmd(f"rm -f {kc_path}") |
| 238 | + return |
| 239 | + |
| 240 | + # Collect resources from each namespace |
| 241 | + for nspace in namespaces: |
| 242 | + nspace_arg = f'--namespace={nspace}' |
| 243 | + if self.get_option('all'): |
| 244 | + k_cmd = ( |
| 245 | + f"{dpu_kube_cmd} get -o json {nspace_arg} --ignore-not-found" |
| 246 | + ) |
| 247 | + for res in self.resources: |
| 248 | + self.add_cmd_output( |
| 249 | + f"{k_cmd} {res}", |
| 250 | + subdir=f"{subdir_base}/{nspace}" |
| 251 | + ) |
| 252 | + |
| 253 | + if self.get_option('describe'): |
| 254 | + k_base = f"{dpu_kube_cmd} {nspace_arg} --ignore-not-found" |
| 255 | + for res in self.resources: |
| 256 | + ret = self.exec_cmd(f"{k_base} get {res}") |
| 257 | + if ret['status'] == 0: |
| 258 | + items = [ |
| 259 | + l.split()[0] for l in ret['output'].splitlines()[1:] |
| 260 | + if l.strip() |
| 261 | + ] |
| 262 | + for item in items: |
| 263 | + self.add_cmd_output( |
| 264 | + f"{k_base} describe {res} {item}", |
| 265 | + subdir=f"{subdir_base}/{nspace}/{res}" |
| 266 | + ) |
| 267 | + |
| 268 | + # Clean up temporary kubeconfig |
| 269 | + self.exec_cmd(f"rm -f {kc_path}") |
| 270 | + self._log_info(f"Collected resources from DPU cluster {cluster_name}") |
| 271 | + |
154 | 272 |
|
155 | 273 | class RedHatKubernetes(DocaDpf, RedHatPlugin): |
156 | 274 |
|
|
0 commit comments