Skip to content

Commit 8774140

Browse files
soulebfilanov
authored andcommitted
Add methods to retrieve dpuCluster resources
We want to collect all DPF custom resources during a single sos report run. We add here the needed functions to retrieve kubeconfigs and collect resources for a dpu cluster. Signed-off-by: Soule BA <[email protected]>
1 parent 6a4d4b4 commit 8774140

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed

sos/report/plugins/doca_dpf.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99
# See the LICENSE file in the source distribution for further information.
1010

11+
import json
1112
import os
1213
from sos.report.plugins import (Plugin, RedHatPlugin, DebianPlugin,
1314
UbuntuPlugin, PluginOpt)
@@ -151,6 +152,123 @@ def collect_per_resource_details(self):
151152
subdir=f'cluster-info/{nspace}/{res}'
152153
)
153154

155+
def _discover_dpu_clusters(self):
156+
"""Discover all dpucluster objects in the host cluster.
157+
158+
Returns a list of dicts with cluster name and namespace.
159+
"""
160+
result = self.collect_cmd_output(
161+
f"{self.kube_cmd} get dpucluster -A -o json",
162+
subdir='cluster-info'
163+
)
164+
165+
if result['status'] != 0:
166+
self._log_warn("Failed to discover DPU clusters")
167+
return []
168+
169+
try:
170+
data = json.loads(result['output'])
171+
clusters = []
172+
for item in data.get('items', []):
173+
cluster_name = item['metadata']['name']
174+
namespace = item['metadata']['namespace']
175+
clusters.append({
176+
'name': cluster_name,
177+
'namespace': namespace
178+
})
179+
180+
self._log_info(f"Discovered {len(clusters)} DPU cluster(s)")
181+
return clusters
182+
except (json.JSONDecodeError, KeyError) as e:
183+
self._log_warn(f"Failed to parse DPU clusters: {e}")
184+
return []
185+
186+
def _collect_dpu_cluster_resources(self, cluster_info):
187+
"""Collect resources from a single DPU cluster.
188+
189+
cluster_info: Dict with 'name' and 'namespace' keys
190+
"""
191+
cluster_name = cluster_info['name']
192+
namespace = cluster_info['namespace']
193+
# expected secret name to be found in the namespace
194+
secret_name = f"{cluster_name}-admin-kubeconfig"
195+
subdir_base = f'dpu-clusters/{namespace}/{cluster_name}'
196+
197+
# Create unique temp kubeconfig path
198+
mktemp_ret = self.exec_cmd('mktemp /tmp/sos-dpu-kc.XXXXXX')
199+
if mktemp_ret['status'] != 0:
200+
self._log_warn(
201+
f"Failed to create temporary kubeconfig for {cluster_name}"
202+
)
203+
return
204+
kc_path = mktemp_ret['output'].strip()
205+
206+
extract_cmd = (
207+
f"{self.kube_cmd} get secret {secret_name} -n {namespace} "
208+
f"-o jsonpath='{{.data.admin\\.conf}}' | base64 -d > {kc_path}"
209+
)
210+
extract_result = self.exec_cmd(extract_cmd)
211+
if extract_result['status'] != 0:
212+
self._log_warn(
213+
f"Failed to retrieve kubeconfig for DPU cluster "
214+
f"{cluster_name} in namespace {namespace}"
215+
)
216+
self.exec_cmd(f"rm -f {kc_path}")
217+
return
218+
219+
dpu_kube_cmd = f"kubectl --kubeconfig={kc_path} --request-timeout=10s"
220+
kns_result = self.collect_cmd_output(
221+
f"{dpu_kube_cmd} get namespaces -o json",
222+
subdir=subdir_base
223+
)
224+
225+
if kns_result['status'] != 0:
226+
self._log_warn(f"Failed to access DPU cluster {cluster_name}")
227+
self.exec_cmd(f"rm -f {kc_path}")
228+
return
229+
230+
try:
231+
ns_data = json.loads(kns_result['output'])
232+
namespaces = [n['metadata']['name'] for n in ns_data.get('items', [])]
233+
except (json.JSONDecodeError, KeyError) as e:
234+
self._log_warn(
235+
f"Failed to parse namespaces for {cluster_name}: {e}"
236+
)
237+
self.exec_cmd(f"rm -f {kc_path}")
238+
return
239+
240+
# Collect resources from each namespace
241+
for nspace in namespaces:
242+
nspace_arg = f'--namespace={nspace}'
243+
if self.get_option('all'):
244+
k_cmd = (
245+
f"{dpu_kube_cmd} get -o json {nspace_arg} --ignore-not-found"
246+
)
247+
for res in self.resources:
248+
self.add_cmd_output(
249+
f"{k_cmd} {res}",
250+
subdir=f"{subdir_base}/{nspace}"
251+
)
252+
253+
if self.get_option('describe'):
254+
k_base = f"{dpu_kube_cmd} {nspace_arg} --ignore-not-found"
255+
for res in self.resources:
256+
ret = self.exec_cmd(f"{k_base} get {res}")
257+
if ret['status'] == 0:
258+
items = [
259+
l.split()[0] for l in ret['output'].splitlines()[1:]
260+
if l.strip()
261+
]
262+
for item in items:
263+
self.add_cmd_output(
264+
f"{k_base} describe {res} {item}",
265+
subdir=f"{subdir_base}/{nspace}/{res}"
266+
)
267+
268+
# Clean up temporary kubeconfig
269+
self.exec_cmd(f"rm -f {kc_path}")
270+
self._log_info(f"Collected resources from DPU cluster {cluster_name}")
271+
154272

155273
class RedHatKubernetes(DocaDpf, RedHatPlugin):
156274

0 commit comments

Comments
 (0)