Skip to content

Commit ddb34ff

Browse files
committed
migrate CI test steps to containers
Today we use benni09 static agent to run test/gtest/valgrind steps which is unscaleable since it can only run one pipeline at a time, causing delays in builds that can be stuck waiting for hours The idea is to move these steps to containers, allowing running them in parallel as well as running multiple pipelines at the same time (depending on the capacity of the k8s cluster) Issue: HPCINFRA-3250 Signed-off-by: NirWolfer <[email protected]>
1 parent 191dfbd commit ddb34ff

File tree

6 files changed

+171
-115
lines changed

6 files changed

+171
-115
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
ARG ARCH=x86_64
2+
FROM harbor.mellanox.com/hpcx/x86_64/ubuntu22.04/base AS build
3+
FROM build AS tests
4+
RUN apt-get update && \
5+
apt-get install -y \
6+
net-tools unzip iproute2 wget \
7+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
8+
FROM tests AS vg
9+
RUN apt-get update && \
10+
apt-get install -y \
11+
valgrind \
12+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
13+
FROM tests AS test
14+
RUN apt-get update && \
15+
apt-get install -y \
16+
openssh-server psmisc \
17+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
18+
# setup ssh server and passwordless login for root for tests flows (verifyer.pl)
19+
RUN mkdir -p /var/run/sshd ~/.ssh && \
20+
rm -rf ~/.ssh/id_rsa* && ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa && \
21+
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys && \
22+
sed -i 's|#PermitRootLogin.*|PermitRootLogin without-password|g' /etc/ssh/sshd_config && \
23+
sed -i 's|#PasswordAuthentication.*|PasswordAuthentication no|g' /etc/ssh/sshd_config && \
24+
echo "Host *" >> ~/.ssh/config && \
25+
echo " StrictHostKeyChecking no" >> ~/.ssh/config && \
26+
echo " UserKnownHostsFile /dev/null" >> ~/.ssh/config && \
27+
echo " LogLevel ERROR" >> ~/.ssh/config
28+
FROM tests AS gtest
29+
FROM build AS style
30+
RUN apt-get update \
31+
&& apt-get install -y clang-15 clang-format-15 \
32+
&& update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-15 100 \
33+
&& update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100 \
34+
--slave /usr/bin/clang++ clang++ /usr/bin/clang++-15 \
35+
&& apt-get clean && rm -rf /var/lib/apt/lists/*

.ci/matrix_job.yaml

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ job: LIBVMA
44
step_allow_single_selector: false
55

66
registry_host: harbor.mellanox.com
7-
registry_auth: swx-storage
7+
registry_auth: swx-infra_harbor_credentials
8+
registry_path: /swx-infra/media
89

910
kubernetes:
1011
privileged: true
@@ -43,9 +44,58 @@ runs_on_dockers:
4344
- {name: 'toolbox', url: 'harbor.mellanox.com/hpcx/x86_64/rhel8.6/builder:inbox', category: 'tool', arch: 'x86_64'}
4445
- {name: 'blackduck', url: 'harbor.mellanox.com/toolbox/ngci-centos:7.9.2009.2', category: 'tool', arch: 'x86_64'}
4546
- {name: 'header-check', url: 'harbor.mellanox.com/toolbox/header_check:0.0.51', category: 'tool', arch: 'x86_64', tag: '0.0.51'}
46-
47-
runs_on_agents:
48-
- {nodeLabel: 'beni09', category: 'base'}
47+
# tests
48+
- {
49+
file: '.ci/dockerfiles/Dockerfile.ubuntu22.04',
50+
arch: 'x86_64',
51+
name: 'test',
52+
uri: 'vma/$arch/ubuntu22.04/$name',
53+
tag: '20250219',
54+
build_args: '--no-cache --target test',
55+
category: 'tests',
56+
annotations: [{ key: 'k8s.v1.cni.cncf.io/networks', value: 'sriov-cx6dx-p1' }],
57+
limits: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p1: 1}',
58+
requests: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p1: 1}',
59+
caps_add: '[ IPC_LOCK, SYS_RESOURCE ]',
60+
runAsUser: '0',
61+
runAsGroup: '0',
62+
cloud: swx-k8s-spray,
63+
namespace: default
64+
}
65+
- {
66+
file: '.ci/dockerfiles/Dockerfile.ubuntu22.04',
67+
arch: 'x86_64',
68+
name: 'vg',
69+
uri: 'vma/$arch/ubuntu22.04/$name',
70+
tag: '20250219',
71+
build_args: '--no-cache --target vg',
72+
category: 'tool',
73+
annotations: [{ key: 'k8s.v1.cni.cncf.io/networks', value: 'sriov-cx6dx-p2' }],
74+
limits: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p2: 1}',
75+
requests: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p2: 1}',
76+
caps_add: '[ IPC_LOCK, SYS_RESOURCE ]',
77+
runAsUser: '0',
78+
runAsGroup: '0',
79+
cloud: swx-k8s-spray,
80+
namespace: default
81+
}
82+
- {
83+
file: '.ci/dockerfiles/Dockerfile.ubuntu22.04',
84+
arch: 'x86_64',
85+
name: 'gtest',
86+
uri: 'vma/$arch/ubuntu22.04/$name',
87+
tag: '20250219',
88+
build_args: '--no-cache --target gtest',
89+
category: 'tests',
90+
annotations: [{ key: 'k8s.v1.cni.cncf.io/networks', value: 'sriov-cx6dx-p1@net1,sriov-cx6dx-p2@net2' }],
91+
limits: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 8Gi, mellanox.com/sriov_cx6dx_p1: 1, mellanox.com/sriov_cx6dx_p2: 1}',
92+
requests: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 8Gi, mellanox.com/sriov_cx6dx_p1: 1, mellanox.com/sriov_cx6dx_p2: 1}',
93+
caps_add: '[ IPC_LOCK, SYS_RESOURCE ]',
94+
runAsUser: '0',
95+
runAsGroup: '0',
96+
cloud: swx-k8s-spray,
97+
namespace: default
98+
}
4999

50100
matrix:
51101
axes:
@@ -62,6 +112,12 @@ env:
62112

63113
steps:
64114
- name: Setup
115+
containerSelector:
116+
- "{category: 'base'}"
117+
- "{name: 'test', variant:1}"
118+
- "{name: 'gtest', variant:1}"
119+
- "{name: 'vg', variant:1}"
120+
- "{name: 'style'}"
65121
run: |
66122
set +x
67123
echo
@@ -90,27 +146,34 @@ steps:
90146
- name: Install Doca-host
91147
containerSelector:
92148
- "{category: 'base'}"
149+
- "{name: 'test', variant:1}"
150+
- "{name: 'gtest', variant:1}"
151+
- "{name: 'vg', variant:1}"
152+
- "{name: 'style'}"
93153
agentSelector:
94154
- "{nodeLabel: 'skip-agent'}"
95155
run: |
96156
echo "Installing DOCA: ${DOCA_VERSION} ..."
97157
.ci/scripts/doca_install.sh
98158
99-
- name: Install Doca-host on Tools
100-
run: |
101-
echo "Installing DOCA: ${DOCA_VERSION} ..."
102-
.ci/scripts/doca_install.sh
103-
containerSelector:
104-
- "{name: 'style', category: 'tool'}"
105-
agentSelector:
106-
- "{nodeLabel: 'skip-agent'}"
107-
108159
- name: Autogen
160+
containerSelector:
161+
- "{category: 'base'}"
162+
- "{name: 'test', variant:1}"
163+
- "{name: 'gtest', variant:1}"
164+
- "{name: 'vg', variant:1}"
165+
- "{name: 'style'}"
109166
run: |
110167
./autogen.sh -s
111168
parallel: false
112169

113170
- name: Build
171+
containerSelector:
172+
- "{category: 'base'}"
173+
- "{name: 'test', variant:1}"
174+
- "{name: 'gtest', variant:1}"
175+
- "{name: 'vg', variant:1}"
176+
- "{name: 'style'}"
114177
enable: ${do_build}
115178
run: |
116179
[ "x${do_build}" == "xtrue" ] && action=yes || action=no
@@ -227,12 +290,13 @@ steps:
227290
- name: Test
228291
enable: ${do_test}
229292
containerSelector:
230-
- "{name: 'skip-container'}"
293+
- "{name: 'test', variant:1}"
231294
agentSelector:
232-
- "{nodeLabel: 'beni09', variant:1}"
295+
- "{nodeLabel: 'skip-agent'}"
233296
run: |
234297
[ "x${do_test}" == "xtrue" ] && action=yes || action=no
235298
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_run=${action} ./contrib/test_jenkins.sh
299+
sleep 80000
236300
parallel: false
237301
onfail: |
238302
./.ci/artifacts.sh
@@ -242,9 +306,9 @@ steps:
242306
- name: Gtest
243307
enable: ${do_gtest}
244308
containerSelector:
245-
- "{name: 'skip-container'}"
309+
- "{name: 'gtest', variant:1}"
246310
agentSelector:
247-
- "{nodeLabel: 'beni09', variant:1}"
311+
- "{nodeLabel: 'skip-agent'}"
248312
run: |
249313
[ "x${do_gtest}" == "xtrue" ] && action=yes || action=no
250314
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_gtest=${action} ./contrib/test_jenkins.sh
@@ -259,9 +323,9 @@ steps:
259323
- name: Valgrind
260324
enable: ${do_valgrind}
261325
containerSelector:
262-
- "{name: 'skip-container'}"
326+
- "{name: 'vg', variant:1}"
263327
agentSelector:
264-
- "{nodeLabel: 'beni09', variant:1}"
328+
- "{nodeLabel: 'skip-agent'}"
265329
run: |
266330
[ "x${do_valgrind}" == "xtrue" ] && action=yes || action=no
267331
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_vg=${action} ./contrib/test_jenkins.sh

contrib/jenkins_tests/globals.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ function do_archive()
7575
set -e
7676
}
7777

78+
function do_hugepages()
79+
{
80+
if [[ -f /.dockerenv && ! $(grep -q hugetlbfs /proc/mounts) ]]; then
81+
mkdir -p /mnt/huge
82+
mount -t hugetlbfs nodev /mnt/huge
83+
grep hugetlbfs /proc/mounts
84+
echo $?
85+
fi
86+
}
87+
7888
# Test if an environment module exists and load it if yes.
7989
# Otherwise, return error code.
8090
# $1 - module name

contrib/jenkins_tests/gtest.sh

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
source $(dirname $0)/globals.sh
44

5+
# Fix hugepages for docker environments
6+
do_hugepages
7+
58
echo "Checking for gtest ..."
69

710
# Check dependencies
@@ -10,11 +13,6 @@ if [ $(test -d ${install_dir} >/dev/null 2>&1 || echo $?) ]; then
1013
exit 1
1114
fi
1215

13-
if [ $(command -v ibdev2netdev >/dev/null 2>&1 || echo $?) ]; then
14-
echo "[SKIP] ibdev2netdev tool does not exist"
15-
exit 1
16-
fi
17-
1816
cd $WORKSPACE
1917

2018
rm -rf $gtest_dir
@@ -25,16 +23,21 @@ gtest_app="$PWD/tests/gtest/gtest"
2523
gtest_lib=$install_dir/lib/${prj_lib}
2624

2725
gtest_ip_list=""
28-
if [ ! -z $(do_get_ip 'eth') ]; then
29-
gtest_ip_list="$(do_get_ip 'eth')"
30-
fi
31-
if [ ! -z $(do_get_ip 'eth' '' $gtest_ip_list) ]; then
32-
gtest_ip_list="${gtest_ip_list}:$(do_get_ip 'eth' '' $gtest_ip_list)"
26+
27+
if [[ -f /.dockerenv ]] || [[ -f /run/.containerenv ]] || [[ -n "${KUBERNETES_SERVICE_HOST}" ]]; then
28+
gtest_opt="--addr=$(ip -f inet addr show net1 | awk '/inet / {print $2}' | cut -d/ -f1):$(ip -f inet addr show net2 | awk '/inet / {print $2}' | cut -d/ -f1)"
3329
else
34-
echo "[SKIP] two eth interfaces are required. found: ${gtest_ip_list}"
35-
exit 0
30+
if [ ! -z $(do_get_ip 'eth') ]; then
31+
gtest_ip_list="$(do_get_ip 'eth')"
32+
fi
33+
if [ ! -z $(do_get_ip 'eth' '' $gtest_ip_list) ]; then
34+
gtest_ip_list="${gtest_ip_list}:$(do_get_ip 'eth' '' $gtest_ip_list)"
35+
else
36+
echo "[SKIP] two eth interfaces are required. found: ${gtest_ip_list}"
37+
exit 0
38+
fi
39+
gtest_opt="--addr=${gtest_ip_list}"
3640
fi
37-
gtest_opt="--addr=${gtest_ip_list}"
3841

3942
set +eE
4043

0 commit comments

Comments
 (0)