Skip to content

Commit f70196b

Browse files
committed
migrate CI test steps to containers
Today we use benni09 static agent to run test/gtest/valgrind steps which is unscaleable since it can only run one pipeline at a time, causing delays in builds that can be stuck waiting for hours The idea is to move these steps to containers, allowing running them in parallel as well as running multiple pipelines at the same time (depending on the capacity of the k8s cluster) Issue: HPCINFRA-3250 Signed-off-by: NirWolfer <[email protected]>
1 parent 191dfbd commit f70196b

File tree

6 files changed

+176
-115
lines changed

6 files changed

+176
-115
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
ARG ARCH=x86_64
2+
FROM harbor.mellanox.com/hpcx/$ARCH/ubuntu22.04/base AS build
3+
FROM build AS tests
4+
RUN apt-get update && \
5+
apt-get install -y \
6+
net-tools unzip iproute2 wget \
7+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
8+
FROM tests AS vg
9+
RUN apt-get update && \
10+
apt-get install -y \
11+
valgrind \
12+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
13+
FROM tests AS test
14+
RUN apt-get update && \
15+
apt-get install -y \
16+
openssh-server psmisc \
17+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
18+
# setup ssh server and passwordless login for root for tests flows (verifyer.pl)
19+
RUN mkdir -p /var/run/sshd ~/.ssh && \
20+
rm -rf ~/.ssh/id_rsa* && ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa && \
21+
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys && \
22+
sed -i 's|#PermitRootLogin.*|PermitRootLogin without-password|g' /etc/ssh/sshd_config && \
23+
sed -i 's|#PasswordAuthentication.*|PasswordAuthentication no|g' /etc/ssh/sshd_config && \
24+
echo "Host *" >> ~/.ssh/config && \
25+
echo " StrictHostKeyChecking no" >> ~/.ssh/config && \
26+
echo " UserKnownHostsFile /dev/null" >> ~/.ssh/config && \
27+
echo " LogLevel ERROR" >> ~/.ssh/config
28+
FROM tests AS gtest
29+
FROM build AS style
30+
RUN apt-get update \
31+
&& apt-get install -y clang-15 clang-format-15 \
32+
&& update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-15 100 \
33+
&& update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100 \
34+
--slave /usr/bin/clang++ clang++ /usr/bin/clang++-15 \
35+
&& apt-get clean && rm -rf /var/lib/apt/lists/*

.ci/matrix_job.yaml

Lines changed: 88 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ job: LIBVMA
44
step_allow_single_selector: false
55

66
registry_host: harbor.mellanox.com
7-
registry_auth: swx-storage
7+
registry_auth: swx-infra_harbor_credentials
8+
registry_path: /swx-infra/media
89

910
kubernetes:
1011
privileged: true
@@ -43,9 +44,58 @@ runs_on_dockers:
4344
- {name: 'toolbox', url: 'harbor.mellanox.com/hpcx/x86_64/rhel8.6/builder:inbox', category: 'tool', arch: 'x86_64'}
4445
- {name: 'blackduck', url: 'harbor.mellanox.com/toolbox/ngci-centos:7.9.2009.2', category: 'tool', arch: 'x86_64'}
4546
- {name: 'header-check', url: 'harbor.mellanox.com/toolbox/header_check:0.0.51', category: 'tool', arch: 'x86_64', tag: '0.0.51'}
46-
47-
runs_on_agents:
48-
- {nodeLabel: 'beni09', category: 'base'}
47+
# tests
48+
- {
49+
file: '.ci/dockerfiles/Dockerfile.ubuntu22.04',
50+
arch: 'x86_64',
51+
name: 'test',
52+
uri: 'vma/$arch/ubuntu22.04/$name',
53+
tag: '20250219',
54+
build_args: '--no-cache --target test',
55+
category: 'tests',
56+
annotations: [{ key: 'k8s.v1.cni.cncf.io/networks', value: 'sriov-cx6dx-p1' }],
57+
limits: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p1: 1}',
58+
requests: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p1: 1}',
59+
caps_add: '[ IPC_LOCK, SYS_RESOURCE ]',
60+
runAsUser: '0',
61+
runAsGroup: '0',
62+
cloud: swx-k8s-spray,
63+
namespace: default
64+
}
65+
- {
66+
file: '.ci/dockerfiles/Dockerfile.ubuntu22.04',
67+
arch: 'x86_64',
68+
name: 'vg',
69+
uri: 'vma/$arch/ubuntu22.04/$name',
70+
tag: '20250219',
71+
build_args: '--no-cache --target vg',
72+
category: 'tool',
73+
annotations: [{ key: 'k8s.v1.cni.cncf.io/networks', value: 'sriov-cx6dx-p2' }],
74+
limits: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p2: 1}',
75+
requests: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 10Gi, mellanox.com/sriov_cx6dx_p2: 1}',
76+
caps_add: '[ IPC_LOCK, SYS_RESOURCE ]',
77+
runAsUser: '0',
78+
runAsGroup: '0',
79+
cloud: swx-k8s-spray,
80+
namespace: default
81+
}
82+
- {
83+
file: '.ci/dockerfiles/Dockerfile.ubuntu22.04',
84+
arch: 'x86_64',
85+
name: 'gtest',
86+
uri: 'vma/$arch/ubuntu22.04/$name',
87+
tag: '20250219',
88+
build_args: '--no-cache --target gtest',
89+
category: 'tests',
90+
annotations: [{ key: 'k8s.v1.cni.cncf.io/networks', value: 'sriov-cx6dx-p1@net1,sriov-cx6dx-p2@net2' }],
91+
limits: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 8Gi, mellanox.com/sriov_cx6dx_p1: 1, mellanox.com/sriov_cx6dx_p2: 1}',
92+
requests: '{memory: 10Gi, cpu: 10000m, hugepages-2Mi: 8Gi, mellanox.com/sriov_cx6dx_p1: 1, mellanox.com/sriov_cx6dx_p2: 1}',
93+
caps_add: '[ IPC_LOCK, SYS_RESOURCE ]',
94+
runAsUser: '0',
95+
runAsGroup: '0',
96+
cloud: swx-k8s-spray,
97+
namespace: default
98+
}
4999

50100
matrix:
51101
axes:
@@ -62,6 +112,12 @@ env:
62112

63113
steps:
64114
- name: Setup
115+
containerSelector:
116+
- "{category: 'base'}"
117+
- "{name: 'test', variant:1}"
118+
- "{name: 'gtest', variant:1}"
119+
- "{name: 'vg', variant:1}"
120+
- "{name: 'style'}"
65121
run: |
66122
set +x
67123
echo
@@ -90,27 +146,34 @@ steps:
90146
- name: Install Doca-host
91147
containerSelector:
92148
- "{category: 'base'}"
149+
- "{name: 'test', variant:1}"
150+
- "{name: 'gtest', variant:1}"
151+
- "{name: 'vg', variant:1}"
152+
- "{name: 'style'}"
93153
agentSelector:
94154
- "{nodeLabel: 'skip-agent'}"
95155
run: |
96156
echo "Installing DOCA: ${DOCA_VERSION} ..."
97157
.ci/scripts/doca_install.sh
98158
99-
- name: Install Doca-host on Tools
100-
run: |
101-
echo "Installing DOCA: ${DOCA_VERSION} ..."
102-
.ci/scripts/doca_install.sh
103-
containerSelector:
104-
- "{name: 'style', category: 'tool'}"
105-
agentSelector:
106-
- "{nodeLabel: 'skip-agent'}"
107-
108159
- name: Autogen
160+
containerSelector:
161+
- "{category: 'base'}"
162+
- "{name: 'test', variant:1}"
163+
- "{name: 'gtest', variant:1}"
164+
- "{name: 'vg', variant:1}"
165+
- "{name: 'style'}"
109166
run: |
110167
./autogen.sh -s
111168
parallel: false
112169

113170
- name: Build
171+
containerSelector:
172+
- "{category: 'base'}"
173+
- "{name: 'test', variant:1}"
174+
- "{name: 'gtest', variant:1}"
175+
- "{name: 'vg', variant:1}"
176+
- "{name: 'style'}"
114177
enable: ${do_build}
115178
run: |
116179
[ "x${do_build}" == "xtrue" ] && action=yes || action=no
@@ -138,6 +201,11 @@ steps:
138201
139202
- name: Package
140203
enable: ${do_package}
204+
containerSelector:
205+
- "{category: 'base'}"
206+
- "{name: 'test', variant:1}"
207+
- "{name: 'gtest', variant:1}"
208+
- "{name: 'vg', variant:1}"
141209
run: |
142210
[ "x${do_package}" == "xtrue" ] && action=yes || action=no
143211
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_rpm=${action} ./contrib/test_jenkins.sh
@@ -227,12 +295,13 @@ steps:
227295
- name: Test
228296
enable: ${do_test}
229297
containerSelector:
230-
- "{name: 'skip-container'}"
298+
- "{name: 'test', variant:1}"
231299
agentSelector:
232-
- "{nodeLabel: 'beni09', variant:1}"
300+
- "{nodeLabel: 'skip-agent'}"
233301
run: |
234302
[ "x${do_test}" == "xtrue" ] && action=yes || action=no
235303
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_run=${action} ./contrib/test_jenkins.sh
304+
sleep 80000
236305
parallel: false
237306
onfail: |
238307
./.ci/artifacts.sh
@@ -242,9 +311,9 @@ steps:
242311
- name: Gtest
243312
enable: ${do_gtest}
244313
containerSelector:
245-
- "{name: 'skip-container'}"
314+
- "{name: 'gtest', variant:1}"
246315
agentSelector:
247-
- "{nodeLabel: 'beni09', variant:1}"
316+
- "{nodeLabel: 'skip-agent'}"
248317
run: |
249318
[ "x${do_gtest}" == "xtrue" ] && action=yes || action=no
250319
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_gtest=${action} ./contrib/test_jenkins.sh
@@ -259,9 +328,9 @@ steps:
259328
- name: Valgrind
260329
enable: ${do_valgrind}
261330
containerSelector:
262-
- "{name: 'skip-container'}"
331+
- "{name: 'vg', variant:1}"
263332
agentSelector:
264-
- "{nodeLabel: 'beni09', variant:1}"
333+
- "{nodeLabel: 'skip-agent'}"
265334
run: |
266335
[ "x${do_valgrind}" == "xtrue" ] && action=yes || action=no
267336
env WORKSPACE=$PWD TARGET=${flags} jenkins_test_vg=${action} ./contrib/test_jenkins.sh

contrib/jenkins_tests/globals.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ function do_archive()
7575
set -e
7676
}
7777

78+
function do_hugepages()
79+
{
80+
if [[ -f /.dockerenv && ! $(grep -q hugetlbfs /proc/mounts) ]]; then
81+
mkdir -p /mnt/huge
82+
mount -t hugetlbfs nodev /mnt/huge
83+
grep hugetlbfs /proc/mounts
84+
echo $?
85+
fi
86+
}
87+
7888
# Test if an environment module exists and load it if yes.
7989
# Otherwise, return error code.
8090
# $1 - module name

contrib/jenkins_tests/gtest.sh

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
source $(dirname $0)/globals.sh
44

5+
# Fix hugepages for docker environments
6+
do_hugepages
7+
58
echo "Checking for gtest ..."
69

710
# Check dependencies
@@ -10,11 +13,6 @@ if [ $(test -d ${install_dir} >/dev/null 2>&1 || echo $?) ]; then
1013
exit 1
1114
fi
1215

13-
if [ $(command -v ibdev2netdev >/dev/null 2>&1 || echo $?) ]; then
14-
echo "[SKIP] ibdev2netdev tool does not exist"
15-
exit 1
16-
fi
17-
1816
cd $WORKSPACE
1917

2018
rm -rf $gtest_dir
@@ -25,16 +23,21 @@ gtest_app="$PWD/tests/gtest/gtest"
2523
gtest_lib=$install_dir/lib/${prj_lib}
2624

2725
gtest_ip_list=""
28-
if [ ! -z $(do_get_ip 'eth') ]; then
29-
gtest_ip_list="$(do_get_ip 'eth')"
30-
fi
31-
if [ ! -z $(do_get_ip 'eth' '' $gtest_ip_list) ]; then
32-
gtest_ip_list="${gtest_ip_list}:$(do_get_ip 'eth' '' $gtest_ip_list)"
26+
27+
if [[ -f /.dockerenv ]] || [[ -f /run/.containerenv ]] || [[ -n "${KUBERNETES_SERVICE_HOST}" ]]; then
28+
gtest_opt="--addr=$(ip -f inet addr show net1 | awk '/inet / {print $2}' | cut -d/ -f1):$(ip -f inet addr show net2 | awk '/inet / {print $2}' | cut -d/ -f1)"
3329
else
34-
echo "[SKIP] two eth interfaces are required. found: ${gtest_ip_list}"
35-
exit 0
30+
if [ ! -z $(do_get_ip 'eth') ]; then
31+
gtest_ip_list="$(do_get_ip 'eth')"
32+
fi
33+
if [ ! -z $(do_get_ip 'eth' '' $gtest_ip_list) ]; then
34+
gtest_ip_list="${gtest_ip_list}:$(do_get_ip 'eth' '' $gtest_ip_list)"
35+
else
36+
echo "[SKIP] two eth interfaces are required. found: ${gtest_ip_list}"
37+
exit 0
38+
fi
39+
gtest_opt="--addr=${gtest_ip_list}"
3640
fi
37-
gtest_opt="--addr=${gtest_ip_list}"
3841

3942
set +eE
4043

0 commit comments

Comments
 (0)