Skip to content

Commit 259d5b4

Browse files
sarroutbiclaude
andauthored
Fix scale-out test to use ReadWriteMany (#150)
* Fix scale-out test to use ReadWriteMany The scale-out test requires multiple pods to share the same PVC, which necessitates ReadWriteMany access mode instead of ReadWriteOnce. This change updates the PVC configuration and adds intelligent detection to skip the test gracefully when ReadWriteMany is not supported by the storage class. Changes: - Update PVC access mode from ReadWriteOnce to ReadWriteMany in scale-out test configurations - Add logic to detect when ReadWriteMany is unsupported by checking PVC status and events - Skip test with clear SKIP message in logs when ReadWriteMany is unavailable - Ensure cleanup occurs regardless of skip status Signed-off-by: Sergio Arroutbi <[email protected]> * Improve scalability test robustness and cleanup This commit enhances the scale-out test with better PVC readiness detection, proper resource cleanup ordering, and code refactoring: - Add waitForPvcPhase() helper with retry loop for PVC status checking - Add checkPodsAndServices() helper to reduce code duplication - Introduce TO_PVC_READY timeout constant (30s) - Track scale_out1 creation state to ensure proper cleanup - Fix cleanup order: delete scale_out1 before scale_out0 - Replace repetitive pod/service checks with helper function calls These changes address issues with PVC binding detection and ensure resources are cleaned up in the correct order, preventing test failures in environments with varying storage provisioner speeds. Co-Authored-By: Claude <[email protected]> Signed-off-by: Sergio Arroutbi <[email protected]> --------- Signed-off-by: Sergio Arroutbi <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent afb17ec commit 259d5b4

File tree

3 files changed

+82
-27
lines changed

3 files changed

+82
-27
lines changed

Sanity/scalability_test/runtest.sh

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,42 @@
2626
#
2727
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2828

29+
# Helper function to check pod and service amounts
30+
checkPodsAndServices() {
31+
local expected_pods=$1
32+
local expected_services=$2
33+
local pod_timeout=$3
34+
local service_timeout=$4
35+
local namespace=$5
36+
37+
rlRun "ocpopCheckPodAmount ${expected_pods} ${pod_timeout} ${namespace}" 0 "Checking ${expected_pods} POD(s) [Timeout=${pod_timeout} secs.]"
38+
rlRun "ocpopCheckServiceAmount ${expected_services} ${service_timeout} ${namespace}" 0 "Checking ${expected_services} Service(s) [Timeout=${service_timeout} secs.]"
39+
}
40+
41+
# Helper function to wait for PVC phase with retry loop
42+
waitForPvcPhase() {
43+
local pvc_name=$1
44+
local namespace=$2
45+
local timeout=$3
46+
local counter=0
47+
48+
while [ ${counter} -lt ${timeout} ]; do
49+
pvc_status=$(${OC_CLIENT} get pvc ${pvc_name} -n ${namespace} -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
50+
ocpopLogVerbose "PVC ${pvc_name} status: ${pvc_status} [${counter}/${timeout}]"
51+
52+
if [ "$pvc_status" = "Bound" ] || [ "$pvc_status" = "Pending" ]; then
53+
echo "$pvc_status"
54+
return 0
55+
fi
56+
57+
counter=$((counter+1))
58+
sleep 1
59+
done
60+
61+
echo "$pvc_status"
62+
return 1
63+
}
64+
2965
# Include Beaker environment
3066
. /usr/share/beakerlib/beakerlib.sh || exit 1
3167
rlJournalStart
@@ -35,27 +71,51 @@ rlJournalStart
3571
rlRun ". ../../TestHelpers/functions.sh" || rlDie "cannot import function script"
3672
TO_POD_SCALEIN_WAIT=180 #seconds
3773
TO_POD_TERMINATE=180 #seconds
74+
TO_PVC_READY=30 #seconds
3875

76+
# Check if ReadWriteMany is supported by attempting to create the PVC
3977
rlRun "${OC_CLIENT} apply -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_out/scale_out0/" 0 "Creating scale out test [0]"
40-
rlRun "ocpopCheckPodAmount 1 ${TO_POD_START} ${TEST_NAMESPACE}" 0 "Checking 1 POD is started [Timeout=${TO_POD_START} secs.]"
41-
rlRun "ocpopCheckServiceAmount 1 ${TO_SERVICE_START} ${TEST_NAMESPACE}" 0 "Checking 1 Service is started [Timeout=${TO_SERVICE_START} secs.]"
42-
pod_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
43-
rlAssertNotEquals "Checking pod name not empty" "${pod_name}" ""
44-
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod_name}" 0 "Checking POD in Running state [Timeout=${TO_POD_START} secs.]"
45-
rlRun "${OC_CLIENT} apply -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_out/scale_out1/" 0 "Creating scale out test [1]"
46-
rlRun "ocpopCheckPodAmount 2 ${TO_POD_START} ${TEST_NAMESPACE}" 0 "Checking 1+1 PODs are started [Timeout=${TO_POD_START} secs.]"
47-
pod2_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
48-
rlAssertNotEquals "Checking pod name not empty" "${pod2_name}" ""
49-
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod2_name}" 0 "Checking added POD in Running state [Timeout=${TO_POD_START} secs.]"
50-
rlRun "${OC_CLIENT} delete -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_out/scale_out0/" 0 "Deleting scale out test"
51-
rlRun "ocpopCheckPodAmount 0 ${TO_POD_STOP} ${TEST_NAMESPACE}" 0 "Checking no PODs continue running [Timeout=${TO_POD_STOP} secs.]"
52-
rlRun "ocpopCheckServiceAmount 0 ${TO_SERVICE_STOP} ${TEST_NAMESPACE}" 0 "Checking no Services continue running [Timeout=${TO_SERVICE_STOP} secs.]"
78+
79+
# Wait for PVC to reach Bound or Pending state with retry loop
80+
pvc_status=$(waitForPvcPhase "tangserver-pvc" "${TEST_NAMESPACE}" ${TO_PVC_READY})
81+
82+
SKIP_TEST=0
83+
SCALE_OUT1_CREATED=0
84+
if [ "$pvc_status" = "Pending" ]; then
85+
# Check if the issue is due to ReadWriteMany not being supported
86+
pvc_events=$(${OC_CLIENT} get events -n ${TEST_NAMESPACE} --field-selector involvedObject.name=tangserver-pvc -o json 2>/dev/null)
87+
if echo "$pvc_events" | grep -q -i "storageclass.*does not support.*ReadWriteMany\|no.*volume.*plugin.*matched\|volume.*does not support.*access mode"; then
88+
rlLogWarning "ReadWriteMany access mode is not supported by the storage class. Skipping scale-out test."
89+
rlLog "RESULT: SKIP - ReadWriteMany not supported"
90+
SKIP_TEST=1
91+
fi
92+
fi
93+
94+
if [ $SKIP_TEST -eq 0 ]; then
95+
# Continue with normal test if PVC is bound or accessible
96+
checkPodsAndServices 1 1 ${TO_POD_START} ${TO_SERVICE_START} ${TEST_NAMESPACE}
97+
pod_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
98+
rlAssertNotEquals "Checking pod name not empty" "${pod_name}" ""
99+
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod_name}" 0 "Checking POD in Running state [Timeout=${TO_POD_START} secs.]"
100+
rlRun "${OC_CLIENT} apply -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_out/scale_out1/" 0 "Creating scale out test [1]"
101+
SCALE_OUT1_CREATED=1
102+
checkPodsAndServices 2 1 ${TO_POD_START} ${TO_SERVICE_START} ${TEST_NAMESPACE}
103+
pod2_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
104+
rlAssertNotEquals "Checking pod name not empty" "${pod2_name}" ""
105+
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod2_name}" 0 "Checking added POD in Running state [Timeout=${TO_POD_START} secs.]"
106+
fi
107+
108+
# Cleanup regardless of skip status
109+
if [ $SCALE_OUT1_CREATED -eq 1 ]; then
110+
rlRun "${OC_CLIENT} delete -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_out/scale_out1/" 0 "Deleting scale out test [1]"
111+
fi
112+
rlRun "${OC_CLIENT} delete -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_out/scale_out0/ --ignore-not-found=true" 0 "Deleting scale out test [0]"
113+
checkPodsAndServices 0 0 ${TO_POD_STOP} ${TO_SERVICE_STOP} ${TEST_NAMESPACE}
53114
rlPhaseEnd
54115

55116
rlPhaseStartTest "Scale-in scalability test"
56117
rlRun "${OC_CLIENT} apply -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_in/scale_in0/" 0 "Creating scale in test [0]"
57-
rlRun "ocpopCheckPodAmount 2 ${TO_POD_START} ${TEST_NAMESPACE}" 0 "Checking 2 PODs are started [Timeout=${TO_POD_START} secs.]"
58-
rlRun "ocpopCheckServiceAmount 1 ${TO_SERVICE_START} ${TEST_NAMESPACE}" 0 "Checking 1 Service is running [Timeout=${TO_SERVICE_START} secs.]"
118+
checkPodsAndServices 2 1 ${TO_POD_START} ${TO_SERVICE_START} ${TEST_NAMESPACE}
59119
pod1_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
60120
pod2_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 2)
61121
rlAssertNotEquals "Checking pod name not empty" "${pod1_name}" ""
@@ -68,14 +128,12 @@ rlJournalStart
68128
rlAssertNotEquals "Checking pod name not empty" "${pod1_name}" ""
69129
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod1_name}" 0 "Checking POD:[$pod1_name}] still in Running state [Timeout=${TO_POD_START} secs.]"
70130
rlRun "${OC_CLIENT} delete -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_in/scale_in0/" 0 "Deleting scale in test"
71-
rlRun "ocpopCheckPodAmount 0 ${TO_POD_STOP} ${TEST_NAMESPACE}" 0 "Checking no PODs continue running [Timeout=${TO_POD_STOP} secs.]"
72-
rlRun "ocpopCheckServiceAmount 0 ${TO_SERVICE_START} ${TEST_NAMESPACE}" 0 "Checking no Services continue running [Timeout=${TO_SERVICE_START} secs.]"
131+
checkPodsAndServices 0 0 ${TO_POD_STOP} ${TO_SERVICE_START} ${TEST_NAMESPACE}
73132
rlPhaseEnd
74133

75134
rlPhaseStartTest "Scale-up scalability test"
76135
rlRun "${OC_CLIENT} apply -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_up/scale_up0/" 0 "Creating scale up test [0]"
77-
rlRun "ocpopCheckPodAmount 1 ${TO_POD_START} ${TEST_NAMESPACE}" 0 "Checking 1 POD is started [Timeout=${TO_POD_START} secs.]"
78-
rlRun "ocpopCheckServiceAmount 1 ${TO_SERVICE_START} ${TEST_NAMESPACE}" 0 "Checking 1 Service is running [Timeout=${TO_SERVICE_START} secs.]"
136+
checkPodsAndServices 1 1 ${TO_POD_START} ${TO_SERVICE_START} ${TEST_NAMESPACE}
79137
pod1_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
80138
rlAssertNotEquals "Checking pod name not empty" "${pod1_name}" ""
81139
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod1_name}" 0 "Checking POD:[$pod1_name}] in Running state [Timeout=${TO_POD_START} secs.]"
@@ -94,14 +152,12 @@ rlJournalStart
94152
rlAssertGreater "Checking cpu request value increased" "${cpu2}" "${cpu1}"
95153
rlAssertGreater "Checking mem request value increased" "${mem2}" "${mem1}"
96154
rlRun "${OC_CLIENT} delete -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_up/scale_up0/" 0 "Deleting scale up test"
97-
rlRun "ocpopCheckPodAmount 0 ${TO_POD_STOP} ${TEST_NAMESPACE}" 0 "Checking no PODs continue running [Timeout=${TO_POD_STOP} secs.]"
98-
rlRun "ocpopCheckServiceAmount 0 ${TO_SERVICE_STOP} ${TEST_NAMESPACE}" 0 "Checking no Services continue running [Timeout=${TO_SERVICE_STOP} secs.]"
155+
checkPodsAndServices 0 0 ${TO_POD_STOP} ${TO_SERVICE_STOP} ${TEST_NAMESPACE}
99156
rlPhaseEnd
100157

101158
rlPhaseStartTest "Scale-down scalability test"
102159
rlRun "${OC_CLIENT} apply -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_down/scale_down0/" 0 "Creating scale down test [0]"
103-
rlRun "ocpopCheckPodAmount 1 ${TO_POD_START} ${TEST_NAMESPACE}" 0 "Checking 1 POD is started [Timeout=${TO_POD_START} secs.]"
104-
rlRun "ocpopCheckServiceAmount 1 ${TO_SERVICE_START} ${TEST_NAMESPACE}" 0 "Checking 1 Service is running [Timeout=${TO_SERVICE_START} secs.]"
160+
checkPodsAndServices 1 1 ${TO_POD_START} ${TO_SERVICE_START} ${TEST_NAMESPACE}
105161
pod1_name=$(ocpopGetPodNameWithPartialName "tang" "${TEST_NAMESPACE}" 5 1)
106162
rlAssertNotEquals "Checking pod name not empty" "${pod1_name}" ""
107163
rlRun "ocpopCheckPodState Running ${TO_POD_START} ${TEST_NAMESPACE} ${pod1_name}" 0 "Checking POD:[$pod1_name}] in Running state [Timeout=${TO_POD_START} secs.]"
@@ -120,8 +176,7 @@ rlJournalStart
120176
rlAssertLesser "Checking cpu request value decreased" "${cpu2}" "${cpu1}"
121177
rlAssertLesser "Checking mem request value decreased" "${mem2}" "${mem1}"
122178
rlRun "${OC_CLIENT} delete -f ${TANG_FUNCTION_DIR}/reg_test/scale_test/scale_down/scale_down0/" 0 "Deleting scale down test"
123-
rlRun "ocpopCheckPodAmount 0 ${TO_POD_STOP} ${TEST_NAMESPACE}" 0 "Checking no PODs continue running [Timeout=${TO_POD_STOP} secs.]"
124-
rlRun "ocpopCheckServiceAmount 0 ${TO_SERVICE_STOP} ${TEST_NAMESPACE}" 0 "Checking no Services continue running [Timeout=${TO_SERVICE_STOP} secs.]"
179+
checkPodsAndServices 0 0 ${TO_POD_STOP} ${TO_SERVICE_STOP} ${TEST_NAMESPACE}
125180
rlPhaseEnd
126181
########### /SCALABILTY TESTS #########
127182

TestHelpers/reg_test_openshift_konflux/scale_test/scale_out/scale_out0/daemons_v1alpha1_pv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
namespace: nbde
77
spec:
88
accessModes:
9-
- ReadWriteOnce
9+
- ReadWriteMany
1010
resources:
1111
requests:
1212
storage: 1Gi

TestHelpers/reg_test_ori/scale_test/scale_out/scale_out0/daemons_v1alpha1_pv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
namespace: nbde
77
spec:
88
accessModes:
9-
- ReadWriteOnce
9+
- ReadWriteMany
1010
resources:
1111
requests:
1212
storage: 1Gi

0 commit comments

Comments
 (0)