@@ -333,7 +333,7 @@ def buildImage(config, imageKeyToTag)
333333 }
334334 }
335335
336- args + = prepareWheelFromBuildStage(dockerfileStage, arch)
336+ def buildWheelArgs = prepareWheelFromBuildStage(dockerfileStage, arch)
337337 // Avoid the frequency of OOM issue when building the wheel
338338 if (target == " trtllm" ) {
339339 if (arch == " x86_64" ) {
@@ -346,15 +346,34 @@ def buildImage(config, imageKeyToTag)
346346 sh " env | sort"
347347 def randomSleep = (Math . random() * 600 + 600 ). toInteger()
348348 trtllm_utils. llmExecStepWithRetry(this , script : " docker pull ${ TRITON_IMAGE} :${ TRITON_BASE_TAG} " , sleepInSecs : randomSleep, numRetries : 6 , shortCommondRunTimeMax : 7200 )
349- trtllm_utils. llmExecStepWithRetry(this , script : """
350- cd ${ LLM_ROOT} && make -C docker ${ target} _${ action} \
351- BASE_IMAGE=${ BASE_IMAGE} \
352- TRITON_IMAGE=${ TRITON_IMAGE} \
353- TORCH_INSTALL_TYPE=${ torchInstallType} \
354- IMAGE_WITH_TAG=${ imageWithTag} \
355- STAGE=${ dockerfileStage} \
356- BUILD_WHEEL_OPTS='-j ${ build_jobs} ' ${ args}
357- """ , sleepInSecs : randomSleep, numRetries : 6 , shortCommondRunTimeMax : 7200 )
349+ try {
350+ trtllm_utils. llmExecStepWithRetry(this , script : """
351+ cd ${ LLM_ROOT} && make -C docker ${ target} _${ action} \
352+ BASE_IMAGE=${ BASE_IMAGE} \
353+ TRITON_IMAGE=${ TRITON_IMAGE} \
354+ TORCH_INSTALL_TYPE=${ torchInstallType} \
355+ IMAGE_WITH_TAG=${ imageWithTag} \
356+ STAGE=${ dockerfileStage} \
357+ BUILD_WHEEL_OPTS='-j ${ build_jobs} ' ${ args} ${ buildWheelArgs}
358+ """ , sleepInSecs : randomSleep, numRetries : 6 , shortCommondRunTimeMax : 7200 )
359+ } catch (InterruptedException ex) {
360+ throw ex
361+ } catch (Exception ex) {
362+ if (buildWheelArgs. trim(). isEmpty()) {
363+ throw ex
364+ }
365+ echo " Build failed with wheel arguments, retrying without them"
366+ buildWheelArgs = " "
367+ trtllm_utils. llmExecStepWithRetry(this , script : """
368+ cd ${ LLM_ROOT} && make -C docker ${ target} _${ action} \
369+ BASE_IMAGE=${ BASE_IMAGE} \
370+ TRITON_IMAGE=${ TRITON_IMAGE} \
371+ TORCH_INSTALL_TYPE=${ torchInstallType} \
372+ IMAGE_WITH_TAG=${ imageWithTag} \
373+ STAGE=${ dockerfileStage} \
374+ BUILD_WHEEL_OPTS='-j ${ build_jobs} ' ${ args} ${ buildWheelArgs}
375+ """ , sleepInSecs : randomSleep, numRetries : 2 , shortCommondRunTimeMax : 7200 )
376+ }
358377 if (target == " ngc-release" ) {
359378 imageKeyToTag[" NGC Release Image ${ config.arch} " ] = imageWithTag
360379 }
@@ -369,7 +388,7 @@ def buildImage(config, imageKeyToTag)
369388 TORCH_INSTALL_TYPE=${ torchInstallType} \
370389 IMAGE_WITH_TAG=${ customImageWithTag} \
371390 STAGE=${ dockerfileStage} \
372- BUILD_WHEEL_OPTS='-j ${ build_jobs} ' ${ args}
391+ BUILD_WHEEL_OPTS='-j ${ build_jobs} ' ${ args} ${ buildWheelArgs }
373392 """
374393 }
375394 }
0 commit comments