diff --git a/common/src/config.rs b/common/src/config.rs index bdf11b0..e2470f7 100644 --- a/common/src/config.rs +++ b/common/src/config.rs @@ -135,6 +135,7 @@ impl WorkerConfig { #[derive(Debug, Default, Clone, Deserialize)] pub struct ScheduleConfig { retry_delay_base: Option, + max_retries: Option, } impl ScheduleConfig { @@ -142,9 +143,17 @@ impl ScheduleConfig { if c.retry_delay_base.is_some() { self.retry_delay_base = c.retry_delay_base; } + + if c.max_retries.is_some() { + self.max_retries = c.max_retries; + } } pub fn retry_delay_base(&self) -> i64 { self.retry_delay_base.unwrap_or(DEFAULT_RETRY_DELAY_BASE) } + + pub fn max_retries(&self) -> Option { + self.max_retries + } } diff --git a/contrib/confs/rebuilderd.conf b/contrib/confs/rebuilderd.conf index 4023ca5..80bfcd0 100644 --- a/contrib/confs/rebuilderd.conf +++ b/contrib/confs/rebuilderd.conf @@ -37,8 +37,11 @@ ## Configure the delay to automatically retry failed rebuilds in hours. The ## default is 24h, this base is multiplied with the number of rebuilds, so the ## first retry would happen after 24h, the second retry would happen 48h after the -## first retry and the third retry would happen 72h after the second retry. There -## is no upper limit of retries, if you can't afford frequent retries it's -## recommended to set this to a high value like 168 (1 week) or higher. +## first retry and the third retry would happen 72h after the second retry. By default, there is no upper limit to the +## number of retries; however, you may set one with max_retries. ## Successful rebuilds are not retried. #retry_delay_base = 24 + +## Configure the maximum number of times an unreproducible package will be retried (0 to N). There is no default upper +## limit. +#max_retries = diff --git a/daemon/src/api/v1/build.rs b/daemon/src/api/v1/build.rs index 31ad64c..a50b7e0 100644 --- a/daemon/src/api/v1/build.rs +++ b/daemon/src/api/v1/build.rs @@ -1,7 +1,9 @@ use crate::api::forward_compressed_data; use crate::api::v1::util::auth; use crate::api::v1::util::filters::{IntoIdentityFilter, IntoOriginFilter}; -use crate::api::v1::util::friends::get_build_input_friends; +use crate::api::v1::util::friends::{ + get_build_input_friends, mark_build_input_friends_as_non_retriable, +}; use crate::api::v1::util::pagination::PaginateDsl; use crate::config::Config; use crate::db::Pool; @@ -15,11 +17,11 @@ use crate::schema::{ }; use crate::{attestation, web}; use actix_web::{HttpRequest, HttpResponse, Responder, get, post}; -use chrono::{Duration, Utc}; -use diesel::NullableExpressionMethods; +use chrono::{Duration, NaiveDateTime, Utc}; use diesel::QueryDsl; use diesel::dsl::update; use diesel::{ExpressionMethods, SqliteExpressionMethods}; +use diesel::{NullableExpressionMethods, delete}; use diesel::{OptionalExtension, RunQueryDsl}; use in_toto::crypto::PrivateKey; use rebuilderd_common::api; @@ -118,7 +120,8 @@ pub async fn submit_rebuild_report( // figure out any other build inputs that should share this result (same input, backend, and arch). Will include the // enqueued build ID as well, so no need to add it later. - let friends = get_build_input_friends(connection.as_mut(), queued.build_input_id).await?; + let friends = + get_build_input_friends(connection.as_mut(), queued.build_input_id).map_err(Error::from)?; let encoded_log = if is_zstd_compressed(&report.build_log) { report.build_log @@ -221,6 +224,16 @@ pub async fn submit_rebuild_report( .get_result::(connection.as_mut()) .map_err(Error::from)?; + // bail if we have a max retry count set and requeueing this package would exceed it + if let Some(max_retries) = cfg.schedule.max_retries() + && retry_count + 1 > max_retries + { + mark_build_input_friends_as_non_retriable(connection.as_mut(), queued.build_input_id) + .map_err(Error::from)?; + + return Ok(HttpResponse::NoContent()); + } + let now = Utc::now(); let then = now + Duration::hours(((retry_count + 1) * 24) as i64); diff --git a/daemon/src/api/v1/package.rs b/daemon/src/api/v1/package.rs index e94fb2a..efe686b 100644 --- a/daemon/src/api/v1/package.rs +++ b/daemon/src/api/v1/package.rs @@ -1,6 +1,9 @@ use crate::api::v1::util::auth; use crate::api::v1::util::filters::IntoOriginFilter; use crate::api::v1::util::filters::{IntoFilter, IntoIdentityFilter}; +use crate::api::v1::util::friends::{ + build_input_friends, get_build_input_friends, mark_build_input_friends_as_non_retriable, +}; use crate::api::v1::util::pagination::PaginateDsl; use crate::config::Config; use crate::db::{Pool, SqliteConnectionWrap}; @@ -10,7 +13,8 @@ use crate::schema::{ }; use crate::web; use actix_web::{HttpRequest, HttpResponse, Responder, get, post}; -use chrono::Utc; +use aliases::*; +use chrono::{NaiveDateTime, Utc}; use diesel::dsl::{delete, exists, not, select, update}; use diesel::r2d2::{ConnectionManager, PooledConnection}; use diesel::sql_types::Integer; @@ -24,9 +28,6 @@ use rebuilderd_common::api::v1::{ }; use rebuilderd_common::errors::Error; -use crate::api::v1::util::friends::build_input_friends; -use aliases::*; - mod aliases { diesel::alias!(crate::schema::rebuilds as r1: RebuildsAlias1, crate::schema::rebuilds as r2: RebuildsAlias2); diesel::alias!(crate::schema::source_packages as sp: SourcePackagesAlias); @@ -242,6 +243,19 @@ pub async fn submit_package_report( let has_queued_friend = has_queued_friend(conn, &build_input)?; if current_status != BuildStatus::Good && !has_queued_friend { + let retry_count = build_inputs::table + .filter(build_inputs::id.is(build_input.id)) + .select(build_inputs::retries) + .get_result::(conn)?; + + // bail if we have a max retry count set and requeueing this package would exceed it + if let Some(max_retries) = cfg.schedule.max_retries() + && retry_count + 1 > max_retries + { + mark_build_input_friends_as_non_retriable(conn.as_mut(), build_input.id)?; + continue; + } + let priority = match current_status { BuildStatus::Bad => Priority::retry(), _ => Priority::default(), @@ -306,15 +320,7 @@ fn has_queued_friend( ) -> Result { let has_queued_friend = select(exists( queue::table - .filter( - queue::build_input_id.eq_any( - build_inputs::table - .filter(build_inputs::url.is(&build_input.url)) - .filter(build_inputs::backend.is(&build_input.backend)) - .filter(build_inputs::architecture.is(&build_input.architecture)) - .select(build_inputs::id), - ), - ) + .filter(queue::build_input_id.eq_any(build_input_friends(build_input.id))) .select(queue::id), )) .get_result::(conn.as_mut()) diff --git a/daemon/src/api/v1/queue.rs b/daemon/src/api/v1/queue.rs index f8bfc55..a558105 100644 --- a/daemon/src/api/v1/queue.rs +++ b/daemon/src/api/v1/queue.rs @@ -337,6 +337,8 @@ pub async fn request_work( let pop_request = request.into_inner(); let supported_architectures = standardize_architectures(&pop_request.supported_architectures); + let max_retries = cfg.schedule.max_retries().unwrap_or(i32::MAX); + if let Some(record) = connection.transaction::, _, _>(|conn| { if let Some(record) = queue_base() @@ -346,6 +348,7 @@ pub async fn request_work( .is_null() .or(build_inputs::next_retry.le(diesel::dsl::now)), ) + .filter(build_inputs::retries.lt(max_retries)) .filter(build_inputs::architecture.eq_any(supported_architectures)) .filter(build_inputs::backend.eq_any(pop_request.supported_backends)) .order_by(( diff --git a/daemon/src/api/v1/util/friends.rs b/daemon/src/api/v1/util/friends.rs index 0f8b75a..d64b19f 100644 --- a/daemon/src/api/v1/util/friends.rs +++ b/daemon/src/api/v1/util/friends.rs @@ -1,6 +1,9 @@ -use crate::schema::build_inputs; -use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl, SqliteConnection, SqliteExpressionMethods}; -use rebuilderd_common::errors::Error; +use crate::schema::{build_inputs, queue}; +use chrono::NaiveDateTime; +use diesel::{ + ExpressionMethods, QueryDsl, QueryResult, RunQueryDsl, SqliteConnection, + SqliteExpressionMethods, delete, update, +}; use aliases::*; @@ -32,11 +35,29 @@ pub fn build_input_friends(id: i32) -> _ { ) } -pub async fn get_build_input_friends( +pub fn get_build_input_friends( connection: &mut SqliteConnection, id: i32, -) -> Result, Error> { - let results = build_input_friends(id).load::(connection)?; +) -> QueryResult> { + build_input_friends(id).load::(connection) +} + +pub fn mark_build_input_friends_as_non_retriable( + connection: &mut SqliteConnection, + id: i32, +) -> QueryResult<()> { + let friends = get_build_input_friends(connection, id)?; + + // null out the next retry to mark the package and its friends as non-retried + update(build_inputs::table) + .filter(build_inputs::id.eq_any(&friends)) + .set(build_inputs::next_retry.eq(None::)) + .execute(connection)?; + + // drop any enqueued jobs for the build input and its friends + delete(queue::table) + .filter(queue::build_input_id.eq_any(&friends)) + .execute(connection)?; - Ok(results) + Ok(()) }