From 0edbbfc8bbc7da1153ff6a66678d87a663bc5488 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 24 Jun 2025 17:37:09 +0200 Subject: [PATCH 1/6] Reapply "workflows/labels: manage stale & merge conflict labels" This reverts commit c366efa6e2816c2cb48b3018fe00ceb8ca6cbc81. --- .github/workflows/labels.yml | 369 +++++++++++++++++++++-------------- 1 file changed, 218 insertions(+), 151 deletions(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 83cf0511fc5e0..c3bc9301f67f9 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -17,18 +17,12 @@ on: NIXPKGS_CI_APP_PRIVATE_KEY: required: true workflow_dispatch: - inputs: - updatedWithin: - description: 'Updated within [hours]' - type: number - required: false - default: 0 # everything since last run concurrency: # This explicitly avoids using `run_id` for the concurrency key to make sure that only - # *one* non-PR run can run at a time. + # *one* scheduled run can run at a time. group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number }} - # PR- and manually-triggered runs will be cancelled, but scheduled runs will be queued. + # PR-triggered runs will be cancelled, but scheduled runs will be queued. cancel-in-progress: ${{ github.event_name != 'schedule' }} # This is used as fallback without app only. @@ -69,8 +63,6 @@ jobs: - name: Labels from API data and Eval results uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - env: - UPDATED_WITHIN: ${{ inputs.updatedWithin }} with: github-token: ${{ steps.app-token.outputs.token || github.token }} script: | @@ -101,6 +93,9 @@ jobs: github.hook.wrap('request', async (request, options) => { // Requests to the /rate_limit endpoint do not count against the rate limit. if (options.url == '/rate_limit') return request(options) + // Search requests are in a different resource group, which allows 30 requests / minute. + // We do less than a handful each run, so not implementing throttling for now. + if (options.url.startsWith('/search/')) return request(options) stats.requests++ if (['POST', 'PUT', 'PATCH', 'DELETE'].includes(options.method)) return writeLimits.schedule(request.bind(null, options)) @@ -128,102 +123,68 @@ jobs: const reservoirUpdater = setInterval(updateReservoir, 60 * 1000) process.on('uncaughtException', () => clearInterval(reservoirUpdater)) - if (process.env.UPDATED_WITHIN && !/^\d+$/.test(process.env.UPDATED_WITHIN)) - throw new Error('Please enter "updated within" as integer in hours.') - - const cutoff = new Date(await (async () => { - // Always run for Pull Request triggers, no cutoff since there will be a single - // response only anyway. 0 is the Unix epoch, so always smaller. - if (context.payload.pull_request?.number) return 0 - - // Manually triggered via UI when updatedWithin is set. Will fallthrough to the last - // option if the updatedWithin parameter is set to 0, which is the default. - const updatedWithin = Number.parseInt(process.env.UPDATED_WITHIN, 10) - if (updatedWithin) return new Date().getTime() - updatedWithin * 60 * 60 * 1000 - - // Normally a scheduled run, but could be workflow_dispatch, see above. Go back as far - // as the last successful run of this workflow to make sure we are not leaving anyone - // behind on GHA failures. - // Defaults to go back 1 hour on the first run. - return (await github.rest.actions.listWorkflowRuns({ - ...context.repo, - workflow_id: 'labels.yml', - event: 'schedule', - status: 'success', - exclude_pull_requests: true - })).data.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000 - })()) - core.info('cutoff timestamp: ' + cutoff.toISOString()) - - // To simplify this action's logic we fetch the pull_request data again below, even if - // we are already in a pull_request event's context and would have the data readily - // available. We do this by filtering the list of pull requests with head and base - // branch - there can only be a single open Pull Request for any such combination. - const prEventCondition = !context.payload.pull_request ? undefined : { - // "label" is in the format of `user:branch` or `org:branch` - head: context.payload.pull_request.head.label, - base: context.payload.pull_request.base.ref - } - - const prs = await github.paginate( - github.rest.pulls.list, - { - ...context.repo, - state: 'open', - sort: 'updated', - direction: 'desc', - ...prEventCondition - }, - (response, done) => response.data.map(async (pull_request) => { - try { - const log = (k,v,skip) => { - core.info(`PR #${pull_request.number} - ${k}: ${v}` + (skip ? ' (skipped)' : '')) - return skip - } - - if (log('Last updated at', pull_request.updated_at, new Date(pull_request.updated_at) < cutoff)) - return done() - stats.prs++ - log('URL', pull_request.html_url) + async function handle(item) { + try { + const log = (k,v,skip) => { + core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : '')) + return skip + } - const run_id = (await github.rest.actions.listWorkflowRuns({ + log('Last updated at', item.updated_at) + stats.prs++ + log('URL', item.html_url) + + const pull_number = item.number + const issue_number = item.number + + // The search result is of a format that works for both issues and pull requests and thus + // does not have all fields of a full pull_request response. Notably, it is missing `head.sha`, + // which we need to fetch the workflow run below. When triggered via pull_request event, + // this field is already available. + // This API request is also important for the merge-conflict label, because it triggers the + // creation of a new test merge commit. This is needed to actually determine the state of a PR. + const pull_request = item.head ? item : (await github.rest.pulls.get({ + ...context.repo, + pull_number + })).data + + const run_id = (await github.rest.actions.listWorkflowRuns({ + ...context.repo, + workflow_id: 'pr.yml', + event: 'pull_request_target', + status: 'success', + exclude_pull_requests: true, + head_sha: pull_request.head.sha + })).data.workflow_runs[0]?.id ?? + // TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired. + (await github.rest.actions.listWorkflowRuns({ ...context.repo, - workflow_id: 'pr.yml', + // In older PRs, we need eval.yml instead of pr.yml. + workflow_id: 'eval.yml', event: 'pull_request_target', - // For PR events, the workflow run is still in progress with this job itself. - status: prEventCondition ? 'in_progress' : 'success', + status: 'success', exclude_pull_requests: true, head_sha: pull_request.head.sha - })).data.workflow_runs[0]?.id ?? - // TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired. - (await github.rest.actions.listWorkflowRuns({ - ...context.repo, - // In older PRs, we need eval.yml instead of pr.yml. - workflow_id: 'eval.yml', - event: 'pull_request_target', - status: 'success', - exclude_pull_requests: true, - head_sha: pull_request.head.sha - })).data.workflow_runs[0]?.id - - // Newer PRs might not have run Eval to completion, yet. We can skip them, because this - // job will be run as part of that Eval run anyway. - if (log('Last eval run', run_id ?? '', !run_id)) - return; - - const artifact = (await github.rest.actions.listWorkflowRunArtifacts({ - ...context.repo, - run_id, - name: 'comparison' - })).data.artifacts[0] - - // Instead of checking the boolean artifact.expired, we will give us a minute to - // actually download the artifact in the next step and avoid that race condition. - // Older PRs, where the workflow run was already eval.yml, but the artifact was not - // called "comparison", yet, will be skipped as well. - const expired = new Date(artifact?.expires_at ?? 0) < new Date(new Date().getTime() + 60 * 1000) - if (log('Artifact expires at', artifact?.expires_at ?? '', expired)) - return; + })).data.workflow_runs[0]?.id + + // Newer PRs might not have run Eval to completion, yet. + // Older PRs might not have an eval.yml workflow, yet. + // In either case we continue without fetching an artifact on a best-effort basis. + log('Last eval run', run_id ?? '') + + const artifact = run_id && (await github.rest.actions.listWorkflowRunArtifacts({ + ...context.repo, + run_id, + name: 'comparison' + })).data.artifacts[0] + + // Instead of checking the boolean artifact.expired, we will give us a minute to + // actually download the artifact in the next step and avoid that race condition. + // Older PRs, where the workflow run was already eval.yml, but the artifact was not + // called "comparison", yet, will skip the download. + const expired = !artifact || new Date(artifact?.expires_at ?? 0) < new Date(new Date().getTime() + 60 * 1000) + log('Artifact expires at', artifact?.expires_at ?? '') + if (!expired) { stats.artifacts++ await artifactClient.downloadArtifact(artifact.id, { @@ -232,39 +193,82 @@ jobs: repositoryOwner: context.repo.owner, token: core.getInput('github-token') }, - path: path.resolve(pull_request.number.toString()), + path: path.resolve(pull_number.toString()), expectedHash: artifact.digest }) + } - // Create a map (Label -> Boolean) of all currently set labels. - // Each label is set to True and can be disabled later. - const before = Object.fromEntries( - (await github.paginate(github.rest.issues.listLabelsOnIssue, { - ...context.repo, - issue_number: pull_request.number - })) - .map(({ name }) => [name, true]) - ) + // Create a map (Label -> Boolean) of all currently set labels. + // Each label is set to True and can be disabled later. + const before = Object.fromEntries( + (await github.paginate(github.rest.issues.listLabelsOnIssue, { + ...context.repo, + issue_number + })) + .map(({ name }) => [name, true]) + ) - const approvals = new Set( - (await github.paginate(github.rest.pulls.listReviews, { + const approvals = new Set( + (await github.paginate(github.rest.pulls.listReviews, { + ...context.repo, + pull_number + })) + .filter(review => review.state == 'APPROVED') + .map(review => review.user?.id) + ) + + const latest_event_at = new Date( + (await github.paginate( + github.rest.issues.listEventsForTimeline, + { ...context.repo, - pull_number: pull_request.number - })) - .filter(review => review.state == 'APPROVED') - .map(review => review.user?.id) - ) + issue_number, + per_page: 100 + } + )) + // We also ignore base_ref_force_pushed, which will not happen in nixpkgs, but + // is very useful for testing in forks. + .findLast(({ event }) => !['labeled', 'unlabeled', 'base_ref_force_pushed'].includes(event)) + ?.created_at ?? item.created_at + ) + const stale_at = new Date(new Date().setDate(new Date().getDate() - 180)) + + // Manage most of the labels, without eval results + const after = Object.assign( + {}, + before, + { + // We intentionally don't use the mergeable or mergeable_state attributes. + // Those have an intermediate state while the test merge commit is created. + // This doesn't work well for us, because we might have just triggered another + // test merge commit creation by request the pull request via API at the start + // of this function. + // The attribute merge_commit_sha keeps the old value of null or the hash *until* + // the new test merge commit has either successfully been created or failed so. + // This essentially means we are updating the merge conflict label in two steps: + // On the first pass of the day, we just fetch the pull request, which triggers + // the creation. At this stage, the label is likely not updated, yet. + // The second pass will then read the result from the first pass and set the label. + '2.status: merge conflict': !pull_request.merge_commit_sha, + '2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at, + '12.approvals: 1': approvals.size == 1, + '12.approvals: 2': approvals.size == 2, + '12.approvals: 3+': approvals.size >= 3, + '12.first-time contribution': + [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association), + } + ) + // Manage labels based on eval results + if (!expired) { const maintainers = new Set(Object.keys( - JSON.parse(await readFile(`${pull_request.number}/maintainers.json`, 'utf-8')) + JSON.parse(await readFile(`${pull_number}/maintainers.json`, 'utf-8')) ).map(m => Number.parseInt(m, 10))) - const evalLabels = JSON.parse(await readFile(`${pull_request.number}/changed-paths.json`, 'utf-8')).labels + const evalLabels = JSON.parse(await readFile(`${pull_number}/changed-paths.json`, 'utf-8')).labels - // Manage the labels - const after = Object.assign( - {}, - before, + Object.assign( + after, // Ignore `evalLabels` if it's an array. // This can happen for older eval runs, before we switched to objects. // The old eval labels would have been set by the eval run, @@ -272,41 +276,104 @@ jobs: // TODO: Simplify once old eval results have expired (~2025-10) (Array.isArray(evalLabels) ? undefined : evalLabels), { - '12.approvals: 1': approvals.size == 1, - '12.approvals: 2': approvals.size == 2, - '12.approvals: 3+': approvals.size >= 3, '12.approved-by: package-maintainer': Array.from(maintainers).some(m => approvals.has(m)), - '12.first-time contribution': - [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association), } ) + } - // No need for an API request, if all labels are the same. - const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name]) - if (log('Has changes', hasChanges, !hasChanges)) - return; + // No need for an API request, if all labels are the same. + const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name]) + if (log('Has changes', hasChanges, !hasChanges)) + return; + + // Skipping labeling on a pull_request event, because we have no privileges. + const labels = Object.entries(after).filter(([,value]) => value).map(([name]) => name) + if (log('Set labels', labels, context.eventName == 'pull_request')) + return; + + await github.rest.issues.setLabels({ + ...context.repo, + issue_number, + labels + }) + } catch (cause) { + throw new Error(`Labeling #${item.number} failed.`, { cause }) + } + } - // Skipping labeling on a pull_request event, because we have no privileges. - const labels = Object.entries(after).filter(([,value]) => value).map(([name]) => name) - if (log('Set labels', labels, context.eventName == 'pull_request')) - return; + if (context.payload.pull_request) { + await handle(context.payload.pull_request) + } else { + const workflowData = (await github.rest.actions.listWorkflowRuns({ + ...context.repo, + workflow_id: 'labels.yml', + event: 'schedule', + status: 'success', + exclude_pull_requests: true, + per_page: 1 + })).data - await github.rest.issues.setLabels({ - ...context.repo, - issue_number: pull_request.number, - labels - }) - } catch (cause) { - throw new Error(`Labeling PR #${pull_request.number} failed.`, { cause }) + // Go back as far as the last successful run of this workflow to make sure + // we are not leaving anyone behind on GHA failures. + // Defaults to go back 1 hour on the first run. + const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000) + core.info('cutoff timestamp: ' + cutoff.toISOString()) + + const updatedItems = await github.paginate( + github.rest.search.issuesAndPullRequests, + { + q: [ + `repo:"${process.env.GITHUB_REPOSITORY}"`, + 'type:pr', + 'is:open', + `updated:>=${cutoff.toISOString()}` + ].join(' AND '), + // TODO: Remove in 2025-10, when it becomes the default. + advanced_search: true } - }) - ); - - (await Promise.allSettled(prs.flat())) - .filter(({ status }) => status == 'rejected') - .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) + ) + + const allOptions = { + q: [ + `repo:"${process.env.GITHUB_REPOSITORY}"`, + 'type:pr', + 'is:open' + ].join(' AND '), + sort: 'created', + direction: 'asc', + // TODO: Remove in 2025-10, when it becomes the default. + advanced_search: true + } - core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) + const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ + ...allOptions, + per_page: 1 + })).data + const { total_count: total_runs } = workflowData + const allItems = (await github.rest.search.issuesAndPullRequests({ + ...allOptions, + per_page: 100, + // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and + // up to 6*24=144 scheduled runs per day, we hit every PR twice each day. + // We might not hit every PR on one iteration, because the pages will shift slightly when + // PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was + // missed once, would have to move through the whole page to be missed again. This is very unlikely, + // so it should certainly be hit on the next iteration. + // TODO: Evaluate after a while, whether the above holds still true and potentially implement + // an overlap between runs. + page: total_runs % Math.ceil(total_pulls / 100) + })).data.items + + // Some items might be in both search results, so filtering out duplicates as well. + const items = [].concat(updatedItems, allItems) + .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number)) + + ;(await Promise.allSettled(items.map(handle))) + .filter(({ status }) => status == 'rejected') + .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) + + core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) + } clearInterval(reservoirUpdater) - name: Log current API rate limits From d5072dd3444b7da967eecd6c6b35687cfde444c6 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 24 Jun 2025 19:31:21 +0200 Subject: [PATCH 2/6] workflows/labels: fix stale label To set the stale label properly, we need to consider the right timeline events only - and their respective relevant timestamps. --- .github/workflows/labels.yml | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index c3bc9301f67f9..3abf5011a97f6 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -226,11 +226,32 @@ jobs: per_page: 100 } )) - // We also ignore base_ref_force_pushed, which will not happen in nixpkgs, but - // is very useful for testing in forks. - .findLast(({ event }) => !['labeled', 'unlabeled', 'base_ref_force_pushed'].includes(event)) - ?.created_at ?? item.created_at + .filter(({ event }) => [ + // These events are hand-picked from: + // https://docs.github.com/en/rest/using-the-rest-api/issue-event-types?apiVersion=2022-11-28 + // Each of those causes a PR/issue to *not* be considered as stale anymore. + // Most of these use created_at. + 'assigned', + 'commented', // uses updated_at, because that could be > created_at + 'committed', // uses committer.date + 'head_ref_force_pushed', + 'milestoned', + 'pinned', + 'ready_for_review', + 'renamed', + 'reopened', + 'review_dismissed', + 'review_requested', + 'reviewed', // uses submitted_at + 'unlocked', + 'unmarked_as_duplicate', + ].includes(event)) + .map(({ created_at, updated_at, committer, submitted_at }) => new Date(updated_at ?? created_at ?? submitted_at ?? committer.date)) + .sort() + .reverse() + .at(0) ?? item.created_at ) + const stale_at = new Date(new Date().setDate(new Date().getDate() - 180)) // Manage most of the labels, without eval results From ed1fc4c6b3561fcef919ac289aaeddba37d112cc Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 24 Jun 2025 19:34:21 +0200 Subject: [PATCH 3/6] workflows/labels: fix running in pull_request context When running in a pull_request context, the labels job is part of the currently running workflow - which will never have succeeded, yet. Apparently it could be failed already, so in this case we take *any* workflow run, no matter its state. --- .github/workflows/labels.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 3abf5011a97f6..c2b33ef8adbaf 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -152,7 +152,8 @@ jobs: ...context.repo, workflow_id: 'pr.yml', event: 'pull_request_target', - status: 'success', + // In pull_request contexts the workflow is still running. + status: context.payload.pull_request ? undefined : 'success', exclude_pull_requests: true, head_sha: pull_request.head.sha })).data.workflow_runs[0]?.id ?? From 39dc87db4bf75ad01e1f56d47e187861fbe8790a Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 24 Jun 2025 19:43:57 +0200 Subject: [PATCH 4/6] workflows/labels: handle PR-creation-edge-case for merge conflict label Explained very well by the code comment. --- .github/workflows/labels.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index c2b33ef8adbaf..38f5071efebd1 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -255,6 +255,13 @@ jobs: const stale_at = new Date(new Date().setDate(new Date().getDate() - 180)) + // After creation of a Pull Request, `merge_commit_sha` will be null initially: + // The very first merge commit will only be calculated after a little while. + // To avoid labeling the PR as conflicted before that, we wait a few minutes. + // This is intentionally less than the time that Eval takes, so that the label job + // running after Eval can indeed label the PR as conflicted if that is the case. + const merge_commit_sha_valid = new Date() - new Date(pull_request.created_at) > 3 * 60 * 1000 + // Manage most of the labels, without eval results const after = Object.assign( {}, @@ -271,7 +278,7 @@ jobs: // On the first pass of the day, we just fetch the pull request, which triggers // the creation. At this stage, the label is likely not updated, yet. // The second pass will then read the result from the first pass and set the label. - '2.status: merge conflict': !pull_request.merge_commit_sha, + '2.status: merge conflict': merge_commit_sha_valid && !pull_request.merge_commit_sha, '2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at, '12.approvals: 1': approvals.size == 1, '12.approvals: 2': approvals.size == 2, From ddf3480d49ba35240e324644015ccb2422379679 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 24 Jun 2025 19:51:53 +0200 Subject: [PATCH 5/6] workflows/labels: improve cleanup of reservoir timer This should make sure that the timer is cleaned up, no matter what. This didn't seem to be the case before, where it would still be stuck sometimes, when throwing an error somewhere. --- .github/workflows/labels.yml | 134 ++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 66 deletions(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 38f5071efebd1..efc8556b15568 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -121,7 +121,6 @@ jobs: await updateReservoir() // Update remaining requests every minute to account for other jobs running in parallel. const reservoirUpdater = setInterval(updateReservoir, 60 * 1000) - process.on('uncaughtException', () => clearInterval(reservoirUpdater)) async function handle(item) { try { @@ -330,80 +329,83 @@ jobs: } } - if (context.payload.pull_request) { - await handle(context.payload.pull_request) - } else { - const workflowData = (await github.rest.actions.listWorkflowRuns({ - ...context.repo, - workflow_id: 'labels.yml', - event: 'schedule', - status: 'success', - exclude_pull_requests: true, - per_page: 1 - })).data - - // Go back as far as the last successful run of this workflow to make sure - // we are not leaving anyone behind on GHA failures. - // Defaults to go back 1 hour on the first run. - const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000) - core.info('cutoff timestamp: ' + cutoff.toISOString()) - - const updatedItems = await github.paginate( - github.rest.search.issuesAndPullRequests, - { + try { + if (context.payload.pull_request) { + await handle(context.payload.pull_request) + } else { + const workflowData = (await github.rest.actions.listWorkflowRuns({ + ...context.repo, + workflow_id: 'labels.yml', + event: 'schedule', + status: 'success', + exclude_pull_requests: true, + per_page: 1 + })).data + + // Go back as far as the last successful run of this workflow to make sure + // we are not leaving anyone behind on GHA failures. + // Defaults to go back 1 hour on the first run. + const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000) + core.info('cutoff timestamp: ' + cutoff.toISOString()) + + const updatedItems = await github.paginate( + github.rest.search.issuesAndPullRequests, + { + q: [ + `repo:"${process.env.GITHUB_REPOSITORY}"`, + 'type:pr', + 'is:open', + `updated:>=${cutoff.toISOString()}` + ].join(' AND '), + // TODO: Remove in 2025-10, when it becomes the default. + advanced_search: true + } + ) + + const allOptions = { q: [ `repo:"${process.env.GITHUB_REPOSITORY}"`, 'type:pr', - 'is:open', - `updated:>=${cutoff.toISOString()}` + 'is:open' ].join(' AND '), + sort: 'created', + direction: 'asc', // TODO: Remove in 2025-10, when it becomes the default. advanced_search: true } - ) - - const allOptions = { - q: [ - `repo:"${process.env.GITHUB_REPOSITORY}"`, - 'type:pr', - 'is:open' - ].join(' AND '), - sort: 'created', - direction: 'asc', - // TODO: Remove in 2025-10, when it becomes the default. - advanced_search: true - } - const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ - ...allOptions, - per_page: 1 - })).data - const { total_count: total_runs } = workflowData - const allItems = (await github.rest.search.issuesAndPullRequests({ - ...allOptions, - per_page: 100, - // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and - // up to 6*24=144 scheduled runs per day, we hit every PR twice each day. - // We might not hit every PR on one iteration, because the pages will shift slightly when - // PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was - // missed once, would have to move through the whole page to be missed again. This is very unlikely, - // so it should certainly be hit on the next iteration. - // TODO: Evaluate after a while, whether the above holds still true and potentially implement - // an overlap between runs. - page: total_runs % Math.ceil(total_pulls / 100) - })).data.items - - // Some items might be in both search results, so filtering out duplicates as well. - const items = [].concat(updatedItems, allItems) - .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number)) - - ;(await Promise.allSettled(items.map(handle))) - .filter(({ status }) => status == 'rejected') - .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) - - core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) + const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ + ...allOptions, + per_page: 1 + })).data + const { total_count: total_runs } = workflowData + const allItems = (await github.rest.search.issuesAndPullRequests({ + ...allOptions, + per_page: 100, + // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and + // up to 6*24=144 scheduled runs per day, we hit every PR twice each day. + // We might not hit every PR on one iteration, because the pages will shift slightly when + // PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was + // missed once, would have to move through the whole page to be missed again. This is very unlikely, + // so it should certainly be hit on the next iteration. + // TODO: Evaluate after a while, whether the above holds still true and potentially implement + // an overlap between runs. + page: total_runs % Math.ceil(total_pulls / 100) + })).data.items + + // Some items might be in both search results, so filtering out duplicates as well. + const items = [].concat(updatedItems, allItems) + .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number)) + + ;(await Promise.allSettled(items.map(handle))) + .filter(({ status }) => status == 'rejected') + .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) + + core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) + } + } finally { + clearInterval(reservoirUpdater) } - clearInterval(reservoirUpdater) - name: Log current API rate limits env: From 579bfd48daf875aea88171c1fa6abccf4b21a956 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 24 Jun 2025 20:52:50 +0200 Subject: [PATCH 6/6] workflows/labels: use /pulls endpoint instead of search for "all" pull requests It's necessary to use a combination of different endpoints here, because the /search endpoint only allows fetching the first 1000 items and will fail with a higher page number (11+). On the flip side, the /pulls endpoint doesn't allow counting the total number of results, so we can't calculate the required page number with its response. Putting both together should work, though. --- .github/workflows/labels.yml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index efc8556b15568..9c664d9c11f9e 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -138,8 +138,7 @@ jobs: // The search result is of a format that works for both issues and pull requests and thus // does not have all fields of a full pull_request response. Notably, it is missing `head.sha`, - // which we need to fetch the workflow run below. When triggered via pull_request event, - // this field is already available. + // which we need to fetch the workflow run below. This field is already available non-search sources. // This API request is also important for the merge-conflict label, because it triggers the // creation of a new test merge commit. This is needed to actually determine the state of a PR. const pull_request = item.head ? item : (await github.rest.pulls.get({ @@ -362,7 +361,11 @@ jobs: } ) - const allOptions = { + // The search endpoint only allows fetching the first 1000 records, but the + // pull request list endpoint does not support counting the total number + // of results. + // Thus, we use /search for counting and /pulls for reading the response. + const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ q: [ `repo:"${process.env.GITHUB_REPOSITORY}"`, 'type:pr', @@ -371,16 +374,16 @@ jobs: sort: 'created', direction: 'asc', // TODO: Remove in 2025-10, when it becomes the default. - advanced_search: true - } - - const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ - ...allOptions, + advanced_search: true, per_page: 1 })).data const { total_count: total_runs } = workflowData - const allItems = (await github.rest.search.issuesAndPullRequests({ - ...allOptions, + + const allPulls = (await github.rest.pulls.list({ + ...context.repo, + state: 'open', + sort: 'created', + direction: 'asc', per_page: 100, // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and // up to 6*24=144 scheduled runs per day, we hit every PR twice each day. @@ -391,10 +394,10 @@ jobs: // TODO: Evaluate after a while, whether the above holds still true and potentially implement // an overlap between runs. page: total_runs % Math.ceil(total_pulls / 100) - })).data.items + })).data // Some items might be in both search results, so filtering out duplicates as well. - const items = [].concat(updatedItems, allItems) + const items = [].concat(updatedItems, allPulls) .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number)) ;(await Promise.allSettled(items.map(handle)))