workflow/labels: switch to a scheduled trigger

Instead of relying on workflow_run events, we now trigger the labeling
workflow by schedule. This avoids all permission/secrets problems of
running in the pull_request_review context - and also gets rid of the
"waiting for approval to run workflows" button for new contributors that
sometimes comes up right now.

Also, it's more efficient. Previously, the labeling workflow would run
on *any* pull_request_review event, which happens for all comments, too.
So quite a few runs.

This will cause a delay of up to 1 hour with the current settings until
approval labels are set. Depending on how long the job normally runs we
can adjust the frequency. The workflow is written in a way that will
work no matter what the frequency ends up being, even when it's
interrupted by transient GHA failures: It will always look at all PRs
which were updated since the last time the workflow ran successfully.

We also add the ability to run it manually via UI. This is useful:
- When fixing bugs in the labeler workflow to run it all the way back to
where the bug was introduced.
- When introducing new labels, to get a head start for a reasonable
amount of PRs immediately.

Of course, the workflow is also still run directly after Eval itself.
This is also the only case that the actions/labeler steps will run,
since they depend on the `pull_request` context.
This commit is contained in:
Wolfgang Walther
2025-06-14 19:42:55 +02:00
parent 9ead5b1cc7
commit 6f12f662ae
2 changed files with 114 additions and 70 deletions

View File

@@ -6,16 +6,23 @@
name: "Label PR"
on:
schedule:
- cron: '37 * * * *'
workflow_call:
workflow_run:
workflows:
- Review dismissed
- Review submitted
types: [completed]
workflow_dispatch:
inputs:
updatedWithin:
description: 'Updated within [hours]'
type: number
required: false
default: 0 # everything since last run
concurrency:
group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
# This explicitly avoids using `run_id` for the concurrency key to make sure that only
# *one* non-PR run can run at a time.
group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number }}
# PR- and manually-triggered runs will be cancelled, but scheduled runs will be queued.
cancel-in-progress: ${{ github.event_name != 'schedule' }}
permissions:
issues: write # needed to create *new* labels
@@ -31,57 +38,111 @@ jobs:
runs-on: ubuntu-24.04-arm
if: "!contains(github.event.pull_request.title, '[skip treewide]')"
steps:
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
id: eval
with:
script: |
const run_id = (await github.rest.actions.listWorkflowRuns({
owner: context.repo.owner,
repo: context.repo.repo,
workflow_id: 'eval.yml',
event: 'pull_request_target',
head_sha: context.payload.pull_request?.head.sha ?? context.payload.workflow_run.head_sha
})).data.workflow_runs[0]?.id
core.setOutput('run-id', run_id)
- name: Install dependencies
run: npm install @actions/artifact
- name: Download the comparison results
if: steps.eval.outputs.run-id
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
run-id: ${{ steps.eval.outputs.run-id }}
github-token: ${{ github.token }}
pattern: comparison
path: comparison
merge-multiple: true
- name: Labels from eval
if: steps.eval.outputs.run-id && github.event_name != 'pull_request'
- name: Labels from API data and Eval results
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
env:
UPDATED_WITHIN: ${{ inputs.updatedWithin }}
with:
script: |
const path = require('node:path')
const { DefaultArtifactClient } = require('@actions/artifact')
const { readFile } = require('node:fs/promises')
let pull_requests
if (context.payload.workflow_run) {
// PRs from forks don't have any PRs associated by default.
// Thus, we request the PR number with an API call *to* the fork's repo.
// Multiple pull requests can be open from the same head commit, either via
// different base branches or head branches.
const { head_repository, head_sha, repository } = context.payload.workflow_run
pull_requests = (await github.paginate(github.rest.repos.listPullRequestsAssociatedWithCommit, {
owner: head_repository.owner.login,
repo: head_repository.name,
commit_sha: head_sha
})).filter(pull_request => pull_request.base.repo.id == repository.id)
} else {
pull_requests = [ context.payload.pull_request ]
const artifactClient = new DefaultArtifactClient()
if (process.env.UPDATED_WITHIN && !/^\d+$/.test(process.env.UPDATED_WITHIN))
throw new Error('Please enter "updated within" as integer in hours.')
const cutoff = new Date(await (async () => {
// Always run for Pull Request triggers, no cutoff since there will be a single
// response only anyway. 0 is the Unix epoch, so always smaller.
if (context.payload.pull_request?.number) return 0
// Manually triggered via UI when updatedWithin is set. Will fallthrough to the last
// option if the updatedWithin parameter is set to 0, which is the default.
const updatedWithin = Number.parseInt(process.env.UPDATED_WITHIN, 10)
if (updatedWithin) return new Date().getTime() - updatedWithin * 60 * 60 * 1000
// Normally a scheduled run, but could be workflow_dispatch, see above. Go back as far
// as the last successful run of this workflow to make sure we are not leaving anyone
// behind on GHA failures.
return (await github.rest.actions.listWorkflowRuns({
...context.repo,
workflow_id: 'labels.yml',
event: 'schedule',
status: 'success',
exclude_pull_requests: true
})).data.workflow_runs[0]?.created_at
})())
core.info('cutoff timestamp: ' + cutoff.toISOString())
// To simplify this action's logic we fetch the pull_request data again below, even if
// we are already in a pull_request event's context and would have the data readily
// available. We do this by filtering the list of pull requests with head and base
// branch - there can only be a single open Pull Request for any such combination.
const prEventCondition = !context.payload.pull_request ? undefined : {
// "label" is in the format of `user:branch` or `org:branch`
head: context.payload.pull_request.head.label,
base: context.payload.pull_request.base.ref
}
await Promise.all(
pull_requests.map(async (pull_request) => {
await github.paginate(
github.rest.pulls.list,
{
...context.repo,
state: 'open',
sort: 'updated',
direction: 'desc',
...prEventCondition
},
async (response, done) => await Promise.all(response.data.map(async (pull_request) => {
const log = (k,v) => core.info(`PR #${pull_request.number} - ${k}: ${v}`)
log('Last updated at', pull_request.updated_at)
if (new Date(pull_request.updated_at) < cutoff) return done()
const run_id = (await github.rest.actions.listWorkflowRuns({
...context.repo,
workflow_id: 'eval.yml',
event: 'pull_request_target',
// For PR events, the workflow run is still in progress with this job itself.
status: prEventCondition ? 'in_progress' : 'success',
exclude_pull_requests: true,
head_sha: pull_request.head.sha
})).data.workflow_runs[0]?.id
// Newer PRs might not have run Eval to completion, yet. We can skip them, because this
// job will be run as part of that Eval run anyway.
log('Last eval run', run_id)
if (!run_id) return;
const artifact = (await github.rest.actions.listWorkflowRunArtifacts({
...context.repo,
run_id,
name: 'comparison'
})).data.artifacts[0]
// Instead of checking the boolean artifact.expired, we will give us a minute to
// actually download the artifact in the next step and avoid that race condition.
log('Artifact expires at', artifact.expires_at)
if (new Date(artifact.expires_at) < new Date(new Date().getTime() + 60 * 1000)) return;
await artifactClient.downloadArtifact(artifact.id, {
findBy: {
repositoryName: context.repo.repo,
repositoryOwner: context.repo.owner,
token: core.getInput('github-token')
},
path: path.resolve('comparison'),
expectedHash: artifact.digest
})
// Shortcut for all issue endpoints related to labels
const pr = {
owner: context.repo.owner,
repo: context.repo.repo,
...context.repo,
issue_number: pull_request.number
}
@@ -132,13 +193,13 @@ jobs:
labels: added
})
}
})
}))
)
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
name: Labels from touched files
if: |
github.event_name != 'workflow_run' &&
github.event_name == 'pull_request_target' &&
github.event.pull_request.head.repo.owner.login != 'NixOS' || !(
github.head_ref == 'haskell-updates' ||
github.head_ref == 'python-updates' ||
@@ -153,7 +214,7 @@ jobs:
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
name: Labels from touched files (no sync)
if: |
github.event_name != 'workflow_run' &&
github.event_name == 'pull_request_target' &&
github.event.pull_request.head.repo.owner.login != 'NixOS' || !(
github.head_ref == 'haskell-updates' ||
github.head_ref == 'python-updates' ||
@@ -171,7 +232,7 @@ jobs:
# This is to avoid the mass of labels there, which is mostly useless - and really annoying for
# the backport labels.
if: |
github.event_name != 'workflow_run' &&
github.event_name == 'pull_request_target' &&
github.event.pull_request.head.repo.owner.login == 'NixOS' && (
github.head_ref == 'haskell-updates' ||
github.head_ref == 'python-updates' ||

View File

@@ -1,17 +0,0 @@
name: Review submitted
on:
pull_request_review:
types: [submitted]
permissions: {}
defaults:
run:
shell: bash
jobs:
trigger:
runs-on: ubuntu-24.04-arm
steps:
- run: echo This is a no-op only used as a trigger for workflow_run.