workflow/labels: switch to a scheduled trigger

Instead of relying on workflow_run events, we now trigger the labeling workflow by schedule. This avoids all permission/secrets problems of running in the pull_request_review context - and also gets rid of the "waiting for approval to run workflows" button for new contributors that sometimes comes up right now. Also, it's more efficient. Previously, the labeling workflow would run on *any* pull_request_review event, which happens for all comments, too. So quite a few runs. This will cause a delay of up to 1 hour with the current settings until approval labels are set. Depending on how long the job normally runs we can adjust the frequency. The workflow is written in a way that will work no matter what the frequency ends up being, even when it's interrupted by transient GHA failures: It will always look at all PRs which were updated since the last time the workflow ran successfully. We also add the ability to run it manually via UI. This is useful: - When fixing bugs in the labeler workflow to run it all the way back to where the bug was introduced. - When introducing new labels, to get a head start for a reasonable amount of PRs immediately. Of course, the workflow is also still run directly after Eval itself. This is also the only case that the actions/labeler steps will run, since they depend on the `pull_request` context.
2025-06-14 19:42:55 +02:00
parent 9ead5b1cc7
commit 6f12f662ae
2 changed files with 114 additions and 70 deletions
--- a/.github/workflows/labels.yml
+++ b/.github/workflows/labels.yml
@@ -6,16 +6,23 @@
 name: "Label PR"

 on:
+  schedule:
+    - cron: '37 * * * *'
  workflow_call:
-  workflow_run:
-    workflows:
-      - Review dismissed
-      - Review submitted
-    types: [completed]
+  workflow_dispatch:
+    inputs:
+      updatedWithin:
+        description: 'Updated within [hours]'
+        type: number
+        required: false
+        default: 0 # everything since last run

 concurrency:
-  group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.run_id }}
-  cancel-in-progress: true
+  # This explicitly avoids using `run_id` for the concurrency key to make sure that only
+  # *one* non-PR run can run at a time.
+  group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number }}
+  # PR- and manually-triggered runs will be cancelled, but scheduled runs will be queued.
+  cancel-in-progress: ${{ github.event_name != 'schedule' }}

 permissions:
  issues: write # needed to create *new* labels
@@ -31,57 +38,111 @@ jobs:
    runs-on: ubuntu-24.04-arm
    if: "!contains(github.event.pull_request.title, '[skip treewide]')"
    steps:
-      - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-        id: eval
-        with:
-          script: |
-            const run_id = (await github.rest.actions.listWorkflowRuns({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              workflow_id: 'eval.yml',
-              event: 'pull_request_target',
-              head_sha: context.payload.pull_request?.head.sha ?? context.payload.workflow_run.head_sha
-            })).data.workflow_runs[0]?.id
-            core.setOutput('run-id', run_id)
+      - name: Install dependencies
+        run: npm install @actions/artifact

-      - name: Download the comparison results
-        if: steps.eval.outputs.run-id
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          run-id: ${{ steps.eval.outputs.run-id }}
-          github-token: ${{ github.token }}
-          pattern: comparison
-          path: comparison
-          merge-multiple: true
-
-      - name: Labels from eval
-        if: steps.eval.outputs.run-id && github.event_name != 'pull_request'
+      - name: Labels from API data and Eval results
        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          UPDATED_WITHIN: ${{ inputs.updatedWithin }}
        with:
          script: |
+            const path = require('node:path')
+            const { DefaultArtifactClient } = require('@actions/artifact')
            const { readFile } = require('node:fs/promises')

-            let pull_requests
-            if (context.payload.workflow_run) {
-              // PRs from forks don't have any PRs associated by default.
-              // Thus, we request the PR number with an API call *to* the fork's repo.
-              // Multiple pull requests can be open from the same head commit, either via
-              // different base branches or head branches.
-              const { head_repository, head_sha, repository } = context.payload.workflow_run
-              pull_requests = (await github.paginate(github.rest.repos.listPullRequestsAssociatedWithCommit, {
-                owner: head_repository.owner.login,
-                repo: head_repository.name,
-                commit_sha: head_sha
-              })).filter(pull_request => pull_request.base.repo.id == repository.id)
-            } else {
-              pull_requests = [ context.payload.pull_request ]
+            const artifactClient = new DefaultArtifactClient()
+
+            if (process.env.UPDATED_WITHIN && !/^\d+$/.test(process.env.UPDATED_WITHIN))
+              throw new Error('Please enter "updated within" as integer in hours.')
+
+            const cutoff = new Date(await (async () => {
+              // Always run for Pull Request triggers, no cutoff since there will be a single
+              // response only anyway. 0 is the Unix epoch, so always smaller.
+              if (context.payload.pull_request?.number) return 0
+
+              // Manually triggered via UI when updatedWithin is set. Will fallthrough to the last
+              // option if the updatedWithin parameter is set to 0, which is the default.
+              const updatedWithin = Number.parseInt(process.env.UPDATED_WITHIN, 10)
+              if (updatedWithin) return new Date().getTime() - updatedWithin * 60 * 60 * 1000
+
+              // Normally a scheduled run, but could be workflow_dispatch, see above. Go back as far
+              // as the last successful run of this workflow to make sure we are not leaving anyone
+              // behind on GHA failures.
+              return (await github.rest.actions.listWorkflowRuns({
+                ...context.repo,
+                workflow_id: 'labels.yml',
+                event: 'schedule',
+                status: 'success',
+                exclude_pull_requests: true
+              })).data.workflow_runs[0]?.created_at
+            })())
+            core.info('cutoff timestamp: ' + cutoff.toISOString())
+
+            // To simplify this action's logic we fetch the pull_request data again below, even if
+            // we are already in a pull_request event's context and would have the data readily
+            // available. We do this by filtering the list of pull requests with head and base
+            // branch - there can only be a single open Pull Request for any such combination.
+            const prEventCondition = !context.payload.pull_request ? undefined : {
+              // "label" is in the format of `user:branch` or `org:branch`
+              head: context.payload.pull_request.head.label,
+              base: context.payload.pull_request.base.ref
            }

-            await Promise.all(
-              pull_requests.map(async (pull_request) => {
+            await github.paginate(
+              github.rest.pulls.list,
+              {
+                ...context.repo,
+                state: 'open',
+                sort: 'updated',
+                direction: 'desc',
+                ...prEventCondition
+              },
+              async (response, done) => await Promise.all(response.data.map(async (pull_request) => {
+                const log = (k,v) => core.info(`PR #${pull_request.number} - ${k}: ${v}`)
+
+                log('Last updated at', pull_request.updated_at)
+                if (new Date(pull_request.updated_at) < cutoff) return done()
+
+                const run_id = (await github.rest.actions.listWorkflowRuns({
+                  ...context.repo,
+                  workflow_id: 'eval.yml',
+                  event: 'pull_request_target',
+                  // For PR events, the workflow run is still in progress with this job itself.
+                  status: prEventCondition ? 'in_progress' : 'success',
+                  exclude_pull_requests: true,
+                  head_sha: pull_request.head.sha
+                })).data.workflow_runs[0]?.id
+
+                // Newer PRs might not have run Eval to completion, yet. We can skip them, because this
+                // job will be run as part of that Eval run anyway.
+                log('Last eval run', run_id)
+                if (!run_id) return;
+
+                const artifact = (await github.rest.actions.listWorkflowRunArtifacts({
+                  ...context.repo,
+                  run_id,
+                  name: 'comparison'
+                })).data.artifacts[0]
+
+                // Instead of checking the boolean artifact.expired, we will give us a minute to
+                // actually download the artifact in the next step and avoid that race condition.
+                log('Artifact expires at', artifact.expires_at)
+                if (new Date(artifact.expires_at) < new Date(new Date().getTime() + 60 * 1000)) return;
+
+                await artifactClient.downloadArtifact(artifact.id, {
+                  findBy: {
+                    repositoryName: context.repo.repo,
+                    repositoryOwner: context.repo.owner,
+                    token: core.getInput('github-token')
+                  },
+                  path: path.resolve('comparison'),
+                  expectedHash: artifact.digest
+                })
+
+                // Shortcut for all issue endpoints related to labels
                const pr = {
-                  owner: context.repo.owner,
-                  repo: context.repo.repo,
+                  ...context.repo,
                  issue_number: pull_request.number
                }

@@ -132,13 +193,13 @@ jobs:
                    labels: added
                  })
                }
-              })
+              }))
            )

      - uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
        name: Labels from touched files
        if: |
-          github.event_name != 'workflow_run' &&
+          github.event_name == 'pull_request_target' &&
          github.event.pull_request.head.repo.owner.login != 'NixOS' || !(
            github.head_ref == 'haskell-updates' ||
            github.head_ref == 'python-updates' ||
@@ -153,7 +214,7 @@ jobs:
      - uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
        name: Labels from touched files (no sync)
        if: |
-          github.event_name != 'workflow_run' &&
+          github.event_name == 'pull_request_target' &&
          github.event.pull_request.head.repo.owner.login != 'NixOS' || !(
            github.head_ref == 'haskell-updates' ||
            github.head_ref == 'python-updates' ||
@@ -171,7 +232,7 @@ jobs:
        # This is to avoid the mass of labels there, which is mostly useless - and really annoying for
        # the backport labels.
        if: |
-          github.event_name != 'workflow_run' &&
+          github.event_name == 'pull_request_target' &&
          github.event.pull_request.head.repo.owner.login == 'NixOS' && (
            github.head_ref == 'haskell-updates' ||
            github.head_ref == 'python-updates' ||
--- a/.github/workflows/review-submitted.yml
+++ b/.github/workflows/review-submitted.yml
@@ -1,17 +0,0 @@
-name: Review submitted
-
-on:
-  pull_request_review:
-    types: [submitted]
-
-permissions: {}
-
-defaults:
-  run:
-    shell: bash
-
-jobs:
-  trigger:
-    runs-on: ubuntu-24.04-arm
-    steps:
-      - run: echo This is a no-op only used as a trigger for workflow_run.