From b2d1946eb28740b5817cc49ff8abd0b8b69bc28f Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sat, 1 Nov 2025 10:52:38 +0100 Subject: [PATCH 1/4] workflows/reviewers: request owners and maintainers at once Instead of requesting owners and maintainer separately, each with their own limit of 10 review requests, we now run this together. This unties the logic and allows easier refactoring. Also, it gives us a consistent threshold of when not to request reviews anymore, which I set to 15. Before, this could have been anything between 10 and 20, depending on how the reviewers distributed over owners and maintainers. --- .github/workflows/reviewers.yml | 10 +++++----- ci/request-reviews/request-code-owner-reviews.sh | 5 +---- ci/request-reviews/request-reviewers.sh | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/workflows/reviewers.yml b/.github/workflows/reviewers.yml index f22e44d7cbff..4b49972e0732 100644 --- a/.github/workflows/reviewers.yml +++ b/.github/workflows/reviewers.yml @@ -69,15 +69,14 @@ jobs: GH_TOKEN: ${{ steps.app-token.outputs.token }} run: gh api /rate_limit | jq - - name: Requesting code owner reviews + - name: Determining code owner reviews if: steps.app-token.outputs.token env: GH_TOKEN: ${{ steps.app-token.outputs.token }} REPOSITORY: ${{ github.repository }} NUMBER: ${{ github.event.number }} - # Don't do anything on draft PRs - DRY_MODE: ${{ github.event.pull_request.draft && '1' || '' }} - run: result/bin/request-code-owner-reviews.sh "$REPOSITORY" "$NUMBER" ci/OWNERS + run: | + result/bin/request-code-owner-reviews.sh "$REPOSITORY" "$NUMBER" ci/OWNERS > owners.txt - name: Log current API rate limits (app-token) if: ${{ steps.app-token.outputs.token }} @@ -149,7 +148,7 @@ jobs: GH_TOKEN: ${{ github.token }} run: gh api /rate_limit | jq - - name: Requesting maintainer reviews + - name: Requesting reviews if: ${{ steps.app-token.outputs.token }} env: GH_TOKEN: ${{ github.token }} @@ -164,6 +163,7 @@ jobs: # There appears to be no API to request reviews based on GitHub IDs jq -r 'keys[]' comparison/maintainers.json \ | while read -r id; do gh api /user/"$id" --jq .login; done \ + | cat owners.txt - \ | GH_TOKEN="$APP_GH_TOKEN" result/bin/request-reviewers.sh "$REPOSITORY" "$NUMBER" "$AUTHOR" - name: Log current API rate limits (app-token) diff --git a/ci/request-reviews/request-code-owner-reviews.sh b/ci/request-reviews/request-code-owner-reviews.sh index 663285ae03fe..0ec61ab9f04e 100755 --- a/ci/request-reviews/request-code-owner-reviews.sh +++ b/ci/request-reviews/request-code-owner-reviews.sh @@ -31,8 +31,6 @@ prRepo=$(jq -r .head.repo.full_name <<< "$prInfo") log "PR repo: $prRepo" prBranch=$(jq -r .head.ref <<< "$prInfo") log "PR branch: $prBranch" -prAuthor=$(jq -r .user.login <<< "$prInfo") -log "PR author: $prAuthor" extraArgs=() if pwdRepo=$(git rev-parse --show-toplevel 2>/dev/null); then @@ -56,5 +54,4 @@ git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch" headRef=$(git -C "$tmp/nixpkgs.git" rev-parse refs/remotes/fork/"$prBranch") log "Requesting reviews from code owners" -"$SCRIPT_DIR"/get-code-owners.sh "$tmp/nixpkgs.git" "$ownersFile" "$baseBranch" "$headRef" | \ - "$SCRIPT_DIR"/request-reviewers.sh "$baseRepo" "$prNumber" "$prAuthor" +"$SCRIPT_DIR"/get-code-owners.sh "$tmp/nixpkgs.git" "$ownersFile" "$baseBranch" "$headRef" diff --git a/ci/request-reviews/request-reviewers.sh b/ci/request-reviews/request-reviewers.sh index 1c105e385d28..6782b621b70b 100755 --- a/ci/request-reviews/request-reviewers.sh +++ b/ci/request-reviews/request-reviewers.sh @@ -68,7 +68,7 @@ for user in "${!users[@]}"; do fi done -if [[ "${#users[@]}" -gt 10 ]]; then +if [[ "${#users[@]}" -gt 15 ]]; then log "Too many reviewers (${!users[*]}), skipping review requests" exit 0 fi From d177d6057df8446f21297279047387edb350cf83 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sat, 1 Nov 2025 11:04:46 +0100 Subject: [PATCH 2/4] ci/request-reviews: move git calls out of get-code-owners This is just a refactor, no functional change. It is a preparation for a future change, where `get-code-owners.sh` can be moved entirely into eval/compare. This can only happen once we removed the remaining `gh api` calls from it. --- ci/request-reviews/get-code-owners.sh | 15 ++++----------- ci/request-reviews/request-code-owner-reviews.sh | 8 +++++++- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/ci/request-reviews/get-code-owners.sh b/ci/request-reviews/get-code-owners.sh index 13a377429b92..5d121f69a74c 100755 --- a/ci/request-reviews/get-code-owners.sh +++ b/ci/request-reviews/get-code-owners.sh @@ -9,32 +9,25 @@ log() { } if (( "$#" < 4 )); then - log "Usage: $0 GIT_REPO OWNERS_FILE BASE_REF HEAD_REF" + log "Usage: $0 TOUCHED_FILES_FILE OWNERS_FILE" exit 1 fi -gitRepo=$1 +touchedFilesFile=$1 ownersFile=$2 -baseRef=$3 -headRef=$4 tmp=$(mktemp -d) trap 'rm -rf "$tmp"' exit -git -C "$gitRepo" diff --name-only --merge-base "$baseRef" "$headRef" > "$tmp/touched-files" -readarray -t touchedFiles < "$tmp/touched-files" +readarray -t touchedFiles < "$touchedFilesFile" log "This PR touches ${#touchedFiles[@]} files" -# Get the owners file from the base, because we don't want to allow PRs to -# remove code owners to avoid pinging them -git -C "$gitRepo" show "$baseRef":"$ownersFile" > "$tmp"/codeowners - # Associative array with the user as the key for easy de-duplication # Make sure to always lowercase keys to avoid duplicates with different casings declare -A users=() for file in "${touchedFiles[@]}"; do - result=$(codeowners --file "$tmp"/codeowners "$file") + result=$(codeowners --file "$ownersFile" "$file") # Remove the file prefix and trim the surrounding spaces read -r owners <<< "${result#"$file"}" diff --git a/ci/request-reviews/request-code-owner-reviews.sh b/ci/request-reviews/request-code-owner-reviews.sh index 0ec61ab9f04e..09df38304854 100755 --- a/ci/request-reviews/request-code-owner-reviews.sh +++ b/ci/request-reviews/request-code-owner-reviews.sh @@ -53,5 +53,11 @@ git -C "$tmp/nixpkgs.git" config remote.fork.promisor true git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch" headRef=$(git -C "$tmp/nixpkgs.git" rev-parse refs/remotes/fork/"$prBranch") +git -C "$tmp/nixpkgs.git" diff --name-only --merge-base "$baseBranch" "$headRef" > "$tmp/touched-files" + +# Get the owners file from the base, because we don't want to allow PRs to +# remove code owners to avoid pinging them +git -C "$tmp/nixpkgs.git" show "$baseBranch":"$ownersFile" > "$tmp"/codeowners + log "Requesting reviews from code owners" -"$SCRIPT_DIR"/get-code-owners.sh "$tmp/nixpkgs.git" "$ownersFile" "$baseBranch" "$headRef" +"$SCRIPT_DIR"/get-code-owners.sh "$tmp/touched-files" "$tmp"/codeowners From 18ab6b721e288ca9b6f9c0000ea62bebd070d5a1 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sat, 1 Nov 2025 12:02:44 +0100 Subject: [PATCH 3/4] ci/request-reviews: move gh api calls out of get-code-owners All the github related logic is now bundled in `request-reviewers.sh`. This allows moving the `get-code-owners.sh` file into the eval/compare step in the next commit. --- .github/workflows/reviewers.yml | 14 -------- ci/request-reviews/get-code-owners.sh | 43 ++----------------------- ci/request-reviews/request-reviewers.sh | 34 ++++++++++++++++++- 3 files changed, 35 insertions(+), 56 deletions(-) diff --git a/.github/workflows/reviewers.yml b/.github/workflows/reviewers.yml index 4b49972e0732..a6822aa9af0e 100644 --- a/.github/workflows/reviewers.yml +++ b/.github/workflows/reviewers.yml @@ -63,27 +63,13 @@ jobs: permission-members: read permission-pull-requests: write - - name: Log current API rate limits (app-token) - if: ${{ steps.app-token.outputs.token }} - env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} - run: gh api /rate_limit | jq - - name: Determining code owner reviews - if: steps.app-token.outputs.token env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} REPOSITORY: ${{ github.repository }} NUMBER: ${{ github.event.number }} run: | result/bin/request-code-owner-reviews.sh "$REPOSITORY" "$NUMBER" ci/OWNERS > owners.txt - - name: Log current API rate limits (app-token) - if: ${{ steps.app-token.outputs.token }} - env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} - run: gh api /rate_limit | jq - - name: Log current API rate limits (github.token) env: GH_TOKEN: ${{ github.token }} diff --git a/ci/request-reviews/get-code-owners.sh b/ci/request-reviews/get-code-owners.sh index 5d121f69a74c..33963bce7f30 100755 --- a/ci/request-reviews/get-code-owners.sh +++ b/ci/request-reviews/get-code-owners.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Get the code owners of the files changed by a PR, returning one username per line +# Get the code owners of the files changed by a PR, returning one username or team per line set -euo pipefail @@ -16,16 +16,9 @@ fi touchedFilesFile=$1 ownersFile=$2 -tmp=$(mktemp -d) -trap 'rm -rf "$tmp"' exit - readarray -t touchedFiles < "$touchedFilesFile" log "This PR touches ${#touchedFiles[@]} files" -# Associative array with the user as the key for easy de-duplication -# Make sure to always lowercase keys to avoid duplicates with different casings -declare -A users=() - for file in "${touchedFiles[@]}"; do result=$(codeowners --file "$ownersFile" "$file") @@ -52,39 +45,7 @@ for file in "${touchedFiles[@]}"; do # The first regex match is everything after the @ entry=${BASH_REMATCH[1]} - if [[ "$entry" =~ (.*)/(.*) ]]; then - # Teams look like $org/$team - org=${BASH_REMATCH[1]} - team=${BASH_REMATCH[2]} - - # Instead of requesting a review from the team itself, - # we request reviews from the individual users. - # This is because once somebody from a team reviewed the PR, - # the API doesn't expose that the team was already requested for a review, - # so we wouldn't be able to avoid rerequesting reviews - # without saving some some extra state somewhere - - # We could also consider implementing a more advanced heuristic - # in the future that e.g. only pings one team member, - # but escalates to somebody else if that member doesn't respond in time. - gh api \ - --cache=1h \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/orgs/$org/teams/$team/members" \ - --jq '.[].login' > "$tmp/team-members" - readarray -t members < "$tmp/team-members" - log "Team $entry has these members: ${members[*]}" - - for user in "${members[@]}"; do - users[${user,,}]= - done - else - # Everything else is a user - users[${entry,,}]= - fi + echo "$entry" done done - -printf "%s\n" "${!users[@]}" diff --git a/ci/request-reviews/request-reviewers.sh b/ci/request-reviews/request-reviewers.sh index 6782b621b70b..b4288669db93 100755 --- a/ci/request-reviews/request-reviewers.sh +++ b/ci/request-reviews/request-reviewers.sh @@ -33,9 +33,41 @@ prAuthor=$3 tmp=$(mktemp -d) trap 'rm -rf "$tmp"' exit +# Associative array with the user as the key for easy de-duplication +# Make sure to always lowercase keys to avoid duplicates with different casings declare -A users=() while read -r handle && [[ -n "$handle" ]]; do - users[${handle,,}]= + if [[ "$handle" =~ (.*)/(.*) ]]; then + # Teams look like $org/$team + org=${BASH_REMATCH[1]} + team=${BASH_REMATCH[2]} + + # Instead of requesting a review from the team itself, + # we request reviews from the individual users. + # This is because once somebody from a team reviewed the PR, + # the API doesn't expose that the team was already requested for a review, + # so we wouldn't be able to avoid rerequesting reviews + # without saving some some extra state somewhere + + # We could also consider implementing a more advanced heuristic + # in the future that e.g. only pings one team member, + # but escalates to somebody else if that member doesn't respond in time. + gh api \ + --cache=1h \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/orgs/$org/teams/$team/members" \ + --jq '.[].login' > "$tmp/team-members" + readarray -t members < "$tmp/team-members" + log "Team $entry has these members: ${members[*]}" + + for user in "${members[@]}"; do + users[${user,,}]= + done + else + # Everything else is a user + users[${handle,,}]= + fi done # Cannot request a review from the author From 3bef0dcf6588c23c95ce882e17a592d6ec66aa21 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sat, 1 Nov 2025 14:27:15 +0100 Subject: [PATCH 4/4] ci/request-reviews: move get-code-owners to eval/compare This moves the parsing of ci/OWNERS into the Nix sandbox. We also get rid of checking out the nixpkgs repo another time in the reviewers workflow - we already have everything we need in the eval/compare job. The creation of owners.txt in this way is only temporary, it should eventually be moved further, similar to how maintainers.json is currently migrating to a maintainer map for the whole repo stored on the target branch as artifact. --- .github/workflows/reviewers.yml | 9 +-- ci/eval/compare/default.nix | 39 ++++++++++++ ci/request-reviews/default.nix | 8 --- ci/request-reviews/get-code-owners.sh | 51 --------------- .../request-code-owner-reviews.sh | 63 ------------------- 5 files changed, 40 insertions(+), 130 deletions(-) delete mode 100755 ci/request-reviews/get-code-owners.sh delete mode 100755 ci/request-reviews/request-code-owner-reviews.sh diff --git a/.github/workflows/reviewers.yml b/.github/workflows/reviewers.yml index a6822aa9af0e..03d8caa2661b 100644 --- a/.github/workflows/reviewers.yml +++ b/.github/workflows/reviewers.yml @@ -63,13 +63,6 @@ jobs: permission-members: read permission-pull-requests: write - - name: Determining code owner reviews - env: - REPOSITORY: ${{ github.repository }} - NUMBER: ${{ github.event.number }} - run: | - result/bin/request-code-owner-reviews.sh "$REPOSITORY" "$NUMBER" ci/OWNERS > owners.txt - - name: Log current API rate limits (github.token) env: GH_TOKEN: ${{ github.token }} @@ -149,7 +142,7 @@ jobs: # There appears to be no API to request reviews based on GitHub IDs jq -r 'keys[]' comparison/maintainers.json \ | while read -r id; do gh api /user/"$id" --jq .login; done \ - | cat owners.txt - \ + | cat comparison/owners.txt - \ | GH_TOKEN="$APP_GH_TOKEN" result/bin/request-reviewers.sh "$REPOSITORY" "$NUMBER" "$AUTHOR" - name: Log current API rate limits (app-token) diff --git a/ci/eval/compare/default.nix b/ci/eval/compare/default.nix index eabc28af4d36..571e6ba41068 100644 --- a/ci/eval/compare/default.nix +++ b/ci/eval/compare/default.nix @@ -7,6 +7,7 @@ python3, stdenvNoCC, makeWrapper, + codeowners, }: let python = python3.withPackages (ps: [ @@ -48,6 +49,7 @@ in { combinedDir, touchedFilesJson, + ownersFile ? ../../OWNERS, }: let # Usually we expect a derivation, but when evaluating in multiple separate steps, we pass @@ -174,6 +176,7 @@ runCommand "compare" nativeBuildInputs = map lib.getBin [ jq cmp-stats + codeowners ]; maintainers = builtins.toJSON maintainers; packages = builtins.toJSON packages; @@ -222,6 +225,42 @@ runCommand "compare" } >> $out/step-summary.md fi + jq -r '.[]' "${touchedFilesJson}" > ./touched-files + readarray -t touchedFiles < ./touched-files + echo "This PR touches ''${#touchedFiles[@]} files" + + # TODO: Move ci/OWNERS to Nix and produce owners.json instead of owners.txt. + for file in "''${touchedFiles[@]}"; do + result=$(codeowners --file "${ownersFile}" "$file") + + # Remove the file prefix and trim the surrounding spaces + read -r owners <<< "''${result#"$file"}" + if [[ "$owners" == "(unowned)" ]]; then + echo "File $file is unowned" + continue + fi + echo "File $file is owned by $owners" + + # Split up multiple owners, separated by arbitrary amounts of spaces + IFS=" " read -r -a entries <<< "$owners" + + for entry in "''${entries[@]}"; do + # GitHub technically also supports Emails as code owners, + # but we can't easily support that, so let's not + if [[ ! "$entry" =~ @(.*) ]]; then + echo -e "\e[33mCodeowner \"$entry\" for file $file is not valid: Must start with \"@\"\e[0m" + # Don't fail, because the PR for which this script runs can't fix it, + # it has to be fixed in the base branch + continue + fi + # The first regex match is everything after the @ + entry=''${BASH_REMATCH[1]} + + echo "$entry" >> "$out/owners.txt" + done + + done + cp "$maintainersPath" "$out/maintainers.json" cp "$packagesPath" "$out/packages.json" '' diff --git a/ci/request-reviews/default.nix b/ci/request-reviews/default.nix index 075ff52fd564..55a1889fe89f 100644 --- a/ci/request-reviews/default.nix +++ b/ci/request-reviews/default.nix @@ -3,20 +3,15 @@ stdenvNoCC, makeWrapper, coreutils, - codeowners, jq, - curl, github-cli, - gitMinimal, }: stdenvNoCC.mkDerivation { name = "request-reviews"; src = lib.fileset.toSource { root = ./.; fileset = lib.fileset.unions [ - ./get-code-owners.sh ./request-reviewers.sh - ./request-code-owner-reviews.sh ]; }; nativeBuildInputs = [ makeWrapper ]; @@ -29,11 +24,8 @@ stdenvNoCC.mkDerivation { --set PATH ${ lib.makeBinPath [ coreutils - codeowners jq - curl github-cli - gitMinimal ] } done diff --git a/ci/request-reviews/get-code-owners.sh b/ci/request-reviews/get-code-owners.sh deleted file mode 100755 index 33963bce7f30..000000000000 --- a/ci/request-reviews/get-code-owners.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash - -# Get the code owners of the files changed by a PR, returning one username or team per line - -set -euo pipefail - -log() { - echo "$@" >&2 -} - -if (( "$#" < 4 )); then - log "Usage: $0 TOUCHED_FILES_FILE OWNERS_FILE" - exit 1 -fi - -touchedFilesFile=$1 -ownersFile=$2 - -readarray -t touchedFiles < "$touchedFilesFile" -log "This PR touches ${#touchedFiles[@]} files" - -for file in "${touchedFiles[@]}"; do - result=$(codeowners --file "$ownersFile" "$file") - - # Remove the file prefix and trim the surrounding spaces - read -r owners <<< "${result#"$file"}" - if [[ "$owners" == "(unowned)" ]]; then - log "File $file is unowned" - continue - fi - log "File $file is owned by $owners" - - # Split up multiple owners, separated by arbitrary amounts of spaces - IFS=" " read -r -a entries <<< "$owners" - - for entry in "${entries[@]}"; do - # GitHub technically also supports Emails as code owners, - # but we can't easily support that, so let's not - if [[ ! "$entry" =~ @(.*) ]]; then - warn -e "\e[33mCodeowner \"$entry\" for file $file is not valid: Must start with \"@\"\e[0m" >&2 - # Don't fail, because the PR for which this script runs can't fix it, - # it has to be fixed in the base branch - continue - fi - # The first regex match is everything after the @ - entry=${BASH_REMATCH[1]} - - echo "$entry" - done - -done diff --git a/ci/request-reviews/request-code-owner-reviews.sh b/ci/request-reviews/request-code-owner-reviews.sh deleted file mode 100755 index 09df38304854..000000000000 --- a/ci/request-reviews/request-code-owner-reviews.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash - -# Requests reviews for a PR - -set -euo pipefail -tmp=$(mktemp -d) -trap 'rm -rf "$tmp"' exit -SCRIPT_DIR=$(dirname "$0") - -log() { - echo "$@" >&2 -} - -if (( $# < 3 )); then - log "Usage: $0 GITHUB_REPO PR_NUMBER OWNERS_FILE" - exit 1 -fi -baseRepo=$1 -prNumber=$2 -ownersFile=$3 - -log "Fetching PR info" -prInfo=$(gh api \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/repos/$baseRepo/pulls/$prNumber") - -baseBranch=$(jq -r .base.ref <<< "$prInfo") -log "Base branch: $baseBranch" -prRepo=$(jq -r .head.repo.full_name <<< "$prInfo") -log "PR repo: $prRepo" -prBranch=$(jq -r .head.ref <<< "$prInfo") -log "PR branch: $prBranch" - -extraArgs=() -if pwdRepo=$(git rev-parse --show-toplevel 2>/dev/null); then - # Speedup for local runs - extraArgs+=(--reference-if-able "$pwdRepo") -fi - -log "Fetching Nixpkgs commit history" -# We only need the commit history, not the contents, so we can do a tree-less clone using tree:0 -# https://github.blog/open-source/git/get-up-to-speed-with-partial-clone-and-shallow-clone/#user-content-quick-summary -git clone --bare --filter=tree:0 --no-tags --origin upstream "${extraArgs[@]}" https://github.com/"$baseRepo".git "$tmp"/nixpkgs.git - -log "Fetching the PR commit history" -# Fetch the PR -git -C "$tmp/nixpkgs.git" remote add fork https://github.com/"$prRepo".git -# This remote config is the same as --filter=tree:0 when cloning -git -C "$tmp/nixpkgs.git" config remote.fork.partialclonefilter tree:0 -git -C "$tmp/nixpkgs.git" config remote.fork.promisor true - -git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch" -headRef=$(git -C "$tmp/nixpkgs.git" rev-parse refs/remotes/fork/"$prBranch") - -git -C "$tmp/nixpkgs.git" diff --name-only --merge-base "$baseBranch" "$headRef" > "$tmp/touched-files" - -# Get the owners file from the base, because we don't want to allow PRs to -# remove code owners to avoid pinging them -git -C "$tmp/nixpkgs.git" show "$baseBranch":"$ownersFile" > "$tmp"/codeowners - -log "Requesting reviews from code owners" -"$SCRIPT_DIR"/get-code-owners.sh "$tmp/touched-files" "$tmp"/codeowners