ci/request-reviews: untangle owner-related bash code (#457503)

This commit is contained in:
Wolfgang Walther
2025-11-02 15:41:16 +00:00
committed by GitHub
6 changed files with 75 additions and 190 deletions

View File

@@ -63,28 +63,6 @@ jobs:
permission-members: read permission-members: read
permission-pull-requests: write permission-pull-requests: write
- name: Log current API rate limits (app-token)
if: ${{ steps.app-token.outputs.token }}
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
run: gh api /rate_limit | jq
- name: Requesting code owner reviews
if: steps.app-token.outputs.token
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
REPOSITORY: ${{ github.repository }}
NUMBER: ${{ github.event.number }}
# Don't do anything on draft PRs
DRY_MODE: ${{ github.event.pull_request.draft && '1' || '' }}
run: result/bin/request-code-owner-reviews.sh "$REPOSITORY" "$NUMBER" ci/OWNERS
- name: Log current API rate limits (app-token)
if: ${{ steps.app-token.outputs.token }}
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
run: gh api /rate_limit | jq
- name: Log current API rate limits (github.token) - name: Log current API rate limits (github.token)
env: env:
GH_TOKEN: ${{ github.token }} GH_TOKEN: ${{ github.token }}
@@ -149,7 +127,7 @@ jobs:
GH_TOKEN: ${{ github.token }} GH_TOKEN: ${{ github.token }}
run: gh api /rate_limit | jq run: gh api /rate_limit | jq
- name: Requesting maintainer reviews - name: Requesting reviews
if: ${{ steps.app-token.outputs.token }} if: ${{ steps.app-token.outputs.token }}
env: env:
GH_TOKEN: ${{ github.token }} GH_TOKEN: ${{ github.token }}
@@ -164,6 +142,7 @@ jobs:
# There appears to be no API to request reviews based on GitHub IDs # There appears to be no API to request reviews based on GitHub IDs
jq -r 'keys[]' comparison/maintainers.json \ jq -r 'keys[]' comparison/maintainers.json \
| while read -r id; do gh api /user/"$id" --jq .login; done \ | while read -r id; do gh api /user/"$id" --jq .login; done \
| cat comparison/owners.txt - \
| GH_TOKEN="$APP_GH_TOKEN" result/bin/request-reviewers.sh "$REPOSITORY" "$NUMBER" "$AUTHOR" | GH_TOKEN="$APP_GH_TOKEN" result/bin/request-reviewers.sh "$REPOSITORY" "$NUMBER" "$AUTHOR"
- name: Log current API rate limits (app-token) - name: Log current API rate limits (app-token)

View File

@@ -7,6 +7,7 @@
python3, python3,
stdenvNoCC, stdenvNoCC,
makeWrapper, makeWrapper,
codeowners,
}: }:
let let
python = python3.withPackages (ps: [ python = python3.withPackages (ps: [
@@ -48,6 +49,7 @@ in
{ {
combinedDir, combinedDir,
touchedFilesJson, touchedFilesJson,
ownersFile ? ../../OWNERS,
}: }:
let let
# Usually we expect a derivation, but when evaluating in multiple separate steps, we pass # Usually we expect a derivation, but when evaluating in multiple separate steps, we pass
@@ -174,6 +176,7 @@ runCommand "compare"
nativeBuildInputs = map lib.getBin [ nativeBuildInputs = map lib.getBin [
jq jq
cmp-stats cmp-stats
codeowners
]; ];
maintainers = builtins.toJSON maintainers; maintainers = builtins.toJSON maintainers;
packages = builtins.toJSON packages; packages = builtins.toJSON packages;
@@ -222,6 +225,42 @@ runCommand "compare"
} >> $out/step-summary.md } >> $out/step-summary.md
fi fi
jq -r '.[]' "${touchedFilesJson}" > ./touched-files
readarray -t touchedFiles < ./touched-files
echo "This PR touches ''${#touchedFiles[@]} files"
# TODO: Move ci/OWNERS to Nix and produce owners.json instead of owners.txt.
for file in "''${touchedFiles[@]}"; do
result=$(codeowners --file "${ownersFile}" "$file")
# Remove the file prefix and trim the surrounding spaces
read -r owners <<< "''${result#"$file"}"
if [[ "$owners" == "(unowned)" ]]; then
echo "File $file is unowned"
continue
fi
echo "File $file is owned by $owners"
# Split up multiple owners, separated by arbitrary amounts of spaces
IFS=" " read -r -a entries <<< "$owners"
for entry in "''${entries[@]}"; do
# GitHub technically also supports Emails as code owners,
# but we can't easily support that, so let's not
if [[ ! "$entry" =~ @(.*) ]]; then
echo -e "\e[33mCodeowner \"$entry\" for file $file is not valid: Must start with \"@\"\e[0m"
# Don't fail, because the PR for which this script runs can't fix it,
# it has to be fixed in the base branch
continue
fi
# The first regex match is everything after the @
entry=''${BASH_REMATCH[1]}
echo "$entry" >> "$out/owners.txt"
done
done
cp "$maintainersPath" "$out/maintainers.json" cp "$maintainersPath" "$out/maintainers.json"
cp "$packagesPath" "$out/packages.json" cp "$packagesPath" "$out/packages.json"
'' ''

View File

@@ -3,20 +3,15 @@
stdenvNoCC, stdenvNoCC,
makeWrapper, makeWrapper,
coreutils, coreutils,
codeowners,
jq, jq,
curl,
github-cli, github-cli,
gitMinimal,
}: }:
stdenvNoCC.mkDerivation { stdenvNoCC.mkDerivation {
name = "request-reviews"; name = "request-reviews";
src = lib.fileset.toSource { src = lib.fileset.toSource {
root = ./.; root = ./.;
fileset = lib.fileset.unions [ fileset = lib.fileset.unions [
./get-code-owners.sh
./request-reviewers.sh ./request-reviewers.sh
./request-code-owner-reviews.sh
]; ];
}; };
nativeBuildInputs = [ makeWrapper ]; nativeBuildInputs = [ makeWrapper ];
@@ -29,11 +24,8 @@ stdenvNoCC.mkDerivation {
--set PATH ${ --set PATH ${
lib.makeBinPath [ lib.makeBinPath [
coreutils coreutils
codeowners
jq jq
curl
github-cli github-cli
gitMinimal
] ]
} }
done done

View File

@@ -1,97 +0,0 @@
#!/usr/bin/env bash
# Get the code owners of the files changed by a PR, returning one username per line
set -euo pipefail
log() {
echo "$@" >&2
}
if (( "$#" < 4 )); then
log "Usage: $0 GIT_REPO OWNERS_FILE BASE_REF HEAD_REF"
exit 1
fi
gitRepo=$1
ownersFile=$2
baseRef=$3
headRef=$4
tmp=$(mktemp -d)
trap 'rm -rf "$tmp"' exit
git -C "$gitRepo" diff --name-only --merge-base "$baseRef" "$headRef" > "$tmp/touched-files"
readarray -t touchedFiles < "$tmp/touched-files"
log "This PR touches ${#touchedFiles[@]} files"
# Get the owners file from the base, because we don't want to allow PRs to
# remove code owners to avoid pinging them
git -C "$gitRepo" show "$baseRef":"$ownersFile" > "$tmp"/codeowners
# Associative array with the user as the key for easy de-duplication
# Make sure to always lowercase keys to avoid duplicates with different casings
declare -A users=()
for file in "${touchedFiles[@]}"; do
result=$(codeowners --file "$tmp"/codeowners "$file")
# Remove the file prefix and trim the surrounding spaces
read -r owners <<< "${result#"$file"}"
if [[ "$owners" == "(unowned)" ]]; then
log "File $file is unowned"
continue
fi
log "File $file is owned by $owners"
# Split up multiple owners, separated by arbitrary amounts of spaces
IFS=" " read -r -a entries <<< "$owners"
for entry in "${entries[@]}"; do
# GitHub technically also supports Emails as code owners,
# but we can't easily support that, so let's not
if [[ ! "$entry" =~ @(.*) ]]; then
warn -e "\e[33mCodeowner \"$entry\" for file $file is not valid: Must start with \"@\"\e[0m" >&2
# Don't fail, because the PR for which this script runs can't fix it,
# it has to be fixed in the base branch
continue
fi
# The first regex match is everything after the @
entry=${BASH_REMATCH[1]}
if [[ "$entry" =~ (.*)/(.*) ]]; then
# Teams look like $org/$team
org=${BASH_REMATCH[1]}
team=${BASH_REMATCH[2]}
# Instead of requesting a review from the team itself,
# we request reviews from the individual users.
# This is because once somebody from a team reviewed the PR,
# the API doesn't expose that the team was already requested for a review,
# so we wouldn't be able to avoid rerequesting reviews
# without saving some some extra state somewhere
# We could also consider implementing a more advanced heuristic
# in the future that e.g. only pings one team member,
# but escalates to somebody else if that member doesn't respond in time.
gh api \
--cache=1h \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/orgs/$org/teams/$team/members" \
--jq '.[].login' > "$tmp/team-members"
readarray -t members < "$tmp/team-members"
log "Team $entry has these members: ${members[*]}"
for user in "${members[@]}"; do
users[${user,,}]=
done
else
# Everything else is a user
users[${entry,,}]=
fi
done
done
printf "%s\n" "${!users[@]}"

View File

@@ -1,60 +0,0 @@
#!/usr/bin/env bash
# Requests reviews for a PR
set -euo pipefail
tmp=$(mktemp -d)
trap 'rm -rf "$tmp"' exit
SCRIPT_DIR=$(dirname "$0")
log() {
echo "$@" >&2
}
if (( $# < 3 )); then
log "Usage: $0 GITHUB_REPO PR_NUMBER OWNERS_FILE"
exit 1
fi
baseRepo=$1
prNumber=$2
ownersFile=$3
log "Fetching PR info"
prInfo=$(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/$baseRepo/pulls/$prNumber")
baseBranch=$(jq -r .base.ref <<< "$prInfo")
log "Base branch: $baseBranch"
prRepo=$(jq -r .head.repo.full_name <<< "$prInfo")
log "PR repo: $prRepo"
prBranch=$(jq -r .head.ref <<< "$prInfo")
log "PR branch: $prBranch"
prAuthor=$(jq -r .user.login <<< "$prInfo")
log "PR author: $prAuthor"
extraArgs=()
if pwdRepo=$(git rev-parse --show-toplevel 2>/dev/null); then
# Speedup for local runs
extraArgs+=(--reference-if-able "$pwdRepo")
fi
log "Fetching Nixpkgs commit history"
# We only need the commit history, not the contents, so we can do a tree-less clone using tree:0
# https://github.blog/open-source/git/get-up-to-speed-with-partial-clone-and-shallow-clone/#user-content-quick-summary
git clone --bare --filter=tree:0 --no-tags --origin upstream "${extraArgs[@]}" https://github.com/"$baseRepo".git "$tmp"/nixpkgs.git
log "Fetching the PR commit history"
# Fetch the PR
git -C "$tmp/nixpkgs.git" remote add fork https://github.com/"$prRepo".git
# This remote config is the same as --filter=tree:0 when cloning
git -C "$tmp/nixpkgs.git" config remote.fork.partialclonefilter tree:0
git -C "$tmp/nixpkgs.git" config remote.fork.promisor true
git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch"
headRef=$(git -C "$tmp/nixpkgs.git" rev-parse refs/remotes/fork/"$prBranch")
log "Requesting reviews from code owners"
"$SCRIPT_DIR"/get-code-owners.sh "$tmp/nixpkgs.git" "$ownersFile" "$baseBranch" "$headRef" | \
"$SCRIPT_DIR"/request-reviewers.sh "$baseRepo" "$prNumber" "$prAuthor"

View File

@@ -33,9 +33,41 @@ prAuthor=$3
tmp=$(mktemp -d) tmp=$(mktemp -d)
trap 'rm -rf "$tmp"' exit trap 'rm -rf "$tmp"' exit
# Associative array with the user as the key for easy de-duplication
# Make sure to always lowercase keys to avoid duplicates with different casings
declare -A users=() declare -A users=()
while read -r handle && [[ -n "$handle" ]]; do while read -r handle && [[ -n "$handle" ]]; do
users[${handle,,}]= if [[ "$handle" =~ (.*)/(.*) ]]; then
# Teams look like $org/$team
org=${BASH_REMATCH[1]}
team=${BASH_REMATCH[2]}
# Instead of requesting a review from the team itself,
# we request reviews from the individual users.
# This is because once somebody from a team reviewed the PR,
# the API doesn't expose that the team was already requested for a review,
# so we wouldn't be able to avoid rerequesting reviews
# without saving some some extra state somewhere
# We could also consider implementing a more advanced heuristic
# in the future that e.g. only pings one team member,
# but escalates to somebody else if that member doesn't respond in time.
gh api \
--cache=1h \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/orgs/$org/teams/$team/members" \
--jq '.[].login' > "$tmp/team-members"
readarray -t members < "$tmp/team-members"
log "Team $entry has these members: ${members[*]}"
for user in "${members[@]}"; do
users[${user,,}]=
done
else
# Everything else is a user
users[${handle,,}]=
fi
done done
# Cannot request a review from the author # Cannot request a review from the author
@@ -68,7 +100,7 @@ for user in "${!users[@]}"; do
fi fi
done done
if [[ "${#users[@]}" -gt 10 ]]; then if [[ "${#users[@]}" -gt 15 ]]; then
log "Too many reviewers (${!users[*]}), skipping review requests" log "Too many reviewers (${!users[*]}), skipping review requests"
exit 0 exit 0
fi fi