From 0d584f7c8fe87a8ddd8ce013b8b19b47fc3834aa Mon Sep 17 00:00:00 2001 From: Johannes Kirschbauer Date: Tue, 1 Apr 2025 10:27:06 +0200 Subject: [PATCH] ci/compare: nix stats comparison Displays stats table in the step-summary if there are no added/removed packages --- ci/eval/compare/cmp-stats.py | 153 +++++++++++++++++++++++++++++++++++ ci/eval/compare/default.nix | 50 +++++++++++- ci/eval/default.nix | 2 + 3 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 ci/eval/compare/cmp-stats.py diff --git a/ci/eval/compare/cmp-stats.py b/ci/eval/compare/cmp-stats.py new file mode 100644 index 000000000000..e3dc8bbd8e64 --- /dev/null +++ b/ci/eval/compare/cmp-stats.py @@ -0,0 +1,153 @@ +import json +import os +from scipy.stats import ttest_rel +import pandas as pd +import numpy as np +from pathlib import Path + +# Define metrics of interest (can be expanded as needed) +METRIC_PREFIXES = ("nr", "gc") + +def flatten_data(json_data: dict) -> dict: + """ + Extracts and flattens metrics from JSON data. + This is needed because the JSON data can be nested. + For example, the JSON data entry might look like this: + + "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464} + + Flattened: + + "gc.cycles": 13 + "gc.heapSize": 5404549120 + ... + + Args: + json_data (dict): JSON data containing metrics. + Returns: + dict: Flattened metrics with keys as metric names. + """ + flat_metrics = {} + for k, v in json_data.items(): + if isinstance(v, (int, float)): + flat_metrics[k] = v + elif isinstance(v, dict): + for sub_k, sub_v in v.items(): + flat_metrics[f"{k}.{sub_k}"] = sub_v + return flat_metrics + + + + +def load_all_metrics(directory: Path) -> dict: + """ + Loads all stats JSON files in the specified directory and extracts metrics. + + Args: + directory (Path): Directory containing JSON files. + Returns: + dict: Dictionary with filenames as keys and extracted metrics as values. + """ + metrics = {} + for system_dir in directory.iterdir(): + assert system_dir.is_dir() + + for chunk_output in system_dir.iterdir(): + with chunk_output.open() as f: + data = json.load(f) + metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data) + + return metrics + +def dataframe_to_markdown(df: pd.DataFrame) -> str: + markdown_lines = [] + + # Header (get column names and format them) + header = '\n| ' + ' | '.join(df.columns) + ' |' + markdown_lines.append(header) + markdown_lines.append("| - " * (len(df.columns)) + "|") # Separator line + + # Iterate over rows to build Markdown rows + for _, row in df.iterrows(): + # TODO: define threshold for highlighting + highlight = False + + fmt = lambda x: f"**{x}**" if highlight else f"{x}" + + # Check for no change and NaN in p_value/t_stat + row_values = [] + for val in row: + if isinstance(val, float) and np.isnan(val): # For NaN values in p-value or t-stat + row_values.append("-") # Custom symbol for NaN + elif isinstance(val, float) and val == 0: # For no change (mean_diff == 0) + row_values.append("-") # Custom symbol for no change + else: + row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val))) + + markdown_lines.append('| ' + ' | '.join(row_values) + ' |') + + return '\n'.join(markdown_lines) + + +def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame: + common_files = sorted(set(before_metrics) & set(after_metrics)) + all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() }) + + results = [] + + for key in all_keys: + before_vals, after_vals = [], [] + + for fname in common_files: + if key in before_metrics[fname] and key in after_metrics[fname]: + before_vals.append(before_metrics[fname][key]) + after_vals.append(after_metrics[fname][key]) + + if len(before_vals) >= 2: + before_arr = np.array(before_vals) + after_arr = np.array(after_vals) + + diff = after_arr - before_arr + pct_change = 100 * diff / before_arr + t_stat, p_val = ttest_rel(after_arr, before_arr) + + results.append({ + "metric": key, + "mean_before": np.mean(before_arr), + "mean_after": np.mean(after_arr), + "mean_diff": np.mean(diff), + "mean_%_change": np.mean(pct_change), + "p_value": p_val, + "t_stat": t_stat + }) + + df = pd.DataFrame(results).sort_values("p_value") + return df + + +if __name__ == "__main__": + before_dir = os.environ.get("BEFORE_DIR") + after_dir = os.environ.get("AFTER_DIR") + + if not before_dir or not after_dir: + print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.") + exit(1) + + before_stats = Path(before_dir) / "stats" + after_stats = Path(after_dir) / "stats" + + # This may happen if the pull request target does not include PR#399720 yet. + if not before_stats.exists(): + print("⚠️ Skipping comparison: stats directory is missing in the target commit.") + exit(0) + + # This should never happen, but we're exiting gracefully anyways + if not after_stats.exists(): + print("⚠️ Skipping comparison: stats directory missing in current PR evaluation.") + exit(0) + + before_metrics = load_all_metrics(before_stats) + after_metrics = load_all_metrics(after_stats) + df1 = perform_pairwise_tests(before_metrics, after_metrics) + markdown_table = dataframe_to_markdown(df1) + print(markdown_table) diff --git a/ci/eval/compare/default.nix b/ci/eval/compare/default.nix index 04676476f4b2..7b677c6d01f3 100644 --- a/ci/eval/compare/default.nix +++ b/ci/eval/compare/default.nix @@ -3,6 +3,7 @@ jq, runCommand, writeText, + python3, ... }: { @@ -125,18 +126,59 @@ let in runCommand "compare" { - nativeBuildInputs = [ jq ]; + nativeBuildInputs = [ + jq + (python3.withPackages ( + ps: with ps; [ + numpy + pandas + scipy + ] + )) + + ]; maintainers = builtins.toJSON maintainers; passAsFile = [ "maintainers" ]; + env = { + BEFORE_DIR = "${beforeResultDir}"; + AFTER_DIR = "${afterResultDir}"; + }; } '' mkdir $out cp ${changed-paths} $out/changed-paths.json - jq -r -f ${./generate-step-summary.jq} < ${changed-paths} > $out/step-summary.md + + if jq -e '(.attrdiff.added | length == 0) and (.attrdiff.removed | length == 0)' "${changed-paths}" > /dev/null; then + # Chunks have changed between revisions + # We cannot generate a performance comparison + { + echo + echo "# Performance comparison" + echo + echo "This compares the performance of this branch against its pull request base branch (e.g., 'master')" + echo + echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)" + echo + } >> $out/step-summary.md + + python3 ${./cmp-stats.py} >> $out/step-summary.md + + else + # Package chunks are the same in both revisions + # We can use the to generate a performance comparison + { + echo + echo "# Performance Comparison" + echo + echo "Performance stats were skipped because the package sets differ between the two revisions." + echo + echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)" + } >> $out/step-summary.md + fi + + jq -r -f ${./generate-step-summary.jq} < ${changed-paths} >> $out/step-summary.md cp "$maintainersPath" "$out/maintainers.json" - - # TODO: Compare eval stats '' diff --git a/ci/eval/default.nix b/ci/eval/default.nix index 8537084b1bd5..639e75ec4211 100644 --- a/ci/eval/default.nix +++ b/ci/eval/default.nix @@ -9,6 +9,7 @@ nixVersions, jq, sta, + python3, }: let @@ -270,6 +271,7 @@ let runCommand writeText supportedSystems + python3 ; };