From 338b5bde5de829a4d50ba1e94e821ec7d787272c Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 13 Aug 2025 02:34:38 -0700 Subject: [PATCH] julia.withPackages: improve weak dependency handling --- pkgs/development/julia-modules/default.nix | 42 +++++-- pkgs/development/julia-modules/depot.nix | 39 +++---- .../julia-modules/package-closure.nix | 9 ++ .../julia-modules/python/extract_artifacts.py | 9 +- .../julia-modules/python/minimal_registry.py | 49 +++++++-- .../julia-modules/python/project.py | 104 ++++++++++++++++++ .../julia-modules/python/sources_nix.py | 8 +- .../julia-modules/resolve_packages.jl | 50 +-------- .../julia-modules/stdlib-infos.nix | 36 ++++++ 9 files changed, 252 insertions(+), 94 deletions(-) create mode 100755 pkgs/development/julia-modules/python/project.py create mode 100644 pkgs/development/julia-modules/stdlib-infos.nix diff --git a/pkgs/development/julia-modules/default.nix b/pkgs/development/julia-modules/default.nix index 8568c70e4c52..1c6d33326f53 100644 --- a/pkgs/development/julia-modules/default.nix +++ b/pkgs/development/julia-modules/default.nix @@ -11,6 +11,7 @@ # Artifacts dependencies fetchurl, + gcc, glibc, pkgs, stdenv, @@ -79,7 +80,9 @@ let PythonCall = [ "PyCall" ]; }; - # Invoke Julia resolution logic to determine the full dependency closure + # Invoke Julia resolution logic to determine the full dependency closure. Also + # gather information on the Julia standard libraries, which we'll need to + # generate a Manifest.toml. packageOverridesRepoified = lib.mapAttrs util.repoifySimple packageOverrides; closureYaml = callPackage ./package-closure.nix { inherit @@ -90,6 +93,9 @@ let ; packageOverrides = packageOverridesRepoified; }; + stdlibInfos = callPackage ./stdlib-infos.nix { + inherit julia; + }; # Generate a Nix file consisting of a map from dependency UUID --> package info with fetchgit call: # { @@ -181,6 +187,27 @@ let "${dependencyUuidToRepoYaml}" \ "$out" ''; + project = + runCommand "julia-project" + { + buildInputs = [ + (python3.withPackages ( + ps: with ps; [ + toml + pyyaml + ] + )) + git + ]; + } + '' + python ${./python}/project.py \ + "${closureYaml}" \ + "${stdlibInfos}" \ + '${lib.generators.toJSON { } overridesOnly}' \ + "${dependencyUuidToRepoYaml}" \ + "$out" + ''; # Next, deal with artifacts. Scan each artifacts file individually and generate a Nix file that # produces the desired Overrides.toml. @@ -220,7 +247,7 @@ let ; } // lib.optionalAttrs (!stdenv.targetPlatform.isDarwin) { - inherit glibc; + inherit gcc glibc; } ); overridesJson = writeTextFile { @@ -235,8 +262,7 @@ let "$out" ''; - # Build a Julia project and depot. The project contains Project.toml/Manifest.toml, while the - # depot contains package build products (including the precompiled libraries, if precompile=true) + # Build a Julia project and depot under $out/project and $out/depot respectively projectAndDepot = callPackage ./depot.nix { inherit closureYaml @@ -247,12 +273,8 @@ let precompile ; julia = juliaWrapped; + inherit project; registry = minimalRegistry; - packageNames = - if makeTransitiveDependenciesImportable then - lib.mapAttrsToList (uuid: info: info.name) dependencyUuidToInfo - else - packageNames; }; in @@ -276,7 +298,9 @@ runCommand "julia-${julia.version}-env" inherit artifactsNix; inherit overridesJson; inherit overridesToml; + inherit project; inherit projectAndDepot; + inherit stdlibInfos; }; } ( diff --git a/pkgs/development/julia-modules/depot.nix b/pkgs/development/julia-modules/depot.nix index be5693d2b5d9..bf461f3e3759 100644 --- a/pkgs/development/julia-modules/depot.nix +++ b/pkgs/development/julia-modules/depot.nix @@ -14,7 +14,7 @@ juliaCpuTarget, overridesToml, packageImplications, - packageNames, + project, precompile, registry, }: @@ -44,7 +44,7 @@ runCommand "julia-depot" (python3.withPackages (ps: with ps; [ pyyaml ])) ] ++ extraLibs; - inherit precompile registry; + inherit precompile project registry; } ( '' @@ -52,19 +52,21 @@ runCommand "julia-depot" echo "Building Julia depot and project with the following inputs" echo "Julia: ${julia}" + echo "Project: $project" echo "Registry: $registry" echo "Overrides ${overridesToml}" mkdir -p $out/project export JULIA_PROJECT="$out/project" + cp "$project/Manifest.toml" "$JULIA_PROJECT/Manifest.toml" + cp "$project/Project.toml" "$JULIA_PROJECT/Project.toml" mkdir -p $out/depot/artifacts export JULIA_DEPOT_PATH="$out/depot" cp ${overridesToml} $out/depot/artifacts/Overrides.toml # These can be useful to debug problems - # export JULIA_DEBUG=Pkg - # export JULIA_DEBUG=loading + # export JULIA_DEBUG=Pkg,loading ${setJuliaSslCaRootsPath} @@ -104,26 +106,21 @@ runCommand "julia-depot" Pkg.Registry.add(Pkg.RegistrySpec(path="${registry}")) - input = ${lib.generators.toJSON { } packageNames} ::Vector{String} + # No need to Pkg.activate() since we set JULIA_PROJECT above + println("Running Pkg.instantiate()") + Pkg.instantiate() - if isfile("extra_package_names.txt") - append!(input, readlines("extra_package_names.txt")) - end + # Build is a separate step from instantiate. + # Needed for packages like Conda.jl to set themselves up. + println("Running Pkg.build()") + Pkg.build() - input = unique(input) - - if !isempty(input) - println("Adding packages: " * join(input, " ")) - Pkg.add(input; preserve=PRESERVE_NONE) - Pkg.instantiate() - - if "precompile" in keys(ENV) && ENV["precompile"] != "0" && ENV["precompile"] != "" - if isdefined(Sys, :CPU_NAME) - println("Precompiling with CPU_NAME = " * Sys.CPU_NAME) - end - - Pkg.precompile() + if "precompile" in keys(ENV) && ENV["precompile"] != "0" && ENV["precompile"] != "" + if isdefined(Sys, :CPU_NAME) + println("Precompiling with CPU_NAME = " * Sys.CPU_NAME) end + + Pkg.precompile() end # Remove the registry to save space diff --git a/pkgs/development/julia-modules/package-closure.nix b/pkgs/development/julia-modules/package-closure.nix index a393a2f49427..f0095366455f 100644 --- a/pkgs/development/julia-modules/package-closure.nix +++ b/pkgs/development/julia-modules/package-closure.nix @@ -43,12 +43,21 @@ let println(io, "- name: " * spec.name) println(io, " uuid: " * string(spec.uuid)) println(io, " version: " * string(spec.version)) + println(io, " tree_hash: " * string(spec.tree_hash)) if endswith(spec.name, "_jll") && haskey(deps_map, spec.uuid) println(io, " depends_on: ") for (dep_name, dep_uuid) in pairs(deps_map[spec.uuid]) println(io, " \"$(dep_name)\": \"$(dep_uuid)\"") end end + println(io, " deps: ") + for (dep_name, dep_uuid) in pairs(deps_map[spec.uuid]) + println(io, " - name: \"$(dep_name)\"") + println(io, " uuid: \"$(dep_uuid)\"") + end + if spec.name in input + println(io, " is_input: true") + end end end ''; diff --git a/pkgs/development/julia-modules/python/extract_artifacts.py b/pkgs/development/julia-modules/python/extract_artifacts.py index 134294321f26..642611e029c4 100755 --- a/pkgs/development/julia-modules/python/extract_artifacts.py +++ b/pkgs/development/julia-modules/python/extract_artifacts.py @@ -47,14 +47,17 @@ def get_archive_derivation(uuid, artifact_name, url, sha256, closure_dependencie ''""" else: + # We provide gcc.cc.lib by default in order to get some common libraries + # like libquadmath.so. A number of packages expect this to be available and + # will give linker errors if it isn't. fixup = f"""fixupPhase = let libs = lib.concatMap (lib.mapAttrsToList (k: v: v.path)) [{" ".join(["uuid-" + x for x in depends_on])}]; in '' find $out -type f -executable -exec \ - patchelf --set-rpath \$ORIGIN:\$ORIGIN/../lib:${{lib.makeLibraryPath (["$out" glibc] ++ libs ++ (with pkgs; [{" ".join(other_libs)}]))}} {{}} \; + patchelf --set-rpath \\$ORIGIN:\\$ORIGIN/../lib:${{lib.makeLibraryPath (["$out" glibc gcc.cc.lib] ++ libs ++ (with pkgs; [{" ".join(other_libs)}]))}} {{}} \\; find $out -type f -executable -exec \ - patchelf --set-interpreter ${{glibc}}/lib/ld-linux-x86-64.so.2 {{}} \; + patchelf --set-interpreter ${{glibc}}/lib/ld-linux-x86-64.so.2 {{}} \\; ''""" return f"""stdenv.mkDerivation {{ @@ -145,7 +148,7 @@ def main(): if is_darwin: f.write("{ lib, fetchurl, pkgs, stdenv }:\n\n") else: - f.write("{ lib, fetchurl, glibc, pkgs, stdenv }:\n\n") + f.write("{ lib, fetchurl, gcc, glibc, pkgs, stdenv }:\n\n") f.write("rec {\n") diff --git a/pkgs/development/julia-modules/python/minimal_registry.py b/pkgs/development/julia-modules/python/minimal_registry.py index bdab0716ef89..ab33ac366ca8 100755 --- a/pkgs/development/julia-modules/python/minimal_registry.py +++ b/pkgs/development/julia-modules/python/minimal_registry.py @@ -24,14 +24,15 @@ with open(desired_packages_path, "r") as f: uuid_to_versions = defaultdict(list) for pkg in desired_packages: - uuid_to_versions[pkg["uuid"]].append(pkg["version"]) + uuid_to_versions[pkg["uuid"]].append(pkg["version"]) with open(dependencies_path, "r") as f: uuid_to_store_path = yaml.safe_load(f) os.makedirs(out_path) -registry = toml.load(registry_path / "Registry.toml") +full_registry = toml.load(registry_path / "Registry.toml") +registry = full_registry.copy() registry["packages"] = {k: v for k, v in registry["packages"].items() if k in uuid_to_versions} for (uuid, versions) in uuid_to_versions.items(): @@ -80,20 +81,48 @@ for (uuid, versions) in uuid_to_versions.items(): if (registry_path / path / f).exists(): shutil.copy2(registry_path / path / f, out_path / path) - # Copy the Versions.toml file, trimming down to the versions we care about + # Copy the Versions.toml file, trimming down to the versions we care about. + # In the case where versions=None, this is a weak dep, and we keep all versions. all_versions = toml.load(registry_path / path / "Versions.toml") - versions_to_keep = {k: v for k, v in all_versions.items() if k in versions} + versions_to_keep = {k: v for k, v in all_versions.items() if k in versions} if versions != None else all_versions for k, v in versions_to_keep.items(): del v["nix-sha256"] with open(out_path / path / "Versions.toml", "w") as f: toml.dump(versions_to_keep, f) - # Fill in the local store path for the repo - if not uuid in uuid_to_store_path: continue - package_toml = toml.load(registry_path / path / "Package.toml") - package_toml["repo"] = "file://" + uuid_to_store_path[uuid] - with open(out_path / path / "Package.toml", "w") as f: - toml.dump(package_toml, f) + if versions is None: + # This is a weak dep; just grab the whole Package.toml + shutil.copy2(registry_path / path / "Package.toml", out_path / path / "Package.toml") + elif uuid in uuid_to_store_path: + # Fill in the local store path for the repo + package_toml = toml.load(registry_path / path / "Package.toml") + package_toml["repo"] = "file://" + uuid_to_store_path[uuid] + with open(out_path / path / "Package.toml", "w") as f: + toml.dump(package_toml, f) +# Look for missing weak deps and include them. This can happen when our initial +# resolve step finds dependencies, but we fail to resolve them at the project.py +# stage. Usually this happens because the package that depends on them does so +# as a weak dep, but doesn't have a Package.toml in its repo making this clear. +for pkg in desired_packages: + for dep in (pkg.get("deps", []) or []): + uuid = dep["uuid"] + if not uuid in uuid_to_versions: + entry = full_registry["packages"].get(uuid) + if not entry: + print(f"""WARNING: found missing UUID but couldn't resolve it: {uuid}""") + continue + + # Add this entry back to the minimal Registry.toml + registry["packages"][uuid] = entry + + # Bring over the Package.toml + path = Path(entry["path"]) + if (out_path / path / "Package.toml").exists(): + continue + Path(out_path / path).mkdir(parents=True, exist_ok=True) + shutil.copy2(registry_path / path / "Package.toml", out_path / path / "Package.toml") + +# Finally, dump the Registry.toml with open(out_path / "Registry.toml", "w") as f: toml.dump(registry, f) diff --git a/pkgs/development/julia-modules/python/project.py b/pkgs/development/julia-modules/python/project.py new file mode 100755 index 000000000000..4a5f2ae20719 --- /dev/null +++ b/pkgs/development/julia-modules/python/project.py @@ -0,0 +1,104 @@ + +from collections import defaultdict +import json +import os +from pathlib import Path +import sys +import toml +import yaml + + +desired_packages_path = Path(sys.argv[1]) +stdlib_infos_path = Path(sys.argv[2]) +package_overrides = json.loads(sys.argv[3]) +dependencies_path = Path(sys.argv[4]) +out_path = Path(sys.argv[5]) + +with open(desired_packages_path, "r") as f: + desired_packages = yaml.safe_load(f) or [] + +with open(stdlib_infos_path, "r") as f: + stdlib_infos = yaml.safe_load(f) or [] + +with open(dependencies_path, "r") as f: + uuid_to_store_path = yaml.safe_load(f) + +result = { + "deps": defaultdict(list) +} + +for pkg in desired_packages: + if pkg["uuid"] in package_overrides: + info = package_overrides[pkg["uuid"]] + result["deps"][info["name"]].append({ + "uuid": pkg["uuid"], + "path": info["src"], + }) + continue + + path = uuid_to_store_path.get(pkg["uuid"], None) + isStdLib = False + if pkg["uuid"] in stdlib_infos["stdlibs"]: + path = stdlib_infos["stdlib_root"] + "/" + stdlib_infos["stdlibs"][pkg["uuid"]]["name"] + isStdLib = True + + if path: + if (Path(path) / "Project.toml").exists(): + project_toml = toml.load(Path(path) / "Project.toml") + + deps = [] + weak_deps = project_toml.get("weakdeps", {}) + extensions = project_toml.get("extensions", {}) + + if "deps" in project_toml: + # Build up deps for the manifest, excluding weak deps + weak_deps_uuids = weak_deps.values() + for (dep_name, dep_uuid) in project_toml["deps"].items(): + if not (dep_uuid in weak_deps_uuids): + deps.append(dep_name) + else: + # Not all projects have a Project.toml. In this case, use the deps we + # calculated from the package resolve step. This isn't perfect since it + # will fail to properly split out weak deps, but it's better than nothing. + print(f"""WARNING: package {pkg["name"]} didn't have a Project.toml in {path}""") + deps = [x["name"] for x in pkg.get("deps", [])] + weak_deps = {} + extensions = {} + + tree_hash = pkg.get("tree_hash", "") + + result["deps"][pkg["name"]].append({ + "version": pkg["version"], + "uuid": pkg["uuid"], + "git-tree-sha1": (tree_hash if tree_hash != "nothing" else None) or None, + "deps": deps or None, + "weakdeps": weak_deps or None, + "extensions": extensions or None, + + # We *don't* set "path" here, because then Julia will try to use the + # read-only Nix store path instead of cloning to the depot. This will + # cause packages like Conda.jl to fail during the Pkg.build() step. + # + # "path": None if isStdLib else path , + }) + else: + print("WARNING: adding a package that we didn't have a path for, and it doesn't seem to be a stdlib", pkg) + result["deps"][pkg["name"]].append({ + "version": pkg["version"], + "uuid": pkg["uuid"], + "deps": [x["name"] for x in pkg["deps"]] + }) + +os.makedirs(out_path) + +with open(out_path / "Manifest.toml", "w") as f: + f.write(f'julia_version = "{stdlib_infos["julia_version"]}"\n') + f.write('manifest_format = "2.0"\n\n') + toml.dump(result, f) + +with open(out_path / "Project.toml", "w") as f: + f.write('[deps]\n') + + for pkg in desired_packages: + if pkg.get("is_input", False): + f.write(f'''{pkg["name"]} = "{pkg["uuid"]}"\n''') diff --git a/pkgs/development/julia-modules/python/sources_nix.py b/pkgs/development/julia-modules/python/sources_nix.py index 989bf6bf186f..b0f0a21e3b22 100755 --- a/pkgs/development/julia-modules/python/sources_nix.py +++ b/pkgs/development/julia-modules/python/sources_nix.py @@ -24,7 +24,7 @@ def ensure_version_valid(version): Ensure a version string is a valid Julia-parsable version. It doesn't really matter what it looks like as it's just used for overrides. """ - return re.sub('[^0-9\.]','', version) + return re.sub('[^0-9.]','', version) with open(out_path, "w") as f: f.write("{fetchgit}:\n") @@ -41,6 +41,9 @@ with open(out_path, "w") as f: treehash = "{treehash}"; }};\n""") elif uuid in registry["packages"]: + # The treehash is missing for stdlib packages. Don't bother downloading these. + if (not ("tree_hash" in pkg)) or pkg["tree_hash"] == "nothing": continue + registry_info = registry["packages"][uuid] path = registry_info["path"] packageToml = toml.load(registry_path / path / "Package.toml") @@ -65,7 +68,8 @@ with open(out_path, "w") as f: treehash = "{version_to_use["git-tree-sha1"]}"; }};\n""") else: - # print("Warning: couldn't figure out what to do with pkg in sources_nix.py", pkg) + # This is probably a stdlib + # print("WARNING: couldn't figure out what to do with pkg in sources_nix.py", pkg) pass f.write("}") diff --git a/pkgs/development/julia-modules/resolve_packages.jl b/pkgs/development/julia-modules/resolve_packages.jl index fce60035d5f5..c53763827aab 100644 --- a/pkgs/development/julia-modules/resolve_packages.jl +++ b/pkgs/development/julia-modules/resolve_packages.jl @@ -46,54 +46,6 @@ end foreach(pkg -> ctx.env.project.deps[pkg.name] = pkg.uuid, pkgs) # Save the original pkgs for later. We might need to augment it with the weak dependencies -orig_pkgs = pkgs +orig_pkgs = deepcopy(pkgs) pkgs, deps_map = _resolve(ctx.io, ctx.env, ctx.registries, pkgs, PRESERVE_NONE, ctx.julia_version) - -if VERSION >= VersionNumber("1.9") - while true - # Check for weak dependencies, which appear on the RHS of the deps_map but not in pkgs. - # Build up weak_name_to_uuid - uuid_to_name = Dict() - for pkg in pkgs - uuid_to_name[pkg.uuid] = pkg.name - end - weak_name_to_uuid = Dict() - for (uuid, deps) in pairs(deps_map) - for (dep_name, dep_uuid) in pairs(deps) - if !haskey(uuid_to_name, dep_uuid) - weak_name_to_uuid[dep_name] = dep_uuid - end - end - end - - if isempty(weak_name_to_uuid) - break - end - - # We have nontrivial weak dependencies, so add each one to the initial pkgs and then re-run _resolve - println("Found weak dependencies: $(keys(weak_name_to_uuid))") - - orig_uuids = Set([pkg.uuid for pkg in orig_pkgs]) - - for (name, uuid) in pairs(weak_name_to_uuid) - if uuid in orig_uuids - continue - end - - pkg = PackageSpec(name, uuid) - - push!(orig_uuids, uuid) - push!(orig_pkgs, pkg) - ctx.env.project.deps[name] = uuid - entry = Pkg.Types.manifest_info(ctx.env.manifest, uuid) - if VERSION >= VersionNumber("1.11") - orig_pkgs[length(orig_pkgs)] = update_package_add(ctx, pkg, entry, nothing, nothing, false) - else - orig_pkgs[length(orig_pkgs)] = update_package_add(ctx, pkg, entry, false) - end - end - - global pkgs, deps_map = _resolve(ctx.io, ctx.env, ctx.registries, orig_pkgs, PRESERVE_NONE, ctx.julia_version) - end -end diff --git a/pkgs/development/julia-modules/stdlib-infos.nix b/pkgs/development/julia-modules/stdlib-infos.nix new file mode 100644 index 000000000000..1b5d10962c2f --- /dev/null +++ b/pkgs/development/julia-modules/stdlib-infos.nix @@ -0,0 +1,36 @@ +{ + julia, + runCommand, +}: + +let + juliaExpression = '' + using Pkg + open(ENV["out"], "w") do io + println(io, "stdlib_root: \"$(Sys.STDLIB)\"") + + println(io, "julia_version: \"$(string(VERSION))\"") + + stdlibs = Pkg.Types.stdlibs() + println(io, "stdlibs:") + for (uuid, (name, version)) in stdlibs + println(io, " \"$(uuid)\": ") + println(io, " name: $name") + println(io, " version: $version") + end + end + ''; +in + +runCommand "julia-stdlib-infos.yml" + { + buildInputs = [ + julia + ]; + } + '' + # Prevent a warning where Julia tries to download package server info + export JULIA_PKG_SERVER="" + + julia -e '${juliaExpression}'; + ''