A => find-gh-srcs.nix +19 -0
@@ 1,19 @@
+{ pkgs ? import <nixpkgs> {} }:
+
+let
+ getRepoAndHash = pkg:
+ let
+ repoUrl = pkg.src.gitRepoUrl or (throw "gitRepoUrl not found");
+ repo = (pkg.src.owner or (throw "owner not found")) + "/" + (pkg.src.repo or (throw "repo not found"));
+ rev = pkg.src.rev or (throw "rev not found");
+ in
+ assert pkgs.lib.strings.hasPrefix "https://github.com" repoUrl;
+ assert builtins.match "[0-9a-f]{40}" rev != null;
+ builtins.seq repo (builtins.seq rev { inherit repo; hash = rev; });
+ # TODO check all packages, not just top-level.
+ # Is it possible to find all fetchFromGitHub calls? I vaguely remember someone
+ # mentioning a script/expression that collects all sources for archiving purposes.
+ allTries = builtins.mapAttrs (name: pkg: builtins.tryEval (getRepoAndHash pkg)) pkgs;
+ allSuccesses = pkgs.lib.filterAttrs (name: result: result.success) allTries;
+ allGHSrcs = builtins.mapAttrs (name: result: result.value) allSuccesses;
+in allGHSrcs
A => main.tcl +64 -0
@@ 1,64 @@
+#!/usr/bin/env tclsh
+
+# SPDX-FileCopyrightText: Francesco Gazzetta <fgaz@fgaz.me>
+# SPDX-License-Identifier: MIT
+
+# Requires tcllib
+# Usage: in the nixpkgs directory, run the script.
+# Outputs all errors and mismatching sources to stdout
+# and logs what is happening to stderr.
+# You likely want to redirect stdout to a file and `tail --follow` it from
+# another console.
+
+package require json
+
+if {[info exists env(GITHUB_TOKEN)]} then {
+ set curl_args [list -H "Authorization: Bearer $env(GITHUB_TOKEN)"]
+} else {
+ puts stderr "WARNING: \$GITHUB_TOKEN not found, will perform unauthenticated requests."
+ set curl_args ""
+}
+
+proc validate {repo hash} {
+ global curl_args
+ puts stderr "Validating $hash ∈ $repo"
+ try {
+ # Failed attempts:
+ # * https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#get-a-commit
+ # Does not distinguish between commits in the upstream repo and forks
+ set api_res [json::json2dict [exec -ignorestderr curl {*}$curl_args --fail -s --retry 5 --retry-max-time 30 --retry-all-errors https://api.github.com/search/commits?q=repo:$repo+hash:$hash]]
+ #puts stderr $api_res
+ if {[lindex [dict get $api_res items] 0] == ""} then {
+ puts "$hash ∉ $repo"
+ } else {
+ set actual_repo [dict get [lindex [dict get $api_res items] 0] repository full_name]
+ if {$repo != $actual_repo} then {
+ puts "$hash ∈ $actual_repo ≠$repo"
+ }
+ }
+ } on error err {
+ puts "SKIPPING $repo $hash. Error: $err"
+ puts stderr "SKIPPING $repo $hash. Error: $err"
+ }
+}
+
+puts stderr "VALIDATING RAW URLS"
+
+set raw_urls [exec git grep -Eoh {github.com/[^/]+/[^/]+/commit/[a-f0-9]{40}}]
+
+foreach url $raw_urls {
+ regexp {github.com/([^/]+/[^/]+)/commit/([a-f0-9]{40})} $url _ repo hash
+ validate $repo $hash
+ after 1000
+}
+
+puts stderr "VALIDATING SRCS"
+
+set scriptdir [file normalize [file dirname [info script]]]
+# Somehow nix-instantiate --eval doesn't work
+set srcs [json::json2dict [exec -ignorestderr nix eval --file $scriptdir/find-gh-srcs.nix --apply "f: f { pkgs = import [pwd] {}; }" --json]]
+
+dict for {name src} $srcs {
+ validate [dict get $src repo] [dict get $src hash]
+ after 1000
+}