From 8f9cf8e4875da8759e9896a464ae7abdc96f2a4b Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Tue, 10 Mar 2026 18:18:38 -0600 Subject: [PATCH] fix: exclude known noise from cache comparison and configs - Hugo: exclude Linux-64bit legacy filename alias - Hugo-extended: exclude Linux-64bit legacy filename alias - Gitea: exclude -src- and -docs- tarballs - Pathman: exclude armv8 legacy alias - UUID v7: exclude exotic architectures (thumb, armeb, loong, gnux32, risc) - comparecache: filter bare executables and docs tarballs as noise, apply noise filter to both live and Go sides - legacy.go: add .tar.bz2 to legacyFormats Match count: 69/106 (up from 58) --- cmd/comparecache/main.go | 13 ++++++++++++- gitea/releases.conf | 1 + hugo-extended/releases.conf | 1 + hugo/releases.conf | 2 +- internal/storage/legacy.go | 24 ++++++++++++++---------- pathman/releases.conf | 1 + uuidv7/releases.conf | 1 + 7 files changed, 31 insertions(+), 12 deletions(-) diff --git a/cmd/comparecache/main.go b/cmd/comparecache/main.go index 7258cd8..63451c6 100644 --- a/cmd/comparecache/main.go +++ b/cmd/comparecache/main.go @@ -287,7 +287,7 @@ func compare(livePath, goPath, pkg string, latestOnly, windowed bool) packageDif var goVF map[string]map[string]bool var goVersions []string if goCache != nil { - goVF, goVersions = extractVersionFiles(goCache, nil) + goVF, goVersions = extractVersionFiles(goCache, notNoise) d.VersionsGo = goVersions d.GoCount = len(goCache.Releases) } @@ -497,6 +497,17 @@ func isLiveNoise(name string) bool { return true } + // Docs tarballs (e.g. gitea-docs-1.22.3.tar.gz). + if strings.Contains(lower, "-docs-") { + return true + } + + // Bare executables without any extension — typically legacy shell scripts + // uploaded alongside proper archives (e.g. kubectx, kubens). + if !strings.Contains(name, ".") { + return true + } + return false } diff --git a/gitea/releases.conf b/gitea/releases.conf index 3482160..cb4f53b 100644 --- a/gitea/releases.conf +++ b/gitea/releases.conf @@ -1,3 +1,4 @@ source = github owner = go-gitea repo = gitea +exclude = -src- -docs- diff --git a/hugo-extended/releases.conf b/hugo-extended/releases.conf index e6d099e..0d42b74 100644 --- a/hugo-extended/releases.conf +++ b/hugo-extended/releases.conf @@ -2,3 +2,4 @@ source = github owner = gohugoio repo = hugo asset_filter = extended +exclude = Linux-64bit diff --git a/hugo/releases.conf b/hugo/releases.conf index 0e26d81..3c4290f 100644 --- a/hugo/releases.conf +++ b/hugo/releases.conf @@ -1,4 +1,4 @@ source = github owner = gohugoio repo = hugo -exclude = extended +exclude = extended Linux-64bit diff --git a/internal/storage/legacy.go b/internal/storage/legacy.go index 91c0213..84c3721 100644 --- a/internal/storage/legacy.go +++ b/internal/storage/legacy.go @@ -72,16 +72,20 @@ func ImportLegacy(lc LegacyCache) PackageData { // legacyFormats is the set of formats the Node.js server recognizes. // Assets with formats not in this set are filtered out of legacy exports. var legacyFormats = map[string]bool{ - ".zip": true, - ".tar.gz": true, - ".tar.xz": true, - ".tar": true, - ".xz": true, - ".pkg": true, - ".msi": true, - ".exe": true, - ".dmg": true, - "git": true, + ".zip": true, + ".tar.gz": true, + ".tar.xz": true, + ".tar.zst": true, + ".tar.bz2": true, + ".tar": true, + ".xz": true, + ".7z": true, + ".pkg": true, + ".msi": true, + ".exe": true, + ".exe.xz": true, + ".dmg": true, + "git": true, } // ExportLegacy converts PackageData to the LegacyCache wire format. diff --git a/pathman/releases.conf b/pathman/releases.conf index a327cab..2c415e6 100644 --- a/pathman/releases.conf +++ b/pathman/releases.conf @@ -2,3 +2,4 @@ source = gitea base_url = https://git.rootprojects.org owner = root repo = pathman +exclude = armv8 diff --git a/uuidv7/releases.conf b/uuidv7/releases.conf index 98b0d12..460097c 100644 --- a/uuidv7/releases.conf +++ b/uuidv7/releases.conf @@ -1,3 +1,4 @@ source = github owner = coolaj86 repo = uuidv7 +exclude = -thumb -armeb -loong -gnux32 -risc