From 4f09649d30f45d82dd1d758e1bd3bd5530a4fa77 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Wed, 11 Mar 2026 17:27:37 -0600 Subject: [PATCH] fix(legacy): fix PACKAGE FORMAT CHANGE warnings in legacy cache export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce PACKAGE FORMAT CHANGE warnings from 6,149 to ~3,200 by aligning the legacy export field values with what the Node build-classifier extracts from filenames. classify.go: - Split solaris/illumos/sunos into three separate OS patterns (Node triplet.js treats them as distinct values; lumping all to OSSunOS caused 1,483 drops) - Add mips64r6/mips64r6el arch patterns before mips64 to prevent prefix match - Add mips64le/mips64el distinct patterns before mips64 baseline - Fix amd64[_-]?v2/v3/v4 regex to match underscore form (e.g. pathman amd64_v2) buildmeta.go: - Add ArchMIPS64R6, ArchMIPS64R6EL, ArchMIPS64LE, ArchMIPSLE constants legacy.go legacyFieldBackport: - Remove x86_64_v2/v3/v4 → x86_64 translation (classifier knows these values) - Remove mips64r6/mips64r6el → mips64 translation (same reason) - Add mipsle → mipsel translation (tpm['mipsle']={arch:'mipsel'}) - Add mips64le → mips64el translation (tpm['mips64le']={arch:'mips64el'}) legacy.go legacyARMArchFromFilename: - Check "armv7" before "gnueabihf" so armv7-unknown-linux-gnueabihf → armv7 - Add armv6hf → armhf (shellcheck naming, tpm['armv6hf']=ARMHF) - Add arm-5 → armel (Gitea naming: patternToTerms converts arm-5 → armv5 → armel) - Add arm-7 → armv7 (Gitea naming: patternToTerms converts arm-7 → armv7) - Add armv5 → armel (tpm['armv5']=T.ARMEL) --- internal/buildmeta/buildmeta.go | 10 +-- internal/classify/classify.go | 15 ++-- internal/storage/legacy.go | 67 +++++++++++++----- internal/storage/legacy_test.go | 121 +++++++++++++++++++++++++++++--- 4 files changed, 178 insertions(+), 35 deletions(-) diff --git a/internal/buildmeta/buildmeta.go b/internal/buildmeta/buildmeta.go index 8309e00..c5be5da 100644 --- a/internal/buildmeta/buildmeta.go +++ b/internal/buildmeta/buildmeta.go @@ -50,10 +50,12 @@ const ( ArchRISCV64 Arch = "riscv64" ArchS390X Arch = "s390x" ArchLoong64 Arch = "loong64" - ArchMIPS64LE Arch = "mips64le" - ArchMIPS64 Arch = "mips64" - ArchMIPSLE Arch = "mipsle" - ArchMIPS Arch = "mips" + ArchMIPS64LE Arch = "mips64le" + ArchMIPS64 Arch = "mips64" + ArchMIPS64R6EL Arch = "mips64r6el" + ArchMIPS64R6 Arch = "mips64r6" + ArchMIPSLE Arch = "mipsle" + ArchMIPS Arch = "mips" // Universal (fat) binary architectures for macOS. ArchUniversal1 Arch = "universal1" // PPC + x86 (Rosetta 1 era) diff --git a/internal/classify/classify.go b/internal/classify/classify.go index 22e52c2..8458155 100644 --- a/internal/classify/classify.go +++ b/internal/classify/classify.go @@ -77,7 +77,11 @@ var osPatterns = []struct { {buildmeta.OSOpenBSD, regexp.MustCompile(`(?i)` + b + `openbsd` + bEnd)}, {buildmeta.OSNetBSD, regexp.MustCompile(`(?i)` + b + `netbsd` + bEnd)}, {buildmeta.OSDragonFly, regexp.MustCompile(`(?i)` + b + `dragonfly(?:bsd)?` + bEnd)}, - {buildmeta.OSSunOS, regexp.MustCompile(`(?i)` + b + `(?:sunos|solaris|illumos)` + bEnd)}, + // solaris, illumos, and sunos are distinct OS values in the Node build-classifier. + // Keep them separate so the legacy cache matches what the classifier extracts. + {buildmeta.OSSolaris, regexp.MustCompile(`(?i)` + b + `solaris` + bEnd)}, + {buildmeta.OSIllumos, regexp.MustCompile(`(?i)` + b + `illumos` + bEnd)}, + {buildmeta.OSSunOS, regexp.MustCompile(`(?i)` + b + `sunos` + bEnd)}, {buildmeta.OSAIX, regexp.MustCompile(`(?i)` + b + `aix` + bEnd)}, {buildmeta.OSAndroid, regexp.MustCompile(`(?i)` + b + `android` + bEnd)}, {buildmeta.OSPlan9, regexp.MustCompile(`(?i)` + b + `plan9` + bEnd)}, @@ -102,9 +106,9 @@ var archPatterns = []struct { // Universal/fat binaries before specific arches. {buildmeta.ArchUniversal2, regexp.MustCompile(`(?i)` + b + `(?:universal2?|fat)` + bEnd)}, // amd64 micro-levels before baseline — "amd64v3" must not fall through to amd64. - {buildmeta.ArchAMD64v4, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v4|amd64v4|v4-amd64)`)}, - {buildmeta.ArchAMD64v3, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v3|amd64v3|v3-amd64)`)}, - {buildmeta.ArchAMD64v2, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v2|amd64v2|v2-amd64)`)}, + {buildmeta.ArchAMD64v4, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v4|amd64[_-]?v4|v4-amd64)`)}, + {buildmeta.ArchAMD64v3, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v3|amd64[_-]?v3|v3-amd64)`)}, + {buildmeta.ArchAMD64v2, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v2|amd64[_-]?v2|v2-amd64)`)}, // amd64 baseline before x86 — "x86_64" must not match as x86. {buildmeta.ArchAMD64, regexp.MustCompile(`(?i)(?:x86[_-]64|amd64|x64|64-?bit)`)}, // arm64 before armv7/armv6 — "aarch64" must not match as arm. @@ -119,6 +123,9 @@ var archPatterns = []struct { {buildmeta.ArchRISCV64, regexp.MustCompile(`(?i)riscv64`)}, {buildmeta.ArchS390X, regexp.MustCompile(`(?i)s390x`)}, {buildmeta.ArchLoong64, regexp.MustCompile(`(?i)loong(?:arch)?64`)}, + // mips64r6 before mips64 — "mips64r6" contains "mips64" as a prefix. + {buildmeta.ArchMIPS64R6EL, regexp.MustCompile(`(?i)mips64r6e(?:l|le)`)}, + {buildmeta.ArchMIPS64R6, regexp.MustCompile(`(?i)mips64r6`)}, {buildmeta.ArchMIPS64LE, regexp.MustCompile(`(?i)mips64(?:el|le)`)}, {buildmeta.ArchMIPS64, regexp.MustCompile(`(?i)mips64`)}, {buildmeta.ArchMIPSLE, regexp.MustCompile(`(?i)mips(?:el|le)`)}, diff --git a/internal/storage/legacy.go b/internal/storage/legacy.go index 0f6f74f..aaa5fe2 100644 --- a/internal/storage/legacy.go +++ b/internal/storage/legacy.go @@ -82,15 +82,19 @@ func (a Asset) toLegacy() LegacyAsset { // - universal2/universal1 → x86_64: classifier maps "universal" in filename // to x86_64. The darwin WATERFALL falls back aarch64→x86_64, so arm64 // users still receive these builds. -// - x86_64_v2 → x86_64: classifier doesn't recognize micro-arch level suffixes. -// - mips64r6/mips64r6el → mips64: MIPS Release 6 variants map to the base arch. -// - ARM (filename-based): gnueabihf/armhf→armhf, armel→armel, armv5→armel, -// armv7a→armv7a. Go normalizes these; Node classifier preserves the -// original Debian/Rust naming. See legacyARMArchFromFilename. +// - mipsle → mipsel: classifier normalizes both spellings to "mipsel". +// - mips64le → mips64el: classifier normalizes both spellings to "mips64el". +// - ARM (filename-based): explicit armvN takes priority over ABI tags; +// gnueabihf/armhf→armhf, armel→armel, armv5→armel, armv7a→armv7a. +// Go normalizes these; Node classifier preserves the original naming. +// See legacyARMArchFromFilename. // // Note: solaris/illumos/sunos are kept as-is. The build-classifier (triplet.js) // recognizes all three as distinct values, and the live cache uses them directly. // +// Note: x86_64_v2/v3/v4 and mips64r6/mips64r6el are kept as-is. The +// build-classifier knows these exact values and expects them to match. +// // Package-specific rules replicate per-package overrides in production's releases.js: // - ffmpeg: Windows .gz → .exe (prod releases.js: rel.ext = 'exe') func legacyFieldBackport(pkg string, a Asset) Asset { @@ -99,14 +103,12 @@ func legacyFieldBackport(pkg string, a Asset) Asset { a.Arch = "x86_64" } - // x86_64 micro-arch levels: classifier doesn't know these suffixes. - if a.Arch == "x86_64_v2" || a.Arch == "x86_64_v3" || a.Arch == "x86_64_v4" { - a.Arch = "x86_64" + // MIPS spelling normalization: classifier maps both spellings to the "el" form. + if a.Arch == "mipsle" { + a.Arch = "mipsel" } - - // MIPS Release 6 variants: map to the base mips64 arch. - if a.Arch == "mips64r6" || a.Arch == "mips64r6el" { - a.Arch = "mips64" + if a.Arch == "mips64le" { + a.Arch = "mips64el" } // ARM arch: the Node classifier re-parses filenames and expects the cache @@ -136,18 +138,47 @@ func legacyFieldBackport(pkg string, a Asset) Asset { // Go canonical arch value already matches what the classifier would extract. // // The Node classifier's extraction rules differ from Go's normalization: -// - gnueabihf (Rust triplet) / armhf (Debian) → "armhf" (not "armv6" or "armv7") +// - armv7a (explicit) → "armv7a" (not "armv7") +// - armv7 (explicit, e.g. "armv7-unknown-linux-gnueabihf") → "armv7" +// The explicit version number takes priority over the ABI suffix. +// - arm-5 / arm-7 (Gitea naming: "linux-arm-5", "linux-arm-7") → "armel" / "armv7" +// patternToTerms converts "arm-5" → "armv5" and "arm-7" → "armv7". +// - armv6hf (shellcheck naming) → "armhf" (tpm['armv6hf'] = ARMHF) +// - gnueabihf (Rust triplet, no explicit armvN) → "armhf" +// - armhf (Debian armhf) → "armhf" // - armel (Debian soft-float ABI) → "armel" (not "armv6") -// - armv5 → "armel" (Node tiered map: armv5 falls back to armel) -// - armv7a → "armv7a" (not "armv7") +// - armv5 (explicit) → "armel" (Node tiered map: armv5 falls back to armel) func legacyARMArchFromFilename(filename string) string { lower := strings.ToLower(filename) - if strings.Contains(lower, "gnueabihf") || strings.Contains(lower, "armhf") { - return "armhf" - } + // armv7a before armv7 — "armv7a" contains "armv7" as a prefix. if strings.Contains(lower, "armv7a") { return "armv7a" } + // Explicit armv7 in filename: takes priority over ABI suffix (gnueabihf). + // e.g. "armv7-unknown-linux-gnueabihf" → classifier extracts "armv7". + if strings.Contains(lower, "armv7") { + return "armv7" + } + // armv6hf (shellcheck naming): tpm['armv6hf'] = ARMHF → "armhf". + if strings.Contains(lower, "armv6hf") { + return "armhf" + } + // Gitea arm-N naming: "linux-arm-5" → patternToTerms → "armv5" → armel. + if strings.Contains(lower, "arm-5") { + return "armel" + } + // Gitea arm-N naming: "linux-arm-7" → patternToTerms → "armv7" → armv7. + if strings.Contains(lower, "arm-7") { + return "armv7" + } + // Rust gnueabihf triplet (no explicit armvN): classifier → "armhf". + if strings.Contains(lower, "gnueabihf") { + return "armhf" + } + // Debian armhf (hard-float ABI): classifier → "armhf". + if strings.Contains(lower, "armhf") { + return "armhf" + } if strings.Contains(lower, "armel") { return "armel" } diff --git a/internal/storage/legacy_test.go b/internal/storage/legacy_test.go index 73e8175..b06d315 100644 --- a/internal/storage/legacy_test.go +++ b/internal/storage/legacy_test.go @@ -231,8 +231,9 @@ func TestExportLegacyTranslations(t *testing.T) { } }) - t.Run("x86_64_v2_translated_to_x86_64", func(t *testing.T) { - // Micro-arch level suffixes (v2/v3/v4) are not recognized by the classifier. + t.Run("x86_64_v2_kept_as_is", func(t *testing.T) { + // Micro-arch level suffixes (v2/v3/v4): classifier has tpm entries for + // amd64_v2 → x86_64_v2, so the cache must match with the canonical value. pd := storage.PackageData{ Assets: []storage.Asset{ {Filename: "tool-linux-x86_64_v2.tar.gz", OS: "linux", Arch: "x86_64_v2", Format: ".tar.gz"}, @@ -242,12 +243,13 @@ func TestExportLegacyTranslations(t *testing.T) { if len(lc.Releases) != 1 { t.Fatalf("releases = %d, want 1", len(lc.Releases)) } - if lc.Releases[0].Arch != "x86_64" { - t.Errorf("arch = %q, want x86_64", lc.Releases[0].Arch) + if lc.Releases[0].Arch != "x86_64_v2" { + t.Errorf("arch = %q, want x86_64_v2 (kept as-is, classifier knows this value)", lc.Releases[0].Arch) } }) - t.Run("mips64r6_translated_to_mips64", func(t *testing.T) { + t.Run("mips64r6_kept_as_is", func(t *testing.T) { + // mips64r6/mips64r6el: classifier has tpm entries for these exact values. pd := storage.PackageData{ Assets: []storage.Asset{ {Filename: "tool-linux-mips64r6.tar.gz", OS: "linux", Arch: "mips64r6", Format: ".tar.gz"}, @@ -258,10 +260,43 @@ func TestExportLegacyTranslations(t *testing.T) { if len(lc.Releases) != 2 { t.Fatalf("releases = %d, want 2", len(lc.Releases)) } - for _, r := range lc.Releases { - if r.Arch != "mips64" { - t.Errorf("arch = %q, want mips64 (mips64r6* → mips64)", r.Arch) - } + if lc.Releases[0].Arch != "mips64r6" { + t.Errorf("arch = %q, want mips64r6 (kept as-is, classifier knows this value)", lc.Releases[0].Arch) + } + if lc.Releases[1].Arch != "mips64r6el" { + t.Errorf("arch = %q, want mips64r6el (kept as-is, classifier knows this value)", lc.Releases[1].Arch) + } + }) + + t.Run("mipsle_to_mipsel", func(t *testing.T) { + // mipsle: Go uses mipsle (GOARCH), Node classifier tpm['mipsle'] = {arch:'mipsel'}. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "caddy_linux_mipsle.tar.gz", OS: "linux", Arch: "mipsle", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("caddy", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "mipsel" { + t.Errorf("arch = %q, want mipsel (mipsle → mipsel)", lc.Releases[0].Arch) + } + }) + + t.Run("mips64le_to_mips64el", func(t *testing.T) { + // mips64le: Go uses mips64le (GOARCH), Node classifier tpm['mips64le'] = {arch:'mips64el'}. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "gitea-linux-mips64le.tar.gz", OS: "linux", Arch: "mips64le", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("gitea", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "mips64el" { + t.Errorf("arch = %q, want mips64el (mips64le → mips64el)", lc.Releases[0].Arch) } }) @@ -425,6 +460,74 @@ func TestExportLegacyTranslations(t *testing.T) { t.Errorf("arch = %q, want armv6 (no translation for armv6l)", lc.Releases[0].Arch) } }) + + t.Run("arm_armv7_gnueabihf_to_armv7", func(t *testing.T) { + // Files like "ripgrep-14.1.0-armv7-unknown-linux-gnueabihf.tar.gz": + // Go classifies as armv7; the "armv7" term in filename takes priority + // over the gnueabihf ABI suffix. Classifier sees "armv7" → extracts armv7. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "ripgrep-14.1.0-armv7-unknown-linux-gnueabihf.tar.gz", OS: "linux", Arch: "armv7", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("ripgrep", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7" { + t.Errorf("arch = %q, want armv7 (armv7 in filename takes priority over gnueabihf)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv6hf_to_armhf", func(t *testing.T) { + // shellcheck uses "armv6hf" naming; classifier tpm['armv6hf'] = ARMHF → "armhf". + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "shellcheck-v0.9.0.linux.armv6hf.tar.xz", OS: "linux", Arch: "armv6", Format: ".tar.xz"}, + }, + } + lc, _ := storage.ExportLegacy("shellcheck", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armhf" { + t.Errorf("arch = %q, want armhf (armv6hf → armhf)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_gitea_arm5_to_armel", func(t *testing.T) { + // Gitea uses "arm-5" naming; patternToTerms converts to "armv5" → tpm → "armel". + // Go sees \barm\b → classifies as armv6. Legacy export must correct to armel. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "gitea-1.20.0-linux-arm-5", OS: "linux", Arch: "armv6", Format: ""}, + }, + } + lc, _ := storage.ExportLegacy("gitea", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armel" { + t.Errorf("arch = %q, want armel (arm-5 → armel)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_gitea_arm7_to_armv7", func(t *testing.T) { + // Gitea uses "arm-7" naming; patternToTerms converts to "armv7" → tpm → "armv7". + // Go sees \barm\b → classifies as armv6. Legacy export must correct to armv7. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "gitea-1.20.0-linux-arm-7", OS: "linux", Arch: "armv6", Format: ""}, + }, + } + lc, _ := storage.ExportLegacy("gitea", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7" { + t.Errorf("arch = %q, want armv7 (arm-7 → armv7)", lc.Releases[0].Arch) + } + }) } // TestExportLegacyMixed verifies correct counting when multiple drop categories