Files
vim-ale/internal/classify/classify.go
AJ ONeal 4f09649d30 fix(legacy): fix PACKAGE FORMAT CHANGE warnings in legacy cache export
Reduce PACKAGE FORMAT CHANGE warnings from 6,149 to ~3,200 by aligning
the legacy export field values with what the Node build-classifier extracts
from filenames.

classify.go:
- Split solaris/illumos/sunos into three separate OS patterns (Node triplet.js
  treats them as distinct values; lumping all to OSSunOS caused 1,483 drops)
- Add mips64r6/mips64r6el arch patterns before mips64 to prevent prefix match
- Add mips64le/mips64el distinct patterns before mips64 baseline
- Fix amd64[_-]?v2/v3/v4 regex to match underscore form (e.g. pathman amd64_v2)

buildmeta.go:
- Add ArchMIPS64R6, ArchMIPS64R6EL, ArchMIPS64LE, ArchMIPSLE constants

legacy.go legacyFieldBackport:
- Remove x86_64_v2/v3/v4 → x86_64 translation (classifier knows these values)
- Remove mips64r6/mips64r6el → mips64 translation (same reason)
- Add mipsle → mipsel translation (tpm['mipsle']={arch:'mipsel'})
- Add mips64le → mips64el translation (tpm['mips64le']={arch:'mips64el'})

legacy.go legacyARMArchFromFilename:
- Check "armv7" before "gnueabihf" so armv7-unknown-linux-gnueabihf → armv7
- Add armv6hf → armhf (shellcheck naming, tpm['armv6hf']=ARMHF)
- Add arm-5 → armel (Gitea naming: patternToTerms converts arm-5 → armv5 → armel)
- Add arm-7 → armv7 (Gitea naming: patternToTerms converts arm-7 → armv7)
- Add armv5 → armel (tpm['armv5']=T.ARMEL)
2026-03-11 17:27:37 -06:00

269 lines
8.7 KiB
Go

// Package classify extracts build targets from release asset filenames.
//
// Standard toolchains (goreleaser, cargo-dist, zig build) produce predictable
// filenames like "tool_0.1.0_linux_amd64.tar.gz" or
// "tool-0.1.0-x86_64-unknown-linux-musl.tar.gz". This package matches those
// patterns directly using regex, avoiding heuristic guessing.
//
// Detection order matters: architectures are checked longest-first to prevent
// "x86" from matching inside "x86_64", and OS checks use word boundaries.
package classify
import (
"path"
"regexp"
"strings"
"github.com/webinstall/webi-installers/internal/buildmeta"
)
// Result holds the classification of an asset filename.
type Result struct {
OS buildmeta.OS
Arch buildmeta.Arch
Libc buildmeta.Libc
Format buildmeta.Format
}
// Target returns the build target (OS + Arch + Libc).
func (r Result) Target() buildmeta.Target {
return buildmeta.Target{OS: r.OS, Arch: r.Arch, Libc: r.Libc}
}
// Filename classifies a release asset filename, returning the detected
// OS, architecture, libc, and archive format. Undetected fields are empty.
//
// OS is detected first because it can influence arch interpretation.
// For example, "windows-arm" in modern releases means ARM64, while
// bare "arm" on Linux historically means ARMv6.
func Filename(name string) Result {
lower := strings.ToLower(name)
os := detectOS(lower)
arch := detectArch(lower)
format := detectFormat(lower)
// .deb, .rpm, .snap are Linux-only package formats.
if os == "" && (format == buildmeta.FormatDeb || format == buildmeta.FormatRPM || format == buildmeta.FormatSnap) {
os = buildmeta.OSLinux
}
// .app.zip and .dmg are macOS-only formats.
if os == "" && (format == buildmeta.FormatAppZip || format == buildmeta.FormatDMG) {
os = buildmeta.OSDarwin
}
return Result{
OS: os,
Arch: arch,
Libc: detectLibc(lower),
Format: format,
}
}
// b is a boundary: start/end of string or a non-alphanumeric separator.
// Go's RE2 doesn't support \b, so we use this instead.
const b = `(?:^|[^a-zA-Z0-9])`
const bEnd = `(?:[^a-zA-Z0-9]|$)`
// --- OS detection ---
var osPatterns = []struct {
os buildmeta.OS
pattern *regexp.Regexp
}{
{buildmeta.OSDarwin, regexp.MustCompile(`(?i)(?:` + b + `(?:darwin|macos|macosx|osx|os-x|apple)` + bEnd + `|` + b + `mac` + bEnd + `)`)},
{buildmeta.OSLinux, regexp.MustCompile(`(?i)` + b + `linux` + bEnd)},
{buildmeta.OSWindows, regexp.MustCompile(`(?i)` + b + `(?:windows|win(?:32|64|x64|dows)?)` + bEnd + `|\.exe(?:\.xz)?$|\.msi$`)},
{buildmeta.OSFreeBSD, regexp.MustCompile(`(?i)` + b + `freebsd` + bEnd)},
{buildmeta.OSOpenBSD, regexp.MustCompile(`(?i)` + b + `openbsd` + bEnd)},
{buildmeta.OSNetBSD, regexp.MustCompile(`(?i)` + b + `netbsd` + bEnd)},
{buildmeta.OSDragonFly, regexp.MustCompile(`(?i)` + b + `dragonfly(?:bsd)?` + bEnd)},
// solaris, illumos, and sunos are distinct OS values in the Node build-classifier.
// Keep them separate so the legacy cache matches what the classifier extracts.
{buildmeta.OSSolaris, regexp.MustCompile(`(?i)` + b + `solaris` + bEnd)},
{buildmeta.OSIllumos, regexp.MustCompile(`(?i)` + b + `illumos` + bEnd)},
{buildmeta.OSSunOS, regexp.MustCompile(`(?i)` + b + `sunos` + bEnd)},
{buildmeta.OSAIX, regexp.MustCompile(`(?i)` + b + `aix` + bEnd)},
{buildmeta.OSAndroid, regexp.MustCompile(`(?i)` + b + `android` + bEnd)},
{buildmeta.OSPlan9, regexp.MustCompile(`(?i)` + b + `plan9` + bEnd)},
}
func detectOS(lower string) buildmeta.OS {
for _, p := range osPatterns {
if p.pattern.MatchString(lower) {
return p.os
}
}
return ""
}
// --- Arch detection ---
// Order matters: check longer/more-specific patterns first.
var archPatterns = []struct {
arch buildmeta.Arch
pattern *regexp.Regexp
}{
// Universal/fat binaries before specific arches.
{buildmeta.ArchUniversal2, regexp.MustCompile(`(?i)` + b + `(?:universal2?|fat)` + bEnd)},
// amd64 micro-levels before baseline — "amd64v3" must not fall through to amd64.
{buildmeta.ArchAMD64v4, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v4|amd64[_-]?v4|v4-amd64)`)},
{buildmeta.ArchAMD64v3, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v3|amd64[_-]?v3|v3-amd64)`)},
{buildmeta.ArchAMD64v2, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v2|amd64[_-]?v2|v2-amd64)`)},
// amd64 baseline before x86 — "x86_64" must not match as x86.
{buildmeta.ArchAMD64, regexp.MustCompile(`(?i)(?:x86[_-]64|amd64|x64|64-?bit)`)},
// arm64 before armv7/armv6 — "aarch64" must not match as arm.
{buildmeta.ArchARM64, regexp.MustCompile(`(?i)(?:aarch64|arm64|armv8)`)},
{buildmeta.ArchARMv7, regexp.MustCompile(`(?i)(?:armv7l?|arm-?v7|arm7|arm32|armhf)`)},
// armel and gnueabihf are ARMv6 soft/hard-float ABI names used in Debian and Rust triplets.
{buildmeta.ArchARMv6, regexp.MustCompile(`(?i)(?:armv6l?|arm-?v6|aarch32|armel|gnueabihf|` + b + `arm` + bEnd + `)`)},
{buildmeta.ArchARMv5, regexp.MustCompile(`(?i)(?:armv5)`)},
// ppc64le before ppc64. ppc64el is an alternative spelling used in Debian/Ubuntu.
{buildmeta.ArchPPC64LE, regexp.MustCompile(`(?i)(?:ppc64le|ppc64el)`)},
{buildmeta.ArchPPC64, regexp.MustCompile(`(?i)ppc64`)},
{buildmeta.ArchRISCV64, regexp.MustCompile(`(?i)riscv64`)},
{buildmeta.ArchS390X, regexp.MustCompile(`(?i)s390x`)},
{buildmeta.ArchLoong64, regexp.MustCompile(`(?i)loong(?:arch)?64`)},
// mips64r6 before mips64 — "mips64r6" contains "mips64" as a prefix.
{buildmeta.ArchMIPS64R6EL, regexp.MustCompile(`(?i)mips64r6e(?:l|le)`)},
{buildmeta.ArchMIPS64R6, regexp.MustCompile(`(?i)mips64r6`)},
{buildmeta.ArchMIPS64LE, regexp.MustCompile(`(?i)mips64(?:el|le)`)},
{buildmeta.ArchMIPS64, regexp.MustCompile(`(?i)mips64`)},
{buildmeta.ArchMIPSLE, regexp.MustCompile(`(?i)mips(?:el|le)`)},
{buildmeta.ArchMIPS, regexp.MustCompile(`(?i)` + b + `mips` + bEnd)},
// x86 last — must not steal x86_64.
{buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|ia32|` + b + `386` + bEnd + `|32-?bit)`)},
}
func detectArch(lower string) buildmeta.Arch {
for _, p := range archPatterns {
if p.pattern.MatchString(lower) {
return p.arch
}
}
return ""
}
// --- Libc detection ---
var (
reMusl = regexp.MustCompile(`(?i)` + b + `musl` + bEnd)
reGNU = regexp.MustCompile(`(?i)` + b + `(?:gnu|glibc)` + bEnd)
reMSVC = regexp.MustCompile(`(?i)` + b + `msvc` + bEnd)
reStatic = regexp.MustCompile(`(?i)` + b + `static` + bEnd)
)
func detectLibc(lower string) buildmeta.Libc {
switch {
case reMusl.MatchString(lower):
return buildmeta.LibcMusl
case reGNU.MatchString(lower):
return buildmeta.LibcGNU
case reMSVC.MatchString(lower):
return buildmeta.LibcMSVC
case reStatic.MatchString(lower):
return buildmeta.LibcNone
}
return ""
}
// --- Format detection ---
// formatSuffixes maps file extensions to formats, longest first.
var formatSuffixes = []struct {
suffix string
format buildmeta.Format
}{
{".tar.gz", buildmeta.FormatTarGz},
{".tar.xz", buildmeta.FormatTarXz},
{".tar.zst", buildmeta.FormatTarZst},
{".tar.bz2", buildmeta.FormatTarBz2},
{".exe.xz", buildmeta.FormatExeXz},
{".app.zip", buildmeta.FormatAppZip},
{".tgz", buildmeta.FormatTarGz},
{".zip", buildmeta.FormatZip},
{".gz", buildmeta.FormatGz},
{".xz", buildmeta.FormatXz},
{".zst", buildmeta.FormatZst},
{".7z", buildmeta.Format7z},
{".exe", buildmeta.FormatExe},
{".msi", buildmeta.FormatMSI},
{".dmg", buildmeta.FormatDMG},
{".deb", buildmeta.FormatDeb},
{".rpm", buildmeta.FormatRPM},
{".snap", buildmeta.FormatSnap},
{".appx", buildmeta.FormatAppx},
{".apk", buildmeta.FormatAPK},
{".AppImage", buildmeta.FormatAppImage},
{".pkg", buildmeta.FormatPkg},
}
func detectFormat(lower string) buildmeta.Format {
// Use the base name to avoid directory separators confusing suffix matching.
base := path.Base(lower)
for _, s := range formatSuffixes {
if strings.HasSuffix(base, s.suffix) {
return s.format
}
}
return ""
}
// IsMetaAsset returns true if the filename is a non-installable meta file
// (checksums, signatures, source tarballs, documentation, etc.).
func IsMetaAsset(name string) bool {
lower := strings.ToLower(name)
for _, suffix := range []string{
".txt",
".sha256",
".sha256sum",
".sha512",
".sha512sum",
".md5",
".md5sum",
".sig",
".asc",
".pem",
".sbom",
".spdx",
".json.sig",
".sigstore",
".minisig",
"_src.tar.gz",
"_src.tar.xz",
"_src.zip",
"-src.tar.gz",
"-src.tar.xz",
"-src.zip",
".d.ts",
".pub",
} {
if strings.HasSuffix(lower, suffix) {
return true
}
}
for _, substr := range []string{
"checksums",
"sha256sum",
"sha512sum",
"buildable-artifact",
".LICENSE",
".README",
} {
if strings.Contains(lower, substr) {
return true
}
}
for _, exact := range []string{
"install.sh",
"install.ps1",
"compat.json",
"b3sums",
"dist-manifest.json",
} {
if lower == exact {
return true
}
}
return false
}