Files
vim-ale/internal/classify/classify.go
AJ ONeal 34cfe32492 feat: add arch/libc fallback chains and version waterfall resolution
Prefer latest version over best CPU match. An amd64v4 machine gets
v2.0.0 (baseline only) instead of v1.0.0 (which had a v4 build)
because recency beats specificity.

- buildmeta: add amd64v2/v3/v4 micro-levels, ArchFallbacks, LibcFallbacks
- classify: detect micro-arch levels, treat Windows "arm" as ARM64
- platlatest: add Resolve() that walks fallback chains picking newest
2026-03-09 21:44:06 -06:00

181 lines
5.9 KiB
Go

// Package classify extracts build targets from release asset filenames.
//
// Standard toolchains (goreleaser, cargo-dist, zig build) produce predictable
// filenames like "tool_0.1.0_linux_amd64.tar.gz" or
// "tool-0.1.0-x86_64-unknown-linux-musl.tar.gz". This package matches those
// patterns directly using regex, avoiding heuristic guessing.
//
// Detection order matters: architectures are checked longest-first to prevent
// "x86" from matching inside "x86_64", and OS checks use word boundaries.
package classify
import (
"path"
"regexp"
"strings"
"github.com/webinstall/webi-installers/internal/buildmeta"
)
// Result holds the classification of an asset filename.
type Result struct {
OS buildmeta.OS
Arch buildmeta.Arch
Libc buildmeta.Libc
Format buildmeta.Format
}
// Target returns the build target (OS + Arch + Libc).
func (r Result) Target() buildmeta.Target {
return buildmeta.Target{OS: r.OS, Arch: r.Arch, Libc: r.Libc}
}
// Filename classifies a release asset filename, returning the detected
// OS, architecture, libc, and archive format. Undetected fields are empty.
//
// OS is detected first because it can influence arch interpretation.
// For example, "windows-arm" in modern releases means ARM64, while
// bare "arm" on Linux historically means ARMv6.
func Filename(name string) Result {
lower := strings.ToLower(name)
os := detectOS(lower)
arch := detectArch(lower)
// On Windows, bare "arm" (detected as ARMv6) almost certainly means
// ARM64. Windows never shipped ARMv6 binaries — "ARM" became the
// marketing label for ARM64 (Windows on ARM).
if os == buildmeta.OSWindows && arch == buildmeta.ArchARMv6 {
arch = buildmeta.ArchARM64
}
return Result{
OS: os,
Arch: arch,
Libc: detectLibc(lower),
Format: detectFormat(lower),
}
}
// b is a boundary: start/end of string or a non-alphanumeric separator.
// Go's RE2 doesn't support \b, so we use this instead.
const b = `(?:^|[^a-zA-Z0-9])`
const bEnd = `(?:[^a-zA-Z0-9]|$)`
// --- OS detection ---
var osPatterns = []struct {
os buildmeta.OS
pattern *regexp.Regexp
}{
{buildmeta.OSDarwin, regexp.MustCompile(`(?i)` + b + `(?:darwin|macos|osx|os-x|apple)` + bEnd)},
{buildmeta.OSLinux, regexp.MustCompile(`(?i)` + b + `linux` + bEnd)},
{buildmeta.OSWindows, regexp.MustCompile(`(?i)` + b + `(?:windows|win(?:32|64|dows)?)` + bEnd + `|\.exe(?:\.xz)?$|\.msi$`)},
{buildmeta.OSFreeBSD, regexp.MustCompile(`(?i)` + b + `freebsd` + bEnd)},
{buildmeta.OSSunOS, regexp.MustCompile(`(?i)` + b + `(?:sunos|solaris|illumos)` + bEnd)},
{buildmeta.OSAIX, regexp.MustCompile(`(?i)` + b + `aix` + bEnd)},
{buildmeta.OSAndroid, regexp.MustCompile(`(?i)` + b + `android` + bEnd)},
}
func detectOS(lower string) buildmeta.OS {
for _, p := range osPatterns {
if p.pattern.MatchString(lower) {
return p.os
}
}
return ""
}
// --- Arch detection ---
// Order matters: check longer/more-specific patterns first.
var archPatterns = []struct {
arch buildmeta.Arch
pattern *regexp.Regexp
}{
// amd64 micro-levels before baseline — "amd64v3" must not fall through to amd64.
{buildmeta.ArchAMD64v4, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v4|amd64v4|v4-amd64)`)},
{buildmeta.ArchAMD64v3, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v3|amd64v3|v3-amd64)`)},
{buildmeta.ArchAMD64v2, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v2|amd64v2|v2-amd64)`)},
// amd64 baseline before x86 — "x86_64" must not match as x86.
{buildmeta.ArchAMD64, regexp.MustCompile(`(?i)(?:x86[_-]64|amd64|x64|64-bit)`)},
// arm64 before armv7/armv6 — "aarch64" must not match as arm.
{buildmeta.ArchARM64, regexp.MustCompile(`(?i)(?:aarch64|arm64|armv8)`)},
{buildmeta.ArchARMv7, regexp.MustCompile(`(?i)(?:armv7l?|arm-?v7|arm32)`)},
{buildmeta.ArchARMv6, regexp.MustCompile(`(?i)(?:armv6l?|arm-?v6|aarch32|` + b + `arm` + bEnd + `)`)},
// ppc64le before ppc64.
{buildmeta.ArchPPC64LE, regexp.MustCompile(`(?i)ppc64le`)},
{buildmeta.ArchPPC64, regexp.MustCompile(`(?i)ppc64`)},
{buildmeta.ArchS390X, regexp.MustCompile(`(?i)s390x`)},
{buildmeta.ArchMIPS64, regexp.MustCompile(`(?i)mips64`)},
{buildmeta.ArchMIPS, regexp.MustCompile(`(?i)` + b + `mips` + bEnd)},
// x86 last — must not steal x86_64.
{buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|32-bit)`)},
}
func detectArch(lower string) buildmeta.Arch {
for _, p := range archPatterns {
if p.pattern.MatchString(lower) {
return p.arch
}
}
return ""
}
// --- Libc detection ---
var (
reMusl = regexp.MustCompile(`(?i)` + b + `musl` + bEnd)
reGNU = regexp.MustCompile(`(?i)` + b + `(?:gnu|glibc)` + bEnd)
reMSVC = regexp.MustCompile(`(?i)` + b + `msvc` + bEnd)
reStatic = regexp.MustCompile(`(?i)` + b + `static` + bEnd)
)
func detectLibc(lower string) buildmeta.Libc {
switch {
case reMusl.MatchString(lower):
return buildmeta.LibcMusl
case reGNU.MatchString(lower):
return buildmeta.LibcGNU
case reMSVC.MatchString(lower):
return buildmeta.LibcMSVC
case reStatic.MatchString(lower):
return buildmeta.LibcNone
}
return ""
}
// --- Format detection ---
// formatSuffixes maps file extensions to formats, longest first.
var formatSuffixes = []struct {
suffix string
format buildmeta.Format
}{
{".tar.gz", buildmeta.FormatTarGz},
{".tar.xz", buildmeta.FormatTarXz},
{".tar.zst", buildmeta.FormatTarZst},
{".exe.xz", buildmeta.FormatExeXz},
{".app.zip", buildmeta.FormatAppZip},
{".tgz", buildmeta.FormatTarGz},
{".zip", buildmeta.FormatZip},
{".gz", buildmeta.FormatGz},
{".xz", buildmeta.FormatXz},
{".zst", buildmeta.FormatZst},
{".7z", buildmeta.Format7z},
{".exe", buildmeta.FormatExe},
{".msi", buildmeta.FormatMSI},
{".dmg", buildmeta.FormatDMG},
{".pkg", buildmeta.FormatPkg},
}
func detectFormat(lower string) buildmeta.Format {
// Use the base name to avoid directory separators confusing suffix matching.
base := path.Base(lower)
for _, s := range formatSuffixes {
if strings.HasSuffix(base, s.suffix) {
return s.format
}
}
return ""
}