From 59b2956d60cc4f7615208c0fbb850b9f50027d64 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Sat, 16 May 2026 21:35:18 -0600 Subject: [PATCH] feat(webid): add UA detection and platform resolve packages --- internal/resolve/resolve.go | 303 ++++++++++++++++++++++++++++++++++ internal/uadetect/uadetect.go | 247 +++++++++++++++++++++++++++ 2 files changed, 550 insertions(+) create mode 100644 internal/resolve/resolve.go create mode 100644 internal/uadetect/uadetect.go diff --git a/internal/resolve/resolve.go b/internal/resolve/resolve.go new file mode 100644 index 0000000..c86566b --- /dev/null +++ b/internal/resolve/resolve.go @@ -0,0 +1,303 @@ +// Package resolve picks the best release for a given platform query. +// +// Given a set of classified distributables and a target query (OS, arch, +// libc, format preferences, version constraint), it returns the single +// best matching release — or nil if nothing matches. +package resolve + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/buildmeta" + "github.com/webinstall/webi-installers/internal/lexver" +) + +// Dist is one downloadable distributable — matches the CSV row from classify. +type Dist struct { + Package string + Version string + Channel string + OS string + Arch string + Libc string + Format string + Download string + Filename string + SHA256 string + Size int64 + LTS bool + Date string + Extra string // extra version info for sorting + GitTag string // original git tag or branch — only for format="git" + GitCommitHash string // short commit hash — only for format="git" + Variants []string // build qualifiers: "installer", "rocm", "fxdependent", etc. +} + +// Query describes what the caller wants. +type Query struct { + OS buildmeta.OS + Arch buildmeta.Arch + Libc buildmeta.Libc + Formats []string // acceptable formats (e.g. ".tar.gz", ".zip"), in preference order + Channel string // "stable" (default), "beta", etc. + Version string // version prefix constraint ("24", "24.14", ""), empty = latest + Variants []string // if non-empty, only match assets with these variants +} + +// Match is the resolved release. +type Match struct { + Version string + OS string + Arch string + Libc string + Format string + Download string + Filename string + LTS bool + Date string + Channel string +} + +// Best finds the single best release matching the query. +// Returns nil if nothing matches. +func Best(dists []Dist, q Query) *Match { + channel := q.Channel + if channel == "" { + channel = "stable" + } + + // Build format set for fast lookup + rank map for preference. + formatRank := make(map[string]int, len(q.Formats)) + for i, f := range q.Formats { + formatRank[f] = i + } + + // Build the set of acceptable architectures (native + compat). + compatArches := buildmeta.CompatArches(q.OS, q.Arch) + archRank := make(map[string]int, len(compatArches)) + for i, a := range compatArches { + archRank[string(a)] = i + } + + // Parse version prefix for constraint matching. + var versionPrefix lexver.Version + hasVersionConstraint := q.Version != "" + if hasVersionConstraint { + versionPrefix = lexver.Parse(q.Version) + } + + var best *candidate + for i := range dists { + d := &dists[i] + + // Channel filter. + if channel == "stable" && d.Channel != "stable" && d.Channel != "" { + continue + } + + // OS filter: exact match, POSIX fallback, or ANYOS. + if !osMatches(q.OS, d.OS) { + continue + } + + // Arch filter (including compat arches). + // Empty arch, ANYARCH, or "*" means "universal/platform-agnostic" — + // accept it but rank it lower than an exact match. + aRank, archOK := archRank[d.Arch] + if !archOK && (d.Arch == "" || d.Arch == "*" || d.Arch == string(buildmeta.ArchAny)) { + // Universal binary — rank after all specific arches. + aRank = len(compatArches) + archOK = true + } + if !archOK { + continue + } + + // Libc filter. + if !libcMatches(q.OS, q.Libc, d.Libc) { + continue + } + + // Format filter. + // Empty format means bare binary — accept as last resort. + fRank, formatOK := formatRank[d.Format] + if !formatOK && d.Format == "" { + // Bare binary — rank after all explicit formats. + fRank = len(q.Formats) + formatOK = true + } + if !formatOK && len(q.Formats) > 0 { + continue + } + if !formatOK { + fRank = 999 + } + + // Version constraint. + ver := lexver.Parse(d.Version) + if hasVersionConstraint && !ver.HasPrefix(versionPrefix) { + continue + } + + c := &candidate{ + dist: d, + ver: ver, + archRank: aRank, + formatRank: fRank, + hasVariants: len(d.Variants) > 0, + } + + if best == nil || c.betterThan(best) { + best = c + } + } + + if best == nil { + return nil + } + + d := best.dist + return &Match{ + Version: d.Version, + OS: d.OS, + Arch: d.Arch, + Libc: d.Libc, + Format: d.Format, + Download: d.Download, + Filename: d.Filename, + LTS: d.LTS, + Date: d.Date, + Channel: d.Channel, + } +} + +// Catalog computes aggregate metadata across all stable dists for a package. +type Catalog struct { + OSes []string + Arches []string + Libcs []string + Formats []string + Latest string // highest version of any channel + Stable string // highest stable version +} + +// Survey scans all dists and returns the catalog. +func Survey(dists []Dist) Catalog { + oses := make(map[string]bool) + arches := make(map[string]bool) + libcs := make(map[string]bool) + formats := make(map[string]bool) + + var latest, stable string + for _, d := range dists { + if d.OS != "" { + oses[d.OS] = true + } + if d.Arch != "" { + arches[d.Arch] = true + } + if d.Libc != "" { + libcs[d.Libc] = true + } + if d.Format != "" { + formats[d.Format] = true + } + + v := lexver.Parse(d.Version) + if latest == "" || lexver.Compare(v, lexver.Parse(latest)) > 0 { + latest = d.Version + } + if d.Channel == "stable" || d.Channel == "" { + if stable == "" || lexver.Compare(v, lexver.Parse(stable)) > 0 { + stable = d.Version + } + } + } + + return Catalog{ + OSes: sortedKeys(oses), + Arches: sortedKeys(arches), + Libcs: sortedKeys(libcs), + Formats: sortedKeys(formats), + Latest: latest, + Stable: stable, + } +} + +type candidate struct { + dist *Dist + ver lexver.Version + archRank int + formatRank int + hasVariants bool // true if dist has variant qualifiers (GPU, installer, etc.) +} + +// betterThan returns true if c is a better match than other. +// Priority: version (higher) > base over variant > arch rank (lower=native) > format rank (lower=preferred). +func (c *candidate) betterThan(other *candidate) bool { + cmp := lexver.Compare(c.ver, other.ver) + if cmp != 0 { + return cmp > 0 + } + // Prefer base build over variant builds (rocm, installer, etc.) + if c.hasVariants != other.hasVariants { + return !c.hasVariants + } + if c.archRank != other.archRank { + return c.archRank < other.archRank + } + return c.formatRank < other.formatRank +} + +// osMatches checks whether a dist's OS is acceptable for the query. +// Matches exact OS, ANYOS (universal), and POSIX compatibility levels +// (posix_2017 matches any non-Windows OS). +func osMatches(want buildmeta.OS, have string) bool { + if have == string(want) { + return true + } + if have == string(buildmeta.OSAny) { + return true + } + // POSIX assets run on any non-Windows system. + if want != buildmeta.OSWindows { + if have == string(buildmeta.OSPosix2017) || have == string(buildmeta.OSPosix2024) { + return true + } + } + return false +} + +// libcMatches checks whether a dist's libc is acceptable for the query. +func libcMatches(os buildmeta.OS, want buildmeta.Libc, have string) bool { + // Darwin and Windows don't use libc tagging — accept anything. + if os == buildmeta.OSDarwin || os == buildmeta.OSWindows { + return true + } + + // If the dist has no libc tag, accept it (likely statically linked). + if have == "" || have == "none" || have == string(buildmeta.LibcNone) { + return true + } + + // If the query has no libc preference, accept any. + if want == "" || want == buildmeta.LibcNone { + return true + } + + return have == string(want) +} + +func sortedKeys(m map[string]bool) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + // Simple insertion sort — these are tiny sets. + for i := 1; i < len(keys); i++ { + for j := i; j > 0 && strings.Compare(keys[j-1], keys[j]) > 0; j-- { + keys[j-1], keys[j] = keys[j], keys[j-1] + } + } + return keys +} diff --git a/internal/uadetect/uadetect.go b/internal/uadetect/uadetect.go new file mode 100644 index 0000000..2870795 --- /dev/null +++ b/internal/uadetect/uadetect.go @@ -0,0 +1,247 @@ +// Package uadetect identifies the requesting agent's OS, CPU architecture, +// and libc so the server can select the correct release artifact. +// +// An agent identifies itself through multiple signals: +// - The User-Agent header: Webi's bootstrap scripts send "$(uname -srm)", +// e.g. "Darwin 23.1.0 arm64". Browsers, curl, and PowerShell send their +// own UA strings. +// - Query parameters: ?os=linux&arch=arm64 are an explicit declaration +// that takes precedence over the header. +// +// Use [FromRequest] to detect from an HTTP request (preferred). +// Use [Parse] to detect from a raw UA string. +package uadetect + +import ( + "net/http" + "strings" + + "github.com/webinstall/webi-installers/internal/buildmeta" +) + +// Result holds the detected platform info from a User-Agent string. +type Result struct { + OS buildmeta.OS + Arch buildmeta.Arch + Libc buildmeta.Libc +} + +// FromRequest detects the agent's platform from an HTTP request. +// Query parameters ?os and ?arch override the User-Agent header. +func FromRequest(r *http.Request) Result { + qOS := r.URL.Query().Get("os") + qArch := r.URL.Query().Get("arch") + + var ua string + switch { + case qOS != "" && qArch != "": + ua = qOS + " " + qArch + case qOS != "": + ua = qOS + case qArch != "": + ua = qArch + default: + ua = r.Header.Get("User-Agent") + } + + return Parse(ua) +} + +// Parse extracts OS, arch, and libc from a User-Agent string. +func Parse(ua string) Result { + if ua == "-" { + return Result{} + } + + tokens := tokenize(ua) + + return Result{ + OS: matchOS(tokens), + Arch: matchArch(tokens), + Libc: matchLibc(tokens), + } +} + +// tokenize splits a User-Agent into lowercase tokens for matching. +// Splits on whitespace, '/', and ';', since UAs come in various forms: +// +// "Darwin 23.1.0 arm64" (uname -srm) +// "PowerShell/7.3.0" (PowerShell) +// "MS AMD64" (Windows shorthand) +// "Macintosh; Intel Mac OS X 10_15_7" (browser) +func tokenize(ua string) []string { + // Strip xnu kernel info that can mislead arch detection under Rosetta. + // "xnu-7195.60.75~1/RELEASE_ARM64_T8101" contains ARM64 even when + // running as x86_64. This only appears in verbose uname output. + if i := strings.Index(ua, "xnu-"); i >= 0 { + end := strings.IndexByte(ua[i:], ' ') + if end < 0 { + ua = ua[:i] + } else { + ua = ua[:i] + ua[i+end:] + } + } + + return strings.FieldsFunc(strings.ToLower(ua), func(r rune) bool { + return r == ' ' || r == '/' || r == ';' || r == '\t' + }) +} + +// matchOS identifies the operating system from tokens. +// Order matters: Android before Linux, Linux before Windows (for WSL). +func matchOS(tokens []string) buildmeta.OS { + has := func(s string) bool { + for _, t := range tokens { + if strings.Contains(t, s) { + return true + } + } + return false + } + + // Android must be checked before Linux. + if has("android") { + return buildmeta.OSAndroid + } + + if has("darwin") || has("macos") || has("macintosh") || has("iphone") || has("ios") || has("ipad") { + return buildmeta.OSDarwin + } + // "mac" alone (not in "macintosh" which is already matched) + for _, t := range tokens { + if t == "mac" { + return buildmeta.OSDarwin + } + } + + // FreeBSD before Linux (both are POSIX, but FreeBSD never reports "linux"). + if has("freebsd") { + return buildmeta.OSFreeBSD + } + + // Linux before Windows because WSL UAs contain both "linux" and "microsoft". + // But exclude Cygwin/Msys/MINGW which report Linux-like strings on Windows. + if has("linux") && !has("cygwin") && !has("msysgit") && !has("msys") && !has("mingw") { + return buildmeta.OSLinux + } + + // Cygwin, Msys, and MINGW are Windows environments. + if has("windows") || has("win32") || has("microsoft") || has("powershell") || + has("cygwin") || has("msys") || has("mingw") { + return buildmeta.OSWindows + } + for _, t := range tokens { + if t == "ms" || t == "win" { + return buildmeta.OSWindows + } + } + + // Fallback: curl and wget imply a POSIX system, almost always Linux. + if has("curl") || has("wget") { + return buildmeta.OSLinux + } + + return "" +} + +// matchArch identifies the CPU architecture from tokens. +// More specific patterns are checked before less specific ones. +func matchArch(tokens []string) buildmeta.Arch { + has := func(s string) bool { + for _, t := range tokens { + if strings.Contains(t, s) { + return true + } + } + return false + } + exact := func(s string) bool { + for _, t := range tokens { + if t == s { + return true + } + } + return false + } + + // ARM 64-bit (most specific first) + if has("aarch64") || has("arm64") || has("armv8") { + return buildmeta.ArchARM64 + } + + // ARM 32-bit variants + if has("armv7") || has("arm32") { + return buildmeta.ArchARMv7 + } + if has("armv6") { + return buildmeta.ArchARMv6 + } + // Bare "arm" without a version qualifier → armv6 (conservative). + if exact("arm") { + return buildmeta.ArchARMv6 + } + + // POWER (check before generic 64-bit) + if has("ppc64le") { + return buildmeta.ArchPPC64LE + } + if has("ppc64") { + return buildmeta.ArchPPC64 + } + + // s390x (IBM Z) + if has("s390x") { + return buildmeta.ArchS390X + } + + // RISC-V + if has("riscv64") { + return buildmeta.ArchRISCV64 + } + + // MIPS (check before generic 64-bit) + if has("mips64") { + return buildmeta.ArchMIPS64 + } + if has("mips") { + return buildmeta.ArchMIPS + } + + // x86-64 + if has("x86_64") || has("amd64") || exact("x64") { + return buildmeta.ArchAMD64 + } + + // x86 32-bit (after x86_64 to avoid false match) + if has("i386") || has("i686") || exact("x86") { + return buildmeta.ArchX86 + } + + return "" +} + +// matchLibc identifies the C library from tokens. +func matchLibc(tokens []string) buildmeta.Libc { + has := func(s string) bool { + for _, t := range tokens { + if strings.Contains(t, s) { + return true + } + } + return false + } + + if has("musl") { + return buildmeta.LibcMusl + } + // Don't match "microsoft" — it appears in WSL kernel version strings + // (e.g. "5.15.146.1-microsoft-standard-WSL2") and doesn't indicate MSVC. + if has("msvc") || has("windows") { + return buildmeta.LibcMSVC + } + if has("gnu") || has("glibc") || has("linux") { + return buildmeta.LibcGNU + } + + return buildmeta.LibcNone +}