From cf9dd4d2e2acdf3dce09b655a732d5ee0bcc004c Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Sun, 8 Mar 2026 21:38:43 -0600 Subject: [PATCH] feat: add Phase 0 foundation packages for Go rewrite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - internal/buildmeta: canonical constants for OS, arch, libc, format, channel - internal/lexver: version string → lexicographically sortable string - internal/uadetect: User-Agent → OS/arch/libc detection - internal/httpclient: resilient net/http client with retry and backoff - go.mod: initialize module (stdlib only, no dependencies) --- go.mod | 3 + internal/buildmeta/buildmeta.go | 131 +++++++++++++++++ internal/httpclient/httpclient.go | 203 ++++++++++++++++++++++++++ internal/lexver/lexver.go | 223 +++++++++++++++++++++++++++++ internal/lexver/lexver_test.go | 155 ++++++++++++++++++++ internal/uadetect/uadetect.go | 161 +++++++++++++++++++++ internal/uadetect/uadetect_test.go | 117 +++++++++++++++ 7 files changed, 993 insertions(+) create mode 100644 go.mod create mode 100644 internal/buildmeta/buildmeta.go create mode 100644 internal/httpclient/httpclient.go create mode 100644 internal/lexver/lexver.go create mode 100644 internal/lexver/lexver_test.go create mode 100644 internal/uadetect/uadetect.go create mode 100644 internal/uadetect/uadetect_test.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ffe76fa --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/webinstall/webi-installers + +go 1.24 diff --git a/internal/buildmeta/buildmeta.go b/internal/buildmeta/buildmeta.go new file mode 100644 index 0000000..51c9eef --- /dev/null +++ b/internal/buildmeta/buildmeta.go @@ -0,0 +1,131 @@ +// Package buildmeta defines the canonical constants for OS, architecture, +// libc, archive format, and release channel used throughout Webi. +package buildmeta + +// OS represents a target operating system. +type OS string + +const ( + OSAny OS = "ANYOS" + OSDarwin OS = "darwin" + OSLinux OS = "linux" + OSWindows OS = "windows" + OSFreeBSD OS = "freebsd" + OSSunOS OS = "sunos" + OSAIX OS = "aix" + OSAndroid OS = "android" + + // POSIX compatibility levels — used when a package is a shell script + // or otherwise OS-independent for POSIX systems. + OSPosix2017 OS = "posix_2017" + OSPosix2024 OS = "posix_2024" +) + +// Arch represents a target CPU architecture. +type Arch string + +const ( + ArchAny Arch = "ANYARCH" + ArchAMD64 Arch = "x86_64" + ArchARM64 Arch = "aarch64" + ArchARMv7 Arch = "armv7" + ArchARMv6 Arch = "armv6" + ArchX86 Arch = "x86" + ArchPPC64LE Arch = "ppc64le" + ArchPPC64 Arch = "ppc64" + ArchS390X Arch = "s390x" + ArchMIPS64 Arch = "mips64" + ArchMIPS Arch = "mips" +) + +// Libc represents the C library a binary is linked against. +type Libc string + +const ( + LibcNone Libc = "none" // statically linked or no libc dependency (Go, Zig, etc.) + LibcGNU Libc = "gnu" // requires glibc (most Linux distros) + LibcMusl Libc = "musl" // requires musl (Alpine, some Docker images) + LibcMSVC Libc = "msvc" // Microsoft Visual C++ runtime +) + +// Format represents an archive or package format. +type Format string + +const ( + FormatTarGz Format = ".tar.gz" + FormatTarXz Format = ".tar.xz" + FormatTarZst Format = ".tar.zst" + FormatZip Format = ".zip" + FormatGz Format = ".gz" + FormatXz Format = ".xz" + FormatZst Format = ".zst" + FormatExe Format = ".exe" + FormatExeXz Format = ".exe.xz" + FormatMSI Format = ".msi" + FormatDMG Format = ".dmg" + FormatPkg Format = ".pkg" + FormatAppZip Format = ".app.zip" + Format7z Format = ".7z" + FormatSh Format = ".sh" + FormatGit Format = ".git" +) + +// Channel represents a release stability channel. +type Channel string + +const ( + ChannelStable Channel = "stable" + ChannelLatest Channel = "latest" + ChannelRC Channel = "rc" + ChannelPreview Channel = "preview" + ChannelBeta Channel = "beta" + ChannelAlpha Channel = "alpha" + ChannelDev Channel = "dev" +) + +// ChannelNames lists recognized channel names in priority order. +var ChannelNames = []Channel{ + ChannelStable, + ChannelLatest, + ChannelRC, + ChannelPreview, + ChannelBeta, + ChannelAlpha, + ChannelDev, +} + +// Target represents a fully resolved build target. +type Target struct { + OS OS + Arch Arch + Libc Libc +} + +// Triplet returns the canonical "os-arch-libc" string. +func (t Target) Triplet() string { + return string(t.OS) + "-" + string(t.Arch) + "-" + string(t.Libc) +} + +// Release represents a single downloadable build artifact. +type Release struct { + Name string `json:"name"` + Version string `json:"version"` + LTS bool `json:"lts"` + Channel Channel `json:"channel"` + Date string `json:"date"` // "2024-01-15" + OS OS `json:"os"` + Arch Arch `json:"arch"` + Libc Libc `json:"libc"` + Ext Format `json:"ext"` + Download string `json:"download"` + Comment string `json:"comment,omitempty"` +} + +// PackageMeta holds aggregate metadata about a package's available releases. +type PackageMeta struct { + Name string `json:"name"` + OSes []OS `json:"oses"` + Arches []Arch `json:"arches"` + Libcs []Libc `json:"libcs"` + Formats []Format `json:"formats"` +} diff --git a/internal/httpclient/httpclient.go b/internal/httpclient/httpclient.go new file mode 100644 index 0000000..b14f5e2 --- /dev/null +++ b/internal/httpclient/httpclient.go @@ -0,0 +1,203 @@ +// Package httpclient provides a resilient HTTP client with best-practice +// defaults for making upstream API calls (GitHub, Gitea, etc.). +// +// Features: +// - Sensible timeouts at every level (connect, TLS, headers, overall) +// - Connection pooling with reasonable limits +// - TLS 1.2+ minimum +// - Limited redirect depth, no HTTPS→HTTP downgrade +// - Automatic retries with exponential backoff + jitter for transient errors +// - Respects Retry-After headers +// - Custom User-Agent identifying Webi +// - All calls require context.Context +package httpclient + +import ( + "context" + "crypto/tls" + "errors" + "fmt" + "math/rand/v2" + "net" + "net/http" + "strconv" + "time" +) + +// Client wraps http.Client with retry logic and resilience defaults. +type Client struct { + inner *http.Client + userAgent string + retries int + baseDelay time.Duration + maxDelay time.Duration +} + +// Option configures a Client. +type Option func(*Client) + +// WithUserAgent sets the User-Agent header for all requests. +func WithUserAgent(ua string) Option { + return func(c *Client) { c.userAgent = ua } +} + +// WithRetries sets the maximum number of retries for transient errors. +func WithRetries(n int) Option { + return func(c *Client) { c.retries = n } +} + +// WithBaseDelay sets the initial delay for exponential backoff. +func WithBaseDelay(d time.Duration) Option { + return func(c *Client) { c.baseDelay = d } +} + +// New creates a Client with secure, resilient defaults. +func New(opts ...Option) *Client { + transport := &http.Transport{ + DialContext: (&net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS12, + }, + TLSHandshakeTimeout: 10 * time.Second, + ResponseHeaderTimeout: 30 * time.Second, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + ForceAttemptHTTP2: true, + } + + c := &Client{ + inner: &http.Client{ + Transport: transport, + Timeout: 60 * time.Second, + CheckRedirect: checkRedirect, + }, + userAgent: "Webi/2.0 (+https://webinstall.dev)", + retries: 3, + baseDelay: 1 * time.Second, + maxDelay: 30 * time.Second, + } + + for _, opt := range opts { + opt(c) + } + + return c +} + +// maxRedirects is the redirect depth limit. +const maxRedirects = 10 + +// checkRedirect prevents HTTPS→HTTP downgrades and limits redirect depth. +func checkRedirect(req *http.Request, via []*http.Request) error { + if len(via) >= maxRedirects { + return fmt.Errorf("stopped after %d redirects", maxRedirects) + } + if len(via) > 0 && via[0].URL.Scheme == "https" && req.URL.Scheme == "http" { + return errors.New("refused redirect from https to http") + } + return nil +} + +// Do executes a request with automatic retries for transient errors. +// It sets the User-Agent header if not already present. +func (c *Client) Do(req *http.Request) (*http.Response, error) { + if req.Header.Get("User-Agent") == "" { + req.Header.Set("User-Agent", c.userAgent) + } + + var resp *http.Response + var err error + + for attempt := 0; attempt <= c.retries; attempt++ { + if attempt > 0 { + delay := c.backoff(attempt, resp) + select { + case <-req.Context().Done(): + return nil, req.Context().Err() + case <-time.After(delay): + } + + // Close previous response body before retry. + if resp != nil { + resp.Body.Close() + } + } + + resp, err = c.inner.Do(req) + if err != nil { + // Retry on transient network errors. + if req.Context().Err() != nil { + return nil, req.Context().Err() + } + continue + } + + if !isRetryable(resp.StatusCode) { + return resp, nil + } + } + + // Exhausted retries — return whatever we have. + if err != nil { + return nil, fmt.Errorf("after %d retries: %w", c.retries, err) + } + return resp, nil +} + +// Get is a convenience wrapper around Do for GET requests. +func (c *Client) Get(ctx context.Context, url string) (*http.Response, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + return c.Do(req) +} + +// isRetryable returns true for HTTP status codes that indicate a transient error. +func isRetryable(status int) bool { + switch status { + case http.StatusTooManyRequests, // 429 + http.StatusBadGateway, // 502 + http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout: // 504 + return true + } + return false +} + +// backoff calculates the delay before the next retry attempt. +// Uses exponential backoff with jitter, and respects Retry-After headers. +func (c *Client) backoff(attempt int, resp *http.Response) time.Duration { + // Check Retry-After header from previous response. + if resp != nil { + if ra := resp.Header.Get("Retry-After"); ra != "" { + if seconds, err := strconv.Atoi(ra); err == nil { + d := time.Duration(seconds) * time.Second + if d > 0 && d < 5*time.Minute { + return d + } + } + } + } + + // Exponential backoff: baseDelay * 2^attempt + jitter + delay := c.baseDelay + for i := 1; i < attempt; i++ { + delay *= 2 + if delay > c.maxDelay { + delay = c.maxDelay + break + } + } + + // Add jitter: ±25% + jitter := time.Duration(float64(delay) * 0.5 * rand.Float64()) + delay = delay + jitter - (delay / 4) + + return delay +} diff --git a/internal/lexver/lexver.go b/internal/lexver/lexver.go new file mode 100644 index 0000000..2595ae4 --- /dev/null +++ b/internal/lexver/lexver.go @@ -0,0 +1,223 @@ +// Package lexver converts version strings into lexicographically sortable +// representations so that version comparison reduces to string comparison. +// +// The core problem: "1.20.3" must sort after "1.2.0", but as raw strings +// "1.2" > "1.20" because '2' > '.' in ASCII. Lexver solves this by +// zero-padding each numeric segment to a fixed width. +// +// Sorting rules: +// - Numeric segments are zero-padded and compared naturally +// - Stable releases sort after pre-releases of the same version +// - Pre-release channels sort alphabetically (alpha < beta < rc) +// - Numeric suffixes within channels sort numerically (rc2 > rc1) +// +// Examples: +// +// "1.20.3" → "0001.0020.0003.0000~" +// "1.0.0-beta1" → "0001.0000.0000.0000-beta.0001" +// "1.0.0" → "0001.0000.0000.0000~" +// +// The "~" character sorts after "-" in ASCII, so stable versions always +// sort after any pre-release of the same numeric version. +package lexver + +import ( + "strconv" + "strings" + "unicode" +) + +const ( + numWidth = 4 // zero-pad width for version numbers + chanNumWidth = 4 // zero-pad width for channel sequence numbers + numSegments = 4 // major.minor.patch.build + + // suffixStable sorts after suffixPre because '~' > '-' in ASCII. + suffixStable = "~" + suffixPre = "-" +) + +// Parse converts a version string to its lexicographically sortable form. +func Parse(version string) string { + return format(splitVersion(version), false) +} + +// ParsePrefix converts a partial version to a sortable prefix for matching. +// Unlike Parse, it does not pad to the full segment count. +// +// ParsePrefix("1.20") → "0001.0020" +func ParsePrefix(version string) string { + return format(splitVersion(version), true) +} + +// versionParts holds the parsed components of a version string. +type versionParts struct { + nums []int // numeric segments: [1, 20, 3, 0] + channel string // pre-release channel: "beta", "rc", "" for stable + chanNum int // pre-release sequence: 1 in "beta1", 0 if absent +} + +// splitVersion breaks a version string into its semantic components. +func splitVersion(version string) versionParts { + // Strip leading "v" or "V" + version = strings.TrimLeft(version, "vV") + + var p versionParts + + // Find where the pre-release suffix begins. + // We look for the first letter after the numeric prefix. + numStr, prerelease := splitAtPrerelease(version) + + // Parse numeric segments + for _, seg := range strings.Split(numStr, ".") { + if seg == "" { + continue + } + n, err := strconv.Atoi(seg) + if err != nil { + // If we hit a non-numeric segment in the numeric part, + // treat it as start of prerelease. + if prerelease == "" { + prerelease = seg + } else { + prerelease = seg + "-" + prerelease + } + continue + } + p.nums = append(p.nums, n) + } + + // Parse pre-release: "beta1" → channel="beta", chanNum=1 + if prerelease != "" { + p.channel, p.chanNum = splitChannel(prerelease) + } + + return p +} + +// splitAtPrerelease splits "1.20.3-beta1" into ("1.20.3", "beta1"). +// Also handles "1.2beta3" (no separator before channel name). +func splitAtPrerelease(s string) (string, string) { + // Try explicit separator first: dash, plus + for _, sep := range []byte{'-', '+'} { + if idx := strings.IndexByte(s, sep); idx >= 0 { + return s[:idx], s[idx+1:] + } + } + + // Look for a letter following a digit: "1.2beta3" + for i := 1; i < len(s); i++ { + if unicode.IsLetter(rune(s[i])) && unicode.IsDigit(rune(s[i-1])) { + return s[:i], s[i:] + } + } + + return s, "" +} + +// splitChannel separates "beta1" into ("beta", 1) or "rc" into ("rc", 0). +func splitChannel(s string) (string, int) { + s = strings.ToLower(s) + + // Normalize separators: "beta-1", "beta.1" → "beta1" + s = strings.NewReplacer("-", "", ".", "", "_", "").Replace(s) + + // Find where trailing digits begin + i := len(s) + for i > 0 && unicode.IsDigit(rune(s[i-1])) { + i-- + } + + name := s[:i] + num := 0 + if i < len(s) { + num, _ = strconv.Atoi(s[i:]) + } + + return name, num +} + +// format renders parsed version parts into a lexver string. +func format(p versionParts, asPrefix bool) string { + // Pad numeric segments + count := len(p.nums) + if !asPrefix && count < numSegments { + count = numSegments + } + + var b strings.Builder + b.Grow(count*5 + 20) // rough estimate + + for i := 0; i < count; i++ { + if i > 0 { + b.WriteByte('.') + } + n := 0 + if i < len(p.nums) { + n = p.nums[i] + } + b.WriteString(padInt(n, numWidth)) + } + + // Append stability suffix + if p.channel == "" { + b.WriteString(suffixStable) + } else { + b.WriteString(suffixPre) + b.WriteString(p.channel) + b.WriteByte('.') + b.WriteString(padInt(p.chanNum, chanNumWidth)) + } + + return b.String() +} + +func padInt(n, width int) string { + s := strconv.Itoa(n) + for len(s) < width { + s = "0" + s + } + return s +} + +// IsPreRelease reports whether version looks like a pre-release. +func IsPreRelease(version string) bool { + p := splitVersion(version) + return p.channel != "" +} + +// Match holds the result of searching a sorted lexver list. +type Match struct { + // Latest is the newest version regardless of channel. + Latest string + // Stable is the newest stable (non-pre-release) version. + Stable string + // Default is Stable if available, otherwise Latest. + Default string + // Matches lists all lexvers matching the prefix, newest first. + Matches []string +} + +// MatchSorted searches a descending-sorted slice of lexvers for entries +// matching the given prefix. If prefix is empty, all versions match. +func MatchSorted(lexvers []string, prefix string) Match { + var m Match + for _, lv := range lexvers { + if prefix != "" && !strings.HasPrefix(lv, prefix) { + continue + } + m.Matches = append(m.Matches, lv) + if m.Latest == "" { + m.Latest = lv + } + if m.Stable == "" && strings.HasSuffix(lv, suffixStable) { + m.Stable = lv + } + } + if m.Stable != "" { + m.Default = m.Stable + } else { + m.Default = m.Latest + } + return m +} diff --git a/internal/lexver/lexver_test.go b/internal/lexver/lexver_test.go new file mode 100644 index 0000000..8919765 --- /dev/null +++ b/internal/lexver/lexver_test.go @@ -0,0 +1,155 @@ +package lexver_test + +import ( + "testing" + + "github.com/webinstall/webi-installers/internal/lexver" +) + +func TestParse(t *testing.T) { + tests := []struct { + input string + want string + }{ + // Basic semver + {"1.0.0", "0001.0000.0000.0000~"}, + {"1.2.3", "0001.0002.0003.0000~"}, + {"0.1.0", "0000.0001.0000.0000~"}, + + // Leading v + {"v1.2.3", "0001.0002.0003.0000~"}, + {"V1.0.0", "0001.0000.0000.0000~"}, + + // Partial versions (padded to 4 segments) + {"1.20", "0001.0020.0000.0000~"}, + {"1", "0001.0000.0000.0000~"}, + + // Large numbers + {"1.20.156", "0001.0020.0156.0000~"}, + + // Pre-release channels + {"1.0.0-beta1", "0001.0000.0000.0000-beta.0001"}, + {"1.0.0-rc2", "0001.0000.0000.0000-rc.0002"}, + {"1.0.0-alpha3", "0001.0000.0000.0000-alpha.0003"}, + {"2.0.0-preview1", "0002.0000.0000.0000-preview.0001"}, + {"1.0.0-dev", "0001.0000.0000.0000-dev.0000"}, + + // Channel attached to number (no separator) + {"1.2beta3", "0001.0002.0000.0000-beta.0003"}, + {"1.0rc1", "0001.0000.0000.0000-rc.0001"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := lexver.Parse(tt.input) + if got != tt.want { + t.Errorf("Parse(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestParsePrefix(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"1.20", "0001.0020~"}, + {"1", "0001~"}, + {"v2", "0002~"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := lexver.ParsePrefix(tt.input) + if got != tt.want { + t.Errorf("ParsePrefix(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestSortOrder(t *testing.T) { + // These must produce ascending lexver strings. + ordered := []string{ + "0.1.0", + "1.0.0-alpha1", + "1.0.0-beta1", + "1.0.0-rc1", + "1.0.0-rc2", + "1.0.0", + "1.0.1", + "1.1.0", + "1.2.0", + "1.20.0", + "2.0.0-beta1", + "2.0.0", + } + + for i := 1; i < len(ordered); i++ { + prev := lexver.Parse(ordered[i-1]) + curr := lexver.Parse(ordered[i]) + if prev >= curr { + t.Errorf("expected Parse(%q) < Parse(%q)\n got %q >= %q", + ordered[i-1], ordered[i], prev, curr) + } + } +} + +func TestIsPreRelease(t *testing.T) { + tests := []struct { + input string + want bool + }{ + {"1.0.0", false}, + {"1.0.0-beta1", true}, + {"1.0.0-rc2", true}, + {"1.0.0-alpha", true}, + {"1.0.0-dev", true}, + {"v2.0.0-preview1", true}, + {"1.0.0-pre1", true}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := lexver.IsPreRelease(tt.input) + if got != tt.want { + t.Errorf("IsPreRelease(%q) = %v, want %v", tt.input, got, tt.want) + } + }) + } +} + +func TestMatchSorted(t *testing.T) { + // Descending order (as stored) + lexvers := []string{ + lexver.Parse("2.0.0"), + lexver.Parse("2.0.0-rc1"), + lexver.Parse("1.20.3"), + lexver.Parse("1.20.2"), + lexver.Parse("1.19.5"), + } + + t.Run("empty prefix matches all", func(t *testing.T) { + m := lexver.MatchSorted(lexvers, "") + if len(m.Matches) != len(lexvers) { + t.Errorf("expected %d matches, got %d", len(lexvers), len(m.Matches)) + } + if m.Latest != lexvers[0] { + t.Errorf("Latest = %q, want %q", m.Latest, lexvers[0]) + } + if m.Stable != lexvers[0] { + t.Errorf("Stable = %q, want %q", m.Stable, lexvers[0]) + } + }) + + t.Run("prefix filters versions", func(t *testing.T) { + prefix := lexver.ParsePrefix("1.20") + // Strip the "~" suffix for prefix matching + prefix = prefix[:len(prefix)-1] + m := lexver.MatchSorted(lexvers, prefix) + if len(m.Matches) != 2 { + t.Errorf("expected 2 matches for 1.20.x, got %d: %v", len(m.Matches), m.Matches) + } + }) +} diff --git a/internal/uadetect/uadetect.go b/internal/uadetect/uadetect.go new file mode 100644 index 0000000..f2157db --- /dev/null +++ b/internal/uadetect/uadetect.go @@ -0,0 +1,161 @@ +// Package uadetect identifies OS, architecture, and libc from a User-Agent +// string. The input is typically from curl's -A flag: +// +// curl -fsSA "$(uname -srm)" https://webi.sh/node +// +// Which produces something like: +// +// "Darwin 23.1.0 arm64" +// "Linux 6.1.0 x86_64" +// "CYGWIN_NT-10.0-19045 3.5.3 x86_64" +package uadetect + +import ( + "regexp" + "strings" + + "github.com/webinstall/webi-installers/internal/buildmeta" +) + +// Result holds the detected platform info from a User-Agent string. +type Result struct { + OS buildmeta.OS + Arch buildmeta.Arch + Libc buildmeta.Libc +} + +// Parse extracts OS, arch, and libc from a User-Agent string. +func Parse(ua string) Result { + return Result{ + OS: DetectOS(ua), + Arch: DetectArch(ua), + Libc: DetectLibc(ua), + } +} + +// DetectOS returns the OS from a User-Agent string. +func DetectOS(ua string) buildmeta.OS { + if ua == "-" { + return "" + } + + // Android must be tested before Linux. + if reAndroid.MatchString(ua) { + return buildmeta.OSAndroid + } + + // macOS/Darwin must be tested before Linux (for edge cases) and before + // "win" (because "darwin" contains no "win", but ordering matters). + if reDarwin.MatchString(ua) { + return buildmeta.OSDarwin + } + + // Linux must be tested before Windows because WSL User-Agents contain + // both "Linux" and sometimes "Microsoft". + if reLinux.MatchString(ua) && !reCygwin.MatchString(ua) { + return buildmeta.OSLinux + } + + if reWindows.MatchString(ua) { + return buildmeta.OSWindows + } + + // Try Linux again after Windows (for plain "curl" or "wget"). + if reLinuxLoose.MatchString(ua) { + return buildmeta.OSLinux + } + + return "" +} + +// DetectArch returns the CPU architecture from a User-Agent string. +func DetectArch(ua string) buildmeta.Arch { + if ua == "-" { + return "" + } + + // Strip macOS kernel release arch info that can mislead detection. + // e.g. "xnu-7195.60.75~1/RELEASE_ARM64_T8101 x86_64" under Rosetta + ua = reXNU.ReplaceAllString(ua, "") + + // Order matters — more specific patterns first. + if reARM64.MatchString(ua) { + return buildmeta.ArchARM64 + } + if reARMv7.MatchString(ua) { + return buildmeta.ArchARMv7 + } + if reARMv6.MatchString(ua) { + return buildmeta.ArchARMv6 + } + if rePPC64LE.MatchString(ua) { + return buildmeta.ArchPPC64LE + } + if rePPC64.MatchString(ua) { + return buildmeta.ArchPPC64 + } + if reMIPS64.MatchString(ua) { + return buildmeta.ArchMIPS64 + } + if reMIPS.MatchString(ua) { + return buildmeta.ArchMIPS + } + // amd64 must come after ppc64/mips64 (both contain "64"). + if reAMD64.MatchString(ua) { + return buildmeta.ArchAMD64 + } + // x86 must come after x86_64/amd64. + if reX86.MatchString(ua) { + return buildmeta.ArchX86 + } + + return "" +} + +// DetectLibc returns the C library variant from a User-Agent string. +func DetectLibc(ua string) buildmeta.Libc { + if ua == "-" { + return "" + } + + lower := strings.ToLower(ua) + + if reMusl.MatchString(lower) { + return buildmeta.LibcMusl + } + if reMSVC.MatchString(lower) { + return buildmeta.LibcMSVC + } + if reGNU.MatchString(lower) { + return buildmeta.LibcGNU + } + + // Default: no specific libc requirement detected. + return buildmeta.LibcNone +} + +// Compiled regexes — allocated once. +var ( + reAndroid = regexp.MustCompile(`(?i)Android`) + reDarwin = regexp.MustCompile(`(?i)iOS|iPhone|Macintosh|Darwin|OS\s*X|macOS|mac`) + reLinux = regexp.MustCompile(`(?i)Linux`) + reCygwin = regexp.MustCompile(`(?i)cygwin|msysgit`) + reWindows = regexp.MustCompile(`(?i)(\b|^)ms(\b|$)|Microsoft|Windows|win32|win|PowerShell`) + reLinuxLoose = regexp.MustCompile(`(?i)Linux|curl|wget`) + + reXNU = regexp.MustCompile(`xnu-\S*RELEASE_\S*`) + + reARM64 = regexp.MustCompile(`(?i)(\b|_)(aarch64|arm64|arm8|armv8)`) + reARMv7 = regexp.MustCompile(`(?i)(\b|_)(aarch|arm7|armv7|arm32)`) + reARMv6 = regexp.MustCompile(`(?i)(\b|_)(arm6|armv6|arm(\b|_))`) + rePPC64LE = regexp.MustCompile(`(?i)ppc64le`) + rePPC64 = regexp.MustCompile(`(?i)ppc64`) + reMIPS64 = regexp.MustCompile(`(?i)mips64`) + reMIPS = regexp.MustCompile(`(?i)mips`) + reAMD64 = regexp.MustCompile(`(?i)(amd64|x86_64|x64|_64)\b`) + reX86 = regexp.MustCompile(`(?i)(\b|_)(3|6|x|_)86\b`) + + reMusl = regexp.MustCompile(`(\b|_)musl(\b|_)`) + reMSVC = regexp.MustCompile(`(\b|_)(msvc|windows|microsoft)(\b|_)`) + reGNU = regexp.MustCompile(`(\b|_)(gnu|glibc|linux)(\b|_)`) +) diff --git a/internal/uadetect/uadetect_test.go b/internal/uadetect/uadetect_test.go new file mode 100644 index 0000000..961ce09 --- /dev/null +++ b/internal/uadetect/uadetect_test.go @@ -0,0 +1,117 @@ +package uadetect_test + +import ( + "testing" + + "github.com/webinstall/webi-installers/internal/buildmeta" + "github.com/webinstall/webi-installers/internal/uadetect" +) + +func TestDetectOS(t *testing.T) { + tests := []struct { + ua string + want buildmeta.OS + }{ + // macOS / Darwin + {"Darwin 23.1.0 arm64", buildmeta.OSDarwin}, + {"Darwin 20.2.0 x86_64", buildmeta.OSDarwin}, + {"Macintosh; Intel Mac OS X 10_15_7", buildmeta.OSDarwin}, + + // Linux + {"Linux 6.1.0-18-amd64 x86_64", buildmeta.OSLinux}, + {"Linux 5.15.0 aarch64", buildmeta.OSLinux}, + + // WSL (Linux, not Windows) + {"Linux 5.15.146.1-microsoft-standard-WSL2 x86_64", buildmeta.OSLinux}, + + // Windows + {"MS AMD64", buildmeta.OSWindows}, + {"PowerShell/7.3.0", buildmeta.OSWindows}, + {"Microsoft Windows 10.0.19045", buildmeta.OSWindows}, + + // Android + {"Android 13 aarch64", buildmeta.OSAndroid}, + + // Minimal agents + {"curl/8.1.2", buildmeta.OSLinux}, + {"wget/1.21", buildmeta.OSLinux}, + + // Dash means unknown + {"-", ""}, + } + + for _, tt := range tests { + t.Run(tt.ua, func(t *testing.T) { + got := uadetect.DetectOS(tt.ua) + if got != tt.want { + t.Errorf("DetectOS(%q) = %q, want %q", tt.ua, got, tt.want) + } + }) + } +} + +func TestDetectArch(t *testing.T) { + tests := []struct { + ua string + want buildmeta.Arch + }{ + {"Darwin 23.1.0 arm64", buildmeta.ArchARM64}, + {"Linux 6.1.0 aarch64", buildmeta.ArchARM64}, + {"Linux 5.4.0 x86_64", buildmeta.ArchAMD64}, + {"MS AMD64", buildmeta.ArchAMD64}, + {"Linux 5.10.0 armv7l", buildmeta.ArchARMv7}, + {"Linux 5.10.0 armv6l", buildmeta.ArchARMv6}, + {"Linux 5.4.0 ppc64le", buildmeta.ArchPPC64LE}, + + // Rosetta: kernel says ARM64 but uname reports x86_64 + {"Darwin 20.2.0 Darwin Kernel Version 20.2.0; root:xnu-7195.60.75~1/RELEASE_ARM64_T8101 x86_64", buildmeta.ArchAMD64}, + + {"-", ""}, + } + + for _, tt := range tests { + t.Run(tt.ua, func(t *testing.T) { + got := uadetect.DetectArch(tt.ua) + if got != tt.want { + t.Errorf("DetectArch(%q) = %q, want %q", tt.ua, got, tt.want) + } + }) + } +} + +func TestDetectLibc(t *testing.T) { + tests := []struct { + ua string + want buildmeta.Libc + }{ + {"Linux 6.1.0 x86_64 musl", buildmeta.LibcMusl}, + {"Linux 6.1.0 x86_64 gnu", buildmeta.LibcGNU}, + {"Linux 6.1.0 x86_64 linux", buildmeta.LibcGNU}, + {"MS AMD64 msvc", buildmeta.LibcMSVC}, + {"Microsoft Windows", buildmeta.LibcMSVC}, + {"Darwin 23.1.0 arm64", buildmeta.LibcNone}, + {"-", ""}, + } + + for _, tt := range tests { + t.Run(tt.ua, func(t *testing.T) { + got := uadetect.DetectLibc(tt.ua) + if got != tt.want { + t.Errorf("DetectLibc(%q) = %q, want %q", tt.ua, got, tt.want) + } + }) + } +} + +func TestParse(t *testing.T) { + r := uadetect.Parse("Darwin 23.1.0 arm64") + if r.OS != buildmeta.OSDarwin { + t.Errorf("OS = %q, want %q", r.OS, buildmeta.OSDarwin) + } + if r.Arch != buildmeta.ArchARM64 { + t.Errorf("Arch = %q, want %q", r.Arch, buildmeta.ArchARM64) + } + if r.Libc != buildmeta.LibcNone { + t.Errorf("Libc = %q, want %q", r.Libc, buildmeta.LibcNone) + } +}