mirror of
https://github.com/webinstall/webi-installers.git
synced 2026-04-18 07:56:42 +00:00
357 lines
8.8 KiB
Go
357 lines
8.8 KiB
Go
// Command uaparse analyzes User-Agent strings from webi.sh logs.
|
|
//
|
|
// It reads UA strings (one per line) from stdin or a file, parses each
|
|
// through uadetect, and produces summary output showing:
|
|
// - unique platform tuples (os, arch, libc) with counts
|
|
// - platform hints extracted from kernel version strings (cloud provider,
|
|
// container runtime, device info)
|
|
// - detection failures and malformed UAs
|
|
//
|
|
// Usage:
|
|
//
|
|
// uaparse < LIVE-UAS.txt
|
|
// uaparse LIVE-UAS.txt
|
|
// uaparse -json LIVE-UAS.txt
|
|
// uaparse -fixtures LIVE-UAS.txt # output Go test fixtures
|
|
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/webinstall/webi-installers/internal/uadetect"
|
|
)
|
|
|
|
// PlatformKey is the resolution-relevant tuple — everything else is noise
|
|
// for artifact selection.
|
|
type PlatformKey struct {
|
|
OS string `json:"os"`
|
|
Arch string `json:"arch"`
|
|
Libc string `json:"libc"`
|
|
}
|
|
|
|
func (k PlatformKey) String() string {
|
|
return fmt.Sprintf("%-10s %-10s %s", k.OS, k.Arch, k.Libc)
|
|
}
|
|
|
|
// PlatformEntry holds a unique platform and its metadata.
|
|
type PlatformEntry struct {
|
|
Key PlatformKey `json:"key"`
|
|
Count int `json:"count"`
|
|
Examples []string `json:"examples"` // up to 3 representative UAs
|
|
Hints []string `json:"hints"` // unique platform hints seen
|
|
}
|
|
|
|
// UAIssue records a malformed or undetectable UA.
|
|
type UAIssue struct {
|
|
Line int `json:"line"`
|
|
UA string `json:"ua"`
|
|
Reason string `json:"reason"`
|
|
}
|
|
|
|
// Hint is a platform detail extracted from the kernel version string.
|
|
type Hint struct {
|
|
Tag string // short label: "amzn", "azure", "gcp", "wsl", etc.
|
|
Count int
|
|
}
|
|
|
|
func main() {
|
|
jsonOut := flag.Bool("json", false, "output as JSON")
|
|
fixtures := flag.Bool("fixtures", false, "output Go test fixture table")
|
|
flag.Parse()
|
|
|
|
var scanner *bufio.Scanner
|
|
if flag.NArg() > 0 {
|
|
f, err := os.Open(flag.Arg(0))
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "uaparse: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer f.Close()
|
|
scanner = bufio.NewScanner(f)
|
|
} else {
|
|
scanner = bufio.NewScanner(os.Stdin)
|
|
}
|
|
|
|
platforms := make(map[PlatformKey]*PlatformEntry)
|
|
hints := make(map[string]int)
|
|
var issues []UAIssue
|
|
lineNum := 0
|
|
|
|
for scanner.Scan() {
|
|
lineNum++
|
|
ua := strings.TrimSpace(scanner.Text())
|
|
if ua == "" {
|
|
continue
|
|
}
|
|
|
|
// Detect corruption: truncated/double-pasted lines.
|
|
if isMalformed(ua) {
|
|
issues = append(issues, UAIssue{
|
|
Line: lineNum,
|
|
UA: ua,
|
|
Reason: "malformed (truncated or corrupted)",
|
|
})
|
|
continue
|
|
}
|
|
|
|
// Parse through uadetect.
|
|
result := uadetect.Parse(ua)
|
|
|
|
// Check for detection failures.
|
|
if result.OS == "" {
|
|
issues = append(issues, UAIssue{
|
|
Line: lineNum,
|
|
UA: ua,
|
|
Reason: "OS not detected",
|
|
})
|
|
}
|
|
if result.Arch == "" {
|
|
issues = append(issues, UAIssue{
|
|
Line: lineNum,
|
|
UA: ua,
|
|
Reason: "arch not detected",
|
|
})
|
|
}
|
|
|
|
key := PlatformKey{
|
|
OS: string(result.OS),
|
|
Arch: string(result.Arch),
|
|
Libc: string(result.Libc),
|
|
}
|
|
|
|
entry, ok := platforms[key]
|
|
if !ok {
|
|
entry = &PlatformEntry{Key: key}
|
|
platforms[key] = entry
|
|
}
|
|
entry.Count++
|
|
if len(entry.Examples) < 3 {
|
|
entry.Examples = append(entry.Examples, ua)
|
|
}
|
|
|
|
// Extract platform hints from kernel version.
|
|
for _, h := range extractHints(ua) {
|
|
if !containsStr(entry.Hints, h) {
|
|
entry.Hints = append(entry.Hints, h)
|
|
}
|
|
hints[h]++
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
fmt.Fprintf(os.Stderr, "uaparse: read error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Sort platforms by count descending.
|
|
entries := make([]*PlatformEntry, 0, len(platforms))
|
|
for _, e := range platforms {
|
|
entries = append(entries, e)
|
|
}
|
|
sort.Slice(entries, func(i, j int) bool {
|
|
return entries[i].Count > entries[j].Count
|
|
})
|
|
|
|
if *jsonOut {
|
|
outputJSON(entries, issues, hints)
|
|
} else if *fixtures {
|
|
outputFixtures(entries)
|
|
} else {
|
|
outputTable(entries, issues, hints, lineNum)
|
|
}
|
|
}
|
|
|
|
func outputTable(entries []*PlatformEntry, issues []UAIssue, hints map[string]int, total int) {
|
|
fmt.Printf("=== UA Analysis: %d lines → %d unique platforms ===\n\n", total, len(entries))
|
|
|
|
fmt.Printf("%-10s %-10s %-6s %6s %s\n", "OS", "ARCH", "LIBC", "COUNT", "HINTS")
|
|
fmt.Println(strings.Repeat("-", 72))
|
|
for _, e := range entries {
|
|
hintStr := ""
|
|
if len(e.Hints) > 0 {
|
|
hintStr = strings.Join(e.Hints, ", ")
|
|
}
|
|
fmt.Printf("%-10s %-10s %-6s %6d %s\n",
|
|
displayOS(e.Key.OS), e.Key.Arch, displayLibc(e.Key.Libc),
|
|
e.Count, hintStr)
|
|
}
|
|
|
|
if len(hints) > 0 {
|
|
fmt.Printf("\n=== Platform Hints (environment signals from kernel strings) ===\n\n")
|
|
sortedHints := make([]Hint, 0, len(hints))
|
|
for tag, count := range hints {
|
|
sortedHints = append(sortedHints, Hint{tag, count})
|
|
}
|
|
sort.Slice(sortedHints, func(i, j int) bool {
|
|
return sortedHints[i].Count > sortedHints[j].Count
|
|
})
|
|
for _, h := range sortedHints {
|
|
fmt.Printf(" %-20s %d\n", h.Tag, h.Count)
|
|
}
|
|
}
|
|
|
|
if len(issues) > 0 {
|
|
fmt.Printf("\n=== Issues (%d) ===\n\n", len(issues))
|
|
for _, iss := range issues {
|
|
fmt.Printf(" line %d: %s\n %s\n", iss.Line, iss.Reason, iss.UA)
|
|
}
|
|
}
|
|
}
|
|
|
|
func outputJSON(entries []*PlatformEntry, issues []UAIssue, hints map[string]int) {
|
|
out := struct {
|
|
Platforms []*PlatformEntry `json:"platforms"`
|
|
Issues []UAIssue `json:"issues"`
|
|
Hints map[string]int `json:"hints"`
|
|
}{entries, issues, hints}
|
|
|
|
enc := json.NewEncoder(os.Stdout)
|
|
enc.SetIndent("", " ")
|
|
_ = enc.Encode(out)
|
|
}
|
|
|
|
func outputFixtures(entries []*PlatformEntry) {
|
|
fmt.Println("// Generated by cmd/uaparse from live UA data.")
|
|
fmt.Println("// Each entry represents a unique (os, arch, libc) platform seen in production.")
|
|
fmt.Println("var liveUAPlatforms = []struct {")
|
|
fmt.Println("\tua string")
|
|
fmt.Println("\tos buildmeta.OS")
|
|
fmt.Println("\tarch buildmeta.Arch")
|
|
fmt.Println("\tlibc buildmeta.Libc")
|
|
fmt.Println("}{")
|
|
|
|
for _, e := range entries {
|
|
if e.Key.OS == "" || e.Key.Arch == "" {
|
|
continue // skip undetectable
|
|
}
|
|
ua := e.Examples[0]
|
|
fmt.Printf("\t{%q, %s, %s, %s},\n",
|
|
ua, goConst("OS", e.Key.OS), goConst("Arch", e.Key.Arch), goConst("Libc", e.Key.Libc))
|
|
}
|
|
|
|
fmt.Println("}")
|
|
}
|
|
|
|
// isMalformed checks for genuinely corrupted UA strings (network truncation).
|
|
func isMalformed(ua string) bool {
|
|
// Extremely short (less than 10 chars) suggests truncation.
|
|
if len(ua) < 10 {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// extractHints finds environment signals in a UA string.
|
|
func extractHints(ua string) []string {
|
|
lower := strings.ToLower(ua)
|
|
var out []string
|
|
|
|
patterns := []struct {
|
|
substr string
|
|
tag string
|
|
}{
|
|
{"amzn", "amzn"}, // Amazon Linux
|
|
{"-azure", "azure"}, // Azure VM
|
|
{"-gcp", "gcp"}, // Google Cloud
|
|
{"-aws", "aws"}, // AWS kernel
|
|
{"-oracle", "oracle"}, // Oracle Cloud
|
|
{"el7", "rhel7"}, // RHEL/CentOS 7
|
|
{"el8", "rhel8"}, // RHEL/CentOS 8
|
|
{"el9", "rhel9"}, // RHEL/CentOS 9
|
|
{".fc", "fedora"}, // Fedora
|
|
{"+deb", "debian"}, // Debian
|
|
{"-generic", "ubuntu"}, // Ubuntu generic kernel
|
|
{"-pve", "proxmox"}, // Proxmox VE
|
|
{"linuxkit", "docker"}, // Docker Desktop / linuxkit
|
|
{"orbstack", "orbstack"},
|
|
{"microsoft-standard-wsl", "wsl"},
|
|
{"android", "android"},
|
|
{"+rpt-rpi", "rpi"}, // Raspberry Pi
|
|
{"cygwin", "cygwin"},
|
|
{"mingw", "mingw"},
|
|
{"msys", "msys"},
|
|
{"freebsd", "freebsd"},
|
|
{"-nvidia", "nvidia"},
|
|
{"gentoo", "gentoo"},
|
|
{"coreweave", "coreweave"},
|
|
}
|
|
|
|
for _, p := range patterns {
|
|
if strings.Contains(lower, p.substr) {
|
|
out = append(out, p.tag)
|
|
}
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
// androidDeviceRe extracts device/build info from Android kernel strings.
|
|
var androidDeviceRe = regexp.MustCompile(`ab[A-Z0-9]+`)
|
|
|
|
func displayOS(os string) string {
|
|
if os == "" {
|
|
return "(none)"
|
|
}
|
|
return os
|
|
}
|
|
|
|
func displayLibc(libc string) string {
|
|
if libc == "" {
|
|
return "(none)"
|
|
}
|
|
return libc
|
|
}
|
|
|
|
func goConst(prefix, val string) string {
|
|
m := map[string]map[string]string{
|
|
"OS": {
|
|
"darwin": "buildmeta.OSDarwin",
|
|
"linux": "buildmeta.OSLinux",
|
|
"windows": "buildmeta.OSWindows",
|
|
"freebsd": "buildmeta.OSFreeBSD",
|
|
"android": "buildmeta.OSAndroid",
|
|
"": `""`,
|
|
},
|
|
"Arch": {
|
|
"aarch64": "buildmeta.ArchARM64",
|
|
"x86_64": "buildmeta.ArchAMD64",
|
|
"armv7": "buildmeta.ArchARMv7",
|
|
"armv6": "buildmeta.ArchARMv6",
|
|
"x86": "buildmeta.ArchX86",
|
|
"ppc64le": "buildmeta.ArchPPC64LE",
|
|
"ppc64": "buildmeta.ArchPPC64",
|
|
"s390x": "buildmeta.ArchS390X",
|
|
"riscv64": "buildmeta.ArchRISCV64",
|
|
"": `""`,
|
|
},
|
|
"Libc": {
|
|
"gnu": "buildmeta.LibcGNU",
|
|
"musl": "buildmeta.LibcMusl",
|
|
"msvc": "buildmeta.LibcMSVC",
|
|
"none": "buildmeta.LibcNone",
|
|
"": `""`,
|
|
},
|
|
}
|
|
if v, ok := m[prefix][val]; ok {
|
|
return v
|
|
}
|
|
return fmt.Sprintf("%q /* unmapped */", val)
|
|
}
|
|
|
|
func containsStr(ss []string, s string) bool {
|
|
for _, v := range ss {
|
|
if v == s {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|