mirror of
https://github.com/webinstall/webi-installers.git
synced 2026-05-18 06:36:37 +00:00
Rewrites the Node.js release classification pipeline in Go. webicached fetches upstream releases (GitHub, Gitea, GitLab, HashiCorp, custom sources), classifies assets by OS/arch/variant, and writes legacy-format JSON caches compatible with the existing webinstall.dev API. Git-clone packages emit git_tag and git_commit_hash from real repo clones — no fabricated refs.
445 lines
15 KiB
Go
445 lines
15 KiB
Go
package storage
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// Legacy types for reading/writing the Node.js _cache/ JSON format.
|
|
//
|
|
// The Node.js server calls assets "releases" and uses "name" for the
|
|
// filename and "ext" for the format. These types preserve that wire
|
|
// format for backward compatibility during migration.
|
|
//
|
|
// Internal Go code uses [Asset] and [PackageData] directly.
|
|
|
|
// LegacyAsset matches the JSON shape the Node.js server writes and reads.
|
|
type LegacyAsset struct {
|
|
Name string `json:"name"`
|
|
Version string `json:"version"`
|
|
GitTag string `json:"git_tag,omitempty"`
|
|
GitCommitHash string `json:"git_commit_hash,omitempty"`
|
|
LTS bool `json:"lts"`
|
|
Channel string `json:"channel"`
|
|
Date string `json:"date"`
|
|
OS string `json:"os"`
|
|
Arch string `json:"arch"`
|
|
Libc string `json:"libc"`
|
|
Ext string `json:"ext"`
|
|
Download string `json:"download"`
|
|
}
|
|
|
|
// LegacyCache matches the top-level JSON shape in _cache/{pkg}.json.
|
|
type LegacyCache struct {
|
|
OSes []string `json:"oses,omitempty"`
|
|
Arches []string `json:"arches,omitempty"`
|
|
Libcs []string `json:"libcs,omitempty"`
|
|
Formats []string `json:"formats,omitempty"`
|
|
Releases []LegacyAsset `json:"releases"`
|
|
Download string `json:"download"`
|
|
}
|
|
|
|
// LegacyDropStats reports how many assets were excluded during ExportLegacy.
|
|
type LegacyDropStats struct {
|
|
Variants int // dropped: has build variant tags (e.g. rocm, installer, fxdependent)
|
|
Formats int // dropped: format not recognized by the Node.js server
|
|
Android int // dropped: android OS — classifier maps android filenames to linux
|
|
NoTarget int // dropped: no OS and no arch — unclassifiable source tarballs
|
|
}
|
|
|
|
// ToAsset converts a LegacyAsset to the internal Asset type.
|
|
// It reverses the key vocabulary translations applied by toLegacy so that
|
|
// the internal (Go canonical) representation is preserved.
|
|
func (la LegacyAsset) ToAsset() Asset {
|
|
// Reverse-translate legacy Node.js vocabulary to Go canonical names.
|
|
// toLegacy writes macos/amd64/arm64; internal code uses darwin/x86_64/aarch64.
|
|
// "none" libc is buildmeta.LibcNone — preserve it (don't collapse to "").
|
|
os := la.OS
|
|
switch os {
|
|
case "macos":
|
|
os = "darwin"
|
|
case "*":
|
|
os = ""
|
|
}
|
|
arch := la.Arch
|
|
switch arch {
|
|
case "amd64":
|
|
arch = "x86_64"
|
|
case "arm64":
|
|
arch = "aarch64"
|
|
case "*":
|
|
arch = ""
|
|
}
|
|
// Restore the dot-prefix convention used throughout internal Go code.
|
|
// The cache stores ext without a leading dot (e.g. "tar.gz", "zip", "exe"),
|
|
// but Asset.Format uses dotted strings (e.g. ".tar.gz", ".zip", ".exe").
|
|
// "exe" is ambiguous: bare binary (no .exe suffix) vs Windows .exe file.
|
|
// Disambiguate by checking whether the filename ends with ".exe".
|
|
format := la.Ext
|
|
switch {
|
|
case format == "exe" && !strings.HasSuffix(strings.ToLower(la.Name), ".exe"):
|
|
format = "" // bare binary — internal convention is empty string
|
|
case format != "":
|
|
format = "." + format // restore dot prefix for internal use
|
|
}
|
|
return Asset{
|
|
Filename: la.Name,
|
|
Version: la.Version,
|
|
LTS: la.LTS,
|
|
Channel: la.Channel,
|
|
Date: la.Date,
|
|
OS: os,
|
|
Arch: arch,
|
|
Libc: la.Libc,
|
|
Format: format,
|
|
Download: la.Download,
|
|
GitTag: la.GitTag,
|
|
GitCommitHash: la.GitCommitHash,
|
|
}
|
|
}
|
|
|
|
// toLegacy converts an Asset to the LegacyAsset wire format.
|
|
// Callers must have already applied legacyFieldBackport before calling this.
|
|
func (a Asset) toLegacy() LegacyAsset {
|
|
libc := a.Libc
|
|
if libc == "" {
|
|
libc = "none" // API expects "none" rather than empty string
|
|
}
|
|
// Strip leading dot: API expects "tar.gz" not ".tar.gz".
|
|
ext := strings.TrimPrefix(a.Format, ".")
|
|
// Bare binaries: API expects "exe". Internal convention is Format=""
|
|
// for bare binaries (no archive extension). By the time we reach
|
|
// toLegacy, source tarballs and git-clone entries have been filtered
|
|
// or tagged, so Format="" reliably means bare binary.
|
|
if ext == "" {
|
|
ext = "exe"
|
|
}
|
|
return LegacyAsset{
|
|
Name: a.Filename,
|
|
Version: strings.TrimPrefix(a.Version, "v"), // API expects no v-prefix
|
|
GitTag: a.GitTag,
|
|
GitCommitHash: a.GitCommitHash,
|
|
LTS: a.LTS,
|
|
Channel: a.Channel,
|
|
Date: a.Date,
|
|
OS: a.OS,
|
|
Arch: a.Arch,
|
|
Libc: libc,
|
|
Ext: ext,
|
|
Download: a.Download,
|
|
}
|
|
}
|
|
|
|
// legacyFieldBackport translates canonical classifier field values to the
|
|
// values the legacy Node.js resolver expects. This is called at export time
|
|
// only — the canonical values are preserved in Go-native storage (pgstore).
|
|
//
|
|
// The Node build-classifier re-parses each asset's download filename and drops
|
|
// any entry where the cache field doesn't match what it extracts from the name.
|
|
// These translations ensure the cache matches the classifier's extraction.
|
|
//
|
|
// Global OS translations:
|
|
// - sunos → solaris: Node's classifier maps "sunos" filenames to "solaris".
|
|
// LIVE_cache has "solaris" and "illumos" but never "sunos".
|
|
//
|
|
// Global arch translations (all packages):
|
|
// - universal2/universal1 → x86_64: classifier maps "universal" in filename
|
|
// to x86_64. The darwin WATERFALL falls back aarch64→x86_64, so arm64
|
|
// users still receive these builds.
|
|
// - x86_64_v2/v3/v4 → x86_64: AMD64 microarch levels not in LIVE_cache;
|
|
// fold to baseline x86_64.
|
|
// - mips64r6 → mips64: exotic MIPS64R6, not in LIVE_cache.
|
|
// - mips64r6el → mips64le: exotic MIPS64R6 little-endian, not in LIVE_cache.
|
|
// - ARM (filename-based): explicit armvN takes priority over ABI tags.
|
|
// Go normalizes these; see legacyARMArchFromFilename for filename extraction.
|
|
// Final ARM vocab mapping to LIVE_cache values:
|
|
// armv6→armv6l, armv7a→armv7l, armhf→armv7l, armel→arm.
|
|
// - powerpc (32-bit): not in LIVE_cache; entry is dropped.
|
|
//
|
|
// Note: mipsle and mips64le are kept as-is — LIVE_cache uses these exact values.
|
|
// Note: solaris and illumos are kept as-is — both exist in LIVE_cache.
|
|
//
|
|
// Package-specific rules replicate per-package overrides in production's releases.js:
|
|
// - ffmpeg: Windows .gz → .exe (prod releases.js: rel.ext = 'exe')
|
|
//
|
|
// Git-clone entries:
|
|
// - format="git" with empty OS/arch → os="*", arch="*"
|
|
// The legacy cache uses "*" for ANYOS/ANYARCH (builds-cacher LEGACY_OS_MAP['*']='ANYOS').
|
|
// vim plugins, aliasman, serviceman, and other POSIX packages use this format.
|
|
func legacyFieldBackport(pkg string, a Asset) Asset {
|
|
// Git-clone entries are ANYOS/ANYARCH — legacy cache uses "*" for these.
|
|
// This matches production LIVE_cache for vim-commentary, aliasman, etc.
|
|
if a.Format == "git" {
|
|
if a.OS == "" {
|
|
a.OS = "*"
|
|
}
|
|
if a.Arch == "" {
|
|
a.Arch = "*"
|
|
}
|
|
}
|
|
|
|
// sunos → solaris: Node's classifier maps "sunos" filenames to "solaris".
|
|
// LIVE_cache has "solaris" and "illumos" but never "sunos".
|
|
if a.OS == "sunos" {
|
|
a.OS = "solaris"
|
|
}
|
|
|
|
// darwin → macos: LIVE_cache pre-classified packages (go, node, zig, fish, etc.)
|
|
// use "macos". Julia is the sole exception — LIVE julia.json uses "darwin".
|
|
if a.OS == "darwin" && pkg != "julia" {
|
|
a.OS = "macos"
|
|
}
|
|
|
|
// Universal fat binaries: expandUniversal splits these into per-arch
|
|
// entries earlier in the pipeline. This is a safety fallback in case
|
|
// any universal entries reach the legacy export unexpectedly.
|
|
if a.Arch == "universal2" || a.Arch == "universal1" {
|
|
a.Arch = "x86_64"
|
|
}
|
|
|
|
// AMD64 microarch levels: not in LIVE_cache; fold to baseline x86_64.
|
|
switch a.Arch {
|
|
case "x86_64_v2", "x86_64_v3", "x86_64_v4":
|
|
a.Arch = "x86_64"
|
|
}
|
|
|
|
// x86_64 → amd64, aarch64 → arm64: LIVE_cache pre-classified packages use
|
|
// "amd64" and "arm64". Go's classifier uses "x86_64" and "aarch64".
|
|
// These come after universal2→x86_64 and x86_64_v*/→x86_64 so the chains work.
|
|
if a.Arch == "x86_64" {
|
|
a.Arch = "amd64"
|
|
}
|
|
if a.Arch == "aarch64" {
|
|
a.Arch = "arm64"
|
|
}
|
|
|
|
// MIPS variants not in LIVE_cache: fold to nearest supported value.
|
|
// mipsle and mips64le are kept as-is — LIVE_cache uses these exact spellings.
|
|
switch a.Arch {
|
|
case "mips64r6":
|
|
a.Arch = "mips64"
|
|
case "mips64r6el":
|
|
a.Arch = "mips64le"
|
|
}
|
|
|
|
// powerpc (32-bit): not in LIVE_cache; mark for drop by clearing both fields.
|
|
// Per-package taggers (uuidv7, watchexec) handle this via variant tags, but
|
|
// for any package without a tagger, clear here so the NoTarget filter drops it.
|
|
if a.Arch == "powerpc" {
|
|
a.OS = ""
|
|
a.Arch = ""
|
|
}
|
|
|
|
// ARM arch: the Node classifier re-parses filenames and expects the cache
|
|
// arch to match what it extracts. Go normalizes arch values; use filename
|
|
// heuristics to match what Node would extract.
|
|
switch a.Arch {
|
|
case "armv5", "armv6", "armv7":
|
|
if leg := legacyARMArchFromFilename(a.Filename); leg != "" {
|
|
a.Arch = leg
|
|
}
|
|
}
|
|
// Translate ARM arch values to LIVE_cache vocabulary.
|
|
// legacyARMArchFromFilename can produce armhf/armel/armv7a which aren't
|
|
// in LIVE_cache; also translate raw armv6/armv7 (when no filename override).
|
|
switch a.Arch {
|
|
case "armv6":
|
|
a.Arch = "armv6l"
|
|
case "armv7":
|
|
a.Arch = "armv7l"
|
|
case "armhf":
|
|
a.Arch = "armv7l"
|
|
case "armel":
|
|
a.Arch = "arm"
|
|
case "armv7a":
|
|
a.Arch = "armv7l"
|
|
}
|
|
|
|
switch pkg {
|
|
case "ffmpeg":
|
|
if a.OS == "windows" {
|
|
switch a.Format {
|
|
case ".gz", "":
|
|
a.Format = ".exe"
|
|
}
|
|
}
|
|
}
|
|
|
|
return a
|
|
}
|
|
|
|
// legacyARMArchFromFilename returns the arch string the Node build-classifier
|
|
// would extract from a filename for ARM-family builds. Returns "" when the
|
|
// Go canonical arch value already matches what the classifier would extract.
|
|
//
|
|
// The Node classifier's extraction rules differ from Go's normalization:
|
|
// - armv7a (explicit) → "armv7a" (not "armv7")
|
|
// - armv7 (explicit, e.g. "armv7-unknown-linux-gnueabihf") → "armv7"
|
|
// The explicit version number takes priority over the ABI suffix.
|
|
// - arm-5 / arm-7 (Gitea naming: "linux-arm-5", "linux-arm-7") → "armel" / "armv7"
|
|
// patternToTerms converts "arm-5" → "armv5" and "arm-7" → "armv7".
|
|
// - armv6hf (shellcheck naming) → "armhf" (tpm['armv6hf'] = ARMHF)
|
|
// - gnueabihf (Rust triplet, no explicit armvN) → "armhf"
|
|
// - armhf (Debian armhf) → "armhf"
|
|
// - armel (Debian soft-float ABI) → "armel" (not "armv6")
|
|
// - armv5 (explicit) → "armel" (Node tiered map: armv5 falls back to armel)
|
|
func legacyARMArchFromFilename(filename string) string {
|
|
lower := strings.ToLower(filename)
|
|
// armv7a before armv7 — "armv7a" contains "armv7" as a prefix.
|
|
if strings.Contains(lower, "armv7a") {
|
|
return "armv7a"
|
|
}
|
|
// Explicit armv7 in filename: takes priority over ABI suffix (gnueabihf).
|
|
// e.g. "armv7-unknown-linux-gnueabihf" → classifier extracts "armv7".
|
|
if strings.Contains(lower, "armv7") {
|
|
return "armv7"
|
|
}
|
|
// armv6hf (shellcheck naming): tpm['armv6hf'] = ARMHF → "armhf".
|
|
if strings.Contains(lower, "armv6hf") {
|
|
return "armhf"
|
|
}
|
|
// Gitea arm-N naming: "linux-arm-5" → patternToTerms → "armv5" → armel.
|
|
if strings.Contains(lower, "arm-5") {
|
|
return "armel"
|
|
}
|
|
// Gitea arm-N naming: "linux-arm-7" → patternToTerms → "armv7" → armv7.
|
|
if strings.Contains(lower, "arm-7") {
|
|
return "armv7"
|
|
}
|
|
// Rust gnueabihf triplet (no explicit armvN): classifier → "armhf".
|
|
if strings.Contains(lower, "gnueabihf") {
|
|
return "armhf"
|
|
}
|
|
// Debian armhf (hard-float ABI): classifier → "armhf".
|
|
if strings.Contains(lower, "armhf") {
|
|
return "armhf"
|
|
}
|
|
if strings.Contains(lower, "armel") {
|
|
return "armel"
|
|
}
|
|
if strings.Contains(lower, "armv5") {
|
|
return "armel"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// ImportLegacy converts a LegacyCache to PackageData.
|
|
func ImportLegacy(lc LegacyCache) PackageData {
|
|
assets := make([]Asset, len(lc.Releases))
|
|
for i, la := range lc.Releases {
|
|
assets[i] = la.ToAsset()
|
|
}
|
|
return PackageData{Assets: assets}
|
|
}
|
|
|
|
// legacyFormats is the set of formats the Node.js server recognizes.
|
|
// Assets with formats not in this set are filtered out of legacy exports.
|
|
var legacyFormats = map[string]bool{
|
|
".zip": true,
|
|
".tar.gz": true,
|
|
".tar.xz": true,
|
|
".tar.zst": true,
|
|
".tar.bz2": true,
|
|
".tar": true,
|
|
".xz": true,
|
|
".7z": true,
|
|
".pkg": true,
|
|
".msi": true,
|
|
".exe": true,
|
|
".exe.xz": true,
|
|
".dmg": true,
|
|
".app.zip": true,
|
|
".gz": true,
|
|
"git": true,
|
|
}
|
|
|
|
// ExportLegacy converts canonical PackageData to the LegacyCache wire format.
|
|
//
|
|
// The pkg name is used to apply per-package field translations (see legacyFieldBackport).
|
|
// Assets are excluded when:
|
|
// - Variants is non-empty (Node.js has no variant logic)
|
|
// - OS is android (classifier maps android filenames to linux)
|
|
// - OS and arch are both empty (unclassifiable source tarballs)
|
|
// - Format is non-empty and not in the Node.js recognized set
|
|
//
|
|
// Dropped counts are returned in LegacyDropStats for logging.
|
|
func ExportLegacy(pkg string, pd PackageData) (LegacyCache, LegacyDropStats) {
|
|
var releases []LegacyAsset
|
|
var stats LegacyDropStats
|
|
|
|
for _, a := range pd.Assets {
|
|
// Skip variant builds — Node.js doesn't have variant logic.
|
|
if len(a.Variants) > 0 {
|
|
stats.Variants++
|
|
continue
|
|
}
|
|
// Skip android — classifier maps android filenames to linux OS,
|
|
// which mismatches cache entries tagged android.
|
|
if a.OS == "android" {
|
|
stats.Android++
|
|
continue
|
|
}
|
|
// Skip entries with no OS and no arch, unless they're git-clone packages.
|
|
// Source tarballs (cmake, dashcore, bun npm) have format != "git".
|
|
// Git-clone packages (vim plugins, aliasman) legitimately have no OS/arch —
|
|
// legacyFieldBackport will translate them to os="*", arch="*".
|
|
if a.OS == "" && a.Arch == "" && a.Format != "git" {
|
|
stats.NoTarget++
|
|
continue
|
|
}
|
|
// Apply per-package and global legacy field translations.
|
|
a = legacyFieldBackport(pkg, a)
|
|
// Skip formats Node.js doesn't recognize.
|
|
if a.Format != "" && !legacyFormats[a.Format] {
|
|
stats.Formats++
|
|
continue
|
|
}
|
|
releases = append(releases, a.toLegacy())
|
|
}
|
|
if releases == nil {
|
|
releases = []LegacyAsset{}
|
|
}
|
|
|
|
// Build sorted summary arrays from the included releases.
|
|
// These let the API skip normalize.js vocabulary filtering entirely.
|
|
oSet := map[string]bool{}
|
|
aSet := map[string]bool{}
|
|
lSet := map[string]bool{}
|
|
fSet := map[string]bool{}
|
|
for _, r := range releases {
|
|
if r.OS != "" && r.OS != "*" {
|
|
oSet[r.OS] = true
|
|
}
|
|
if r.Arch != "" && r.Arch != "*" {
|
|
aSet[r.Arch] = true
|
|
}
|
|
if r.Libc != "" {
|
|
lSet[r.Libc] = true
|
|
}
|
|
if r.Ext != "" {
|
|
fSet[strings.TrimPrefix(r.Ext, ".")] = true
|
|
}
|
|
}
|
|
lc := LegacyCache{
|
|
OSes: sortedKeys(oSet),
|
|
Arches: sortedKeys(aSet),
|
|
Libcs: sortedKeys(lSet),
|
|
Formats: sortedKeys(fSet),
|
|
Releases: releases,
|
|
}
|
|
return lc, stats
|
|
}
|
|
|
|
// sortedKeys returns the keys of a string set in sorted order.
|
|
func sortedKeys(m map[string]bool) []string {
|
|
if len(m) == 0 {
|
|
return nil
|
|
}
|
|
out := make([]string, 0, len(m))
|
|
for k := range m {
|
|
out = append(out, k)
|
|
}
|
|
sort.Strings(out)
|
|
return out
|
|
}
|