fix: zig platform data lost in cache, expand classifier coverage

- Fix zig Platform.Size type: string in upstream JSON (json.Number)
- Fix zig Platforms json tag: was "-" (dropped in cache), now serializes
- Add riscv64, loong64, armv5 archs to buildmeta and classifier
- Add mipsle, mips64le arch detection patterns
- Add plan9 OS detection
- Add "mac" (word boundary) → darwin OS detection
- Add armhf → armv7, arm7 → armv7 patterns
- Infer Linux from .deb/.rpm format when OS absent
- Filter source archives and buildable-artifact meta-assets

Batch 2 tested: zig (246), flutter (2082), chromedriver (10300),
terraform (5550), julia (1783), iterm2 (262), mariadb (207), gpg (45)
serviceman/aliasman: 0 (source-only, no binary assets)
This commit is contained in:
AJ ONeal
2026-03-10 00:22:33 -06:00
parent d398625f5d
commit f7a6db53b3
3 changed files with 47 additions and 30 deletions

View File

@@ -57,6 +57,28 @@ source archives (`_src.tar.gz`), and `buildable-artifact`.
- Node "odd major = beta" heuristic — v15, v17, v19, v21, v23 are "current" not LTS
- Go version prefix: stripped `go` from `go1.23.6``1.23.6` for clean parsing
## Batch 2 (zig, flutter, chromedriver, terraform, julia, iterm2, mariadb, gpg, serviceman, aliasman)
### Zig Fetcher Fix
The zig upstream API returns `"size"` as a JSON string, not a number.
Changed `Platform.Size` from `int64` to `json.Number` to avoid unmarshal failures.
Also changed `Platforms` tag from `json:"-"` to `json:"platforms,omitempty"` so
platform data is preserved in cache.
### Source-Only Packages
serviceman and aliasman have GitHub releases with empty `assets:[]`. These are
source-only repos that install via `go install` or script download, not binary
releases. The classifier correctly produces 0 distributables for them — they
don't belong in the binary CSV.
### Flutter Arch Detection
Early Flutter releases (pre-2020) had no arch-specific builds — single
platform SDK. No arch in filename → empty arch in CSV. This is correct;
the installer would default to x86_64 on supported platforms.
### TODO for Next Batches
- Hugo "extended" variant should be captured in `extra` column

View File

@@ -537,9 +537,9 @@ type zigRelease struct {
}
type zigPlatform struct {
Tarball string `json:"tarball"`
Shasum string `json:"shasum"`
Size int64 `json:"size"`
Tarball string `json:"tarball"`
Shasum string `json:"shasum"`
Size json.Number `json:"size"`
}
func classifyZigDist(pkg string, d *rawcache.Dir) ([]Dist, error) {
@@ -550,33 +550,23 @@ func classifyZigDist(pkg string, d *rawcache.Dir) ([]Dist, error) {
var dists []Dist
for _, data := range releases {
// Parse the raw JSON to get version, date, and platform entries.
var raw map[string]json.RawMessage
if err := json.Unmarshal(data, &raw); err != nil {
// Parse the cached JSON which has version, date, platforms.
var rel struct {
Version string `json:"version"`
Date string `json:"date"`
Platforms map[string]zigPlatform `json:"platforms"`
}
if err := json.Unmarshal(data, &rel); err != nil {
continue
}
var version, date string
if v, ok := raw["version"]; ok {
json.Unmarshal(v, &version)
}
if d, ok := raw["date"]; ok {
json.Unmarshal(d, &date)
}
channel := "stable"
if strings.Contains(version, "+") || strings.Contains(version, "-") || !strings.Contains(version, ".") {
if strings.Contains(rel.Version, "+") || strings.Contains(rel.Version, "-") || !strings.Contains(rel.Version, ".") {
channel = "beta"
}
for key, val := range raw {
switch key {
case "version", "date", "notes", "src":
continue
}
var plat zigPlatform
if err := json.Unmarshal(val, &plat); err != nil || plat.Tarball == "" {
for key, plat := range rel.Platforms {
if plat.Tarball == "" {
continue
}
@@ -592,7 +582,7 @@ func classifyZigDist(pkg string, d *rawcache.Dir) ([]Dist, error) {
dists = append(dists, Dist{
Package: pkg,
Version: version,
Version: rel.Version,
Channel: channel,
OS: os_,
Arch: arch,
@@ -600,8 +590,8 @@ func classifyZigDist(pkg string, d *rawcache.Dir) ([]Dist, error) {
Download: plat.Tarball,
Filename: filepath.Base(plat.Tarball),
SHA256: plat.Shasum,
Size: plat.Size,
Date: date,
Size: zigSize(plat.Size),
Date: rel.Date,
})
}
}
@@ -633,6 +623,11 @@ func normalizeZigArch(a string) string {
}
}
func zigSize(n json.Number) int64 {
v, _ := n.Int64()
return v
}
func normalizeZigOS(o string) string {
switch o {
case "macos":

View File

@@ -22,14 +22,14 @@ type Release struct {
Version string `json:"version"` // set by us from the key or inner "version" field
Date string `json:"date"`
Notes string `json:"notes,omitempty"`
Platforms map[string]Platform `json:"-"` // "x86_64-linux" → Platform
Platforms map[string]Platform `json:"platforms,omitempty"` // "x86_64-linux" → Platform
}
// Platform is one downloadable artifact for a specific arch-os combo.
type Platform struct {
Tarball string `json:"tarball"`
Shasum string `json:"shasum"`
Size int64 `json:"size"`
Tarball string `json:"tarball"`
Shasum string `json:"shasum"`
Size json.Number `json:"size"` // upstream sends as string
}
// Fetch retrieves the Zig release index.