feat: complete classification of all 116 packages (169,867 rows)

- Add asset_filter/asset_exclude conf keys for shared-repo packages
- Split hugo/hugo-extended: exclude/require "extended" in asset name
- Add macosx, ia32, .snap, .appx classifier patterns
- Fix zig Platform.Size JSON string type (was int64, upstream sends string)
- Filter install scripts, cosign keys, compat.json as meta-assets
- Add riscv64, loong64, armv5, mipsle, mips64le to buildmeta

Full classification produces 169,867 distributable rows across 116 packages.
This commit is contained in:
AJ ONeal
2026-03-10 00:27:57 -06:00
parent e78a721b51
commit 28dab7dade
6 changed files with 77 additions and 9 deletions

View File

@@ -79,10 +79,52 @@ Early Flutter releases (pre-2020) had no arch-specific builds — single
platform SDK. No arch in filename → empty arch in CSV. This is correct;
the installer would default to x86_64 on supported platforms.
### TODO for Next Batches
## Batch 3 (25 packages: arc through gitdeploy)
### New Classifier Patterns
- `macosx` → darwin (syncthing uses `macosx`)
- `ia32` → x86 (dart-sass uses `ia32`)
- `.snap` format → Linux-only
- `.appx` format added for PowerShell
### New Meta-Asset Filters
- `.pub` (cosign keys)
- `install.sh`, `install.ps1` (install scripts)
- `compat.json` (syncthing metadata)
## Batch 4 (62 remaining packages) + Full Run
### Hugo/Hugo-Extended Split
hugo-extended shares the same GitHub repo as hugo. Added `asset_filter` and
`asset_exclude` conf keys to split them:
- `hugo/releases.conf`: `asset_exclude = extended` (6,354 assets)
- `hugo-extended/releases.conf`: `asset_filter = extended` (2,193 assets)
User direction: "hugo-extended should be a separate release. I believe the
README covered this. I think it should have been the default."
### Remaining Empty-Field Patterns (Per-Installer Territory)
These have empty OS or arch from the generic classifier and need per-installer
config to resolve:
- Git-for-Windows: `Git-2.x.x-32-bit.tar.bz2` — no OS in filename, always Windows
- CMake: HP-UX, IRIX targets — exotic/dead platforms
- Dashcore: old naming conventions
- Old PowerShell `.msi` files — no arch in filename
- Bare binaries (ollama-darwin, caddy2_beta12_macos) — no arch info
### Full Results
169,867 distributable rows across 116 packages.
3 packages produce 0 rows: serviceman, aliasman (source-only), duckdns.sh.
### TODO
- Hugo "extended" variant should be captured in `extra` column
- Consider whether bare binaries (no format extension) should get a format marker
- Track `_extended` suffix detection more broadly
- Per-installer configs for packages with known-but-undetectable OS/arch
- `arm32` classification: leave to per-installer unless pattern emerges
- `arm32` is vague — may mean armv6 or armv7. Leave as per-installer responsibility
unless a distinct pattern emerges (user direction 2026-03-10)

View File

@@ -237,6 +237,8 @@ type ghAsset struct {
func classifyGitHub(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]Dist, error) {
tagPrefix := conf.Get("tag_prefix")
assetFilter := strings.ToLower(conf.Get("asset_filter")) // asset must contain this
assetExclude := strings.ToLower(conf.Get("asset_exclude")) // asset must NOT contain this
releases, err := readAllReleases(d)
if err != nil {
return nil, err
@@ -269,12 +271,21 @@ func classifyGitHub(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]Di
for _, asset := range rel.Assets {
name := asset.Name
lower := strings.ToLower(name)
// Skip checksums, signatures, SBOMs, etc.
if isMetaAsset(name) {
continue
}
// Per-package asset filters.
if assetFilter != "" && !strings.Contains(lower, assetFilter) {
continue
}
if assetExclude != "" && strings.Contains(lower, assetExclude) {
continue
}
os_, arch, libc, format := classifyFilename(name)
dists = append(dists, Dist{
@@ -1206,8 +1217,9 @@ func isMetaAsset(name string) bool {
"checksums.txt", "sha256sums", "sha512sums",
".sbom", ".spdx", ".json.sig", ".sigstore",
"_src.tar.gz", "_src.tar.xz", "_src.zip",
".d.ts", // TypeScript definitions
".tgz", // npm packages (not binary distributables)
".d.ts", // TypeScript definitions
".tgz", // npm packages (not binary distributables)
".pub", // cosign/SSH public keys
} {
if strings.HasSuffix(lower, suffix) {
return true
@@ -1221,6 +1233,14 @@ func isMetaAsset(name string) bool {
return true
}
}
// Exact name matches for known non-distributable files.
for _, exact := range []string{
"install.sh", "install.ps1", "compat.json",
} {
if lower == exact {
return true
}
}
return false
}

View File

@@ -1,3 +1,4 @@
source = github
owner = gohugoio
repo = hugo
asset_filter = extended

View File

@@ -1,3 +1,4 @@
source = github
owner = gohugoio
repo = hugo
asset_exclude = extended

View File

@@ -88,6 +88,8 @@ const (
Format7z Format = ".7z"
FormatDeb Format = ".deb"
FormatRPM Format = ".rpm"
FormatSnap Format = ".snap"
FormatAppx Format = ".appx"
FormatSh Format = ".sh"
FormatGit Format = ".git"
)

View File

@@ -50,8 +50,8 @@ func Filename(name string) Result {
format := detectFormat(lower)
// .deb and .rpm are Linux-only package formats.
if os == "" && (format == buildmeta.FormatDeb || format == buildmeta.FormatRPM) {
// .deb, .rpm, .snap are Linux-only package formats.
if os == "" && (format == buildmeta.FormatDeb || format == buildmeta.FormatRPM || format == buildmeta.FormatSnap) {
os = buildmeta.OSLinux
}
// .app.zip and .dmg are macOS-only formats.
@@ -78,7 +78,7 @@ var osPatterns = []struct {
os buildmeta.OS
pattern *regexp.Regexp
}{
{buildmeta.OSDarwin, regexp.MustCompile(`(?i)(?:` + b + `(?:darwin|macos|osx|os-x|apple)` + bEnd + `|` + b + `mac` + bEnd + `)`)},
{buildmeta.OSDarwin, regexp.MustCompile(`(?i)(?:` + b + `(?:darwin|macos|macosx|osx|os-x|apple)` + bEnd + `|` + b + `mac` + bEnd + `)`)},
{buildmeta.OSLinux, regexp.MustCompile(`(?i)` + b + `linux` + bEnd)},
{buildmeta.OSWindows, regexp.MustCompile(`(?i)` + b + `(?:windows|win(?:32|64|dows)?)` + bEnd + `|\.exe(?:\.xz)?$|\.msi$`)},
{buildmeta.OSFreeBSD, regexp.MustCompile(`(?i)` + b + `freebsd` + bEnd)},
@@ -131,7 +131,7 @@ var archPatterns = []struct {
{buildmeta.ArchMIPSLE, regexp.MustCompile(`(?i)mips(?:el|le)`)},
{buildmeta.ArchMIPS, regexp.MustCompile(`(?i)` + b + `mips` + bEnd)},
// x86 last — must not steal x86_64.
{buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|` + b + `386` + bEnd + `|32-?bit)`)},
{buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|ia32|` + b + `386` + bEnd + `|32-?bit)`)},
}
func detectArch(lower string) buildmeta.Arch {
@@ -189,6 +189,8 @@ var formatSuffixes = []struct {
{".dmg", buildmeta.FormatDMG},
{".deb", buildmeta.FormatDeb},
{".rpm", buildmeta.FormatRPM},
{".snap", buildmeta.FormatSnap},
{".appx", buildmeta.FormatAppx},
{".pkg", buildmeta.FormatPkg},
}