mirror of
https://github.com/webinstall/webi-installers.git
synced 2026-04-06 10:26:49 +00:00
feat: complete classification of all 116 packages (169,867 rows)
- Add asset_filter/asset_exclude conf keys for shared-repo packages - Split hugo/hugo-extended: exclude/require "extended" in asset name - Add macosx, ia32, .snap, .appx classifier patterns - Fix zig Platform.Size JSON string type (was int64, upstream sends string) - Filter install scripts, cosign keys, compat.json as meta-assets - Add riscv64, loong64, armv5, mipsle, mips64le to buildmeta Full classification produces 169,867 distributable rows across 116 packages.
This commit is contained in:
48
CAT-RULES.md
48
CAT-RULES.md
@@ -79,10 +79,52 @@ Early Flutter releases (pre-2020) had no arch-specific builds — single
|
||||
platform SDK. No arch in filename → empty arch in CSV. This is correct;
|
||||
the installer would default to x86_64 on supported platforms.
|
||||
|
||||
### TODO for Next Batches
|
||||
## Batch 3 (25 packages: arc through gitdeploy)
|
||||
|
||||
### New Classifier Patterns
|
||||
|
||||
- `macosx` → darwin (syncthing uses `macosx`)
|
||||
- `ia32` → x86 (dart-sass uses `ia32`)
|
||||
- `.snap` format → Linux-only
|
||||
- `.appx` format added for PowerShell
|
||||
|
||||
### New Meta-Asset Filters
|
||||
|
||||
- `.pub` (cosign keys)
|
||||
- `install.sh`, `install.ps1` (install scripts)
|
||||
- `compat.json` (syncthing metadata)
|
||||
|
||||
## Batch 4 (62 remaining packages) + Full Run
|
||||
|
||||
### Hugo/Hugo-Extended Split
|
||||
|
||||
hugo-extended shares the same GitHub repo as hugo. Added `asset_filter` and
|
||||
`asset_exclude` conf keys to split them:
|
||||
- `hugo/releases.conf`: `asset_exclude = extended` (6,354 assets)
|
||||
- `hugo-extended/releases.conf`: `asset_filter = extended` (2,193 assets)
|
||||
|
||||
User direction: "hugo-extended should be a separate release. I believe the
|
||||
README covered this. I think it should have been the default."
|
||||
|
||||
### Remaining Empty-Field Patterns (Per-Installer Territory)
|
||||
|
||||
These have empty OS or arch from the generic classifier and need per-installer
|
||||
config to resolve:
|
||||
- Git-for-Windows: `Git-2.x.x-32-bit.tar.bz2` — no OS in filename, always Windows
|
||||
- CMake: HP-UX, IRIX targets — exotic/dead platforms
|
||||
- Dashcore: old naming conventions
|
||||
- Old PowerShell `.msi` files — no arch in filename
|
||||
- Bare binaries (ollama-darwin, caddy2_beta12_macos) — no arch info
|
||||
|
||||
### Full Results
|
||||
|
||||
169,867 distributable rows across 116 packages.
|
||||
3 packages produce 0 rows: serviceman, aliasman (source-only), duckdns.sh.
|
||||
|
||||
### TODO
|
||||
|
||||
- Hugo "extended" variant should be captured in `extra` column
|
||||
- Consider whether bare binaries (no format extension) should get a format marker
|
||||
- Track `_extended` suffix detection more broadly
|
||||
- Per-installer configs for packages with known-but-undetectable OS/arch
|
||||
- `arm32` classification: leave to per-installer unless pattern emerges
|
||||
- `arm32` is vague — may mean armv6 or armv7. Leave as per-installer responsibility
|
||||
unless a distinct pattern emerges (user direction 2026-03-10)
|
||||
|
||||
@@ -237,6 +237,8 @@ type ghAsset struct {
|
||||
|
||||
func classifyGitHub(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]Dist, error) {
|
||||
tagPrefix := conf.Get("tag_prefix")
|
||||
assetFilter := strings.ToLower(conf.Get("asset_filter")) // asset must contain this
|
||||
assetExclude := strings.ToLower(conf.Get("asset_exclude")) // asset must NOT contain this
|
||||
releases, err := readAllReleases(d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -269,12 +271,21 @@ func classifyGitHub(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]Di
|
||||
|
||||
for _, asset := range rel.Assets {
|
||||
name := asset.Name
|
||||
lower := strings.ToLower(name)
|
||||
|
||||
// Skip checksums, signatures, SBOMs, etc.
|
||||
if isMetaAsset(name) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Per-package asset filters.
|
||||
if assetFilter != "" && !strings.Contains(lower, assetFilter) {
|
||||
continue
|
||||
}
|
||||
if assetExclude != "" && strings.Contains(lower, assetExclude) {
|
||||
continue
|
||||
}
|
||||
|
||||
os_, arch, libc, format := classifyFilename(name)
|
||||
|
||||
dists = append(dists, Dist{
|
||||
@@ -1206,8 +1217,9 @@ func isMetaAsset(name string) bool {
|
||||
"checksums.txt", "sha256sums", "sha512sums",
|
||||
".sbom", ".spdx", ".json.sig", ".sigstore",
|
||||
"_src.tar.gz", "_src.tar.xz", "_src.zip",
|
||||
".d.ts", // TypeScript definitions
|
||||
".tgz", // npm packages (not binary distributables)
|
||||
".d.ts", // TypeScript definitions
|
||||
".tgz", // npm packages (not binary distributables)
|
||||
".pub", // cosign/SSH public keys
|
||||
} {
|
||||
if strings.HasSuffix(lower, suffix) {
|
||||
return true
|
||||
@@ -1221,6 +1233,14 @@ func isMetaAsset(name string) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Exact name matches for known non-distributable files.
|
||||
for _, exact := range []string{
|
||||
"install.sh", "install.ps1", "compat.json",
|
||||
} {
|
||||
if lower == exact {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
source = github
|
||||
owner = gohugoio
|
||||
repo = hugo
|
||||
asset_filter = extended
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
source = github
|
||||
owner = gohugoio
|
||||
repo = hugo
|
||||
asset_exclude = extended
|
||||
|
||||
@@ -88,6 +88,8 @@ const (
|
||||
Format7z Format = ".7z"
|
||||
FormatDeb Format = ".deb"
|
||||
FormatRPM Format = ".rpm"
|
||||
FormatSnap Format = ".snap"
|
||||
FormatAppx Format = ".appx"
|
||||
FormatSh Format = ".sh"
|
||||
FormatGit Format = ".git"
|
||||
)
|
||||
|
||||
@@ -50,8 +50,8 @@ func Filename(name string) Result {
|
||||
|
||||
format := detectFormat(lower)
|
||||
|
||||
// .deb and .rpm are Linux-only package formats.
|
||||
if os == "" && (format == buildmeta.FormatDeb || format == buildmeta.FormatRPM) {
|
||||
// .deb, .rpm, .snap are Linux-only package formats.
|
||||
if os == "" && (format == buildmeta.FormatDeb || format == buildmeta.FormatRPM || format == buildmeta.FormatSnap) {
|
||||
os = buildmeta.OSLinux
|
||||
}
|
||||
// .app.zip and .dmg are macOS-only formats.
|
||||
@@ -78,7 +78,7 @@ var osPatterns = []struct {
|
||||
os buildmeta.OS
|
||||
pattern *regexp.Regexp
|
||||
}{
|
||||
{buildmeta.OSDarwin, regexp.MustCompile(`(?i)(?:` + b + `(?:darwin|macos|osx|os-x|apple)` + bEnd + `|` + b + `mac` + bEnd + `)`)},
|
||||
{buildmeta.OSDarwin, regexp.MustCompile(`(?i)(?:` + b + `(?:darwin|macos|macosx|osx|os-x|apple)` + bEnd + `|` + b + `mac` + bEnd + `)`)},
|
||||
{buildmeta.OSLinux, regexp.MustCompile(`(?i)` + b + `linux` + bEnd)},
|
||||
{buildmeta.OSWindows, regexp.MustCompile(`(?i)` + b + `(?:windows|win(?:32|64|dows)?)` + bEnd + `|\.exe(?:\.xz)?$|\.msi$`)},
|
||||
{buildmeta.OSFreeBSD, regexp.MustCompile(`(?i)` + b + `freebsd` + bEnd)},
|
||||
@@ -131,7 +131,7 @@ var archPatterns = []struct {
|
||||
{buildmeta.ArchMIPSLE, regexp.MustCompile(`(?i)mips(?:el|le)`)},
|
||||
{buildmeta.ArchMIPS, regexp.MustCompile(`(?i)` + b + `mips` + bEnd)},
|
||||
// x86 last — must not steal x86_64.
|
||||
{buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|` + b + `386` + bEnd + `|32-?bit)`)},
|
||||
{buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|ia32|` + b + `386` + bEnd + `|32-?bit)`)},
|
||||
}
|
||||
|
||||
func detectArch(lower string) buildmeta.Arch {
|
||||
@@ -189,6 +189,8 @@ var formatSuffixes = []struct {
|
||||
{".dmg", buildmeta.FormatDMG},
|
||||
{".deb", buildmeta.FormatDeb},
|
||||
{".rpm", buildmeta.FormatRPM},
|
||||
{".snap", buildmeta.FormatSnap},
|
||||
{".appx", buildmeta.FormatAppx},
|
||||
{".pkg", buildmeta.FormatPkg},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user