From 631147901af07ed6462057477a0660a38d8aba64 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Thu, 7 May 2026 15:39:40 -0600 Subject: [PATCH] feat: add Go release cache daemon (webicached) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrites the Node.js release classification pipeline in Go. webicached fetches upstream releases (GitHub, Gitea, GitLab, HashiCorp, custom sources), classifies assets by OS/arch/variant, and writes legacy-format JSON caches compatible with the existing webinstall.dev API. Git-clone packages emit git_tag and git_commit_hash from real repo clones — no fabricated refs. --- cmd/webicached/main.go | 880 +++++++++++ ffmpeg/releases.conf | 1 - go.mod | 15 + go.sum | 25 + internal/buildmeta/buildmeta.go | 168 ++ internal/classify/classify.go | 283 ++++ internal/classify/classify_test.go | 352 +++++ internal/classifypkg/classifypkg.go | 1357 +++++++++++++++++ internal/installerconf/installerconf.go | 277 ++++ internal/installerconf/installerconf_test.go | 217 +++ internal/lexver/lexver.go | 189 +++ internal/lexver/lexver_test.go | 270 ++++ internal/rawcache/auditlog.go | 63 + internal/rawcache/rawcache.go | 265 ++++ internal/rawcache/rawcache_test.go | 173 +++ internal/releases/atomicparsley/variants.go | 50 + internal/releases/bun/variants.go | 39 + internal/releases/chromedist/chromedist.go | 72 + internal/releases/cmake/variants.go | 60 + internal/releases/fish/variants.go | 28 + internal/releases/flutterdist/flutterdist.go | 94 ++ internal/releases/flutterdist/variants.go | 16 + internal/releases/git/variants.go | 52 + internal/releases/git/versions.go | 33 + internal/releases/gitea/gitea.go | 120 ++ internal/releases/gitea/gitea_test.go | 107 ++ internal/releases/gitea/variants.go | 25 + internal/releases/giteasrc/giteasrc.go | 25 + internal/releases/github/github.go | 22 + internal/releases/githubish/githubish.go | 112 ++ internal/releases/githubish/githubish_test.go | 201 +++ internal/releases/githubsrc/githubsrc.go | 27 + internal/releases/gitlab/gitlab.go | 122 ++ internal/releases/gitlab/gitlab_test.go | 182 +++ internal/releases/gitlabsrc/gitlabsrc.go | 25 + internal/releases/gittag/gittag.go | 178 +++ internal/releases/gittag/gittag_test.go | 56 + internal/releases/golang/golang.go | 72 + internal/releases/gpgdist/gpgdist.go | 70 + internal/releases/hashicorp/hashicorp.go | 79 + internal/releases/iterm2dist/iterm2dist.go | 105 ++ internal/releases/juliadist/juliadist.go | 89 ++ internal/releases/lsd/variants.go | 23 + internal/releases/mariadbdist/mariadbdist.go | 159 ++ internal/releases/node/node.go | 39 + internal/releases/node/node_test.go | 36 + internal/releases/node/variants.go | 20 + internal/releases/nodedist/nodedist.go | 108 ++ internal/releases/nodedist/nodedist_test.go | 143 ++ internal/releases/ollama/variants.go | 32 + internal/releases/postgres/versions.go | 80 + internal/releases/pwsh/variants.go | 37 + internal/releases/sass/variants.go | 19 + .../releases/servicemandist/servicemandist.go | 75 + internal/releases/servicemandist/variants.go | 16 + internal/releases/sttr/variants.go | 36 + internal/releases/uuidv7/variants.go | 18 + internal/releases/watchexec/variants.go | 18 + internal/releases/watchexec/versions.go | 18 + internal/releases/xcaddy/variants.go | 15 + internal/releases/xz/variants.go | 16 + internal/releases/zigdist/zigdist.go | 131 ++ internal/storage/fsstore/fsstore.go | 207 +++ internal/storage/fsstore/fsstore_test.go | 138 ++ internal/storage/legacy.go | 444 ++++++ internal/storage/legacy_test.go | 609 ++++++++ internal/storage/storage.go | 71 + 67 files changed, 9103 insertions(+), 1 deletion(-) create mode 100644 cmd/webicached/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/buildmeta/buildmeta.go create mode 100644 internal/classify/classify.go create mode 100644 internal/classify/classify_test.go create mode 100644 internal/classifypkg/classifypkg.go create mode 100644 internal/installerconf/installerconf.go create mode 100644 internal/installerconf/installerconf_test.go create mode 100644 internal/lexver/lexver.go create mode 100644 internal/lexver/lexver_test.go create mode 100644 internal/rawcache/auditlog.go create mode 100644 internal/rawcache/rawcache.go create mode 100644 internal/rawcache/rawcache_test.go create mode 100644 internal/releases/atomicparsley/variants.go create mode 100644 internal/releases/bun/variants.go create mode 100644 internal/releases/chromedist/chromedist.go create mode 100644 internal/releases/cmake/variants.go create mode 100644 internal/releases/fish/variants.go create mode 100644 internal/releases/flutterdist/flutterdist.go create mode 100644 internal/releases/flutterdist/variants.go create mode 100644 internal/releases/git/variants.go create mode 100644 internal/releases/git/versions.go create mode 100644 internal/releases/gitea/gitea.go create mode 100644 internal/releases/gitea/gitea_test.go create mode 100644 internal/releases/gitea/variants.go create mode 100644 internal/releases/giteasrc/giteasrc.go create mode 100644 internal/releases/github/github.go create mode 100644 internal/releases/githubish/githubish.go create mode 100644 internal/releases/githubish/githubish_test.go create mode 100644 internal/releases/githubsrc/githubsrc.go create mode 100644 internal/releases/gitlab/gitlab.go create mode 100644 internal/releases/gitlab/gitlab_test.go create mode 100644 internal/releases/gitlabsrc/gitlabsrc.go create mode 100644 internal/releases/gittag/gittag.go create mode 100644 internal/releases/gittag/gittag_test.go create mode 100644 internal/releases/golang/golang.go create mode 100644 internal/releases/gpgdist/gpgdist.go create mode 100644 internal/releases/hashicorp/hashicorp.go create mode 100644 internal/releases/iterm2dist/iterm2dist.go create mode 100644 internal/releases/juliadist/juliadist.go create mode 100644 internal/releases/lsd/variants.go create mode 100644 internal/releases/mariadbdist/mariadbdist.go create mode 100644 internal/releases/node/node.go create mode 100644 internal/releases/node/node_test.go create mode 100644 internal/releases/node/variants.go create mode 100644 internal/releases/nodedist/nodedist.go create mode 100644 internal/releases/nodedist/nodedist_test.go create mode 100644 internal/releases/ollama/variants.go create mode 100644 internal/releases/postgres/versions.go create mode 100644 internal/releases/pwsh/variants.go create mode 100644 internal/releases/sass/variants.go create mode 100644 internal/releases/servicemandist/servicemandist.go create mode 100644 internal/releases/servicemandist/variants.go create mode 100644 internal/releases/sttr/variants.go create mode 100644 internal/releases/uuidv7/variants.go create mode 100644 internal/releases/watchexec/variants.go create mode 100644 internal/releases/watchexec/versions.go create mode 100644 internal/releases/xcaddy/variants.go create mode 100644 internal/releases/xz/variants.go create mode 100644 internal/releases/zigdist/zigdist.go create mode 100644 internal/storage/fsstore/fsstore.go create mode 100644 internal/storage/fsstore/fsstore_test.go create mode 100644 internal/storage/legacy.go create mode 100644 internal/storage/legacy_test.go create mode 100644 internal/storage/storage.go diff --git a/cmd/webicached/main.go b/cmd/webicached/main.go new file mode 100644 index 0000000..4b9fc58 --- /dev/null +++ b/cmd/webicached/main.go @@ -0,0 +1,880 @@ +// Command webicached is the release cache daemon. It fetches releases +// from upstream sources, classifies build assets, and writes them to +// the _cache/ directory in the format the Node.js server expects. +// +// This is the Go replacement for the Node.js release-fetching pipeline. +// It reads releases.conf files to discover packages, fetches from the +// configured source, classifies assets, and writes to fsstore. +// +// Default mode: classify all from existing rawcache on startup, then +// fetch+refresh one package per tick (round-robin, 15m default). +// +// Usage: +// +// go run ./cmd/webicached # default: round-robin, one per tick +// go run ./cmd/webicached -eager # fetch all packages on startup +// go run ./cmd/webicached -once -no-fetch # classify from rawcache and exit +// go run ./cmd/webicached bat goreleaser # only these packages +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "log" + "math/rand/v2" + "net/http" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/joho/godotenv" + "github.com/webinstall/webi-installers/internal/classifypkg" + "github.com/webinstall/webi-installers/internal/installerconf" + "github.com/webinstall/webi-installers/internal/rawcache" + "github.com/webinstall/webi-installers/internal/releases/chromedist" + "github.com/webinstall/webi-installers/internal/releases/flutterdist" + "github.com/webinstall/webi-installers/internal/releases/gitea" + "github.com/webinstall/webi-installers/internal/releases/github" + "github.com/webinstall/webi-installers/internal/releases/githubish" + "github.com/webinstall/webi-installers/internal/releases/gittag" + "github.com/webinstall/webi-installers/internal/releases/golang" + "github.com/webinstall/webi-installers/internal/releases/gpgdist" + "github.com/webinstall/webi-installers/internal/releases/hashicorp" + "github.com/webinstall/webi-installers/internal/releases/iterm2dist" + "github.com/webinstall/webi-installers/internal/releases/juliadist" + "github.com/webinstall/webi-installers/internal/releases/mariadbdist" + "github.com/webinstall/webi-installers/internal/releases/nodedist" + "github.com/webinstall/webi-installers/internal/releases/servicemandist" + "github.com/webinstall/webi-installers/internal/releases/zigdist" + "github.com/webinstall/webi-installers/internal/storage" + "github.com/webinstall/webi-installers/internal/storage/fsstore" +) + +var ( + name = "webicached" + version = "0.0.0-dev" + commit = "0000000" + date = "0001-01-01" + licenseYear = "2024" + licenseOwner = "AJ ONeal" + licenseType = "MPL-2.0" +) + +func printVersion(w io.Writer) { + b_ver := strings.TrimPrefix(version, "v") + _, _ = fmt.Fprintf(w, "%s v%s %s (%s)\n", name, b_ver, commit[:7], date) + _, _ = fmt.Fprintf(w, "Copyright (C) %s %s\n", licenseYear, licenseOwner) + _, _ = fmt.Fprintf(w, "Licensed under %s\n", licenseType) +} + +type MainConfig struct { + envFile string + confDir string + cacheDir string + rawDir string + token string + once bool + noFetch bool + shallow bool + eager bool + interval time.Duration + pageDelay time.Duration +} + +// WebiCache holds the configuration for the cache daemon. +type WebiCache struct { + ConfDir string // root directory with {pkg}/releases.conf files + Store storage.Store // classified asset storage (fsstore) + RawDir string // raw upstream response cache + Client *http.Client // HTTP client for upstream calls + Auth *githubish.Auth // GitHub API auth (optional) + Shallow bool // fetch only the first page of releases + NoFetch bool // skip fetching, classify from existing raw data only + PageDelay time.Duration // delay between paginated API requests +} + +// delayTransport wraps an http.RoundTripper to add a delay between requests. +type delayTransport struct { + base http.RoundTripper + delay time.Duration + last time.Time +} + +func (t *delayTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if !t.last.IsZero() && t.delay > 0 { + if wait := t.delay - time.Since(t.last); wait > 0 { + time.Sleep(wait) + } + } + t.last = time.Now() + return t.base.RoundTrip(req) +} + +func main() { + if len(os.Args) > 1 { + switch os.Args[1] { + case "-V", "-version", "--version", "version": + printVersion(os.Stdout) + os.Exit(0) + case "help", "-help", "--help": + printVersion(os.Stdout) + fmt.Fprintln(os.Stdout, "") + fs := flag.NewFlagSet(os.Args[0], flag.ContinueOnError) + fs.SetOutput(os.Stdout) + registerFlags(fs, &MainConfig{}) + fs.Usage() + os.Exit(0) + } + } + + cfg := MainConfig{} + fs := flag.NewFlagSet(os.Args[0], flag.ContinueOnError) + registerFlags(fs, &cfg) + if err := fs.Parse(os.Args[1:]); err != nil { + if errors.Is(err, flag.ErrHelp) { + os.Exit(0) + } + os.Exit(1) + } + + cfg.cacheDir = expandHome(cfg.cacheDir) + cfg.rawDir = expandHome(cfg.rawDir) + + if cfg.envFile != "" { + if err := godotenv.Load(cfg.envFile); err != nil { + log.Fatalf("envfile: %v", err) + } + } + if cfg.token == "" { + cfg.token = os.Getenv("GITHUB_TOKEN") + } + + fss, err := fsstore.New(cfg.cacheDir) + if err != nil { + log.Fatalf("fsstore: %v", err) + } + var store storage.Store = fss + + var auth *githubish.Auth + if cfg.token != "" { + auth = &githubish.Auth{Token: cfg.token} + } + + client := &http.Client{Timeout: 30 * time.Second} + if cfg.pageDelay > 0 { + client.Transport = &delayTransport{ + base: http.DefaultTransport, + delay: cfg.pageDelay, + } + } + + wc := &WebiCache{ + ConfDir: cfg.confDir, + Store: store, + RawDir: cfg.rawDir, + Client: client, + Auth: auth, + Shallow: cfg.shallow, + NoFetch: cfg.noFetch, + PageDelay: cfg.pageDelay, + } + + filterPkgs := fs.Args() + + if cfg.eager { + wc.Run(filterPkgs) + if cfg.once { + return + } + } else if cfg.once { + wc.Run(filterPkgs) + return + } else { + saved := wc.NoFetch + wc.NoFetch = true + wc.Run(filterPkgs) + wc.NoFetch = saved + } + + packages, err := discover(wc.ConfDir) + if err != nil { + log.Fatalf("discover: %v", err) + } + if len(filterPkgs) > 0 { + nameSet := make(map[string]bool, len(filterPkgs)) + for _, a := range filterPkgs { + nameSet[a] = true + } + var filtered []pkgConf + for _, p := range packages { + if nameSet[p.name] { + filtered = append(filtered, p) + } + } + packages = filtered + } + + var real []pkgConf + for _, pkg := range packages { + if pkg.conf.AliasOf == "" { + real = append(real, pkg) + } + } + + log.Printf("refreshing %d packages, interval %s, batch size 20 (ctrl-c to stop)", len(real), cfg.interval) + for { + stale := wc.stalest(real) + if len(stale) == 0 { + log.Printf("all packages fresh, sleeping %s", cfg.interval) + time.Sleep(cfg.interval) + continue + } + + batch := stale + if len(batch) > 20 { + batch = batch[:20] + } + rand.Shuffle(len(batch), func(i, j int) { + batch[i], batch[j] = batch[j], batch[i] + }) + + log.Printf("batch: %d stale, refreshing %d (most stale first)", len(stale), len(batch)) + for _, pkg := range batch { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + if err := wc.refreshPackage(ctx, pkg); err != nil { + log.Printf(" ERROR %s: %v", pkg.name, err) + } + cancel() + time.Sleep(cfg.interval) + } + } +} + +func registerFlags(fs *flag.FlagSet, cfg *MainConfig) { + fs.StringVar(&cfg.envFile, "envfile", "", "path to .env file to load before running") + fs.StringVar(&cfg.confDir, "conf", ".", "root directory containing {pkg}/releases.conf files") + fs.StringVar(&cfg.cacheDir, "legacy", "~/.cache/webi/legacy", "legacy cache directory (fsstore root)") + fs.StringVar(&cfg.rawDir, "raw", "~/.cache/webi/raw", "raw cache directory for upstream responses") + fs.StringVar(&cfg.token, "token", "", "GitHub API token (or set $GITHUB_TOKEN)") + fs.BoolVar(&cfg.once, "once", false, "run once then exit (no periodic refresh)") + fs.BoolVar(&cfg.noFetch, "no-fetch", false, "skip fetching, classify from existing raw data only") + fs.BoolVar(&cfg.shallow, "shallow", false, "fetch only the first page of releases (latest)") + fs.BoolVar(&cfg.eager, "eager", false, "fetch all packages on startup (default: one per tick)") + fs.DurationVar(&cfg.interval, "interval", 9*time.Second, "delay between individual package fetches") + fs.DurationVar(&cfg.pageDelay, "page-delay", 2*time.Second, "delay between paginated API requests") +} + +func expandHome(path string) string { + if !strings.HasPrefix(path, "~/") { + return path + } + home, err := os.UserHomeDir() + if err != nil { + return path + } + return filepath.Join(home, path[2:]) +} + +// stalest returns packages sorted by most stale first (oldest UpdatedAt). +// Packages with no cache entry or empty assets are considered most stale. +func (wc *WebiCache) stalest(packages []pkgConf) []pkgConf { + type stamped struct { + pkg pkgConf + updatedAt time.Time + } + + var stale []stamped + ctx := context.Background() + for _, pkg := range packages { + data, err := wc.Store.Load(ctx, pkg.name) + var t time.Time + hasAssets := false + if err == nil && data != nil { + t = data.UpdatedAt + hasAssets = len(data.Assets) > 0 + } + // Never fetched, or has no assets despite having a timestamp + // (e.g. classified from empty rawcache), or older than 10 minutes. + if t.IsZero() || !hasAssets || time.Since(t) > 10*time.Minute { + stale = append(stale, stamped{pkg: pkg, updatedAt: t}) + } + } + + sort.SliceStable(stale, func(i, j int) bool { + ti, tj := stale[i].updatedAt, stale[j].updatedAt + if ti.Equal(tj) { + return stale[i].pkg.name < stale[j].pkg.name + } + return ti.Before(tj) + }) + + result := make([]pkgConf, len(stale)) + for i, s := range stale { + result[i] = s.pkg + } + return result +} + +// Run discovers packages and refreshes each one. +func (wc *WebiCache) Run(filterPkgs []string) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + packages, err := discover(wc.ConfDir) + if err != nil { + log.Printf("discover: %v", err) + return + } + + if len(filterPkgs) > 0 { + nameSet := make(map[string]bool, len(filterPkgs)) + for _, a := range filterPkgs { + nameSet[a] = true + } + var filtered []pkgConf + for _, p := range packages { + if nameSet[p.name] { + filtered = append(filtered, p) + } + } + packages = filtered + } + + var real []pkgConf + for _, pkg := range packages { + if pkg.conf.AliasOf != "" { + continue + } + real = append(real, pkg) + } + + log.Printf("refreshing %d packages", len(real)) + runStart := time.Now() + + for _, pkg := range real { + if err := wc.refreshPackage(ctx, pkg); err != nil { + log.Printf(" ERROR %s: %v", pkg.name, err) + } + } + + log.Printf("refreshed %d packages in %s", len(real), time.Since(runStart)) +} + +type pkgConf struct { + name string + conf *installerconf.Conf +} + +func discover(dir string) ([]pkgConf, error) { + pattern := filepath.Join(dir, "*", "releases.conf") + matches, err := filepath.Glob(pattern) + if err != nil { + return nil, err + } + + var packages []pkgConf + for _, path := range matches { + pkgDir := filepath.Dir(path) + name := filepath.Base(pkgDir) + if strings.HasPrefix(name, "_") { + continue + } + + // If the package directory is a symlink, treat it as an alias + // of the symlink target (e.g. rust.vim → vim-rust). + fi, err := os.Lstat(filepath.Join(dir, name)) + if err != nil { + log.Printf("warning: %s: %v", name, err) + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + target, err := os.Readlink(filepath.Join(dir, name)) + if err != nil { + log.Printf("warning: readlink %s: %v", name, err) + continue + } + packages = append(packages, pkgConf{ + name: name, + conf: &installerconf.Conf{AliasOf: target}, + }) + continue + } + + conf, err := installerconf.Read(path) + if err != nil { + log.Printf("warning: %s: %v", path, err) + continue + } + packages = append(packages, pkgConf{name: name, conf: conf}) + } + + sort.Slice(packages, func(i, j int) bool { + return packages[i].name < packages[j].name + }) + return packages, nil +} + +// refreshPackage does the full pipeline for one package: +// fetch raw → classify → write to fsstore. +func (wc *WebiCache) refreshPackage(ctx context.Context, pkg pkgConf) error { + pkgStart := time.Now() + name := pkg.name + conf := pkg.conf + + // Step 1: Fetch raw upstream data to rawcache (unless -no-fetch). + if !wc.NoFetch { + shallow := wc.Shallow + if !shallow { + d, err := rawcache.Open(filepath.Join(wc.RawDir, name)) + if err == nil && d.Populated() { + shallow = true + } + } + fetchStart := time.Now() + if err := wc.fetchRaw(ctx, pkg, shallow); err != nil { + return fmt.Errorf("fetch: %w", err) + } + log.Printf(" %s: fetch %s", name, time.Since(fetchStart)) + } + + // Step 2: Classify raw data into assets, tag variants, apply config. + classifyStart := time.Now() + d, err := rawcache.Open(filepath.Join(wc.RawDir, name)) + if err != nil { + return fmt.Errorf("rawcache open: %w", err) + } + + // Open supplementary gittag raw cache if available (for packages with + // git_url that use a non-gittag source type like servicemandist). + var gitTagDir *rawcache.Dir + if conf.GitURL != "" && conf.Source != "gittag" { + gd, gdErr := rawcache.Open(filepath.Join(wc.RawDir, "_gittag", name)) + if gdErr == nil && gd.Populated() { + gitTagDir = gd + } + } + + assets, err := classifypkg.Package(name, conf, d, gitTagDir) + if err != nil { + return fmt.Errorf("classify: %w", err) + } + classifyDur := time.Since(classifyStart) + + // Step 3: Write to fsstore. + writeStart := time.Now() + tx, err := wc.Store.BeginRefresh(ctx, name) + if err != nil { + return fmt.Errorf("begin refresh: %w", err) + } + if err := tx.Put(assets); err != nil { + tx.Rollback() + return fmt.Errorf("put: %w", err) + } + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit: %w", err) + } + writeDur := time.Since(writeStart) + + log.Printf(" %s: %d assets (classify %s, write %s, total %s)", + name, len(assets), classifyDur, writeDur, time.Since(pkgStart)) + return nil +} + +// --- Fetch raw --- + +func (wc *WebiCache) fetchRaw(ctx context.Context, pkg pkgConf, shallow bool) error { + switch pkg.conf.Source { + case "github", "githubsource": + if err := wc.fetchGitHub(ctx, pkg.name, pkg.conf, shallow); err != nil { + return err + } + case "nodedist": + return wc.fetchNodeDist(ctx, pkg.name, pkg.conf) + case "gittag": + return wc.fetchGitTag(ctx, pkg.name, pkg.conf, shallow) + case "gitea": + return wc.fetchGitea(ctx, pkg.name, pkg.conf, shallow) + case "chromedist": + return fetchChromeDist(ctx, wc.Client, wc.RawDir, pkg.name) + case "flutterdist": + return fetchFlutterDist(ctx, wc.Client, wc.RawDir, pkg.name) + case "golang": + return fetchGolang(ctx, wc.Client, wc.RawDir, pkg.name) + case "gpgdist": + return fetchGPGDist(ctx, wc.Client, wc.RawDir, pkg.name) + case "hashicorp": + return fetchHashiCorp(ctx, wc.Client, wc.RawDir, pkg.name, pkg.conf) + case "iterm2dist": + return fetchITerm2Dist(ctx, wc.Client, wc.RawDir, pkg.name) + case "juliadist": + return fetchJuliaDist(ctx, wc.Client, wc.RawDir, pkg.name) + case "mariadbdist": + return fetchMariaDBDist(ctx, wc.Client, wc.RawDir, pkg.name) + case "servicemandist": + if err := servicemandist.Fetch(ctx, wc.Client, wc.RawDir, pkg.name, wc.Auth, shallow); err != nil { + return err + } + case "zigdist": + return fetchZigDist(ctx, wc.Client, wc.RawDir, pkg.name) + default: + log.Printf(" %s: source %q not yet supported, skipping", pkg.name, pkg.conf.Source) + return nil + } + + // For non-gittag sources with a git_url, also clone the repo to get + // commit hashes. Git entries are classified from this data in + // refreshPackage, not from the main raw cache. + if pkg.conf.GitURL != "" && pkg.conf.Source != "gittag" { + if err := wc.fetchGitTagSupplementary(ctx, pkg.name, pkg.conf.GitURL, shallow); err != nil { + log.Printf(" %s: supplementary gittag fetch: %v", pkg.name, err) + } + } + return nil +} + +// fetchGitTagSupplementary clones a git repo to get commit hashes for +// packages that use a non-gittag source type (servicemandist, githubsource) +// but also have a git_url for source installs. +func (wc *WebiCache) fetchGitTagSupplementary(ctx context.Context, pkgName, gitURL string, shallow bool) error { + d, err := rawcache.Open(filepath.Join(wc.RawDir, "_gittag", pkgName)) + if err != nil { + return err + } + + repoDir := filepath.Join(wc.RawDir, "_repos") + os.MkdirAll(repoDir, 0o755) + + for batch, err := range gittag.Fetch(ctx, gitURL, repoDir) { + if err != nil { + return err + } + for _, entry := range batch { + tag := entry.Version + if tag == "" { + tag = "HEAD-" + entry.CommitHash + } + data, _ := json.Marshal(entry) + d.Merge(tag, data) + } + if shallow { + break + } + } + return nil +} + +func (wc *WebiCache) fetchGitHub(ctx context.Context, pkgName string, conf *installerconf.Conf, shallow bool) error { + owner, repo := conf.Owner, conf.Repo + if owner == "" || repo == "" { + return fmt.Errorf("missing owner or repo") + } + + d, err := rawcache.Open(filepath.Join(wc.RawDir, pkgName)) + if err != nil { + return err + } + + tagPrefix := conf.TagPrefix + for batch, err := range github.Fetch(ctx, wc.Client, owner, repo, wc.Auth) { + if err != nil { + return fmt.Errorf("github %s/%s: %w", owner, repo, err) + } + for _, rel := range batch { + if rel.Draft { + continue + } + tag := rel.TagName + if tagPrefix != "" && !strings.HasPrefix(tag, tagPrefix) { + continue + } + data, _ := json.Marshal(rel) + d.Merge(tag, data) + } + if shallow { + break + } + } + return nil +} + +func (wc *WebiCache) fetchNodeDist(ctx context.Context, pkgName string, conf *installerconf.Conf) error { + baseURL := conf.BaseURL + if baseURL == "" { + return fmt.Errorf("missing url") + } + + d, err := rawcache.Open(filepath.Join(wc.RawDir, pkgName)) + if err != nil { + return err + } + + // Fetch from primary URL. Tag with "official/" prefix so unofficial + // entries for the same version don't overwrite. + for batch, err := range nodedist.Fetch(ctx, wc.Client, baseURL) { + if err != nil { + return err + } + for _, entry := range batch { + data, _ := json.Marshal(entry) + d.Merge("official/"+entry.Version, data) + } + } + + // Fetch from unofficial URL if configured (e.g. Node.js unofficial builds + // which add musl, riscv64, loong64 targets). + if unofficialURL := conf.Extra["unofficial_url"]; unofficialURL != "" { + for batch, err := range nodedist.Fetch(ctx, wc.Client, unofficialURL) { + if err != nil { + log.Printf("warning: %s unofficial fetch: %v", pkgName, err) + break + } + for _, entry := range batch { + data, _ := json.Marshal(entry) + d.Merge("unofficial/"+entry.Version, data) + } + } + } + + return nil +} + +func (wc *WebiCache) fetchGitTag(ctx context.Context, pkgName string, conf *installerconf.Conf, shallow bool) error { + gitURL := conf.BaseURL + if gitURL == "" { + return fmt.Errorf("missing url") + } + + d, err := rawcache.Open(filepath.Join(wc.RawDir, pkgName)) + if err != nil { + return err + } + + repoDir := filepath.Join(wc.RawDir, "_repos") + os.MkdirAll(repoDir, 0o755) + + for batch, err := range gittag.Fetch(ctx, gitURL, repoDir) { + if err != nil { + return err + } + for _, entry := range batch { + tag := entry.Version + if tag == "" { + tag = "HEAD-" + entry.CommitHash + } + data, _ := json.Marshal(entry) + d.Merge(tag, data) + } + if shallow { + break + } + } + return nil +} + +func (wc *WebiCache) fetchGitea(ctx context.Context, pkgName string, conf *installerconf.Conf, shallow bool) error { + baseURL, owner, repo := conf.BaseURL, conf.Owner, conf.Repo + if baseURL == "" || owner == "" || repo == "" { + return fmt.Errorf("missing base_url, owner, or repo") + } + + d, err := rawcache.Open(filepath.Join(wc.RawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range gitea.Fetch(ctx, wc.Client, baseURL, owner, repo, nil) { + if err != nil { + return err + } + for _, rel := range batch { + if rel.Draft { + continue + } + data, _ := json.Marshal(rel) + d.Merge(rel.TagName, data) + } + if shallow { + break + } + } + return nil +} + +func fetchChromeDist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range chromedist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("chromedist: %w", err) + } + for _, ver := range batch { + data, _ := json.Marshal(ver) + d.Merge(ver.Version, data) + } + } + return nil +} + +func fetchFlutterDist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range flutterdist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("flutterdist: %w", err) + } + for _, rel := range batch { + // Key by version+channel+os for uniqueness. + key := rel.Version + "-" + rel.Channel + "-" + rel.OS + data, _ := json.Marshal(rel) + d.Merge(key, data) + } + } + return nil +} + +func fetchGolang(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range golang.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("golang: %w", err) + } + for _, rel := range batch { + data, _ := json.Marshal(rel) + d.Merge(rel.Version, data) + } + } + return nil +} + +func fetchGPGDist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range gpgdist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("gpgdist: %w", err) + } + for _, entry := range batch { + data, _ := json.Marshal(entry) + d.Merge(entry.Version, data) + } + } + return nil +} + +func fetchHashiCorp(ctx context.Context, client *http.Client, rawDir, pkgName string, conf *installerconf.Conf) error { + product := conf.Repo + if product == "" { + product = pkgName + } + + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for idx, err := range hashicorp.Fetch(ctx, client, product) { + if err != nil { + return fmt.Errorf("hashicorp %s: %w", product, err) + } + for ver, vdata := range idx.Versions { + data, _ := json.Marshal(vdata) + d.Merge(ver, data) + } + } + return nil +} + +func fetchITerm2Dist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range iterm2dist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("iterm2dist: %w", err) + } + for _, entry := range batch { + key := entry.Version + if entry.Channel == "beta" { + key += "-beta" + } + data, _ := json.Marshal(entry) + d.Merge(key, data) + } + } + return nil +} + +func fetchJuliaDist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range juliadist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("juliadist: %w", err) + } + for _, rel := range batch { + data, _ := json.Marshal(rel) + d.Merge(rel.Version, data) + } + } + return nil +} + +func fetchMariaDBDist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range mariadbdist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("mariadbdist: %w", err) + } + for _, rel := range batch { + data, _ := json.Marshal(rel) + d.Merge(rel.ReleaseID, data) + } + } + return nil +} + +func fetchZigDist(ctx context.Context, client *http.Client, rawDir, pkgName string) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + for batch, err := range zigdist.Fetch(ctx, client) { + if err != nil { + return fmt.Errorf("zigdist: %w", err) + } + for _, rel := range batch { + data, _ := json.Marshal(rel) + d.Merge(rel.Version, data) + } + } + return nil +} diff --git a/ffmpeg/releases.conf b/ffmpeg/releases.conf index 0d133ea..711a2ea 100644 --- a/ffmpeg/releases.conf +++ b/ffmpeg/releases.conf @@ -1,4 +1,3 @@ -source = ffmpegdist github_releases = eugeneware/ffmpeg-static asset_filter = ffmpeg version_prefix = b diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..90a6990 --- /dev/null +++ b/go.mod @@ -0,0 +1,15 @@ +module github.com/webinstall/webi-installers + +go 1.26.1 + +require ( + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect + github.com/jackc/pgx/v5 v5.8.0 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/joho/godotenv v1.5.1 // indirect + github.com/jszwec/csvutil v1.10.0 // indirect + github.com/therootcompany/golib/http/middleware/v2 v2.0.1 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/text v0.29.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9a07481 --- /dev/null +++ b/go.sum @@ -0,0 +1,25 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.8.0 h1:TYPDoleBBme0xGSAX3/+NujXXtpZn9HBONkQC7IEZSo= +github.com/jackc/pgx/v5 v5.8.0/go.mod h1:QVeDInX2m9VyzvNeiCJVjCkNFqzsNb43204HshNSZKw= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/jszwec/csvutil v1.10.0 h1:upMDUxhQKqZ5ZDCs/wy+8Kib8rZR8I8lOR34yJkdqhI= +github.com/jszwec/csvutil v1.10.0/go.mod h1:/E4ONrmGkwmWsk9ae9jpXnv9QT8pLHEPcCirMFhxG9I= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/therootcompany/golib/http/middleware/v2 v2.0.1 h1:VNKpHcwyEW7cMct7/eO4fyrxwIQk2ycb6juVXSPs2Sk= +github.com/therootcompany/golib/http/middleware/v2 v2.0.1/go.mod h1:g5gb9qBidw74nW6/mwIauTKMpOKchiN2l0gt5qzJ2aQ= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/buildmeta/buildmeta.go b/internal/buildmeta/buildmeta.go new file mode 100644 index 0000000..f607295 --- /dev/null +++ b/internal/buildmeta/buildmeta.go @@ -0,0 +1,168 @@ +// Package buildmeta is the shared vocabulary for Webi's build targets. +// +// Every package that deals with OS, architecture, libc, archive format, or +// release channel imports these types instead of passing raw strings. This +// prevents typos like "darwn" from compiling and gives a single place to +// enumerate what Webi supports. +package buildmeta + +// OS represents a target operating system. +type OS string + +const ( + OSAny OS = "ANYOS" + OSDarwin OS = "darwin" + OSLinux OS = "linux" + OSWindows OS = "windows" + OSFreeBSD OS = "freebsd" + OSOpenBSD OS = "openbsd" + OSNetBSD OS = "netbsd" + OSDragonFly OS = "dragonfly" + OSSunOS OS = "sunos" + OSIllumos OS = "illumos" + OSSolaris OS = "solaris" + OSAIX OS = "aix" + OSAndroid OS = "android" + OSPlan9 OS = "plan9" + + // POSIX compatibility levels — used when a package is a shell script + // or otherwise OS-independent for POSIX systems. + OSPosix2017 OS = "posix_2017" + OSPosix2024 OS = "posix_2024" +) + +// Arch represents a target CPU architecture. +type Arch string + +const ( + ArchAny Arch = "ANYARCH" + ArchAMD64 Arch = "x86_64" // baseline (v1) + ArchAMD64v2 Arch = "x86_64_v2" // +SSE4, +POPCNT, etc. + ArchAMD64v3 Arch = "x86_64_v3" // +AVX2, +BMI, etc. + ArchAMD64v4 Arch = "x86_64_v4" // +AVX-512 + ArchARM64 Arch = "aarch64" + ArchARMv7 Arch = "armv7" + ArchARMv6 Arch = "armv6" + ArchARMv5 Arch = "armv5" + ArchX86 Arch = "x86" + ArchPPC64LE Arch = "ppc64le" + ArchPPC64 Arch = "ppc64" + ArchPPC Arch = "powerpc" // 32-bit PowerPC (unsupported by webi, used to prevent gnueabihf over-matching) + ArchRISCV64 Arch = "riscv64" + ArchS390X Arch = "s390x" + ArchLoong64 Arch = "loong64" + ArchMIPS64LE Arch = "mips64le" + ArchMIPS64 Arch = "mips64" + ArchMIPS64R6EL Arch = "mips64r6el" + ArchMIPS64R6 Arch = "mips64r6" + ArchMIPSLE Arch = "mipsle" + ArchMIPS Arch = "mips" + + // Universal (fat) binary architectures for macOS. + ArchUniversal1 Arch = "universal1" // PPC + x86 (Rosetta 1 era) + ArchUniversal2 Arch = "universal2" // x86_64 + ARM64 (Rosetta 2 era) +) + +// Libc represents the C library a binary is linked against. +type Libc string + +const ( + LibcNone Libc = "none" // statically linked or no libc dependency (Go, Zig, etc.) + LibcGNU Libc = "gnu" // requires glibc (most Linux distros) + LibcMusl Libc = "musl" // requires musl (Alpine, some Docker images) + LibcMSVC Libc = "msvc" // Microsoft Visual C++ runtime +) + +// Format represents an archive or package format. +type Format string + +const ( + FormatTarGz Format = ".tar.gz" + FormatTarXz Format = ".tar.xz" + FormatTarZst Format = ".tar.zst" + FormatTarBz2 Format = ".tar.bz2" + FormatZip Format = ".zip" + FormatGz Format = ".gz" + FormatXz Format = ".xz" + FormatZst Format = ".zst" + FormatExe Format = ".exe" + FormatExeXz Format = ".exe.xz" + FormatMSI Format = ".msi" + FormatDMG Format = ".dmg" + FormatPkg Format = ".pkg" + FormatAppZip Format = ".app.zip" + Format7z Format = ".7z" + FormatDeb Format = ".deb" + FormatRPM Format = ".rpm" + FormatSnap Format = ".snap" + FormatAppx Format = ".appx" + FormatAPK Format = ".apk" + FormatAppImage Format = ".AppImage" + FormatSh Format = ".sh" + FormatGit Format = ".git" +) + +// Channel represents a release stability channel. +type Channel string + +const ( + ChannelStable Channel = "stable" + ChannelLatest Channel = "latest" + ChannelRC Channel = "rc" + ChannelPreview Channel = "preview" + ChannelBeta Channel = "beta" + ChannelAlpha Channel = "alpha" + ChannelDev Channel = "dev" +) + +// Target represents a fully resolved build target. +type Target struct { + OS OS + Arch Arch + Libc Libc +} + +// Triplet returns the canonical "os-arch-libc" string. +func (t Target) Triplet() string { + return string(t.OS) + "-" + string(t.Arch) + "-" + string(t.Libc) +} + +// CompatArches returns the architectures that the given OS+arch +// combination can execute, ordered from most specific to least. +// The input arch is always first. +// +// These are OS-level facts (hardware + translation layer), not +// package-specific. Per-package overrides belong in installer config. +func CompatArches(os OS, arch Arch) []Arch { + switch os { + case OSDarwin: + switch arch { + case ArchARM64: + // Rosetta 2: Apple Silicon runs x86_64 binaries. + return []Arch{ArchARM64, ArchUniversal2, ArchAMD64} + case ArchAMD64: + return []Arch{ArchAMD64, ArchUniversal2, ArchX86} + } + case OSWindows: + switch arch { + case ArchARM64: + // Windows on ARM emulates x86_64 and x86. + return []Arch{ArchARM64, ArchAMD64, ArchX86} + } + } + + // Micro-architecture fallbacks (universal across all OSes). + switch arch { + case ArchAMD64v4: + return []Arch{ArchAMD64v4, ArchAMD64v3, ArchAMD64v2, ArchAMD64} + case ArchAMD64v3: + return []Arch{ArchAMD64v3, ArchAMD64v2, ArchAMD64} + case ArchAMD64v2: + return []Arch{ArchAMD64v2, ArchAMD64} + case ArchARMv7: + return []Arch{ArchARMv7, ArchARMv6} + } + + return []Arch{arch} +} + diff --git a/internal/classify/classify.go b/internal/classify/classify.go new file mode 100644 index 0000000..094fbb5 --- /dev/null +++ b/internal/classify/classify.go @@ -0,0 +1,283 @@ +// Package classify extracts build targets from release asset filenames. +// +// Standard toolchains (goreleaser, cargo-dist, zig build) produce predictable +// filenames like "tool_0.1.0_linux_amd64.tar.gz" or +// "tool-0.1.0-x86_64-unknown-linux-musl.tar.gz". This package matches those +// patterns directly using regex, avoiding heuristic guessing. +// +// Detection order matters: architectures are checked longest-first to prevent +// "x86" from matching inside "x86_64", and OS checks use word boundaries. +package classify + +import ( + "path" + "regexp" + "strings" + + "github.com/webinstall/webi-installers/internal/buildmeta" +) + +// Result holds the classification of an asset filename. +type Result struct { + OS buildmeta.OS + Arch buildmeta.Arch + Libc buildmeta.Libc + Format buildmeta.Format +} + +// Target returns the build target (OS + Arch + Libc). +func (r Result) Target() buildmeta.Target { + return buildmeta.Target{OS: r.OS, Arch: r.Arch, Libc: r.Libc} +} + +// Filename classifies a release asset filename, returning the detected +// OS, architecture, libc, and archive format. Undetected fields are empty. +// +// OS is detected first because it can influence arch interpretation. +// For example, "windows-arm" in modern releases means ARM64, while +// bare "arm" on Linux historically means ARMv6. +func Filename(name string) Result { + lower := strings.ToLower(name) + os := detectOS(lower) + arch := detectArch(lower) + format := detectFormat(lower) + + // .deb, .rpm, .snap are Linux-only package formats. + if os == "" && (format == buildmeta.FormatDeb || format == buildmeta.FormatRPM || format == buildmeta.FormatSnap) { + os = buildmeta.OSLinux + } + // .app.zip and .dmg are macOS-only formats. + if os == "" && (format == buildmeta.FormatAppZip || format == buildmeta.FormatDMG) { + os = buildmeta.OSDarwin + } + + return Result{ + OS: os, + Arch: arch, + Libc: detectLibc(lower), + Format: format, + } +} + +// b is a boundary: start/end of string or a non-alphanumeric separator. +// Go's RE2 doesn't support \b, so we use this instead. +const b = `(?:^|[^a-zA-Z0-9])` +const bEnd = `(?:[^a-zA-Z0-9]|$)` + +// --- OS detection --- + +var osPatterns = []struct { + os buildmeta.OS + pattern *regexp.Regexp +}{ + // macos[\d.]* matches versioned names like "macos10.10", "macos11", "macos12.0" (cmake naming). + {buildmeta.OSDarwin, regexp.MustCompile(`(?i)(?:` + b + `(?:darwin|macos[\d.]*|macosx[\d.]*|osx[\d.]*|os-x|apple)` + bEnd + `|` + b + `mac` + bEnd + `)`)}, + // linux[\d.]* matches versioned names like "linux64", "linux32" (chromedriver/dashcore naming). + {buildmeta.OSLinux, regexp.MustCompile(`(?i)` + b + `linux[\d.]*` + bEnd)}, + {buildmeta.OSWindows, regexp.MustCompile(`(?i)` + b + `(?:windows|win(?:32|64|x64|dows)?)` + bEnd + `|\.exe(?:\.xz)?$|\.msi$`)}, + // freebsd\d* matches versioned names like "freebsd13", "freebsd14" (Gitea naming). + {buildmeta.OSFreeBSD, regexp.MustCompile(`(?i)` + b + `freebsd\d*` + bEnd)}, + {buildmeta.OSOpenBSD, regexp.MustCompile(`(?i)` + b + `openbsd` + bEnd)}, + {buildmeta.OSNetBSD, regexp.MustCompile(`(?i)` + b + `netbsd` + bEnd)}, + {buildmeta.OSDragonFly, regexp.MustCompile(`(?i)` + b + `dragonfly(?:bsd)?` + bEnd)}, + // solaris, illumos, and sunos are distinct OS values in the Node build-classifier. + // Keep them separate so the legacy cache matches what the classifier extracts. + {buildmeta.OSSolaris, regexp.MustCompile(`(?i)` + b + `solaris` + bEnd)}, + {buildmeta.OSIllumos, regexp.MustCompile(`(?i)` + b + `illumos` + bEnd)}, + {buildmeta.OSSunOS, regexp.MustCompile(`(?i)` + b + `sunos` + bEnd)}, + {buildmeta.OSAIX, regexp.MustCompile(`(?i)` + b + `aix` + bEnd)}, + {buildmeta.OSAndroid, regexp.MustCompile(`(?i)` + b + `android` + bEnd)}, + {buildmeta.OSPlan9, regexp.MustCompile(`(?i)` + b + `plan9` + bEnd)}, +} + +func detectOS(lower string) buildmeta.OS { + for _, p := range osPatterns { + if p.pattern.MatchString(lower) { + return p.os + } + } + return "" +} + +// --- Arch detection --- +// Order matters: check longer/more-specific patterns first. + +var archPatterns = []struct { + arch buildmeta.Arch + pattern *regexp.Regexp +}{ + // Universal/fat binaries before specific arches. + {buildmeta.ArchUniversal2, regexp.MustCompile(`(?i)` + b + `(?:universal2?|fat)` + bEnd)}, + // amd64 micro-levels before baseline — "amd64v3" must not fall through to amd64. + // amd64_?vN: underscore optional but no dash — dash is ambiguous with version numbers + // (e.g. syncthing "amd64-v2.0.5" where v2 is the release version, not an arch level). + {buildmeta.ArchAMD64v4, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v4|amd64_?v4|v4-amd64)`)}, + {buildmeta.ArchAMD64v3, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v3|amd64_?v3|v3-amd64)`)}, + {buildmeta.ArchAMD64v2, regexp.MustCompile(`(?i)(?:x86[_-]64[_-]v2|amd64_?v2|v2-amd64)`)}, + // amd64 baseline before x86 — "x86_64" must not match as x86. + {buildmeta.ArchAMD64, regexp.MustCompile(`(?i)(?:x86[_-]64|amd64|x64|win64)`)}, + // arm64 before armv7/armv6 — "aarch64" must not match as arm. + {buildmeta.ArchARM64, regexp.MustCompile(`(?i)(?:aarch64|arm64|armv8)`)}, + {buildmeta.ArchARMv7, regexp.MustCompile(`(?i)(?:armv7l?|arm-?v7|arm7|arm32|armhf)`)}, + // armel and gnueabihf are ARMv6 soft/hard-float ABI names used in Debian and Rust triplets. + {buildmeta.ArchARMv6, regexp.MustCompile(`(?i)(?:armv6l?|arm-?v6|aarch32|armel|gnueabihf|` + b + `arm` + bEnd + `)`)}, + {buildmeta.ArchARMv5, regexp.MustCompile(`(?i)(?:armv5)`)}, + // powerpc64le/ppc64le before powerpc64/ppc64 before powerpc32. + // The longer powerpc* forms must come first to prevent shorter matches from + // winning. All powerpc entries must appear BEFORE ARM patterns — otherwise + // "powerpc-linux-gnueabihf" would match gnueabihf → ARMv6. + // ppc64el is an alternative spelling used in Debian/Ubuntu. + {buildmeta.ArchPPC64LE, regexp.MustCompile(`(?i)(?:powerpc64le|ppc64le|ppc64el)`)}, + {buildmeta.ArchPPC64, regexp.MustCompile(`(?i)(?:powerpc64|ppc64)`)}, + // powerpc (32-bit): webi does not serve powerpc32, but we must classify it + // here to prevent the gnueabihf suffix from matching the ARMv6 pattern. + {buildmeta.ArchPPC, regexp.MustCompile(`(?i)` + b + `powerpc` + bEnd)}, + {buildmeta.ArchRISCV64, regexp.MustCompile(`(?i)riscv64`)}, + {buildmeta.ArchS390X, regexp.MustCompile(`(?i)s390x`)}, + {buildmeta.ArchLoong64, regexp.MustCompile(`(?i)loong(?:arch)?64`)}, + // mips64r6 before mips64 — "mips64r6" contains "mips64" as a prefix. + {buildmeta.ArchMIPS64R6EL, regexp.MustCompile(`(?i)mips64r6e(?:l|le)`)}, + {buildmeta.ArchMIPS64R6, regexp.MustCompile(`(?i)mips64r6`)}, + {buildmeta.ArchMIPS64LE, regexp.MustCompile(`(?i)mips64(?:el|le)`)}, + {buildmeta.ArchMIPS64, regexp.MustCompile(`(?i)mips64`)}, + {buildmeta.ArchMIPSLE, regexp.MustCompile(`(?i)mips(?:el|le)`)}, + {buildmeta.ArchMIPS, regexp.MustCompile(`(?i)` + b + `mips` + bEnd)}, + // x86 last — must not steal x86_64. + {buildmeta.ArchX86, regexp.MustCompile(`(?i)(?:` + b + `x86` + bEnd + `|i[3-6]86|ia32|win32|` + b + `386` + bEnd + `)`)}, +} + +func detectArch(lower string) buildmeta.Arch { + for _, p := range archPatterns { + if p.pattern.MatchString(lower) { + return p.arch + } + } + return "" +} + +// --- Libc detection --- + +var ( + reMusl = regexp.MustCompile(`(?i)` + b + `musl` + bEnd) + reGNU = regexp.MustCompile(`(?i)` + b + `(?:gnu|glibc)` + bEnd) + reMSVC = regexp.MustCompile(`(?i)` + b + `msvc` + bEnd) + reStatic = regexp.MustCompile(`(?i)` + b + `static` + bEnd) +) + +func detectLibc(lower string) buildmeta.Libc { + switch { + case reMusl.MatchString(lower): + return buildmeta.LibcMusl + case reGNU.MatchString(lower): + return buildmeta.LibcGNU + case reMSVC.MatchString(lower): + return buildmeta.LibcMSVC + case reStatic.MatchString(lower): + return buildmeta.LibcNone + } + return "" +} + +// --- Format detection --- + +// formatSuffixes maps file extensions to formats, longest first. +var formatSuffixes = []struct { + suffix string + format buildmeta.Format +}{ + {".tar.gz", buildmeta.FormatTarGz}, + {".tar.xz", buildmeta.FormatTarXz}, + {".tar.zst", buildmeta.FormatTarZst}, + {".tar.bz2", buildmeta.FormatTarBz2}, + {".exe.xz", buildmeta.FormatExeXz}, + {".app.zip", buildmeta.FormatAppZip}, + {".tgz", buildmeta.FormatTarGz}, + {".zip", buildmeta.FormatZip}, + {".gz", buildmeta.FormatGz}, + {".xz", buildmeta.FormatXz}, + {".zst", buildmeta.FormatZst}, + {".7z", buildmeta.Format7z}, + {".exe", buildmeta.FormatExe}, + {".msi", buildmeta.FormatMSI}, + {".dmg", buildmeta.FormatDMG}, + {".deb", buildmeta.FormatDeb}, + {".rpm", buildmeta.FormatRPM}, + {".snap", buildmeta.FormatSnap}, + {".appx", buildmeta.FormatAppx}, + {".apk", buildmeta.FormatAPK}, + {".AppImage", buildmeta.FormatAppImage}, + {".pkg", buildmeta.FormatPkg}, +} + +func detectFormat(lower string) buildmeta.Format { + // Use the base name to avoid directory separators confusing suffix matching. + base := path.Base(lower) + for _, s := range formatSuffixes { + if strings.HasSuffix(base, s.suffix) { + return s.format + } + } + return "" +} + +// IsMetaAsset returns true if the filename is a non-installable meta file +// (checksums, signatures, source tarballs, documentation, etc.). +func IsMetaAsset(name string) bool { + lower := strings.ToLower(name) + for _, suffix := range []string{ + ".txt", + ".sha256", + ".sha256sum", + ".sha512", + ".sha512sum", + ".md5", + ".md5sum", + ".sig", + ".asc", + ".pem", + ".sbom", + ".spdx", + ".json.sig", + ".sigstore", + ".minisig", + "_src.tar.gz", + "_src.tar.xz", + "_src.zip", + "-src.tar.gz", + ".src.tar.gz", + "-src.tar.xz", + "-src.zip", + ".d.ts", + ".pub", + ".bsdiff", + ".flatpak", + } { + if strings.HasSuffix(lower, suffix) { + return true + } + } + for _, substr := range []string{ + "checksums", + "sha256sum", + "sha512sum", + "buildable-artifact", + ".LICENSE", + ".README", + } { + if strings.Contains(lower, substr) { + return true + } + } + for _, exact := range []string{ + "install.sh", + "install.ps1", + "compat.json", + "b3sums", + "dist-manifest.json", + } { + if lower == exact { + return true + } + } + return false +} diff --git a/internal/classify/classify_test.go b/internal/classify/classify_test.go new file mode 100644 index 0000000..8ff2dc4 --- /dev/null +++ b/internal/classify/classify_test.go @@ -0,0 +1,352 @@ +package classify_test + +import ( + "testing" + + "github.com/webinstall/webi-installers/internal/buildmeta" + "github.com/webinstall/webi-installers/internal/classify" +) + +func TestFilename(t *testing.T) { + tests := []struct { + name string + input string + wantOS buildmeta.OS + arch buildmeta.Arch + libc buildmeta.Libc + format buildmeta.Format + }{ + // Goreleaser-style + { + name: "goreleaser linux amd64 tar.gz", + input: "hugo_0.145.0_linux-amd64.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatTarGz, + }, + { + name: "goreleaser darwin arm64 tar.gz", + input: "hugo_0.145.0_darwin-arm64.tar.gz", + wantOS: buildmeta.OSDarwin, + arch: buildmeta.ArchARM64, + format: buildmeta.FormatTarGz, + }, + { + name: "goreleaser windows amd64 zip", + input: "hugo_0.145.0_windows-amd64.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatZip, + }, + { + name: "goreleaser freebsd", + input: "hugo_0.145.0_freebsd-amd64.tar.gz", + wantOS: buildmeta.OSFreeBSD, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatTarGz, + }, + + // Rust/cargo-dist style + { + name: "rust linux musl", + input: "ripgrep-14.1.1-x86_64-unknown-linux-musl.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchAMD64, + libc: buildmeta.LibcMusl, + format: buildmeta.FormatTarGz, + }, + { + name: "rust linux gnu", + input: "bat-v0.24.0-x86_64-unknown-linux-gnu.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchAMD64, + libc: buildmeta.LibcGNU, + format: buildmeta.FormatTarGz, + }, + { + name: "rust apple darwin", + input: "ripgrep-14.1.1-x86_64-apple-darwin.tar.gz", + wantOS: buildmeta.OSDarwin, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatTarGz, + }, + { + name: "rust windows msvc", + input: "bat-v0.24.0-x86_64-pc-windows-msvc.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + libc: buildmeta.LibcMSVC, + format: buildmeta.FormatZip, + }, + { + name: "rust aarch64 linux", + input: "ripgrep-14.1.1-aarch64-unknown-linux-gnu.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchARM64, + libc: buildmeta.LibcGNU, + format: buildmeta.FormatTarGz, + }, + + // Zig-style + { + name: "zig linux x86_64", + input: "zig-linux-x86_64-0.14.0.tar.xz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatTarXz, + }, + { + name: "zig macos aarch64", + input: "zig-macos-aarch64-0.14.0.tar.xz", + wantOS: buildmeta.OSDarwin, + arch: buildmeta.ArchARM64, + format: buildmeta.FormatTarXz, + }, + + // Windows executables + { + name: "bare exe", + input: "jq-windows-amd64.exe", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatExe, + }, + { + name: "msi installer", + input: "caddy_2.9.0_windows_amd64.msi", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatMSI, + }, + + // macOS formats + { + name: "dmg installer", + input: "MyApp-1.0.0-darwin-arm64.dmg", + wantOS: buildmeta.OSDarwin, + arch: buildmeta.ArchARM64, + format: buildmeta.FormatDMG, + }, + + // Arch priority: x86_64 must not match x86 + { + name: "x86_64 not x86", + input: "tool-x86_64-linux.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatTarGz, + }, + { + name: "actual x86", + input: "tool-x86-linux.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchX86, + format: buildmeta.FormatTarGz, + }, + { + name: "i386", + input: "tool-linux-i386.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchX86, + format: buildmeta.FormatTarGz, + }, + + // Windows ARM: bare "arm" is armv6 (some tools ship genuine arm32 Windows builds). + // Explicit "arm64" is always aarch64 regardless of OS. + { + name: "windows bare arm stays armv6", + input: "tool-1.0.0-windows-arm.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchARMv6, + format: buildmeta.FormatZip, + }, + { + name: "windows armv6 stays armv6", + input: "tool-2.0.0-windows-armv6.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchARMv6, + format: buildmeta.FormatZip, + }, + { + name: "windows arm64 stays arm64", + input: "tool-1.0.0-windows-arm64.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchARM64, + format: buildmeta.FormatZip, + }, + + // armel and gnueabihf are ARMv6 ABI names + { + name: "armel is armv6", + input: "jq-linux-armel", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchARMv6, + }, + { + name: "gnueabihf is armv6", + input: "tool-arm-unknown-linux-gnueabihf.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchARMv6, + format: buildmeta.FormatTarGz, + }, + + // winx64 is a Windows x86_64 naming used by MariaDB + { + name: "winx64 is windows x86_64", + input: "mariadb-11.4.5-winx64.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatZip, + }, + + // win32/win64 naming used by chromedriver, dashcore, etc. + { + name: "win32 is windows x86", + input: "chromedriver-win32.zip", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchX86, + format: buildmeta.FormatZip, + }, + { + name: "win64 is windows amd64", + input: "dashcore-23.1.2-win64-setup.exe", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatExe, + }, + + // ppc64el is a Debian/Ubuntu alias for ppc64le + { + name: "ppc64el is ppc64le", + input: "jq-linux-ppc64el", + arch: buildmeta.ArchPPC64LE, + }, + + // amd64 micro-architecture levels + { + name: "amd64v2", + input: "tool-linux-amd64v2.tar.gz", + arch: buildmeta.ArchAMD64v2, + }, + { + name: "amd64v3", + input: "tool-linux-x86_64_v3.tar.gz", + arch: buildmeta.ArchAMD64v3, + }, + { + name: "amd64v4", + input: "tool-linux-amd64v4.tar.gz", + arch: buildmeta.ArchAMD64v4, + }, + { + name: "amd64v3 not baseline", + input: "tool-1.0.0-linux-amd64v3.tar.gz", + wantOS: buildmeta.OSLinux, + arch: buildmeta.ArchAMD64v3, + format: buildmeta.FormatTarGz, + }, + + // ARM variants: arm64 must not match armv7/armv6 + { + name: "aarch64 not armv7", + input: "tool-aarch64-linux.tar.gz", + arch: buildmeta.ArchARM64, + }, + { + name: "armv7", + input: "tool-armv7l-linux.tar.gz", + arch: buildmeta.ArchARMv7, + }, + { + name: "armv6", + input: "tool-armv6l-linux.tar.gz", + arch: buildmeta.ArchARMv6, + }, + + // ppc64le before ppc64 + { + name: "ppc64le", + input: "tool-linux-ppc64le.tar.gz", + arch: buildmeta.ArchPPC64LE, + }, + { + name: "ppc64", + input: "tool-linux-ppc64.tar.gz", + arch: buildmeta.ArchPPC64, + }, + + // Static linking + { + name: "static binary", + input: "tool-linux-amd64-static.tar.gz", + libc: buildmeta.LibcNone, + }, + + // .exe implies Windows + { + name: "exe implies windows", + input: "tool-amd64.exe", + wantOS: buildmeta.OSWindows, + arch: buildmeta.ArchAMD64, + format: buildmeta.FormatExe, + }, + + // Compound extensions + { + name: "tar.zst", + input: "tool-linux-amd64.tar.zst", + format: buildmeta.FormatTarZst, + }, + { + name: "exe.xz", + input: "tool-windows-amd64.exe.xz", + format: buildmeta.FormatExeXz, + }, + { + name: "app.zip", + input: "MyApp-1.0.0.app.zip", + format: buildmeta.FormatAppZip, + }, + { + name: "tgz alias", + input: "tool-linux-amd64.tgz", + format: buildmeta.FormatTarGz, + }, + + // s390x, mips + { + name: "s390x", + input: "tool-linux-s390x.tar.gz", + arch: buildmeta.ArchS390X, + }, + { + name: "mips64", + input: "tool-linux-mips64.tar.gz", + arch: buildmeta.ArchMIPS64, + }, + + // Unknown / no match + { + name: "checksum file", + input: "checksums.txt", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := classify.Filename(tt.input) + if tt.wantOS != "" && got.OS != tt.wantOS { + t.Errorf("OS = %q, want %q", got.OS, tt.wantOS) + } + if tt.arch != "" && got.Arch != tt.arch { + t.Errorf("Arch = %q, want %q", got.Arch, tt.arch) + } + if tt.libc != "" && got.Libc != tt.libc { + t.Errorf("Libc = %q, want %q", got.Libc, tt.libc) + } + if tt.format != "" && got.Format != tt.format { + t.Errorf("Format = %q, want %q", got.Format, tt.format) + } + }) + } +} diff --git a/internal/classifypkg/classifypkg.go b/internal/classifypkg/classifypkg.go new file mode 100644 index 0000000..646494e --- /dev/null +++ b/internal/classifypkg/classifypkg.go @@ -0,0 +1,1357 @@ +// Package classifypkg converts raw upstream release data into classified +// [storage.Asset] slices. Each source type (github, nodedist, gittag, etc.) +// has its own classifier that reads JSON from [rawcache.Dir] and produces +// assets with OS, arch, format, and channel fields populated. +// +// This is the second stage of the pipeline: fetch → classify → tag → filter → store. +package classifypkg + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "regexp" + + "github.com/webinstall/webi-installers/internal/buildmeta" + "github.com/webinstall/webi-installers/internal/classify" + "github.com/webinstall/webi-installers/internal/installerconf" + "github.com/webinstall/webi-installers/internal/rawcache" + "github.com/webinstall/webi-installers/internal/releases/atomicparsley" + "github.com/webinstall/webi-installers/internal/releases/bun" + "github.com/webinstall/webi-installers/internal/releases/chromedist" + "github.com/webinstall/webi-installers/internal/releases/cmake" + "github.com/webinstall/webi-installers/internal/releases/fish" + "github.com/webinstall/webi-installers/internal/releases/gitea" + "github.com/webinstall/webi-installers/internal/releases/flutterdist" + "github.com/webinstall/webi-installers/internal/releases/git" + "github.com/webinstall/webi-installers/internal/releases/golang" + "github.com/webinstall/webi-installers/internal/releases/gpgdist" + "github.com/webinstall/webi-installers/internal/releases/hashicorp" + "github.com/webinstall/webi-installers/internal/releases/iterm2dist" + "github.com/webinstall/webi-installers/internal/releases/juliadist" + "github.com/webinstall/webi-installers/internal/releases/lsd" + "github.com/webinstall/webi-installers/internal/releases/mariadbdist" + "github.com/webinstall/webi-installers/internal/releases/node" + "github.com/webinstall/webi-installers/internal/releases/ollama" + "github.com/webinstall/webi-installers/internal/releases/pwsh" + "github.com/webinstall/webi-installers/internal/releases/postgres" + "github.com/webinstall/webi-installers/internal/releases/sass" + "github.com/webinstall/webi-installers/internal/releases/servicemandist" + "github.com/webinstall/webi-installers/internal/releases/sttr" + "github.com/webinstall/webi-installers/internal/releases/uuidv7" + "github.com/webinstall/webi-installers/internal/releases/watchexec" + "github.com/webinstall/webi-installers/internal/releases/xcaddy" + "github.com/webinstall/webi-installers/internal/releases/xz" + "github.com/webinstall/webi-installers/internal/releases/zigdist" + "github.com/webinstall/webi-installers/internal/storage" +) + +// channelFromVersion infers a release channel from the version string. +// Many GitHub releases have pre-release versions (rc, beta, alpha, dev, +// preview) but don't set the prerelease boolean in the API. +func channelFromVersion(version string) string { + v := strings.ToLower(version) + switch { + case strings.Contains(v, "-rc") || strings.Contains(v, ".rc"): + return "rc" + case strings.Contains(v, "-beta") || strings.Contains(v, ".beta"): + return "beta" + case strings.Contains(v, "-alpha") || strings.Contains(v, ".alpha"): + return "alpha" + case strings.Contains(v, "-dev") || strings.Contains(v, ".dev"): + return "dev" + case strings.Contains(v, "-preview") || strings.Contains(v, ".preview"): + return "preview" + case strings.Contains(v, "-pre") || strings.Contains(v, ".pre"): + return "beta" + case strings.Contains(v, "-nightly"): + return "nightly" + case strings.Contains(v, "-canary"): + return "canary" + } + return "stable" +} + +// Package classifies raw upstream data into assets, tags variants, +// and applies config-driven filters. This is the full classify pipeline +// for a single package. +// +// gitTagDir is an optional supplementary raw cache containing gittag data +// for packages that use a non-gittag source type (servicemandist, +// githubsource) alongside a git_url. Pass nil when not applicable. +func Package(pkg string, conf *installerconf.Conf, d *rawcache.Dir, gitTagDir *rawcache.Dir) ([]storage.Asset, error) { + assets, err := classifySource(pkg, conf, d) + if err != nil { + return nil, err + } + + // Append git entries from supplementary gittag data (real commit hashes). + if gitTagDir != nil && conf.GitURL != "" { + gitAssets, gitErr := ClassifyGitEntries(pkg, conf.GitURL, conf.OS, gitTagDir) + if gitErr != nil { + return nil, fmt.Errorf("classify gittag: %w", gitErr) + } + assets = append(assets, gitAssets...) + } + + TagVariants(pkg, assets) + assets = expandUniversal(assets) + NormalizeVersions(pkg, assets) + processGitTagHEAD(assets) + assets = ApplyConfig(assets, conf) + assets = appendLegacy(pkg, assets) + return assets, nil +} + +// expandUniversal duplicates universal fat binary entries into one entry per +// supported arch. This lets the resolver do exact arch matching without +// needing fallback/waterfall logic for universal binaries. +func expandUniversal(assets []storage.Asset) []storage.Asset { + out := make([]storage.Asset, 0, len(assets)) + for _, a := range assets { + switch buildmeta.Arch(a.Arch) { + case buildmeta.ArchUniversal2: + arm := a + arm.Arch = string(buildmeta.ArchARM64) + intel := a + intel.Arch = string(buildmeta.ArchAMD64) + out = append(out, arm, intel) + case buildmeta.ArchUniversal1: + ppc := a + ppc.Arch = string(buildmeta.ArchPPC) + intel := a + intel.Arch = string(buildmeta.ArchX86) + out = append(out, ppc, intel) + default: + out = append(out, a) + } + } + return out +} + +// classifySource dispatches to the source-specific classifier. +func classifySource(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + switch conf.Source { + case "github": + return classifyGitHub(pkg, conf, d) + case "servicemandist": + return classifyServiceman(pkg, conf, d) + case "githubsource": + return classifyGitHubSource(pkg, conf, d) + case "nodedist": + return classifyNodeDist(pkg, conf, d) + case "gittag": + return classifyGitTag(pkg, conf, d) + case "gitea": + return classifyGitea(pkg, conf, d) + case "chromedist": + return classifyChromeDist(d) + case "flutterdist": + return classifyFlutterDist(d) + case "golang": + return classifyGolang(d) + case "gpgdist": + return classifyGPGDist(d) + case "hashicorp": + return classifyHashiCorp(d) + case "iterm2dist": + return classifyITerm2Dist(d) + case "juliadist": + return classifyJuliaDist(d) + case "mariadbdist": + return classifyMariaDBDist(d) + case "zigdist": + return classifyZigDist(d) + default: + return nil, nil + } +} + +// NormalizeVersions applies package-specific version normalization. +// For example, Git for Windows strips ".windows.N" from version strings. +func NormalizeVersions(pkg string, assets []storage.Asset) { + switch pkg { + case "git": + gitdist.NormalizeVersions(assets) + case "lf": + // lf tags are "r1", "r2", etc. Node.js converts to "0.N.0". + for i := range assets { + v := assets[i].Version + if strings.HasPrefix(v, "r") { + assets[i].Version = "0." + v[1:] + ".0" + } + } + case "pg", "postgres", "psql": + postgres.NormalizeVersions(assets) + case "watchexec": + watchexecdist.NormalizeVersions(assets) + } +} + +// TagVariants applies package-specific variant tags to classified assets. +// Each case delegates to a per-installer package under internal/releases/. +func TagVariants(pkg string, assets []storage.Asset) { + switch pkg { + case "atomicparsley": + atomicparsleydist.TagVariants(assets) + case "cmake": + cmakedist.TagVariants(assets) + case "bun": + bundist.TagVariants(assets) + case "fish": + fishdist.TagVariants(assets) + case "flutter": + flutterdist.TagVariants(assets) + case "git": + gitdist.TagVariants(assets) + case "gitea": + gitea.TagVariants(assets) + case "lsd": + lsddist.TagVariants(assets) + case "node": + nodedist.TagVariants(assets) + case "ollama": + ollamadist.TagVariants(assets) + case "pwsh": + pwshdist.TagVariants(assets) + case "sass": + sassdist.TagVariants(assets) + case "serviceman": + servicemandist.TagVariants(assets) + case "sttr": + sttrdist.TagVariants(assets) + case "uuidv7": + uuidv7dist.TagVariants(assets) + case "watchexec": + watchexecdist.TagVariants(assets) + case "xcaddy": + xcaddydist.TagVariants(assets) + case "xz": + xzdist.TagVariants(assets) + } +} + +// appendLegacy adds hardcoded legacy releases for packages that had +// releases from sources that no longer exist (e.g. EnterpriseDB binaries). +// processGitTagHEAD handles HEAD entries from gittag sources. +// For repos with real version tags, HEAD entries are tagged with a "head" +// variant so they're filtered from the legacy cache. For tagless repos +// (only HEAD entries), the version and filename are rewritten to match +// the Node.js legacy format: version "2023.10.10-18.42.21", filename +// "{repo}-v2023.10.10-18.42.21". +func processGitTagHEAD(assets []storage.Asset) { + hasReal := false + hasHEAD := false + for _, a := range assets { + if a.Format != "git" { + continue + } + if strings.HasPrefix(a.Version, "HEAD-") { + hasHEAD = true + } else { + hasReal = true + } + } + if !hasHEAD { + return + } + + for i := range assets { + if !strings.HasPrefix(assets[i].Version, "HEAD-") { + continue + } + if hasReal { + // Repo has real tags: exclude HEAD from legacy cache. + assets[i].Variants = append(assets[i].Variants, "head") + } else { + // Tagless repo: rewrite to Node.js legacy format. + // HEAD-2023.10.10-18.42.21 → 2023.10.10-18.42.21 + datetime := strings.TrimPrefix(assets[i].Version, "HEAD-") + assets[i].Version = datetime + // {repo}-HEAD-2023.10.10-18.42.21 → {repo}-v2023.10.10-18.42.21 + assets[i].Filename = strings.Replace( + assets[i].Filename, + "HEAD-"+datetime, + "v"+datetime, + 1, + ) + } + } +} + +func appendLegacy(pkg string, assets []storage.Asset) []storage.Asset { + switch pkg { + case "postgres": + assets = append(assets, postgres.LegacyReleases()...) + } + return assets +} + +// ApplyConfig applies asset_filter, exclude, and version prefix stripping +// from a package's releases.conf. +func ApplyConfig(assets []storage.Asset, conf *installerconf.Conf) []storage.Asset { + filter := strings.ToLower(conf.AssetFilter) + excludes := conf.Exclude + prefixes := conf.VersionPrefixes + + var out []storage.Asset + for _, a := range assets { + lower := strings.ToLower(a.Filename) + + // Include filter: asset must contain this substring. + if filter != "" && !strings.Contains(lower, filter) { + continue + } + + // Exclude filter. + skip := false + for _, ex := range excludes { + if strings.Contains(a.Filename, ex) { + skip = true + break + } + } + if skip { + continue + } + + // Version prefix stripping. + for _, p := range prefixes { + if strings.HasPrefix(a.Version, p) { + a.Version = strings.TrimPrefix(a.Version, p) + break + } + } + + out = append(out, a) + } + return out +} + +// ReadAllRaw reads all non-directory, non-underscore-prefixed files from +// the active generation of a rawcache directory. +func ReadAllRaw(d *rawcache.Dir) (map[string][]byte, error) { + active, err := d.ActivePath() + if err != nil { + return nil, err + } + entries, err := os.ReadDir(active) + if err != nil { + return nil, err + } + result := make(map[string][]byte, len(entries)) + for _, e := range entries { + if e.IsDir() || strings.HasPrefix(e.Name(), "_") { + continue + } + data, err := os.ReadFile(filepath.Join(active, e.Name())) + if err != nil { + return nil, err + } + result[e.Name()] = data + } + return result, nil +} + +// --- GitHub --- + +type ghRelease struct { + TagName string `json:"tag_name"` + Prerelease bool `json:"prerelease"` + Draft bool `json:"draft"` + PublishedAt string `json:"published_at"` + Assets []ghAsset `json:"assets"` + TarballURL string `json:"tarball_url"` + ZipballURL string `json:"zipball_url"` +} + +type ghAsset struct { + Name string `json:"name"` + BrowserDownloadURL string `json:"browser_download_url"` + Size int64 `json:"size"` +} + +// reRustMuslStatic matches Rust target triples that indicate a statically-linked +// musl build. Rust's *-unknown-linux-musl targets are always static — they have +// zero runtime libc dependency. This is distinct from packages like pwsh +// (-linux-musl-x64), bun (-linux-x64-musl), and node (-linux-x64-musl) which +// dynamically link against musl and require it at runtime. +var reRustMuslStatic = regexp.MustCompile(`(?i)-unknown-linux-musl`) + +func classifyGitHub(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + tagPrefix := conf.TagPrefix + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var rel ghRelease + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + if rel.Draft { + continue + } + + version := rel.TagName + if tagPrefix != "" { + if !strings.HasPrefix(version, tagPrefix) { + continue // skip tags from other packages in monorepos + } + version = strings.TrimPrefix(version, tagPrefix) + } + + channel := "stable" + if rel.Prerelease { + channel = "beta" + } else { + channel = channelFromVersion(version) + } + + date := "" + if len(rel.PublishedAt) >= 10 { + date = rel.PublishedAt[:10] + } + + for _, a := range rel.Assets { + if classify.IsMetaAsset(a.Name) { + continue + } + + r := classify.Filename(a.Name) + + // Normalize .tgz → .tar.gz in the display filename. + // The download URL still points to the real file. + name := a.Name + if strings.HasSuffix(strings.ToLower(name), ".tgz") { + name = name[:len(name)-4] + ".tar.gz" + } + + libc := r.Libc + // Rust static musl builds have zero runtime libc dependency. + if libc == buildmeta.LibcMusl && reRustMuslStatic.MatchString(a.Name) { + libc = buildmeta.LibcNone + } + // Windows gnu (MinGW) is self-contained — no runtime deps. + if r.OS == buildmeta.OSWindows && libc == buildmeta.LibcGNU { + libc = buildmeta.LibcNone + } + + assets = append(assets, storage.Asset{ + Filename: name, + Version: version, + Channel: channel, + OS: string(r.OS), + Arch: string(r.Arch), + Libc: string(libc), + Format: string(r.Format), + Download: a.BrowserDownloadURL, + Date: date, + }) + } + + // Releases with no uploaded binary assets are skipped for GitHub + // packages. These are typically old releases from before the + // project started uploading binaries. Source-installable packages + // should use githubsource or gittag source type instead. + } + return assets, nil +} + +// classifyServiceman handles serviceman's dual-repo layout: binary releases +// from therootcompany/serviceman (≤v0.8.x) and source-only releases from +// bnnanet/serviceman (v0.9.x+). Emits binary assets where available, plus +// git entries for every release that has a git_url configured. +func classifyServiceman(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + repo := conf.Repo + if repo == "" { + repo = pkg + } + + var assets []storage.Asset + for _, data := range releases { + var rel ghRelease + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + if rel.Draft { + continue + } + + version := rel.TagName + channel := "stable" + if rel.Prerelease { + channel = "beta" + } else { + channel = channelFromVersion(version) + } + + date := "" + if len(rel.PublishedAt) >= 10 { + date = rel.PublishedAt[:10] + } + + for _, a := range rel.Assets { + if classify.IsMetaAsset(a.Name) { + continue + } + r := classify.Filename(a.Name) + assets = append(assets, storage.Asset{ + Filename: a.Name, + Version: version, + Channel: channel, + OS: string(r.OS), + Arch: string(r.Arch), + Libc: string(r.Libc), + Format: string(r.Format), + Download: a.BrowserDownloadURL, + Date: date, + }) + } + + } + return assets, nil +} + +// classifyGitHubSource handles packages installed from source via GitHub +// releases. Emits source tarball/zipball/git entries for every release. +// Used for shell scripts, vim plugins, and other source-installable packages. +func classifyGitHubSource(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + tagPrefix := conf.TagPrefix + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + repo := conf.Repo + + var assets []storage.Asset + for _, data := range releases { + var rel ghRelease + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + if rel.Draft { + continue + } + + version := rel.TagName + if tagPrefix != "" { + if !strings.HasPrefix(version, tagPrefix) { + continue + } + version = strings.TrimPrefix(version, tagPrefix) + } + + channel := "stable" + if rel.Prerelease { + channel = "beta" + } else { + channel = channelFromVersion(version) + } + + date := "" + if len(rel.PublishedAt) >= 10 { + date = rel.PublishedAt[:10] + } + + tag := rel.TagName + if rel.TarballURL != "" { + assets = append(assets, storage.Asset{ + Filename: repo + "-" + tag + ".tar.gz", + Version: version, + Channel: channel, + OS: "posix_2017", + Arch: "*", + Format: ".tar.gz", + Download: rel.TarballURL, + Date: date, + }) + } + if rel.ZipballURL != "" { + assets = append(assets, storage.Asset{ + Filename: repo + "-" + tag + ".zip", + Version: version, + Channel: channel, + OS: "posix_2017", + Arch: "*", + Format: ".zip", + Download: rel.ZipballURL, + Date: date, + }) + } + } + return assets, nil +} + +// --- Node.js dist --- + +type nodeEntry struct { + Version string `json:"version"` + Date string `json:"date"` + Files []string `json:"files"` + LTS json.RawMessage `json:"lts"` +} + +func classifyNodeDist(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + officialURL := conf.BaseURL + unofficialURL := conf.Extra["unofficial_url"] + + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for tag, data := range releases { + var entry nodeEntry + if err := json.Unmarshal(data, &entry); err != nil { + continue + } + + // Pick the right base URL from the tag prefix. + baseURL := officialURL + if strings.HasPrefix(tag, "unofficial_") { + baseURL = unofficialURL + } + + lts := string(entry.LTS) != "false" && string(entry.LTS) != "" + channel := "stable" + ver := strings.TrimPrefix(entry.Version, "v") + parts := strings.SplitN(ver, ".", 2) + if len(parts) > 0 { + major := 0 + fmt.Sscanf(parts[0], "%d", &major) + if major%2 != 0 { + channel = "beta" + } + } + + for _, file := range entry.Files { + if file == "src" || file == "headers" { + continue + } + expanded := expandNodeFile(pkg, entry.Version, channel, entry.Date, lts, baseURL, file) + assets = append(assets, expanded...) + } + } + return assets, nil +} + +func expandNodeFile(pkg, version, channel, date string, lts bool, baseURL, file string) []storage.Asset { + parts := strings.Split(file, "-") + if len(parts) < 2 { + return nil + } + + osMap := map[string]string{ + "osx": "darwin", "linux": "linux", "win": "windows", + "sunos": "sunos", "aix": "aix", + } + archMap := map[string]string{ + "x64": "x86_64", "x86": "x86", "arm64": "aarch64", + "armv7l": "armv7", "armv6l": "armv6", + "ppc64": "ppc64", "ppc64le": "ppc64le", "s390x": "s390x", + "riscv64": "riscv64", "loong64": "loong64", + } + + os_ := osMap[parts[0]] + arch := archMap[parts[1]] + if os_ == "" || arch == "" { + return nil + } + + libc := "" + pkgType := "" + if len(parts) > 2 { + pkgType = parts[2] + } + + var formats []string + switch pkgType { + case "musl": + libc = "musl" + formats = []string{".tar.gz", ".tar.xz"} + case "tar": + formats = []string{".tar.gz", ".tar.xz"} + case "zip": + formats = []string{".zip"} + case "pkg": + formats = []string{".pkg"} + case "msi": + formats = []string{".msi"} + case "exe": + formats = []string{".exe"} + case "7z": + formats = []string{".7z"} + case "": + formats = []string{".tar.gz", ".tar.xz"} + default: + return nil + } + + if libc == "" && os_ == "linux" { + libc = "gnu" + } + + osPart := parts[0] + if osPart == "osx" { + osPart = "darwin" + } + archPart := parts[1] + muslExtra := "" + if libc == "musl" { + muslExtra = "-musl" + } + + var assets []storage.Asset + for _, format := range formats { + var filename string + if format == ".msi" { + filename = fmt.Sprintf("node-%s-%s%s%s", version, archPart, muslExtra, format) + } else { + filename = fmt.Sprintf("node-%s-%s-%s%s%s", version, osPart, archPart, muslExtra, format) + } + + assets = append(assets, storage.Asset{ + Filename: filename, + Version: version, + Channel: channel, + OS: os_, + Arch: arch, + Libc: libc, + Format: format, + Download: fmt.Sprintf("%s/%s/%s", baseURL, version, filename), + LTS: lts, + Date: date, + }) + } + return assets +} + +// --- Git tag --- + +type gitTagEntry struct { + Version string `json:"Version"` + GitTag string `json:"GitTag"` + CommitHash string `json:"CommitHash"` + Date string `json:"Date"` +} + +func classifyGitTag(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + gitURL := conf.GitURL + if gitURL == "" { + gitURL = conf.BaseURL + } + return ClassifyGitEntries(pkg, gitURL, conf.OS, d) +} + +// ClassifyGitEntries reads gittag raw cache data and produces git-clone +// assets. Each entry has a real commit hash from the cloned repo. +// +// Used directly by classifyGitTag, and also by webicached to append git +// entries to packages that use other source types (servicemandist, +// githubsource) alongside a git_url. +func ClassifyGitEntries(pkg, gitURL, os string, d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + // Derive repo name from the git URL for filenames. + // "https://github.com/tpope/vim-commentary.git" → "vim-commentary" + repoName := pkg + if gitURL != "" { + base := filepath.Base(gitURL) + repoName = strings.TrimSuffix(base, ".git") + } + + var assets []storage.Asset + for _, data := range releases { + var entry gitTagEntry + if err := json.Unmarshal(data, &entry); err != nil { + continue + } + + version := strings.TrimPrefix(entry.Version, "v") + date := "" + if len(entry.Date) >= 10 { + date = entry.Date[:10] + } + + var filename string + if version != "" { + // Tagged release: "{repo}-{tag}" (e.g. "vim-commentary-v1.2") + filename = repoName + "-" + entry.GitTag + } else if len(entry.Date) >= 19 { + // Tagless repo (HEAD of master/main): synthesize a date-based + // version prefixed with HEAD so it doesn't sort ahead of + // real semver tags (e.g. HEAD-2023.10.10-18.42.21 vs v1.2). + // The full datetime (including time) is needed to match the + // Node.js legacy format (v2023.10.10-18.42.21). + t, parseErr := time.Parse(time.RFC3339, entry.Date) + if parseErr != nil { + continue + } + t = t.UTC() + version = "HEAD-" + t.Format("2006.01.02-15.04.05") + filename = repoName + "-" + version + } else { + continue + } + + assets = append(assets, storage.Asset{ + Filename: filename, + Version: version, + Channel: channelFromVersion(version), + OS: os, + Format: "git", + Download: gitURL, + Date: date, + Extra: "commit:" + entry.CommitHash, + GitTag: entry.GitTag, + GitCommitHash: entry.CommitHash, + }) + } + return assets, nil +} + +// --- Gitea --- + +type giteaRelease struct { + TagName string `json:"tag_name"` + Prerelease bool `json:"prerelease"` + Draft bool `json:"draft"` + PublishedAt string `json:"published_at"` + Assets []giteaAsset `json:"assets"` +} + +type giteaAsset struct { + Name string `json:"name"` + BrowserDownloadURL string `json:"browser_download_url"` + Size int64 `json:"size"` +} + +func classifyGitea(pkg string, conf *installerconf.Conf, d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var rel giteaRelease + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + if rel.Draft { + continue + } + + channel := "stable" + if rel.Prerelease { + channel = "beta" + } else { + channel = channelFromVersion(rel.TagName) + } + date := "" + if len(rel.PublishedAt) >= 10 { + date = rel.PublishedAt[:10] + } + + for _, a := range rel.Assets { + if classify.IsMetaAsset(a.Name) { + continue + } + r := classify.Filename(a.Name) + + assets = append(assets, storage.Asset{ + Filename: a.Name, + Version: rel.TagName, + Channel: channel, + OS: string(r.OS), + Arch: string(r.Arch), + Libc: string(r.Libc), + Format: string(r.Format), + Download: a.BrowserDownloadURL, + Date: date, + }) + } + } + return assets, nil +} + +// --- Chrome for Testing --- + +func classifyChromeDist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var ver chromedist.Version + if err := json.Unmarshal(data, &ver); err != nil { + continue + } + + downloads := ver.Downloads["chromedriver"] + if len(downloads) == 0 { + continue + } + + for _, dl := range downloads { + r := classify.Filename(dl.URL) + assets = append(assets, storage.Asset{ + Filename: "chromedriver-" + dl.Platform + ".zip", + Version: ver.Version, + Channel: "stable", + OS: string(r.OS), + Arch: string(r.Arch), + Format: ".zip", + Download: dl.URL, + }) + } + } + return assets, nil +} + +// --- Flutter --- + +func classifyFlutterDist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var rel flutterdist.Release + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + + date := "" + if len(rel.ReleaseDate) >= 10 { + date = rel.ReleaseDate[:10] + } + + filename := filepath.Base(rel.Archive) + r := classify.Filename(filename) + + assets = append(assets, storage.Asset{ + Filename: filename, + Version: rel.Version, + Channel: rel.Channel, + OS: string(r.OS), + Arch: string(r.Arch), + Format: string(r.Format), + Download: rel.DownloadURL, + Date: date, + }) + } + return assets, nil +} + +// --- Go (golang.org) --- + +// normalizeGoOS maps GOOS values to our canonical OS names. +// Most match directly. illumos and solaris remain distinct (they have +// separate builds with different syscall interfaces). +func normalizeGoOS(goos string) string { + switch goos { + case "darwin": + return "darwin" + case "linux": + return "linux" + case "windows": + return "windows" + case "freebsd": + return "freebsd" + case "openbsd": + return "openbsd" + case "netbsd": + return "netbsd" + case "dragonfly": + return "dragonfly" + case "plan9": + return "plan9" + case "aix": + return "aix" + case "illumos": + return "illumos" + case "solaris": + return "solaris" + default: + return goos + } +} + +// normalizeGoArch maps GOARCH values to our canonical arch names. +func normalizeGoArch(goarch string) string { + switch goarch { + case "amd64": + return "x86_64" + case "arm64": + return "aarch64" + case "386": + return "x86" + case "arm", "armv6l": + return "armv6" + case "ppc64le": + return "ppc64le" + case "ppc64": + return "ppc64" + case "s390x": + return "s390x" + case "riscv64": + return "riscv64" + case "loong64": + return "loong64" + case "mips64le": + return "mips64le" + case "mips64": + return "mips64" + case "mipsle": + return "mipsle" + case "mips": + return "mips" + default: + return goarch + } +} + +func classifyGolang(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var rel golang.Release + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + + // Strip "go" prefix and pad to 3-part version: "go1.10" → "1.10.0" + version := strings.TrimPrefix(rel.Version, "go") + parts := strings.SplitN(version, ".", 3) + for len(parts) < 3 { + parts = append(parts, "0") + } + version = strings.Join(parts, ".") + + channel := "stable" + if !rel.Stable { + channel = "beta" + } + + for _, f := range rel.Files { + if f.Kind == "source" || f.OS == "" { + continue + } + // Skip bootstrap and odd builds. + if strings.Contains(f.Filename, "bootstrap") || strings.Contains(f.Filename, "-arm6.") { + continue + } + + // Use Go API's structured os/arch instead of filename parsing. + osname := normalizeGoOS(f.OS) + arch := normalizeGoArch(f.Arch) + format := classify.Filename(f.Filename).Format + + assets = append(assets, storage.Asset{ + Filename: f.Filename, + Version: version, + Channel: channel, + OS: osname, + Arch: arch, + Format: string(format), + Download: "https://dl.google.com/go/" + f.Filename, + }) + } + } + return assets, nil +} + +// --- GPG (SourceForge) --- + +func classifyGPGDist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var entry gpgdist.Entry + if err := json.Unmarshal(data, &entry); err != nil { + continue + } + + assets = append(assets, storage.Asset{ + Filename: fmt.Sprintf("GnuPG-%s.dmg", entry.Version), + Version: entry.Version, + Channel: "stable", + OS: string(buildmeta.OSDarwin), + Arch: string(buildmeta.ArchAMD64), + Format: string(buildmeta.FormatDMG), + Download: entry.URL, + }) + } + return assets, nil +} + +// --- HashiCorp --- + +func classifyHashiCorp(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var ver hashicorp.Version + if err := json.Unmarshal(data, &ver); err != nil { + continue + } + + channel := channelFromVersion(ver.Version) + + for _, b := range ver.Builds { + r := classify.Filename(b.Filename) + + assets = append(assets, storage.Asset{ + Filename: b.Filename, + Version: ver.Version, + Channel: channel, + OS: string(r.OS), + Arch: string(r.Arch), + Format: string(r.Format), + Download: b.URL, + }) + } + } + return assets, nil +} + +// --- iTerm2 --- + +func classifyITerm2Dist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var entry iterm2dist.Entry + if err := json.Unmarshal(data, &entry); err != nil { + continue + } + + if entry.Version == "" { + continue + } + + filename := filepath.Base(entry.URL) + + assets = append(assets, storage.Asset{ + Filename: filename, + Version: entry.Version, + Channel: entry.Channel, + OS: "darwin", + Arch: string(buildmeta.ArchAMD64), // iTerm2 is x86_64-only (arm64 runs via Rosetta 2) + Format: ".zip", + Download: entry.URL, + }) + } + return assets, nil +} + +// --- Julia --- + +func classifyJuliaDist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + osMap := map[string]string{ + "mac": "darwin", "linux": "linux", "winnt": "windows", + "freebsd": "freebsd", + } + archMap := map[string]string{ + "x86_64": "x86_64", "i686": "x86", "aarch64": "aarch64", + "armv7l": "armv7", "powerpc64le": "ppc64le", + } + + var assets []storage.Asset + for _, data := range releases { + var rel juliadist.Release + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + + channel := "stable" + if !rel.Stable { + channel = "beta" + } + + for _, f := range rel.Files { + if f.Kind == "installer" { + continue + } + + os_ := osMap[f.OS] + arch := archMap[f.Arch] + libc := "" + if os_ == "linux" { + if strings.Contains(f.URL, "musl") { + libc = "musl" + } else { + libc = "gnu" + } + } + + filename := filepath.Base(f.URL) + + assets = append(assets, storage.Asset{ + Filename: filename, + Version: rel.Version, + Channel: channel, + OS: os_, + Arch: arch, + Libc: libc, + Format: "." + f.Extension, + Download: f.URL, + }) + } + } + return assets, nil +} + +// --- MariaDB --- + +func classifyMariaDBDist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + channelMap := map[string]string{ + "Stable": "stable", "RC": "rc", "Alpha": "preview", + } + + var assets []storage.Asset + for _, data := range releases { + var rel mariadbdist.Release + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + + channel := channelMap[rel.MajorStatus] + if channel == "" { + channel = "preview" + } + + lts := rel.MajorStatus == "Stable" + + for _, f := range rel.Files { + // Skip source packages. The API uses OS="Source" and + // sometimes " " (not empty) for CPU on source tarballs. + if strings.EqualFold(f.OS, "source") || strings.TrimSpace(f.OS) == "" || strings.TrimSpace(f.CPU) == "" { + continue + } + // Skip debug builds. + if strings.Contains(strings.ToLower(f.FileName), "debug") { + continue + } + + r := classify.Filename(f.FileName) + + assets = append(assets, storage.Asset{ + Filename: f.FileName, + Version: rel.ReleaseID, + Channel: channel, + LTS: lts, + OS: string(r.OS), + Arch: string(r.Arch), + Format: string(r.Format), + Download: f.FileDownloadURL, + Date: rel.DateOfRelease, + }) + } + } + return assets, nil +} + +// --- Zig --- + +func classifyZigDist(d *rawcache.Dir) ([]storage.Asset, error) { + releases, err := ReadAllRaw(d) + if err != nil { + return nil, err + } + + var assets []storage.Asset + for _, data := range releases { + var rel zigdist.Release + if err := json.Unmarshal(data, &rel); err != nil { + continue + } + + channel := "stable" + if !strings.Contains(rel.Version, ".") { + // Branch names like "master" have no dots. + channel = "beta" + } else if strings.ContainsAny(rel.Version, "+-") { + channel = "beta" + } + + for platform, p := range rel.Platforms { + // Skip source and odd entries. + if strings.Contains(platform, "bootstrap") || platform == "src" { + continue + } + if strings.Contains(platform, "armv6kz") { + continue + } + + // Platform is "arch-os", e.g. "x86_64-linux", "aarch64-macos". + parts := strings.SplitN(platform, "-", 2) + if len(parts) != 2 { + continue + } + + filename := filepath.Base(p.Tarball) + r := classify.Filename(filename) + + assets = append(assets, storage.Asset{ + Filename: filename, + Version: rel.Version, + Channel: channel, + OS: string(r.OS), + Arch: string(r.Arch), + Format: string(r.Format), + Download: p.Tarball, + Date: rel.Date, + }) + } + } + return assets, nil +} diff --git a/internal/installerconf/installerconf.go b/internal/installerconf/installerconf.go new file mode 100644 index 0000000..3795c0c --- /dev/null +++ b/internal/installerconf/installerconf.go @@ -0,0 +1,277 @@ +// Package installerconf reads per-package releases.conf files. +// +// The format is simple key=value, one per line. Blank lines and lines +// starting with # are ignored. Keys and values are trimmed of whitespace. +// Multi-value keys are whitespace-delimited. +// +// The source type is inferred from the primary key: +// +// GitHub binary releases: +// +// github_releases = sharkdp/bat +// github_releases = https://github.com/sharkdp/bat +// +// GitHub source archives (for source-installable packages): +// +// github_sources = BeyondCodeBootcamp/aliasman +// git_url = https://github.com/BeyondCodeBootcamp/aliasman.git +// +// Gitea binary releases (self-hosted, requires full URL or base_url): +// +// gitea_releases = https://git.rootprojects.org/root/pathman +// +// GitLab binary releases (defaults to gitlab.com): +// +// gitlab_releases = owner/repo +// gitlab_releases = https://gitlab.example.com/owner/repo +// +// Git tag enumeration (vim plugins, etc.): +// +// git_url = https://github.com/tpope/vim-commentary.git +// +// HashiCorp releases: +// +// hashicorp_product = terraform +// +// Other sources (one-off scrapers): +// +// source = nodedist +// url = https://nodejs.org/download/release +// +// Complex packages that need custom logic beyond what the classifier +// auto-detects (e.g. ollama's universal binaries, ffmpeg's non-standard +// naming) should put that logic in Go code, not in the config. +// The variants key documents known build variants for human readers; +// actual variant detection logic lives in Go. +package installerconf + +import ( + "bufio" + "fmt" + "net/url" + "os" + "strings" +) + +// Conf holds the parsed per-package release configuration. +type Conf struct { + // Source is the fetch source type: "github", "githubsource", + // "gitea", "giteasource", "gitlab", "gitlabsource", + // "gittag", "nodedist", etc. + Source string + + // Owner is the repository owner (org or user). + Owner string + + // Repo is the repository name. + Repo string + + // BaseURL is a custom base URL for non-GitHub sources + // (e.g. a Gitea instance or nodedist index URL). + BaseURL string + + // GitURL is the git clone URL for source-installable packages. + // Present alongside github_sources/gitea_sources to provide a + // git clone fallback in addition to release tarballs. + GitURL string + + // TagPrefix filters releases in monorepos. Only tags starting with + // this prefix are included, and the prefix is stripped from the + // version string. Example: "tools/monorel/" + TagPrefix string + + // VersionPrefixes are stripped from version/tag strings. + // Whitespace-delimited. Each release tag is checked against these + // in order; the first match is stripped. Projects may change tag + // conventions across versions (e.g. "jq-1.7.1" older, "1.8.0" later). + VersionPrefixes []string + + // Exclude lists filename substrings to filter out. + // Whitespace-delimited. Assets whose name contains any of these + // are skipped entirely (not stored). + Exclude []string + + // AssetFilter is a substring that asset filenames must contain. + // Used when multiple packages share a GitHub release (e.g. + // kubectx/kubens) to select only the relevant assets. + AssetFilter string + + // Variants documents known build variant names for this package. + // Whitespace-delimited. This is a human-readable cue — actual + // variant detection logic lives in Go code per-package. + Variants []string + + // OS restricts all assets to this OS value when set. + // Use "posix_2017" for POSIX-only shell packages that don't + // support Windows. + OS string + + // AliasOf names another package that this one mirrors. + // When set, the package has no releases of its own — it shares + // the cache output of the named target (e.g. dashd → dashcore). + AliasOf string + + // Extra holds any unrecognized keys for forward compatibility. + Extra map[string]string +} + +// parseRepoRef parses a value that is either "owner/repo" or a full URL +// like "https://github.com/owner/repo". Returns baseURL, owner, repo. +// For short form, baseURL is empty (caller uses the default for the forge). +// For full URL form, baseURL is the scheme+host (e.g. "https://github.com"). +func parseRepoRef(val, defaultBase string) (baseURL, owner, repo string) { + if strings.Contains(val, "://") { + u, err := url.Parse(val) + if err == nil { + baseURL = u.Scheme + "://" + u.Host + path := strings.Trim(u.Path, "/") + owner, repo, _ = strings.Cut(path, "/") + return baseURL, owner, repo + } + } + // Short form: "owner/repo" + owner, repo, _ = strings.Cut(val, "/") + return defaultBase, owner, repo +} + +// Read parses a releases.conf file. +func Read(path string) (*Conf, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("installerconf: %w", err) + } + defer f.Close() + + raw := make(map[string]string) + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || line[0] == '#' { + continue + } + key, val, ok := strings.Cut(line, "=") + if !ok { + continue + } + raw[strings.TrimSpace(key)] = strings.TrimSpace(val) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("installerconf: read %s: %w", path, err) + } + + c := &Conf{} + + // Infer source from primary key, falling back to explicit "source". + switch { + // GitHub binary releases. + case raw["github_releases"] != "": + c.Source = "github" + c.BaseURL, c.Owner, c.Repo = parseRepoRef(raw["github_releases"], "https://github.com") + + // GitHub source tarballs. + case raw["github_sources"] != "": + c.Source = "githubsource" + c.BaseURL, c.Owner, c.Repo = parseRepoRef(raw["github_sources"], "https://github.com") + + // Gitea binary releases (self-hosted only — requires full URL or base_url). + case raw["gitea_releases"] != "": + c.Source = "gitea" + c.BaseURL, c.Owner, c.Repo = parseRepoRef(raw["gitea_releases"], raw["base_url"]) + + // Gitea source tarballs (self-hosted only). + case raw["gitea_sources"] != "": + c.Source = "giteasource" + c.BaseURL, c.Owner, c.Repo = parseRepoRef(raw["gitea_sources"], raw["base_url"]) + + // GitLab binary releases (defaults to gitlab.com). + case raw["gitlab_releases"] != "": + c.Source = "gitlab" + c.BaseURL, c.Owner, c.Repo = parseRepoRef(raw["gitlab_releases"], "https://gitlab.com") + + // GitLab source tarballs (defaults to gitlab.com). + case raw["gitlab_sources"] != "": + c.Source = "gitlabsource" + c.BaseURL, c.Owner, c.Repo = parseRepoRef(raw["gitlab_sources"], "https://gitlab.com") + + // Explicit source type (servicemandist, nodedist, zigdist, etc.). + // Must come before git_url so that "source = X" + "git_url = ..." + // uses X as the primary source, not gittag. + case raw["source"] != "": + c.Source = raw["source"] + c.BaseURL = raw["url"] + + // Git tag enumeration (only when no explicit source is set). + case raw["git_url"] != "": + c.Source = "gittag" + c.BaseURL = raw["git_url"] + + // HashiCorp. + case raw["hashicorp_product"] != "": + c.Source = "hashicorp" + c.Repo = raw["hashicorp_product"] + + default: + } + + // git_url can appear alongside any source type (e.g. github_sources) + // to provide a git clone fallback. When it's the only key, it's the + // primary source (gittag). + c.GitURL = raw["git_url"] + + c.TagPrefix = raw["tag_prefix"] + + if v := raw["version_prefixes"]; v != "" { + c.VersionPrefixes = strings.Fields(v) + } else if v := raw["version_prefix"]; v != "" { + c.VersionPrefixes = strings.Fields(v) + } + + // Accept both "exclude" and "asset_exclude" (back-compat). + if v := raw["exclude"]; v != "" { + c.Exclude = strings.Fields(v) + } else if v := raw["asset_exclude"]; v != "" { + c.Exclude = strings.Fields(v) + } + + c.AssetFilter = raw["asset_filter"] + c.OS = raw["os"] + c.AliasOf = raw["alias_of"] + + if v := raw["variants"]; v != "" { + c.Variants = strings.Fields(v) + } + + // Collect unrecognized keys. + known := map[string]bool{ + "source": true, + "github_releases": true, + "github_sources": true, + "gitea_releases": true, + "gitea_sources": true, + "gitlab_releases": true, + "gitlab_sources": true, + "git_url": true, + "hashicorp_product": true, + "base_url": true, + "url": true, + "tag_prefix": true, + "version_prefix": true, + "version_prefixes": true, + "exclude": true, + "asset_exclude": true, + "asset_filter": true, + "os": true, + "variants": true, + "alias_of": true, + } + for k, v := range raw { + if !known[k] { + if c.Extra == nil { + c.Extra = make(map[string]string) + } + c.Extra[k] = v + } + } + + return c, nil +} diff --git a/internal/installerconf/installerconf_test.go b/internal/installerconf/installerconf_test.go new file mode 100644 index 0000000..3aed5c2 --- /dev/null +++ b/internal/installerconf/installerconf_test.go @@ -0,0 +1,217 @@ +package installerconf_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/webinstall/webi-installers/internal/installerconf" +) + +func TestGitHubReleases(t *testing.T) { + c := confFromString(t, ` +github_releases = sharkdp/bat +`) + assertEqual(t, "Source", c.Source, "github") + assertEqual(t, "Owner", c.Owner, "sharkdp") + assertEqual(t, "Repo", c.Repo, "bat") + assertEqual(t, "BaseURL", c.BaseURL, "https://github.com") + assertEqual(t, "TagPrefix", c.TagPrefix, "") + if len(c.VersionPrefixes) != 0 { + t.Errorf("VersionPrefixes = %v, want empty", c.VersionPrefixes) + } + if len(c.Exclude) != 0 { + t.Errorf("Exclude = %v, want empty", c.Exclude) + } +} + +func TestGitHubReleasesFullURL(t *testing.T) { + c := confFromString(t, ` +github_releases = https://github.com/sharkdp/bat +`) + assertEqual(t, "Source", c.Source, "github") + assertEqual(t, "BaseURL", c.BaseURL, "https://github.com") + assertEqual(t, "Owner", c.Owner, "sharkdp") + assertEqual(t, "Repo", c.Repo, "bat") +} + +func TestGitHubSources(t *testing.T) { + c := confFromString(t, ` +github_sources = BeyondCodeBootcamp/aliasman +git_url = https://github.com/BeyondCodeBootcamp/aliasman.git +`) + assertEqual(t, "Source", c.Source, "githubsource") + assertEqual(t, "Owner", c.Owner, "BeyondCodeBootcamp") + assertEqual(t, "Repo", c.Repo, "aliasman") + assertEqual(t, "GitURL", c.GitURL, "https://github.com/BeyondCodeBootcamp/aliasman.git") +} + +func TestGitHubSourcesFullURL(t *testing.T) { + c := confFromString(t, ` +github_sources = https://github.com/BeyondCodeBootcamp/aliasman +git_url = https://github.com/BeyondCodeBootcamp/aliasman.git +`) + assertEqual(t, "Source", c.Source, "githubsource") + assertEqual(t, "BaseURL", c.BaseURL, "https://github.com") + assertEqual(t, "Owner", c.Owner, "BeyondCodeBootcamp") + assertEqual(t, "Repo", c.Repo, "aliasman") +} + +func TestVersionPrefixes(t *testing.T) { + c := confFromString(t, ` +github_releases = jqlang/jq +version_prefixes = jq- cli- +`) + if len(c.VersionPrefixes) != 2 { + t.Fatalf("VersionPrefixes has %d items, want 2: %v", len(c.VersionPrefixes), c.VersionPrefixes) + } + assertEqual(t, "VersionPrefixes[0]", c.VersionPrefixes[0], "jq-") + assertEqual(t, "VersionPrefixes[1]", c.VersionPrefixes[1], "cli-") +} + +func TestExclude(t *testing.T) { + c := confFromString(t, ` +github_releases = gohugoio/hugo +exclude = _extended_ Linux-64bit +`) + if len(c.Exclude) != 2 { + t.Fatalf("Exclude has %d items, want 2: %v", len(c.Exclude), c.Exclude) + } + assertEqual(t, "Exclude[0]", c.Exclude[0], "_extended_") + assertEqual(t, "Exclude[1]", c.Exclude[1], "Linux-64bit") +} + +func TestMonorepoTagPrefix(t *testing.T) { + c := confFromString(t, ` +github_releases = therootcompany/golib +tag_prefix = tools/monorel/ +`) + assertEqual(t, "TagPrefix", c.TagPrefix, "tools/monorel/") +} + +func TestNodeDist(t *testing.T) { + c := confFromString(t, ` +source = nodedist +url = https://nodejs.org/download/release +`) + assertEqual(t, "Source", c.Source, "nodedist") + assertEqual(t, "BaseURL", c.BaseURL, "https://nodejs.org/download/release") +} + +func TestGiteaReleases(t *testing.T) { + c := confFromString(t, ` +gitea_releases = https://git.rootprojects.org/root/pathman +`) + assertEqual(t, "Source", c.Source, "gitea") + assertEqual(t, "BaseURL", c.BaseURL, "https://git.rootprojects.org") + assertEqual(t, "Owner", c.Owner, "root") + assertEqual(t, "Repo", c.Repo, "pathman") +} + +func TestGiteaReleasesWithBaseURL(t *testing.T) { + c := confFromString(t, ` +gitea_releases = root/pathman +base_url = https://git.rootprojects.org +`) + assertEqual(t, "Source", c.Source, "gitea") + assertEqual(t, "BaseURL", c.BaseURL, "https://git.rootprojects.org") + assertEqual(t, "Owner", c.Owner, "root") + assertEqual(t, "Repo", c.Repo, "pathman") +} + +func TestGitLabReleases(t *testing.T) { + c := confFromString(t, ` +gitlab_releases = owner/repo +`) + assertEqual(t, "Source", c.Source, "gitlab") + assertEqual(t, "BaseURL", c.BaseURL, "https://gitlab.com") + assertEqual(t, "Owner", c.Owner, "owner") + assertEqual(t, "Repo", c.Repo, "repo") +} + +func TestGitLabReleasesFullURL(t *testing.T) { + c := confFromString(t, ` +gitlab_releases = https://gitlab.example.com/myorg/myrepo +`) + assertEqual(t, "Source", c.Source, "gitlab") + assertEqual(t, "BaseURL", c.BaseURL, "https://gitlab.example.com") + assertEqual(t, "Owner", c.Owner, "myorg") + assertEqual(t, "Repo", c.Repo, "myrepo") +} + +func TestBlanksAndComments(t *testing.T) { + c := confFromString(t, ` +# Hugo config +github_releases = foo/bar + +# exclude line +exclude = extended +`) + assertEqual(t, "Source", c.Source, "github") + assertEqual(t, "Owner", c.Owner, "foo") + assertEqual(t, "Repo", c.Repo, "bar") +} + +func TestExtraKeys(t *testing.T) { + c := confFromString(t, ` +github_releases = foo/bar +custom_thing = hello +`) + if c.Extra == nil || c.Extra["custom_thing"] != "hello" { + t.Errorf("Extra[custom_thing] = %q, want hello", c.Extra["custom_thing"]) + } +} + +func TestAssetExcludeAlias(t *testing.T) { + c := confFromString(t, ` +github_releases = gohugoio/hugo +asset_exclude = extended +`) + if len(c.Exclude) != 1 { + t.Fatalf("Exclude has %d items, want 1: %v", len(c.Exclude), c.Exclude) + } + assertEqual(t, "Exclude[0]", c.Exclude[0], "extended") +} + +func TestVariants(t *testing.T) { + c := confFromString(t, ` +github_releases = jmorganca/ollama +variants = rocm jetpack5 jetpack6 +`) + if len(c.Variants) != 3 { + t.Fatalf("Variants has %d items, want 3: %v", len(c.Variants), c.Variants) + } + assertEqual(t, "Variants[0]", c.Variants[0], "rocm") + assertEqual(t, "Variants[1]", c.Variants[1], "jetpack5") + assertEqual(t, "Variants[2]", c.Variants[2], "jetpack6") +} + +func TestEmptyExclude(t *testing.T) { + c := confFromString(t, "github_releases = foo/bar\n") + if c.Exclude != nil { + t.Errorf("Exclude = %v, want nil", c.Exclude) + } +} + +// helpers + +func confFromString(t *testing.T, content string) *installerconf.Conf { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "releases.conf") + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + c, err := installerconf.Read(path) + if err != nil { + t.Fatal(err) + } + return c +} + +func assertEqual(t *testing.T, name, got, want string) { + t.Helper() + if got != want { + t.Errorf("%s = %q, want %q", name, got, want) + } +} diff --git a/internal/lexver/lexver.go b/internal/lexver/lexver.go new file mode 100644 index 0000000..aea443e --- /dev/null +++ b/internal/lexver/lexver.go @@ -0,0 +1,189 @@ +// Package lexver makes version strings comparable and sortable. +// +// Not all version strings are semver. Webi handles 4-part versions +// (chromedriver 121.0.6120.0), date-based versions (atomicparsley), +// and pre-releases with extra dots (flutter 2.3.0-16.0.pre). Lexver +// parses these into a struct with an arbitrary-depth numeric segment +// list and provides a comparison function for use with [slices.SortFunc]. +// +// Pre-releases sort before their corresponding stable release: +// +// 1.0.0-alpha1 < 1.0.0-beta1 < 1.0.0-rc1 < 1.0.0 +// +// When release dates are known, they break ties between versions with +// identical numeric segments. +package lexver + +import ( + "cmp" + "strconv" + "strings" + "time" + "unicode" +) + +// Version is a parsed version with comparable fields. +type Version struct { + // Nums holds the dotted numeric segments in order. + // "1.20.3" → [1, 20, 3], "121.0.6120.0" → [121, 0, 6120, 0]. + Nums []int + Channel string // "" for stable, or "alpha", "beta", "dev", "pre", "preview", "rc" + ChannelNum int // e.g. 2 in "rc2" + Date time.Time // release date/time, if known; breaks ties between same-numbered versions + Original string // version string exactly as the releaser published it (e.g. "REL_17_0", "r21") + Raw string // version string after Webi's normalization (e.g. "17.0", "0.21.0") + + // ExtraSort is an optional opaque string for package-specific ordering. + // Set by release-fetcher code for packages where Nums alone can't capture + // the sort order (e.g. flutter's "2.3.0-16.0.pre"). Compared as a plain + // string, only consulted when Nums and Channel are equal. + ExtraSort string +} + +// Parse breaks a version string into its components. +// Both Original and Raw are set to s; callers that normalize versions +// (e.g. "REL_17_0" → "17.0") should set Original to the upstream tag +// and pass the normalized string to Parse. +func Parse(s string) Version { + v := Version{Original: s, Raw: s} + + s = strings.TrimLeft(s, "vV") + + numStr, prerelease := splitAtPrerelease(s) + v.Nums = splitNums(numStr) + + if prerelease != "" { + v.Channel, v.ChannelNum = splitChannel(prerelease) + } + + return v +} + +// Major returns the first numeric segment, or 0 if none. +func (v Version) Major() int { return v.num(0) } + +// Minor returns the second numeric segment, or 0 if none. +func (v Version) Minor() int { return v.num(1) } + +// Patch returns the third numeric segment, or 0 if none. +func (v Version) Patch() int { return v.num(2) } + +func (v Version) num(i int) int { + if i < len(v.Nums) { + return v.Nums[i] + } + return 0 +} + +// IsStable reports whether this is a stable (non-pre-release) version. +func (v Version) IsStable() bool { + return v.Channel == "" +} + +// Compare returns -1, 0, or 1 for ordering two versions. +// Stable releases sort after pre-releases of the same numeric version. +func Compare(a, b Version) int { + // Compare numeric segments pairwise, treating missing segments as 0. + n := max(len(a.Nums), len(b.Nums)) + for i := range n { + an, bn := a.num(i), b.num(i) + if c := cmp.Compare(an, bn); c != 0 { + return c + } + } + + // Break ties with release date when both are known. + if !a.Date.IsZero() && !b.Date.IsZero() { + if c := a.Date.Compare(b.Date); c != 0 { + return c + } + } + + // ExtraSort: package-specific tiebreaker set by release-fetcher code. + if a.ExtraSort != "" && b.ExtraSort != "" { + if c := cmp.Compare(a.ExtraSort, b.ExtraSort); c != 0 { + return c + } + } + + // Both stable → equal. + if a.Channel == "" && b.Channel == "" { + return 0 + } + // Stable beats any pre-release. + if a.Channel == "" { + return 1 + } + if b.Channel == "" { + return -1 + } + // Both pre-release: alphabetical channel, then number. + if c := cmp.Compare(a.Channel, b.Channel); c != 0 { + return c + } + return cmp.Compare(a.ChannelNum, b.ChannelNum) +} + +// HasPrefix reports whether v matches a partial version prefix. +// A prefix with Nums [1, 20] matches any version starting with 1.20 +// (e.g. 1.20.0, 1.20.3, 1.20.3.1). +func (v Version) HasPrefix(prefix Version) bool { + for i, pn := range prefix.Nums { + if i >= len(v.Nums) || v.Nums[i] != pn { + return false + } + } + return true +} + +// splitAtPrerelease splits "1.20.3-beta1" into ("1.20.3", "beta1"). +// Also handles "1.2beta3" (no separator). +func splitAtPrerelease(s string) (string, string) { + for _, sep := range []byte{'-', '+'} { + if idx := strings.IndexByte(s, sep); idx >= 0 { + return s[:idx], s[idx+1:] + } + } + + // "1.2beta3": letter following a digit + for i := 1; i < len(s); i++ { + if unicode.IsLetter(rune(s[i])) && unicode.IsDigit(rune(s[i-1])) { + return s[:i], s[i:] + } + } + + return s, "" +} + +// splitNums parses "1.20.3" into [1, 20, 3]. +// Handles any number of dot-separated segments. +func splitNums(s string) []int { + var nums []int + for _, seg := range strings.Split(s, ".") { + n, err := strconv.Atoi(seg) + if err != nil { + break + } + nums = append(nums, n) + } + return nums +} + +// splitChannel separates "beta1" into ("beta", 1) or "rc" into ("rc", 0). +func splitChannel(s string) (string, int) { + s = strings.ToLower(s) + s = strings.NewReplacer("-", "", ".", "", "_", "").Replace(s) + + i := len(s) + for i > 0 && unicode.IsDigit(rune(s[i-1])) { + i-- + } + + name := s[:i] + num := 0 + if i < len(s) { + num, _ = strconv.Atoi(s[i:]) + } + + return name, num +} diff --git a/internal/lexver/lexver_test.go b/internal/lexver/lexver_test.go new file mode 100644 index 0000000..882e12b --- /dev/null +++ b/internal/lexver/lexver_test.go @@ -0,0 +1,270 @@ +package lexver_test + +import ( + "slices" + "testing" + "time" + + "github.com/webinstall/webi-installers/internal/lexver" +) + +func TestParse(t *testing.T) { + tests := []struct { + input string + nums []int + channel string + chanNum int + }{ + // Standard semver + {"1.0.0", []int{1, 0, 0}, "", 0}, + {"v1.2.3", []int{1, 2, 3}, "", 0}, + {"1.20.156", []int{1, 20, 156}, "", 0}, + + // Partial + {"1.20", []int{1, 20}, "", 0}, + {"1", []int{1}, "", 0}, + + // 4-part (chromedriver, gpg) + {"121.0.6120.0", []int{121, 0, 6120, 0}, "", 0}, + {"2.2.19.0", []int{2, 2, 19, 0}, "", 0}, + + // Pre-release + {"1.0.0-beta1", []int{1, 0, 0}, "beta", 1}, + {"1.0.0-rc2", []int{1, 0, 0}, "rc", 2}, + {"2.0.0-alpha3", []int{2, 0, 0}, "alpha", 3}, + {"1.0.0-dev", []int{1, 0, 0}, "dev", 0}, + + // No separator before channel + {"1.2beta3", []int{1, 2}, "beta", 3}, + {"1.0rc1", []int{1, 0}, "rc", 1}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + v := lexver.Parse(tt.input) + if !slices.Equal(v.Nums, tt.nums) { + t.Errorf("Parse(%q).Nums = %v, want %v", tt.input, v.Nums, tt.nums) + } + if v.Channel != tt.channel || v.ChannelNum != tt.chanNum { + t.Errorf("Parse(%q) channel = %q/%d, want %q/%d", + tt.input, v.Channel, v.ChannelNum, tt.channel, tt.chanNum) + } + }) + } +} + +func TestAccessors(t *testing.T) { + v := lexver.Parse("121.0.6120.0") + if v.Major() != 121 || v.Minor() != 0 || v.Patch() != 6120 { + t.Errorf("got %d.%d.%d, want 121.0.6120", v.Major(), v.Minor(), v.Patch()) + } + + short := lexver.Parse("1") + if short.Minor() != 0 || short.Patch() != 0 { + t.Error("missing segments should return 0") + } +} + +func TestSortOrder(t *testing.T) { + // Must be in ascending order. + ordered := []string{ + "0.1.0", + "1.0.0-alpha1", + "1.0.0-alpha2", + "1.0.0-beta1", + "1.0.0-rc1", + "1.0.0-rc2", + "1.0.0", + "1.0.1", + "1.1.0", + "1.2.0", + "1.20.0", + "2.0.0-beta1", + "2.0.0", + } + + for i := 1; i < len(ordered); i++ { + a := lexver.Parse(ordered[i-1]) + b := lexver.Parse(ordered[i]) + if lexver.Compare(a, b) >= 0 { + t.Errorf("expected %q < %q", ordered[i-1], ordered[i]) + } + } +} + +func TestSortOrder4Part(t *testing.T) { + ordered := []string{ + "121.0.6120.0", + "121.0.6120.1", + "121.0.6121.0", + "122.0.6100.0", + } + + for i := 1; i < len(ordered); i++ { + a := lexver.Parse(ordered[i-1]) + b := lexver.Parse(ordered[i]) + if lexver.Compare(a, b) >= 0 { + t.Errorf("expected %q < %q", ordered[i-1], ordered[i]) + } + } +} + +func TestMismatchedDepth(t *testing.T) { + // "1.0" and "1.0.0" should be equal (trailing zeros). + a := lexver.Parse("1.0") + b := lexver.Parse("1.0.0") + if lexver.Compare(a, b) != 0 { + t.Error("1.0 and 1.0.0 should be equal") + } + + // "1.0.0.1" should be greater than "1.0.0". + c := lexver.Parse("1.0.0.1") + d := lexver.Parse("1.0.0") + if lexver.Compare(c, d) <= 0 { + t.Error("1.0.0.1 should be greater than 1.0.0") + } +} + +func TestSortFunc(t *testing.T) { + versions := []string{"1.0.0", "2.0.0-rc1", "1.20.3", "1.20.2", "1.19.5", "2.0.0"} + parsed := make([]lexver.Version, len(versions)) + for i, s := range versions { + parsed[i] = lexver.Parse(s) + } + + // Sort descending (newest first). + slices.SortFunc(parsed, func(a, b lexver.Version) int { + return lexver.Compare(b, a) + }) + + want := []string{"2.0.0", "2.0.0-rc1", "1.20.3", "1.20.2", "1.19.5", "1.0.0"} + for i, v := range parsed { + if v.Raw != want[i] { + t.Errorf("index %d: got %q, want %q", i, v.Raw, want[i]) + } + } +} + +func TestIsStable(t *testing.T) { + tests := []struct { + input string + want bool + }{ + {"1.0.0", true}, + {"121.0.6120.0", true}, + {"1.0.0-beta1", false}, + {"v2.0.0-dev", false}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + v := lexver.Parse(tt.input) + if v.IsStable() != tt.want { + t.Errorf("Parse(%q).IsStable() = %v, want %v", tt.input, v.IsStable(), tt.want) + } + }) + } +} + +func TestDateTiebreaker(t *testing.T) { + a := lexver.Parse("1.0.0") + a.Date = time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC) + + b := lexver.Parse("1.0.0") + b.Date = time.Date(2024, 6, 1, 14, 30, 0, 0, time.UTC) + + if lexver.Compare(a, b) >= 0 { + t.Error("earlier date should sort before later date at same version") + } + + // Without dates, same version is equal. + c := lexver.Parse("1.0.0") + d := lexver.Parse("1.0.0") + if lexver.Compare(c, d) != 0 { + t.Error("same version without dates should be equal") + } + + // Date only matters when both have it. + e := lexver.Parse("1.0.0") + e.Date = time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC) + f := lexver.Parse("1.0.0") + if lexver.Compare(e, f) != 0 { + t.Error("date should be ignored when only one side has it") + } +} + +func TestDateMinutePrecision(t *testing.T) { + a := lexver.Parse("1.0.0") + a.Date = time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) + + b := lexver.Parse("1.0.0") + b.Date = time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) + + if lexver.Compare(a, b) >= 0 { + t.Error("same date, later time should sort after") + } +} + +func TestOriginal(t *testing.T) { + // Parse sets both Original and Raw to the input. + v := lexver.Parse("17.0") + if v.Original != "17.0" { + t.Errorf("Original = %q, want %q", v.Original, "17.0") + } + + // Release fetcher would do: + // v := lexver.Parse("17.0") + // v.Original = "REL_17_0" + v.Original = "REL_17_0" + if v.Raw != "17.0" { + t.Errorf("Raw should remain %q after setting Original, got %q", "17.0", v.Raw) + } +} + +func TestExtraSort(t *testing.T) { + // Flutter example: 2.3.0-16.0.pre and 2.3.0-16.1.pre + // Nums and Channel are the same; ExtraSort distinguishes them. + a := lexver.Parse("2.3.0-pre") + a.ExtraSort = "0016.0000" + + b := lexver.Parse("2.3.0-pre") + b.ExtraSort = "0016.0001" + + if lexver.Compare(a, b) >= 0 { + t.Error("ExtraSort 0016.0000 should sort before 0016.0001") + } + + // ExtraSort ignored when only one side has it. + c := lexver.Parse("2.3.0-pre") + c.ExtraSort = "0016.0000" + d := lexver.Parse("2.3.0-pre") + if lexver.Compare(c, d) != 0 { + t.Error("ExtraSort should be ignored when only one side has it") + } +} + +func TestHasPrefix(t *testing.T) { + v := lexver.Parse("1.20.3") + + if !v.HasPrefix(lexver.Parse("1.20")) { + t.Error("1.20.3 should match prefix 1.20") + } + if !v.HasPrefix(lexver.Parse("1")) { + t.Error("1.20.3 should match prefix 1") + } + if v.HasPrefix(lexver.Parse("1.19")) { + t.Error("1.20.3 should not match prefix 1.19") + } + if v.HasPrefix(lexver.Parse("2")) { + t.Error("1.20.3 should not match prefix 2") + } + + // 4-part prefix matching + v4 := lexver.Parse("121.0.6120.0") + if !v4.HasPrefix(lexver.Parse("121.0.6120")) { + t.Error("121.0.6120.0 should match prefix 121.0.6120") + } + if !v4.HasPrefix(lexver.Parse("121.0")) { + t.Error("121.0.6120.0 should match prefix 121.0") + } +} diff --git a/internal/rawcache/auditlog.go b/internal/rawcache/auditlog.go new file mode 100644 index 0000000..018d255 --- /dev/null +++ b/internal/rawcache/auditlog.go @@ -0,0 +1,63 @@ +package rawcache + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" +) + +// LogEntry records one event in the append-only audit log. +type LogEntry struct { + Time time.Time `json:"time"` + Tag string `json:"tag"` + Action string `json:"action"` // "added", "changed", "removed" + SHA256 string `json:"sha256,omitempty"` +} + +// AuditLog is an append-only JSONL file that tracks when releases appear, +// change, or disappear from upstream. One file per package, lives alongside +// the double-buffer slots. +type AuditLog struct { + path string +} + +// openLog returns the audit log for a Dir. +func (d *Dir) openLog() *AuditLog { + return &AuditLog{path: filepath.Join(d.root, "audit.jsonl")} +} + +// Append writes one log entry. +func (l *AuditLog) Append(entry LogEntry) error { + if entry.Time.IsZero() { + entry.Time = time.Now().UTC() + } + data, err := json.Marshal(entry) + if err != nil { + return fmt.Errorf("rawcache: marshal log entry: %w", err) + } + data = append(data, '\n') + + f, err := os.OpenFile(l.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("rawcache: open audit log: %w", err) + } + _, writeErr := f.Write(data) + closeErr := f.Close() + if writeErr != nil { + return fmt.Errorf("rawcache: write audit log: %w", writeErr) + } + if closeErr != nil { + return fmt.Errorf("rawcache: close audit log: %w", closeErr) + } + return nil +} + +// ContentHash returns the SHA-256 hex digest of data. +func ContentHash(data []byte) string { + h := sha256.Sum256(data) + return hex.EncodeToString(h[:]) +} diff --git a/internal/rawcache/rawcache.go b/internal/rawcache/rawcache.go new file mode 100644 index 0000000..809eab8 --- /dev/null +++ b/internal/rawcache/rawcache.go @@ -0,0 +1,265 @@ +// Package rawcache stores raw upstream API responses on disk, one file per +// release, with double-buffered full refreshes. +// +// Directory layout: +// +// {root}/ +// active → a symlink to the current slot +// a/ slot A +// _latest one-line file: newest tag +// v0.145.0.json +// v0.144.1.json +// ... +// b/ slot B (standby) +// +// Incremental updates write directly to the active slot. Each file write +// is atomic (temp file + rename). Full refreshes write to the standby slot, +// then atomically swap the symlink. +package rawcache + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" +) + +// Dir manages a raw release cache for one package. +type Dir struct { + root string // e.g. "_cache/raw/github/gohugoio/hugo" +} + +// Open returns a Dir for the given root path. Creates the directory +// structure (slots + symlink) if it doesn't exist. +func Open(root string) (*Dir, error) { + d := &Dir{root: root} + + slotA := filepath.Join(root, "a") + slotB := filepath.Join(root, "b") + active := filepath.Join(root, "active") + + // Create both slots. + for _, slot := range []string{slotA, slotB} { + if err := os.MkdirAll(slot, 0o755); err != nil { + return nil, fmt.Errorf("rawcache: create slot: %w", err) + } + } + + // Create the active symlink if it doesn't exist. + if _, err := os.Lstat(active); errors.Is(err, os.ErrNotExist) { + if err := os.Symlink("a", active); err != nil { + return nil, fmt.Errorf("rawcache: create active symlink: %w", err) + } + } + + return d, nil +} + +// ActivePath returns the absolute path of the currently active slot. +func (d *Dir) ActivePath() (string, error) { + target, err := os.Readlink(filepath.Join(d.root, "active")) + if err != nil { + return "", fmt.Errorf("rawcache: read active symlink: %w", err) + } + return filepath.Join(d.root, target), nil +} + +// standbySlot returns the name of the inactive slot ("a" or "b"). +func (d *Dir) standbySlot() (string, error) { + target, err := os.Readlink(filepath.Join(d.root, "active")) + if err != nil { + return "", fmt.Errorf("rawcache: read active symlink: %w", err) + } + if target == "a" { + return "b", nil + } + return "a", nil +} + +// Populated returns true if the active slot contains at least one release file. +func (d *Dir) Populated() bool { + active, err := d.ActivePath() + if err != nil { + return false + } + entries, err := os.ReadDir(active) + if err != nil { + return false + } + for _, e := range entries { + if !e.IsDir() && !strings.HasPrefix(e.Name(), "_") { + return true + } + } + return false +} + +// Has reports whether a release file exists in the active slot. +func (d *Dir) Has(tag string) bool { + active, err := d.ActivePath() + if err != nil { + return false + } + _, err = os.Stat(filepath.Join(active, tagToFilename(tag))) + return err == nil +} + +// Latest returns the newest tag from the active slot. +// Returns "" if no latest marker exists. +func (d *Dir) Latest() string { + active, err := d.ActivePath() + if err != nil { + return "" + } + data, err := os.ReadFile(filepath.Join(active, "_latest")) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) +} + +// Read returns the raw cached data for a tag from the active slot. +func (d *Dir) Read(tag string) ([]byte, error) { + active, err := d.ActivePath() + if err != nil { + return nil, err + } + return os.ReadFile(filepath.Join(active, tagToFilename(tag))) +} + +// Put writes a release file to the active slot. The write is atomic +// (temp file + rename). +func (d *Dir) Put(tag string, data []byte) error { + active, err := d.ActivePath() + if err != nil { + return err + } + return atomicWrite(filepath.Join(active, tagToFilename(tag)), data) +} + +// Merge writes a release to the active slot if it's new or changed. +// Returns the action taken: "added", "changed", or "" (unchanged). +// Logs the event to the audit log when something happens. +func (d *Dir) Merge(tag string, data []byte) (string, error) { + log := d.openLog() + hash := ContentHash(data) + + if d.Has(tag) { + existing, err := d.Read(tag) + if err != nil { + return "", err + } + if ContentHash(existing) == hash { + return "", nil // unchanged + } + if err := d.Put(tag, data); err != nil { + return "", err + } + log.Append(LogEntry{Tag: tag, Action: "changed", SHA256: hash}) + return "changed", nil + } + + if err := d.Put(tag, data); err != nil { + return "", err + } + log.Append(LogEntry{Tag: tag, Action: "added", SHA256: hash}) + return "added", nil +} + +// SetLatest updates the _latest marker in the active slot. +func (d *Dir) SetLatest(tag string) error { + active, err := d.ActivePath() + if err != nil { + return err + } + return atomicWrite(filepath.Join(active, "_latest"), []byte(tag+"\n")) +} + +// BeginRefresh starts a full refresh. Clears the standby slot and returns +// a Refresh handle for writing to it. Call Commit to atomically swap, or +// Abort to discard. +func (d *Dir) BeginRefresh() (*Refresh, error) { + standby, err := d.standbySlot() + if err != nil { + return nil, err + } + standbyPath := filepath.Join(d.root, standby) + + // Clear the standby slot. + entries, _ := os.ReadDir(standbyPath) + for _, e := range entries { + os.Remove(filepath.Join(standbyPath, e.Name())) + } + + return &Refresh{ + dir: d, + slot: standby, + slotDir: standbyPath, + }, nil +} + +// Refresh writes releases to the standby slot during a full refresh. +type Refresh struct { + dir *Dir + slot string // "a" or "b" + slotDir string +} + +// Put writes a release file to the standby slot. +func (r *Refresh) Put(tag string, data []byte) error { + return atomicWrite(filepath.Join(r.slotDir, tagToFilename(tag)), data) +} + +// SetLatest updates the _latest marker in the standby slot. +func (r *Refresh) SetLatest(tag string) error { + return atomicWrite(filepath.Join(r.slotDir, "_latest"), []byte(tag+"\n")) +} + +// Commit atomically swaps the active symlink to point to the standby slot. +func (r *Refresh) Commit() error { + active := filepath.Join(r.dir.root, "active") + tmp := active + ".tmp" + + // Remove stale temp symlink if it exists. + os.Remove(tmp) + + if err := os.Symlink(r.slot, tmp); err != nil { + return fmt.Errorf("rawcache: create temp symlink: %w", err) + } + if err := os.Rename(tmp, active); err != nil { + os.Remove(tmp) + return fmt.Errorf("rawcache: swap active symlink: %w", err) + } + return nil +} + +// Abort discards the standby slot contents. +func (r *Refresh) Abort() { + entries, _ := os.ReadDir(r.slotDir) + for _, e := range entries { + os.Remove(filepath.Join(r.slotDir, e.Name())) + } +} + +// tagToFilename converts a tag to a safe filename. +// Tags like "v0.145.0" become "v0.145.0". The raw cache stores opaque +// bytes — no extension is assumed because upstream responses may be +// JSON, CSV, XML, or bespoke formats. +func tagToFilename(tag string) string { + // Replace path separators in case a tag contains slashes. + return strings.ReplaceAll(tag, "/", "_") +} + +// atomicWrite writes data to path via a temp file + rename. +func atomicWrite(path string, data []byte) error { + tmp := path + ".tmp" + if err := os.WriteFile(tmp, data, 0o644); err != nil { + return fmt.Errorf("rawcache: write %s: %w", tmp, err) + } + if err := os.Rename(tmp, path); err != nil { + os.Remove(tmp) + return fmt.Errorf("rawcache: rename %s: %w", path, err) + } + return nil +} diff --git a/internal/rawcache/rawcache_test.go b/internal/rawcache/rawcache_test.go new file mode 100644 index 0000000..ba899b6 --- /dev/null +++ b/internal/rawcache/rawcache_test.go @@ -0,0 +1,173 @@ +package rawcache_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/webinstall/webi-installers/internal/rawcache" +) + +func TestOpenCreatesStructure(t *testing.T) { + root := filepath.Join(t.TempDir(), "pkg") + d, err := rawcache.Open(root) + if err != nil { + t.Fatal(err) + } + _ = d + + // Verify structure exists. + for _, name := range []string{"a", "b"} { + info, err := os.Stat(filepath.Join(root, name)) + if err != nil { + t.Fatalf("slot %s: %v", name, err) + } + if !info.IsDir() { + t.Fatalf("slot %s is not a directory", name) + } + } + + target, err := os.Readlink(filepath.Join(root, "active")) + if err != nil { + t.Fatal(err) + } + if target != "a" { + t.Errorf("active symlink = %q, want %q", target, "a") + } +} + +func TestPutAndRead(t *testing.T) { + d, err := rawcache.Open(filepath.Join(t.TempDir(), "pkg")) + if err != nil { + t.Fatal(err) + } + + data := []byte(`{"tag_name":"v1.0.0"}`) + if err := d.Put("v1.0.0", data); err != nil { + t.Fatal(err) + } + + if !d.Has("v1.0.0") { + t.Error("Has(v1.0.0) = false after Put") + } + if d.Has("v2.0.0") { + t.Error("Has(v2.0.0) = true, should be false") + } + + got, err := d.Read("v1.0.0") + if err != nil { + t.Fatal(err) + } + if string(got) != string(data) { + t.Errorf("Read = %q, want %q", got, data) + } +} + +func TestLatest(t *testing.T) { + d, err := rawcache.Open(filepath.Join(t.TempDir(), "pkg")) + if err != nil { + t.Fatal(err) + } + + if latest := d.Latest(); latest != "" { + t.Errorf("Latest() = %q before any writes, want empty", latest) + } + + if err := d.SetLatest("v1.0.0"); err != nil { + t.Fatal(err) + } + if latest := d.Latest(); latest != "v1.0.0" { + t.Errorf("Latest() = %q, want %q", latest, "v1.0.0") + } +} + +func TestRefreshDoubleBuffer(t *testing.T) { + root := filepath.Join(t.TempDir(), "pkg") + d, err := rawcache.Open(root) + if err != nil { + t.Fatal(err) + } + + // Write to active slot (A). + d.Put("v1.0.0", []byte(`{"old":true}`)) + d.SetLatest("v1.0.0") + + // Start a full refresh — writes to standby (B). + r, err := d.BeginRefresh() + if err != nil { + t.Fatal(err) + } + r.Put("v1.0.0", []byte(`{"new":true}`)) + r.Put("v2.0.0", []byte(`{"tag_name":"v2.0.0"}`)) + r.SetLatest("v2.0.0") + + // Before commit, active still points to A. + if d.Latest() != "v1.0.0" { + t.Error("latest should still be v1.0.0 before commit") + } + old, _ := d.Read("v1.0.0") + if string(old) != `{"old":true}` { + t.Errorf("active slot should still have old data, got %q", old) + } + + // Commit swaps to B. + if err := r.Commit(); err != nil { + t.Fatal(err) + } + + if d.Latest() != "v2.0.0" { + t.Errorf("Latest() = %q after commit, want %q", d.Latest(), "v2.0.0") + } + if !d.Has("v2.0.0") { + t.Error("v2.0.0 should exist after commit") + } + updated, _ := d.Read("v1.0.0") + if string(updated) != `{"new":true}` { + t.Errorf("v1.0.0 should be updated after commit, got %q", updated) + } +} + +func TestRefreshAbort(t *testing.T) { + root := filepath.Join(t.TempDir(), "pkg") + d, err := rawcache.Open(root) + if err != nil { + t.Fatal(err) + } + + d.Put("v1.0.0", []byte(`original`)) + d.SetLatest("v1.0.0") + + r, err := d.BeginRefresh() + if err != nil { + t.Fatal(err) + } + r.Put("v99.0.0", []byte(`aborted`)) + r.Abort() + + // Active slot should be unchanged. + if d.Latest() != "v1.0.0" { + t.Error("latest should still be v1.0.0 after abort") + } + if d.Has("v99.0.0") { + t.Error("v99.0.0 should not exist after abort") + } +} + +func TestOpenIdempotent(t *testing.T) { + root := filepath.Join(t.TempDir(), "pkg") + + d1, err := rawcache.Open(root) + if err != nil { + t.Fatal(err) + } + d1.Put("v1.0.0", []byte(`data`)) + + // Opening again should not lose data. + d2, err := rawcache.Open(root) + if err != nil { + t.Fatal(err) + } + if !d2.Has("v1.0.0") { + t.Error("data lost after re-open") + } +} diff --git a/internal/releases/atomicparsley/variants.go b/internal/releases/atomicparsley/variants.go new file mode 100644 index 0000000..c63ad0a --- /dev/null +++ b/internal/releases/atomicparsley/variants.go @@ -0,0 +1,50 @@ +// Package atomicparsley provides OS/arch classification for AtomicParsley releases. +// +// AtomicParsley uses non-standard filenames with no platform terms +// (e.g. "AtomicParsleyLinux.zip", "AtomicParsleyMacOS.zip"). The generic +// filename classifier can't extract OS or arch from these — this package +// applies the same hardcoded mapping that the production releases.js uses. +package atomicparsleydist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants sets OS, arch, and libc for AtomicParsley assets based on +// filename keyword matching. Replicates atomicparsley/releases.js mappings: +// - Alpine → linux/x86_64/musl +// - Linux → linux/x86_64/gnu +// - MacOS → darwin/x86_64 +// - WindowsX86 → windows/x86/msvc +// - Windows → windows/x86_64/msvc +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].OS != "" { + continue // already classified + } + lower := strings.ToLower(assets[i].Filename) + switch { + case strings.Contains(lower, "alpine"): + assets[i].OS = "linux" + assets[i].Arch = "x86_64" + assets[i].Libc = "musl" + case strings.Contains(lower, "linux"): + assets[i].OS = "linux" + assets[i].Arch = "x86_64" + assets[i].Libc = "gnu" + case strings.Contains(lower, "macos"): + assets[i].OS = "darwin" + assets[i].Arch = "x86_64" + case strings.Contains(lower, "windowsx86"): + assets[i].OS = "windows" + assets[i].Arch = "x86" + assets[i].Libc = "msvc" + case strings.Contains(lower, "windows"): + assets[i].OS = "windows" + assets[i].Arch = "x86_64" + assets[i].Libc = "msvc" + } + } +} diff --git a/internal/releases/bun/variants.go b/internal/releases/bun/variants.go new file mode 100644 index 0000000..b3b7a9a --- /dev/null +++ b/internal/releases/bun/variants.go @@ -0,0 +1,39 @@ +// Package bun provides variant tagging for Bun releases. +// +// Bun publishes -profile (debug) builds and uses a non-standard arch +// convention: the default x86_64 build targets x86_64_v3 (AVX2+), +// while -baseline targets plain x86_64. +package bundist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags bun-specific build variants and remaps arch fields. +// +// Bun's default x86_64 build requires AVX2 (x86_64_v3). The -baseline +// build targets plain x86_64. For legacy export, baseline is the one +// we serve (matching Node.js behavior), so non-baseline gets a variant +// tag. The -baseline suffix is stripped from Filename (but not Download) +// so the legacy server sees a clean name. +func TagVariants(assets []storage.Asset) { + for i := range assets { + lower := strings.ToLower(assets[i].Filename) + if strings.Contains(lower, "-profile") { + assets[i].Variants = append(assets[i].Variants, "profile") + } + if assets[i].Arch == "x86_64" { + if strings.Contains(lower, "-baseline") { + // Baseline is plain x86_64 — strip the suffix from + // Filename so the legacy server sees a clean name. + assets[i].Filename = strings.Replace(assets[i].Filename, "-baseline", "", 1) + } else { + // Non-baseline is v3 — tag as variant (excluded from legacy). + assets[i].Arch = "x86_64_v3" + assets[i].Variants = append(assets[i].Variants, "v3") + } + } + } +} diff --git a/internal/releases/chromedist/chromedist.go b/internal/releases/chromedist/chromedist.go new file mode 100644 index 0000000..b5e2acf --- /dev/null +++ b/internal/releases/chromedist/chromedist.go @@ -0,0 +1,72 @@ +// Package chromedist fetches Chrome for Testing release data. +// +// Google publishes a JSON index of known-good Chrome/ChromeDriver versions at: +// +// https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json +// +// Each version entry has per-platform download URLs for chrome, chromedriver, +// and chrome-headless-shell. +package chromedist + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// Index is the top-level response. +type Index struct { + Timestamp string `json:"timestamp"` + Versions []Version `json:"versions"` +} + +// Version is one Chrome for Testing version with its downloads. +type Version struct { + Version string `json:"version"` // "121.0.6120.0" + Revision string `json:"revision"` // "1222902" + Downloads map[string][]Download `json:"downloads"` // "chromedriver" → []Download +} + +// Download is one platform-specific download URL. +type Download struct { + Platform string `json:"platform"` // "linux64", "mac-arm64", "mac-x64", "win32", "win64" + URL string `json:"url"` +} + +// Fetch retrieves the Chrome for Testing release index. +// +// Yields one batch containing all versions. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Version, error] { + return func(yield func([]Version, error) bool) { + url := "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("chromedist: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("chromedist: fetch: %w", err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("chromedist: fetch: %s", resp.Status)) + return + } + + var idx Index + if err := json.NewDecoder(resp.Body).Decode(&idx); err != nil { + yield(nil, fmt.Errorf("chromedist: decode: %w", err)) + return + } + + yield(idx.Versions, nil) + } +} diff --git a/internal/releases/cmake/variants.go b/internal/releases/cmake/variants.go new file mode 100644 index 0000000..3a46693 --- /dev/null +++ b/internal/releases/cmake/variants.go @@ -0,0 +1,60 @@ +package cmakedist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags cmake-specific build variants for exclusion from legacy export. +// +// cmake ships many formats and platforms that webi can't serve: +// +// - .sh self-extracting installer scripts: webi uses the .tar.gz archives. +// +// - .tar.Z files (old UNIX compress format): format not recognized by webi. +// +// - Darwin64 builds (pre-3.6 macOS naming): ancient format, superseded by +// the macos-universal builds. +// +// - sunos-sparc64 builds: unsupported platform (sparc64 arch not recognized). +// +// - AIX/powerpc builds: unsupported platform. +// +// - IRIX builds: unsupported platform. +// +// Note: macos10.N versioned builds (cmake-*-macos10.10-universal.tar.gz) are +// NOT dropped. Go correctly classifies them as os="darwin". The Node production +// classifier has a gap and can't parse "macos10.10" → that is a known prod bug, +// not a Go correctness issue. NODER should treat these as expected differences. +func TagVariants(assets []storage.Asset) { + for i := range assets { + lower := strings.ToLower(assets[i].Filename) + + // Self-extracting installer scripts — webi uses .tar.gz archives. + if strings.HasSuffix(lower, ".sh") { + assets[i].Variants = append(assets[i].Variants, "installer") + continue + } + + // Old UNIX compress format (.tar.Z) — not supported by webi. + if strings.HasSuffix(lower, ".tar.z") { + assets[i].Variants = append(assets[i].Variants, "legacy-archive") + continue + } + + // Darwin64 builds: pre-cmake-3.6 macOS naming, superseded by macos-universal. + if strings.Contains(lower, "darwin64") { + assets[i].Variants = append(assets[i].Variants, "legacy-mac") + continue + } + + // Unsupported platforms. + if strings.Contains(lower, "sunos") || + strings.Contains(lower, "-aix-") || + strings.Contains(lower, "irix") { + assets[i].Variants = append(assets[i].Variants, "unsupported-platform") + continue + } + } +} diff --git a/internal/releases/fish/variants.go b/internal/releases/fish/variants.go new file mode 100644 index 0000000..929a40c --- /dev/null +++ b/internal/releases/fish/variants.go @@ -0,0 +1,28 @@ +// Package fish provides variant tagging for fish shell releases. +// +// Fish publishes .pkg macOS installers alongside the standard archives. +// It also includes a source tarball (fish-{version}.tar.xz) as an +// uploaded release asset — no OS or arch in the name, indistinguishable +// from binaries by content_type. We tag it explicitly as "source". +package fishdist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants tags fish-specific build variants. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Format == ".pkg" { + assets[i].Variants = append(assets[i].Variants, "installer") + } + // Source tarball: no OS or arch detected by the classifier. + if assets[i].OS == "" && assets[i].Arch == "" { + assets[i].Variants = append(assets[i].Variants, "source") + } + // fish-*.app.zip is a macOS universal binary. Fish's naming puts + // arch in Linux filenames (e.g. fish-*-aarch64.tar.xz) but not in + // macOS .app.zip. Tag as x86_64; darwin waterfall serves arm64. + if assets[i].OS == "darwin" && assets[i].Arch == "" && assets[i].Format == ".app.zip" { + assets[i].Arch = "x86_64" + } + } +} diff --git a/internal/releases/flutterdist/flutterdist.go b/internal/releases/flutterdist/flutterdist.go new file mode 100644 index 0000000..cc2b719 --- /dev/null +++ b/internal/releases/flutterdist/flutterdist.go @@ -0,0 +1,94 @@ +// Package flutterdist fetches Flutter release data from Google Storage. +// +// Flutter publishes per-OS release indexes: +// +// https://storage.googleapis.com/flutter_infra_release/releases/releases_macos.json +// https://storage.googleapis.com/flutter_infra_release/releases/releases_linux.json +// https://storage.googleapis.com/flutter_infra_release/releases/releases_windows.json +// +// Each response has a base_url and a releases array with version, channel, +// release_date, archive path, and sha256. +package flutterdist + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// index is the top-level JSON structure for one OS endpoint. +type index struct { + BaseURL string `json:"base_url"` + Releases []Release `json:"releases"` +} + +// Release is one Flutter release entry. +type Release struct { + Hash string `json:"hash"` // git commit hash + Channel string `json:"channel"` // "stable", "beta", "dev" + Version string `json:"version"` // "3.29.2" + ReleaseDate string `json:"release_date"` // "2025-03-13T00:14:34.044690Z" + Archive string `json:"archive"` // "stable/macos/flutter_macos_arm64_3.29.2-stable.zip" + SHA256 string `json:"sha256"` + + // DownloadURL is the fully-qualified URL, assembled from base_url + archive. + // Not in the upstream JSON — set by Fetch. + DownloadURL string `json:"download_url"` + // OS is the platform this entry came from ("macos", "linux", "windows"). + // Not in the upstream JSON — set by Fetch. + OS string `json:"os"` +} + +var defaultOSes = []string{"macos", "linux", "windows"} + +// Fetch retrieves Flutter releases for all platforms. +// +// Yields one batch per OS. The iterator interface exists so callers use +// the same pattern as paginated sources. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + for _, osName := range defaultOSes { + url := fmt.Sprintf( + "https://storage.googleapis.com/flutter_infra_release/releases/releases_%s.json", + osName, + ) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("flutterdist: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("flutterdist: fetch %s: %w", osName, err)) + return + } + + var idx index + err = json.NewDecoder(resp.Body).Decode(&idx) + resp.Body.Close() + if err != nil { + yield(nil, fmt.Errorf("flutterdist: decode %s: %w", osName, err)) + return + } + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("flutterdist: fetch %s: %s", osName, resp.Status)) + return + } + + for i := range idx.Releases { + idx.Releases[i].DownloadURL = idx.BaseURL + "/" + idx.Releases[i].Archive + idx.Releases[i].OS = osName + } + + if !yield(idx.Releases, nil) { + return + } + } + } +} diff --git a/internal/releases/flutterdist/variants.go b/internal/releases/flutterdist/variants.go new file mode 100644 index 0000000..a14a483 --- /dev/null +++ b/internal/releases/flutterdist/variants.go @@ -0,0 +1,16 @@ +package flutterdist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants handles flutter-specific arch defaults. +// +// Flutter's naming convention: flutter_{os}_{version} for x86_64 builds, +// flutter_{os}_arm64_{version} for arm64. The absence of an arch token +// means x86_64 — arm64 is always explicit. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Arch == "" && assets[i].OS != "" { + assets[i].Arch = "x86_64" + } + } +} diff --git a/internal/releases/git/variants.go b/internal/releases/git/variants.go new file mode 100644 index 0000000..a6a4a3e --- /dev/null +++ b/internal/releases/git/variants.go @@ -0,0 +1,52 @@ +// Package git provides variant tagging for Git for Windows releases. +// +// Git for Windows publishes GUI installer .exe files (Git-*-bit.exe), +// self-extracting PortableGit archives, and .pdb debug symbol packages +// alongside the MinGit .zip that webi installs. +package gitdist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags git-specific build variants and fixes OS/arch classification. +// All git-for-windows releases are Windows-only, but MinGit filenames like +// "MinGit-2.33.0-64-bit.zip" have no "windows" indicator — force OS=windows. +// MinGit uses "64-bit"/"32-bit" for arch — a convention specific to this project +// that the general classifier intentionally does not handle. +func TagVariants(assets []storage.Asset) { + for i := range assets { + // All git-for-windows assets are Windows. Filenames like + // "MinGit-2.33.0-64-bit.zip" have no OS term; set it explicitly. + if assets[i].OS == "" { + assets[i].OS = "windows" + } + + // MinGit uses "64-bit"→x86_64, "32-bit"→x86 naming. + // "arm64" is already handled by the general classifier. + if assets[i].Arch == "" { + lower := strings.ToLower(assets[i].Filename) + if strings.Contains(lower, "64-bit") { + assets[i].Arch = "x86_64" + } else if strings.Contains(lower, "32-bit") { + assets[i].Arch = "x86" + } + } + + lower := strings.ToLower(assets[i].Filename) + if assets[i].Format == ".exe" { + assets[i].Variants = append(assets[i].Variants, "installer") + } + if strings.Contains(lower, "portablegit") { + assets[i].Variants = append(assets[i].Variants, "installer") + } + if strings.Contains(lower, "-pdb") || strings.Contains(lower, "pdbs-for-") { + assets[i].Variants = append(assets[i].Variants, "pdb") + } + if strings.Contains(lower, "-busybox") { + assets[i].Variants = append(assets[i].Variants, "busybox") + } + } +} diff --git a/internal/releases/git/versions.go b/internal/releases/git/versions.go new file mode 100644 index 0000000..95b2750 --- /dev/null +++ b/internal/releases/git/versions.go @@ -0,0 +1,33 @@ +package gitdist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// NormalizeVersions strips the ".windows.N" suffix from Git for Windows +// version strings to match the upstream Git version scheme. +// +// Git for Windows tags are like "v2.53.0.windows.1" or "v2.53.0.windows.2". +// Node.js strips ".windows.1" entirely and replaces ".windows.N" (N>1) +// with ".N": +// +// v2.53.0.windows.1 → v2.53.0 +// v2.53.0.windows.2 → v2.53.0.2 +func NormalizeVersions(assets []storage.Asset) { + for i := range assets { + v := assets[i].Version + idx := strings.Index(v, ".windows.") + if idx < 0 { + continue + } + suffix := v[idx+len(".windows."):] + base := v[:idx] + if suffix == "1" { + assets[i].Version = base + } else { + assets[i].Version = base + "." + suffix + } + } +} diff --git a/internal/releases/gitea/gitea.go b/internal/releases/gitea/gitea.go new file mode 100644 index 0000000..036de4d --- /dev/null +++ b/internal/releases/gitea/gitea.go @@ -0,0 +1,120 @@ +// Package gitea fetches releases from a Gitea or Forgejo instance. +// +// Gitea's release API lives under: +// +// GET {baseurl}/api/v1/repos/{owner}/{repo}/releases +// +// The response shape is similar to GitHub's but not identical. This package +// handles pagination, authentication, and deserialization independently. +package gitea + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" + "regexp" + "strings" +) + +// Release is one release from the Gitea releases API. +type Release struct { + TagName string `json:"tag_name"` + Name string `json:"name"` + Prerelease bool `json:"prerelease"` + Draft bool `json:"draft"` + PublishedAt string `json:"published_at"` // "2023-11-05T06:38:05Z" + Assets []Asset `json:"assets"` + TarballURL string `json:"tarball_url"` + ZipballURL string `json:"zipball_url"` +} + +// Asset is one downloadable file attached to a release. +type Asset struct { + Name string `json:"name"` // "pathman-v0.6.0-darwin-amd64.tar.gz" + BrowserDownloadURL string `json:"browser_download_url"` // full URL + Size int64 `json:"size"` +} + +// Auth holds optional credentials for authenticated API access. +type Auth struct { + Token string // personal access token or API key +} + +// Fetch retrieves releases from a Gitea instance, paginating automatically. +// Each yield is one page of releases. +// +// The baseURL should be the Gitea root (e.g. "https://git.rootprojects.org"). +// The /api/v1 prefix is appended automatically. +func Fetch(ctx context.Context, client *http.Client, baseURL, owner, repo string, auth *Auth) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + base := strings.TrimRight(baseURL, "/") + page := 1 + + for { + url := fmt.Sprintf("%s/api/v1/repos/%s/%s/releases?limit=50&page=%d", + base, owner, repo, page) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("gitea: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + if auth != nil && auth.Token != "" { + req.Header.Set("Authorization", "token "+auth.Token) + } + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("gitea: fetch %s: %w", url, err)) + return + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + yield(nil, fmt.Errorf("gitea: fetch %s: %s", url, resp.Status)) + return + } + + var releases []Release + err = json.NewDecoder(resp.Body).Decode(&releases) + resp.Body.Close() + if err != nil { + yield(nil, fmt.Errorf("gitea: decode %s: %w", url, err)) + return + } + + if !yield(releases, nil) { + return + } + + // Gitea uses Link headers like GitHub for pagination. + if nextURL := nextPageURL(resp.Header.Get("Link")); nextURL != "" { + url = nextURL + page++ // not strictly needed since we follow the URL, but keeps logic clear + continue + } + + // No next link — also stop if we got fewer results than requested. + if len(releases) < 50 { + return + } + page++ + } + } +} + +var reNextLink = regexp.MustCompile(`<([^>]+)>;\s*rel="next"`) + +func nextPageURL(link string) string { + if link == "" { + return "" + } + m := reNextLink.FindStringSubmatch(link) + if m == nil { + return "" + } + return m[1] +} diff --git a/internal/releases/gitea/gitea_test.go b/internal/releases/gitea/gitea_test.go new file mode 100644 index 0000000..de6d894 --- /dev/null +++ b/internal/releases/gitea/gitea_test.go @@ -0,0 +1,107 @@ +package gitea_test + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/webinstall/webi-installers/internal/releases/gitea" +) + +const testReleases = `[ + { + "tag_name": "v0.6.0", + "name": "v0.6.0", + "prerelease": false, + "draft": false, + "published_at": "2023-11-05T06:38:05Z", + "tarball_url": "https://example.com/archive/v0.6.0.tar.gz", + "zipball_url": "https://example.com/archive/v0.6.0.zip", + "assets": [ + { + "name": "tool-v0.6.0-linux-amd64.tar.gz", + "browser_download_url": "https://example.com/releases/download/v0.6.0/tool-v0.6.0-linux-amd64.tar.gz", + "size": 89215 + } + ] + } +]` + +func TestFetch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/v1/repos/root/tool/releases" { + t.Errorf("unexpected path: %s", r.URL.Path) + http.NotFound(w, r) + return + } + w.Write([]byte(testReleases)) + })) + defer srv.Close() + + ctx := context.Background() + var all []gitea.Release + + for releases, err := range gitea.Fetch(ctx, srv.Client(), srv.URL, "root", "tool", nil) { + if err != nil { + t.Fatal(err) + } + all = append(all, releases...) + } + + if len(all) != 1 { + t.Fatalf("got %d releases, want 1", len(all)) + } + if all[0].TagName != "v0.6.0" { + t.Errorf("TagName = %q, want %q", all[0].TagName, "v0.6.0") + } + if len(all[0].Assets) != 1 { + t.Errorf("got %d assets, want 1", len(all[0].Assets)) + } + if all[0].TarballURL == "" { + t.Error("TarballURL is empty") + } +} + +func TestFetchAuth(t *testing.T) { + var gotAuth string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("Authorization") + w.Write([]byte("[]")) + })) + defer srv.Close() + + ctx := context.Background() + auth := &gitea.Auth{Token: "abc123"} + for _, err := range gitea.Fetch(ctx, srv.Client(), srv.URL, "root", "tool", auth) { + if err != nil { + t.Fatal(err) + } + } + + if gotAuth != "token abc123" { + t.Errorf("Authorization = %q, want %q", gotAuth, "token abc123") + } +} + +func TestFetchLive(t *testing.T) { + if testing.Short() { + t.Skip("skipping network test in short mode") + } + + ctx := context.Background() + client := &http.Client{} + + var total int + for releases, err := range gitea.Fetch(ctx, client, "https://git.rootprojects.org", "root", "pathman", nil) { + if err != nil { + t.Fatal(err) + } + total += len(releases) + } + + if total < 1 { + t.Errorf("got %d releases, expected at least 1", total) + } + t.Logf("fetched %d releases", total) +} diff --git a/internal/releases/gitea/variants.go b/internal/releases/gitea/variants.go new file mode 100644 index 0000000..fe65122 --- /dev/null +++ b/internal/releases/gitea/variants.go @@ -0,0 +1,25 @@ +// Package gitea provides variant tagging for Gitea releases. +// +// Gitea publishes "gogit" builds that use an alternative pure-Go Git +// backend instead of the default C Git library. +package gitea + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags gitea-specific build variants. +// +// Files containing "-gogit-" in the filename are tagged with the "gogit" +// variant. These use a pure-Go Git backend rather than the default C Git +// library. +func TagVariants(assets []storage.Asset) { + for i := range assets { + lower := strings.ToLower(assets[i].Filename) + if strings.Contains(lower, "gogit") { + assets[i].Variants = append(assets[i].Variants, "gogit") + } + } +} diff --git a/internal/releases/giteasrc/giteasrc.go b/internal/releases/giteasrc/giteasrc.go new file mode 100644 index 0000000..da00b00 --- /dev/null +++ b/internal/releases/giteasrc/giteasrc.go @@ -0,0 +1,25 @@ +// Package giteasrc fetches source archives from Gitea/Forgejo releases. +// +// Some packages are installed from the auto-generated source tarballs +// rather than uploaded binary assets. This package fetches releases and +// exposes the tarball/zipball URLs. +// +// Use [gitea] for packages that use uploaded binary assets. +package giteasrc + +import ( + "context" + "iter" + "net/http" + + "github.com/webinstall/webi-installers/internal/releases/gitea" +) + +// Fetch retrieves releases from a Gitea instance for the given owner/repo. +// Paginates automatically, yielding one batch per API page. +// +// Callers should use [gitea.Release.TarballURL] and +// [gitea.Release.ZipballURL] rather than the Assets list. +func Fetch(ctx context.Context, client *http.Client, baseURL, owner, repo string, auth *gitea.Auth) iter.Seq2[[]gitea.Release, error] { + return gitea.Fetch(ctx, client, baseURL, owner, repo, auth) +} diff --git a/internal/releases/github/github.go b/internal/releases/github/github.go new file mode 100644 index 0000000..01fe579 --- /dev/null +++ b/internal/releases/github/github.go @@ -0,0 +1,22 @@ +// Package github fetches releases from the GitHub API. +// +// This is a thin wrapper around [githubish] that sets the base URL to +// https://api.github.com. Use [githubish] directly for Gitea, Forgejo, +// or other GitHub-compatible forges. +package github + +import ( + "context" + "iter" + "net/http" + + "github.com/webinstall/webi-installers/internal/releases/githubish" +) + +const baseURL = "https://api.github.com" + +// Fetch retrieves releases from GitHub for the given owner/repo. +// Paginates automatically, yielding one batch per API page. +func Fetch(ctx context.Context, client *http.Client, owner, repo string, auth *githubish.Auth) iter.Seq2[[]githubish.Release, error] { + return githubish.Fetch(ctx, client, baseURL, owner, repo, auth) +} diff --git a/internal/releases/githubish/githubish.go b/internal/releases/githubish/githubish.go new file mode 100644 index 0000000..7f227af --- /dev/null +++ b/internal/releases/githubish/githubish.go @@ -0,0 +1,112 @@ +// Package githubish fetches releases from GitHub-compatible APIs. +// +// GitHub, Gitea, Forgejo, and other forges expose the same releases +// endpoint shape: +// +// GET /repos/{owner}/{repo}/releases +// +// This package handles pagination (Link headers), authentication, and +// deserialization. It does not transform or normalize the data. +package githubish + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" + "regexp" +) + +// Release is one release from a GitHub-compatible API. +// Fields mirror the upstream JSON — only the fields Webi cares about are +// included; the rest are silently dropped by the decoder. +type Release struct { + TagName string `json:"tag_name"` + Name string `json:"name"` + Prerelease bool `json:"prerelease"` + Draft bool `json:"draft"` + PublishedAt string `json:"published_at"` // "2025-10-22T13:00:26Z" + Assets []Asset `json:"assets"` + TarballURL string `json:"tarball_url"` // auto-generated source tarball + ZipballURL string `json:"zipball_url"` // auto-generated source zipball +} + +// Asset is one downloadable file attached to a release. +type Asset struct { + Name string `json:"name"` // "ripgrep-15.1.0-x86_64-apple-darwin.tar.gz" + BrowserDownloadURL string `json:"browser_download_url"` // full URL + Size int64 `json:"size"` + ContentType string `json:"content_type"` +} + +// Auth holds optional credentials for authenticated API access. +// Without auth, GitHub's public rate limit is 60 requests/hour. +type Auth struct { + Token string // personal access token or fine-grained token +} + +// Fetch retrieves releases from a GitHub-compatible API, paginating +// automatically. Each yield is one page of releases. +// +// The baseURL should be the API root (e.g. "https://api.github.com"). +// For Gitea: "https://gitea.example.com/api/v1". +func Fetch(ctx context.Context, client *http.Client, baseURL, owner, repo string, auth *Auth) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + url := fmt.Sprintf("%s/repos/%s/%s/releases?per_page=100", baseURL, owner, repo) + + for url != "" { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("githubish: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + if auth != nil && auth.Token != "" { + req.Header.Set("Authorization", "Bearer "+auth.Token) + } + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("githubish: fetch %s: %w", url, err)) + return + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + yield(nil, fmt.Errorf("githubish: fetch %s: %s", url, resp.Status)) + return + } + + var releases []Release + err = json.NewDecoder(resp.Body).Decode(&releases) + resp.Body.Close() + if err != nil { + yield(nil, fmt.Errorf("githubish: decode %s: %w", url, err)) + return + } + + if !yield(releases, nil) { + return + } + + url = nextPageURL(resp.Header.Get("Link")) + } + } +} + +// reNextLink matches `; rel="next"` in a Link header. +var reNextLink = regexp.MustCompile(`<([^>]+)>;\s*rel="next"`) + +// nextPageURL extracts the "next" URL from a GitHub Link header. +// Returns "" if there is no next page. +func nextPageURL(link string) string { + if link == "" { + return "" + } + m := reNextLink.FindStringSubmatch(link) + if m == nil { + return "" + } + return m[1] +} diff --git a/internal/releases/githubish/githubish_test.go b/internal/releases/githubish/githubish_test.go new file mode 100644 index 0000000..616b492 --- /dev/null +++ b/internal/releases/githubish/githubish_test.go @@ -0,0 +1,201 @@ +package githubish_test + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/webinstall/webi-installers/internal/releases/githubish" +) + +const page1 = `[ + { + "tag_name": "v2.0.0", + "name": "v2.0.0", + "prerelease": false, + "draft": false, + "published_at": "2025-06-01T12:00:00Z", + "assets": [ + { + "name": "tool-v2.0.0-linux-amd64.tar.gz", + "browser_download_url": "https://example.com/tool-v2.0.0-linux-amd64.tar.gz", + "size": 5000000, + "content_type": "application/gzip" + } + ] + } +]` + +const page2 = `[ + { + "tag_name": "v1.0.0", + "name": "v1.0.0", + "prerelease": false, + "draft": false, + "published_at": "2024-01-15T08:00:00Z", + "assets": [ + { + "name": "tool-v1.0.0-linux-amd64.tar.gz", + "browser_download_url": "https://example.com/tool-v1.0.0-linux-amd64.tar.gz", + "size": 4000000, + "content_type": "application/gzip" + }, + { + "name": "tool-v1.0.0-darwin-arm64.tar.gz", + "browser_download_url": "https://example.com/tool-v1.0.0-darwin-arm64.tar.gz", + "size": 4500000, + "content_type": "application/gzip" + } + ] + } +]` + +func TestFetchPagination(t *testing.T) { + var srvURL string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/repos/acme/tool/releases" { + t.Errorf("unexpected path: %s", r.URL.Path) + http.NotFound(w, r) + return + } + + page := r.URL.Query().Get("page") + switch page { + case "", "1": + // Link header pointing to page 2 + w.Header().Set("Link", + fmt.Sprintf(`<%s/repos/acme/tool/releases?per_page=100&page=2>; rel="next"`, srvURL)) + w.Write([]byte(page1)) + case "2": + // No Link header — last page + w.Write([]byte(page2)) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + srvURL = srv.URL + + ctx := context.Background() + var batches int + var allReleases []githubish.Release + + for releases, err := range githubish.Fetch(ctx, srv.Client(), srv.URL, "acme", "tool", nil) { + if err != nil { + t.Fatalf("batch %d: %v", batches, err) + } + batches++ + allReleases = append(allReleases, releases...) + } + + if batches != 2 { + t.Errorf("got %d batches, want 2", batches) + } + if len(allReleases) != 2 { + t.Fatalf("got %d releases, want 2", len(allReleases)) + } + + // Page 1: v2.0.0 + if allReleases[0].TagName != "v2.0.0" { + t.Errorf("release[0].TagName = %q, want %q", allReleases[0].TagName, "v2.0.0") + } + if len(allReleases[0].Assets) != 1 { + t.Errorf("release[0] has %d assets, want 1", len(allReleases[0].Assets)) + } + + // Page 2: v1.0.0 + if allReleases[1].TagName != "v1.0.0" { + t.Errorf("release[1].TagName = %q, want %q", allReleases[1].TagName, "v1.0.0") + } + if len(allReleases[1].Assets) != 2 { + t.Errorf("release[1] has %d assets, want 2", len(allReleases[1].Assets)) + } +} + +func TestFetchPrerelease(t *testing.T) { + body := `[{"tag_name":"v1.0.0-rc1","name":"","prerelease":true,"draft":false,"published_at":"2025-01-01T00:00:00Z","assets":[]}]` + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(body)) + })) + defer srv.Close() + + ctx := context.Background() + for releases, err := range githubish.Fetch(ctx, srv.Client(), srv.URL, "acme", "tool", nil) { + if err != nil { + t.Fatal(err) + } + if len(releases) != 1 { + t.Fatalf("got %d releases, want 1", len(releases)) + } + if !releases[0].Prerelease { + t.Error("expected Prerelease = true") + } + if releases[0].TagName != "v1.0.0-rc1" { + t.Errorf("TagName = %q, want %q", releases[0].TagName, "v1.0.0-rc1") + } + } +} + +func TestFetchAuth(t *testing.T) { + var gotAuth string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("Authorization") + w.Write([]byte("[]")) + })) + defer srv.Close() + + ctx := context.Background() + auth := &githubish.Auth{Token: "ghp_test123"} + for _, err := range githubish.Fetch(ctx, srv.Client(), srv.URL, "acme", "tool", auth) { + if err != nil { + t.Fatal(err) + } + } + + if gotAuth != "Bearer ghp_test123" { + t.Errorf("Authorization = %q, want %q", gotAuth, "Bearer ghp_test123") + } +} + +func TestFetchHTTPError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "not found", http.StatusNotFound) + })) + defer srv.Close() + + ctx := context.Background() + for _, err := range githubish.Fetch(ctx, srv.Client(), srv.URL, "acme", "tool", nil) { + if err == nil { + t.Fatal("expected error for 404 response") + } + return + } +} + +func TestFetchEarlyBreak(t *testing.T) { + var requests int + var srvURL string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requests++ + // Always advertise a next page + w.Header().Set("Link", + fmt.Sprintf(`<%s/repos/acme/tool/releases?per_page=100&page=%d>; rel="next"`, srvURL, requests+1)) + w.Write([]byte(`[{"tag_name":"v1.0.0","name":"","prerelease":false,"draft":false,"published_at":"2025-01-01T00:00:00Z","assets":[]}]`)) + })) + defer srv.Close() + srvURL = srv.URL + + ctx := context.Background() + for _, err := range githubish.Fetch(ctx, srv.Client(), srv.URL, "acme", "tool", nil) { + if err != nil { + t.Fatal(err) + } + break // stop after first page + } + + if requests != 1 { + t.Errorf("server received %d requests, want 1 (early break should stop pagination)", requests) + } +} diff --git a/internal/releases/githubsrc/githubsrc.go b/internal/releases/githubsrc/githubsrc.go new file mode 100644 index 0000000..35b402e --- /dev/null +++ b/internal/releases/githubsrc/githubsrc.go @@ -0,0 +1,27 @@ +// Package githubsrc fetches source archives from GitHub releases. +// +// Some packages (shell scripts, vim plugins) are installed from the +// auto-generated source tarballs rather than uploaded binary assets. +// This package fetches releases and exposes the tarball/zipball URLs. +// +// Use [github] for packages that use uploaded binary assets. +package githubsrc + +import ( + "context" + "iter" + "net/http" + + "github.com/webinstall/webi-installers/internal/releases/githubish" +) + +const baseURL = "https://api.github.com" + +// Fetch retrieves releases from GitHub for the given owner/repo. +// Paginates automatically, yielding one batch per API page. +// +// Callers should use [githubish.Release.TarballURL] and +// [githubish.Release.ZipballURL] rather than the Assets list. +func Fetch(ctx context.Context, client *http.Client, owner, repo string, auth *githubish.Auth) iter.Seq2[[]githubish.Release, error] { + return githubish.Fetch(ctx, client, baseURL, owner, repo, auth) +} diff --git a/internal/releases/gitlab/gitlab.go b/internal/releases/gitlab/gitlab.go new file mode 100644 index 0000000..c50f2eb --- /dev/null +++ b/internal/releases/gitlab/gitlab.go @@ -0,0 +1,122 @@ +// Package gitlab fetches releases from a GitLab instance. +// +// GitLab's releases API differs from GitHub's in structure: +// +// GET /api/v4/projects/:id/releases +// +// Where :id is the URL-encoded project path (e.g. "group%2Frepo") or a +// numeric project ID. Assets are split into auto-generated source archives +// and manually attached links. Pagination uses page/per_page query params +// and X-Total-Pages response headers (not Link headers). +// +// This package handles pagination, authentication, and deserialization. +// It does not transform or normalize the data. +package gitlab + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" + "net/url" + "strconv" +) + +// Release is one release from the GitLab releases API. +type Release struct { + TagName string `json:"tag_name"` + Name string `json:"name"` + ReleasedAt string `json:"released_at"` // "2025-10-22T13:00:26Z" + Assets Assets `json:"assets"` +} + +// Assets holds both auto-generated source archives and attached links. +type Assets struct { + Sources []Source `json:"sources"` + Links []Link `json:"links"` +} + +// Source is an auto-generated source archive (tar.gz, zip, etc.). +type Source struct { + Format string `json:"format"` // "zip", "tar.gz", "tar.bz2", "tar" + URL string `json:"url"` +} + +// Link is a file attached to a release (binary, package, etc.). +type Link struct { + ID int `json:"id"` + Name string `json:"name"` + URL string `json:"url"` + DirectAssetPath string `json:"direct_asset_path"` + LinkType string `json:"link_type"` // "other", "runbook", "image", "package" +} + +// Auth holds optional credentials for authenticated API access. +type Auth struct { + Token string // personal access token or deploy token +} + +// Fetch retrieves releases from a GitLab instance, paginating automatically. +// Each yield is one page of releases. +// +// The baseURL should be the GitLab root (e.g. "https://gitlab.com"). +// The project is identified by its path (e.g. "group/repo") — it will be +// URL-encoded automatically. +func Fetch(ctx context.Context, client *http.Client, baseURL, project string, auth *Auth) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + encodedProject := url.PathEscape(project) + page := 1 + + for { + reqURL := fmt.Sprintf("%s/api/v4/projects/%s/releases?per_page=100&page=%d", + baseURL, encodedProject, page) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + yield(nil, fmt.Errorf("gitlab: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + if auth != nil && auth.Token != "" { + req.Header.Set("PRIVATE-TOKEN", auth.Token) + } + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("gitlab: fetch %s: %w", reqURL, err)) + return + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + yield(nil, fmt.Errorf("gitlab: fetch %s: %s", reqURL, resp.Status)) + return + } + + var releases []Release + err = json.NewDecoder(resp.Body).Decode(&releases) + resp.Body.Close() + if err != nil { + yield(nil, fmt.Errorf("gitlab: decode %s: %w", reqURL, err)) + return + } + + if !yield(releases, nil) { + return + } + + // Check if there are more pages. + totalPages := 1 + if tp := resp.Header.Get("X-Total-Pages"); tp != "" { + if n, err := strconv.Atoi(tp); err == nil { + totalPages = n + } + } + if page >= totalPages { + return + } + page++ + } + } +} diff --git a/internal/releases/gitlab/gitlab_test.go b/internal/releases/gitlab/gitlab_test.go new file mode 100644 index 0000000..f7be0dc --- /dev/null +++ b/internal/releases/gitlab/gitlab_test.go @@ -0,0 +1,182 @@ +package gitlab_test + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/webinstall/webi-installers/internal/releases/gitlab" +) + +const page1 = `[ + { + "tag_name": "v2.0.0", + "name": "v2.0.0", + "released_at": "2025-06-01T12:00:00Z", + "assets": { + "sources": [ + {"format": "tar.gz", "url": "https://example.com/archive/v2.0.0.tar.gz"}, + {"format": "zip", "url": "https://example.com/archive/v2.0.0.zip"} + ], + "links": [ + { + "id": 1, + "name": "tool-v2.0.0-linux-amd64.tar.gz", + "url": "https://example.com/tool-v2.0.0-linux-amd64.tar.gz", + "direct_asset_path": "/binaries/linux-amd64", + "link_type": "package" + } + ] + } + } +]` + +const page2 = `[ + { + "tag_name": "v1.0.0", + "name": "v1.0.0", + "released_at": "2024-01-15T08:00:00Z", + "assets": { + "sources": [ + {"format": "tar.gz", "url": "https://example.com/archive/v1.0.0.tar.gz"} + ], + "links": [] + } + } +]` + +func TestFetchPagination(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Go's http server decodes %2F back to /, so check RawPath + // for the encoded form or Path for the decoded form. + wantRaw := "/api/v4/projects/group%2Ftool/releases" + wantDecoded := "/api/v4/projects/group/tool/releases" + if r.URL.RawPath != wantRaw && r.URL.Path != wantDecoded { + t.Errorf("unexpected path: raw=%q decoded=%q", r.URL.RawPath, r.URL.Path) + http.NotFound(w, r) + return + } + + page := r.URL.Query().Get("page") + w.Header().Set("X-Total-Pages", "2") + + switch page { + case "", "1": + w.Write([]byte(page1)) + case "2": + w.Write([]byte(page2)) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + ctx := context.Background() + var batches int + var allReleases []gitlab.Release + + for releases, err := range gitlab.Fetch(ctx, srv.Client(), srv.URL, "group/tool", nil) { + if err != nil { + t.Fatalf("batch %d: %v", batches, err) + } + batches++ + allReleases = append(allReleases, releases...) + } + + if batches != 2 { + t.Errorf("got %d batches, want 2", batches) + } + if len(allReleases) != 2 { + t.Fatalf("got %d releases, want 2", len(allReleases)) + } + + // Page 1: v2.0.0 + r1 := allReleases[0] + if r1.TagName != "v2.0.0" { + t.Errorf("release[0].TagName = %q, want %q", r1.TagName, "v2.0.0") + } + if len(r1.Assets.Sources) != 2 { + t.Errorf("release[0] has %d sources, want 2", len(r1.Assets.Sources)) + } + if len(r1.Assets.Links) != 1 { + t.Errorf("release[0] has %d links, want 1", len(r1.Assets.Links)) + } + if r1.Assets.Links[0].LinkType != "package" { + t.Errorf("release[0] link type = %q, want %q", r1.Assets.Links[0].LinkType, "package") + } + + // Page 2: v1.0.0 + r2 := allReleases[1] + if r2.TagName != "v1.0.0" { + t.Errorf("release[1].TagName = %q, want %q", r2.TagName, "v1.0.0") + } + if len(r2.Assets.Links) != 0 { + t.Errorf("release[1] has %d links, want 0", len(r2.Assets.Links)) + } +} + +func TestFetchAuth(t *testing.T) { + var gotAuth string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("PRIVATE-TOKEN") + w.Write([]byte("[]")) + })) + defer srv.Close() + + ctx := context.Background() + auth := &gitlab.Auth{Token: "glpat-test123"} + for _, err := range gitlab.Fetch(ctx, srv.Client(), srv.URL, "group/tool", auth) { + if err != nil { + t.Fatal(err) + } + } + + if gotAuth != "glpat-test123" { + t.Errorf("PRIVATE-TOKEN = %q, want %q", gotAuth, "glpat-test123") + } +} + +func TestFetchSinglePage(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // No X-Total-Pages header — defaults to 1 page. + w.Write([]byte(page1)) + })) + defer srv.Close() + + ctx := context.Background() + var batches int + for _, err := range gitlab.Fetch(ctx, srv.Client(), srv.URL, "group/tool", nil) { + if err != nil { + t.Fatal(err) + } + batches++ + } + + if batches != 1 { + t.Errorf("got %d batches, want 1 (no X-Total-Pages means single page)", batches) + } +} + +func TestFetchEarlyBreak(t *testing.T) { + var requests int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requests++ + w.Header().Set("X-Total-Pages", "10") + w.Write([]byte(fmt.Sprintf(`[{"tag_name":"v%d.0.0","name":"","released_at":"2025-01-01T00:00:00Z","assets":{"sources":[],"links":[]}}]`, requests))) + })) + defer srv.Close() + + ctx := context.Background() + for _, err := range gitlab.Fetch(ctx, srv.Client(), srv.URL, "group/tool", nil) { + if err != nil { + t.Fatal(err) + } + break // stop after first page + } + + if requests != 1 { + t.Errorf("server received %d requests, want 1", requests) + } +} diff --git a/internal/releases/gitlabsrc/gitlabsrc.go b/internal/releases/gitlabsrc/gitlabsrc.go new file mode 100644 index 0000000..c4a3b43 --- /dev/null +++ b/internal/releases/gitlabsrc/gitlabsrc.go @@ -0,0 +1,25 @@ +// Package gitlabsrc fetches source archives from GitLab releases. +// +// Some packages are installed from the auto-generated source archives +// rather than attached binary links. This package fetches releases and +// exposes the source archive URLs. +// +// Use [gitlab] for packages that use attached release links (binaries). +package gitlabsrc + +import ( + "context" + "iter" + "net/http" + + "github.com/webinstall/webi-installers/internal/releases/gitlab" +) + +// Fetch retrieves releases from a GitLab instance. +// Paginates automatically, yielding one batch per API page. +// +// Callers should use [gitlab.Release.Assets.Sources] rather than +// [gitlab.Release.Assets.Links]. +func Fetch(ctx context.Context, client *http.Client, baseURL, project string, auth *gitlab.Auth) iter.Seq2[[]gitlab.Release, error] { + return gitlab.Fetch(ctx, client, baseURL, project, auth) +} diff --git a/internal/releases/gittag/gittag.go b/internal/releases/gittag/gittag.go new file mode 100644 index 0000000..7a9bf41 --- /dev/null +++ b/internal/releases/gittag/gittag.go @@ -0,0 +1,178 @@ +// Package gittag fetches release information from git tags in a bare repo. +// +// Some packages (vim plugins, shell scripts) are installed by cloning a git +// repo rather than downloading a binary. For these, each tag is a "release" +// and the download URL is the repo's git URL. +// +// This package clones (or fetches) a bare repo to a local cache directory, +// lists version-like tags, and returns them with their commit metadata. +// HEAD is also included as a potential release. +package gittag + +import ( + "context" + "fmt" + "iter" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + + "crypto/rand" + "encoding/hex" +) + +// Entry is one tag (or HEAD) from a git repo. +type Entry struct { + Version string // tag name or date-based version for HEAD + GitTag string // the ref that can be passed to `git clone --branch` + CommitHash string // abbreviated commit hash + Date string // ISO 8601 commit date (author date) +} + +// reVersionTag matches tags that look like versions: v1, v1.2, 1.0.0-rc, etc. +var reVersionTag = regexp.MustCompile(`^v?\d+(\.\d+)`) + +// Fetch clones or updates a bare repo, then yields its version-like tags +// and HEAD as entries. The repoDir is the parent directory where bare repos +// are cached. +// +// Yields one batch containing all tags plus HEAD. +func Fetch(ctx context.Context, gitURL, repoDir string) iter.Seq2[[]Entry, error] { + return func(yield func([]Entry, error) bool) { + repoName := filepath.Base(gitURL) + repoName = strings.TrimSuffix(repoName, ".git") + repoPath := filepath.Join(repoDir, repoName+".git") + + if err := ensureRepo(ctx, repoPath, gitURL); err != nil { + yield(nil, fmt.Errorf("gittag: %w", err)) + return + } + + tags, err := listVersionTags(ctx, repoPath) + if err != nil { + yield(nil, fmt.Errorf("gittag: %w", err)) + return + } + + var entries []Entry + for _, tag := range tags { + info, err := commitInfo(ctx, repoPath, tag) + if err != nil { + yield(nil, fmt.Errorf("gittag: commit info for %q: %w", tag, err)) + return + } + info.Version = tag + info.GitTag = tag + entries = append(entries, info) + } + + // HEAD as an additional entry + head, err := commitInfo(ctx, repoPath, "HEAD") + if err != nil { + yield(nil, fmt.Errorf("gittag: commit info for HEAD: %w", err)) + return + } + branch, err := headBranch(ctx, repoPath) + if err != nil { + yield(nil, fmt.Errorf("gittag: HEAD branch: %w", err)) + return + } + head.GitTag = branch + // Version for HEAD is set by the caller (date-based, etc.) + entries = append(entries, head) + + yield(entries, nil) + } +} + +// ensureRepo clones the repo if it doesn't exist, or fetches if it does. +func ensureRepo(ctx context.Context, repoPath, gitURL string) error { + if _, err := os.Stat(repoPath); err == nil { + // Exists — fetch updates. + cmd := exec.CommandContext(ctx, "git", "--git-dir="+repoPath, "fetch") + cmd.Stderr = os.Stderr + return cmd.Run() + } + + // Clone bare with tree filter (metadata only). + var b [8]byte + rand.Read(b[:]) + id := hex.EncodeToString(b[:]) + tmpPath := repoPath + "." + id + ".tmp" + + cmd := exec.CommandContext(ctx, "git", "clone", "--bare", "--filter=tree:0", gitURL, tmpPath) + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + os.RemoveAll(tmpPath) + return fmt.Errorf("clone %s: %w", gitURL, err) + } + + // Atomic swap — if repoPath appeared in a race, keep it and discard ours. + if err := os.Rename(tmpPath, repoPath); err != nil { + os.RemoveAll(tmpPath) + // If rename failed because repoPath now exists, that's fine. + if _, statErr := os.Stat(repoPath); statErr == nil { + return nil + } + return err + } + return nil +} + +// listVersionTags returns tags that look like version numbers, newest first. +func listVersionTags(ctx context.Context, repoPath string) ([]string, error) { + cmd := exec.CommandContext(ctx, "git", "--git-dir="+repoPath, "tag") + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git tag: %w", err) + } + + var tags []string + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if line == "" { + continue + } + if reVersionTag.MatchString(line) { + tags = append(tags, line) + } + } + + // Reverse so newest tags come first (git tag outputs alphabetically). + for i, j := 0, len(tags)-1; i < j; i, j = i+1, j-1 { + tags[i], tags[j] = tags[j], tags[i] + } + return tags, nil +} + +// commitInfo returns the abbreviated hash and author date for a commitish. +func commitInfo(ctx context.Context, repoPath, commitish string) (Entry, error) { + cmd := exec.CommandContext(ctx, "git", "--git-dir="+repoPath, + "log", "-1", "--format=%h %ad", "--date=iso-strict", commitish) + out, err := cmd.Output() + if err != nil { + return Entry{}, fmt.Errorf("git log %s: %w", commitish, err) + } + + parts := strings.Fields(strings.TrimSpace(string(out))) + if len(parts) < 2 { + return Entry{}, fmt.Errorf("unexpected git log output: %q", out) + } + + return Entry{ + CommitHash: parts[0], + Date: parts[1], + }, nil +} + +// headBranch returns the symbolic ref for HEAD (e.g. "main", "master"). +func headBranch(ctx context.Context, repoPath string) (string, error) { + cmd := exec.CommandContext(ctx, "git", "--git-dir="+repoPath, + "rev-parse", "--abbrev-ref", "HEAD") + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("git rev-parse HEAD: %w", err) + } + return strings.TrimSpace(string(out)), nil +} diff --git a/internal/releases/gittag/gittag_test.go b/internal/releases/gittag/gittag_test.go new file mode 100644 index 0000000..4a37881 --- /dev/null +++ b/internal/releases/gittag/gittag_test.go @@ -0,0 +1,56 @@ +package gittag_test + +import ( + "context" + "testing" + + "github.com/webinstall/webi-installers/internal/releases/gittag" +) + +func TestFetch(t *testing.T) { + if testing.Short() { + t.Skip("skipping network/git test in short mode") + } + + ctx := context.Background() + repoDir := t.TempDir() + + // vim-commentary has a small number of tags. + var entries []gittag.Entry + for batch, err := range gittag.Fetch(ctx, "https://github.com/tpope/vim-commentary.git", repoDir) { + if err != nil { + t.Fatalf("Fetch: %v", err) + } + entries = append(entries, batch...) + } + + if len(entries) < 2 { + t.Fatalf("got %d entries, expected at least 2 (tags + HEAD)", len(entries)) + } + + // Last entry should be HEAD (no Version set by the fetcher). + head := entries[len(entries)-1] + if head.CommitHash == "" { + t.Error("HEAD entry has empty CommitHash") + } + if head.Date == "" { + t.Error("HEAD entry has empty Date") + } + if head.GitTag == "" { + t.Error("HEAD entry has empty GitTag (branch name)") + } + + // At least one tag entry should have a version. + found := false + for _, e := range entries[:len(entries)-1] { + if e.Version != "" { + found = true + break + } + } + if !found { + t.Error("no tag entries have a Version set") + } + + t.Logf("fetched %d entries (last is HEAD on %q)", len(entries), head.GitTag) +} diff --git a/internal/releases/golang/golang.go b/internal/releases/golang/golang.go new file mode 100644 index 0000000..359a71d --- /dev/null +++ b/internal/releases/golang/golang.go @@ -0,0 +1,72 @@ +// Package golang fetches Go release data from golang.org. +// +// The API returns all releases (including unstable) as a JSON array: +// +// https://golang.org/dl/?mode=json&include=all +// +// Each release has a version string like "go1.24.1" and a list of file +// objects with filename, os, arch, sha256, size, and kind. +package golang + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// Release is one Go version from the download API. +type Release struct { + Version string `json:"version"` // "go1.24.1" + Stable bool `json:"stable"` + Files []File `json:"files"` +} + +// File is one downloadable artifact within a release. +type File struct { + Filename string `json:"filename"` // "go1.24.1.linux-amd64.tar.gz" + OS string `json:"os"` // "linux", "darwin", "windows", "" + Arch string `json:"arch"` // "amd64", "arm64", "386", "" + Version string `json:"version"` // "go1.24.1" + SHA256 string `json:"sha256"` + Size int64 `json:"size"` + Kind string `json:"kind"` // "archive", "installer", "source" +} + +// Fetch retrieves the Go release index. +// +// Yields one batch containing all releases. The iterator interface exists +// so callers use the same pattern as paginated sources. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + url := "https://golang.org/dl/?mode=json&include=all" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("golang: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("golang: fetch: %w", err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("golang: fetch: %s", resp.Status)) + return + } + + var releases []Release + if err := json.NewDecoder(resp.Body).Decode(&releases); err != nil { + yield(nil, fmt.Errorf("golang: decode: %w", err)) + return + } + + yield(releases, nil) + } +} diff --git a/internal/releases/gpgdist/gpgdist.go b/internal/releases/gpgdist/gpgdist.go new file mode 100644 index 0000000..ee2ff98 --- /dev/null +++ b/internal/releases/gpgdist/gpgdist.go @@ -0,0 +1,70 @@ +// Package gpgdist fetches GPG for macOS release data from SourceForge RSS. +// +// The gpgosx project publishes DMG installers on SourceForge. The RSS feed +// at https://sourceforge.net/projects/gpgosx/rss?path=/ lists download links +// for each version. +package gpgdist + +import ( + "context" + "fmt" + "io" + "iter" + "net/http" + "regexp" +) + +// Entry is one GPG macOS release. +type Entry struct { + Version string `json:"version"` // "2.4.7" + URL string `json:"url"` // full SourceForge download URL +} + +var linkRe = regexp.MustCompile( + `(https://sourceforge\.net/projects/gpgosx/files/GnuPG-([\d.]+)\.dmg/download)`, +) + +// Fetch retrieves GPG macOS releases from the SourceForge RSS feed. +// +// Yields one batch containing all releases. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Entry, error] { + return func(yield func([]Entry, error) bool) { + url := "https://sourceforge.net/projects/gpgosx/rss?path=/" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("gpgdist: %w", err)) + return + } + req.Header.Set("Accept", "application/rss+xml") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("gpgdist: fetch: %w", err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("gpgdist: fetch: %s", resp.Status)) + return + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + yield(nil, fmt.Errorf("gpgdist: read: %w", err)) + return + } + + matches := linkRe.FindAllStringSubmatch(string(body), -1) + var entries []Entry + for _, m := range matches { + entries = append(entries, Entry{ + URL: m[1], + Version: m[2], + }) + } + + yield(entries, nil) + } +} diff --git a/internal/releases/hashicorp/hashicorp.go b/internal/releases/hashicorp/hashicorp.go new file mode 100644 index 0000000..32ad243 --- /dev/null +++ b/internal/releases/hashicorp/hashicorp.go @@ -0,0 +1,79 @@ +// Package hashicorp fetches release data from the HashiCorp releases API. +// +// HashiCorp publishes release indexes at: +// +// https://releases.hashicorp.com/{product}/index.json +// +// The response is a JSON object with a "versions" key mapping version strings +// to objects containing build arrays with url, os, arch, and filename. +package hashicorp + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// Index is the top-level response from the HashiCorp releases API. +type Index struct { + Versions map[string]Version `json:"versions"` +} + +// Version is one release version with its builds. +type Version struct { + Name string `json:"name"` // "terraform" + Version string `json:"version"` // "1.12.0" + SHASUMS string `json:"shasums,omitempty"` // URL to SHA256SUMS file + SHASUMSSig string `json:"shasums_signature"` // URL to signature + Builds []Build `json:"builds"` + TimestampCreated string `json:"timestamp_created,omitempty"` + TimestampUpdated string `json:"timestamp_updated,omitempty"` +} + +// Build is one downloadable artifact. +type Build struct { + Name string `json:"name"` // "terraform" + Version string `json:"version"` // "1.12.0" + OS string `json:"os"` // "linux", "darwin", "windows" + Arch string `json:"arch"` // "amd64", "arm64", "386" + Filename string `json:"filename"` // "terraform_1.12.0_linux_amd64.zip" + URL string `json:"url"` // full download URL +} + +// Fetch retrieves the HashiCorp release index for a product. +// +// Yields one batch containing all versions. +func Fetch(ctx context.Context, client *http.Client, product string) iter.Seq2[*Index, error] { + return func(yield func(*Index, error) bool) { + url := fmt.Sprintf("https://releases.hashicorp.com/%s/index.json", product) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("hashicorp: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("hashicorp: fetch %s: %w", product, err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("hashicorp: fetch %s: %s", product, resp.Status)) + return + } + + var idx Index + if err := json.NewDecoder(resp.Body).Decode(&idx); err != nil { + yield(nil, fmt.Errorf("hashicorp: decode %s: %w", product, err)) + return + } + + yield(&idx, nil) + } +} diff --git a/internal/releases/iterm2dist/iterm2dist.go b/internal/releases/iterm2dist/iterm2dist.go new file mode 100644 index 0000000..bcb98d4 --- /dev/null +++ b/internal/releases/iterm2dist/iterm2dist.go @@ -0,0 +1,105 @@ +// Package iterm2dist fetches iTerm2 release URLs from the downloads page. +// +// iTerm2 doesn't have a structured API — releases are listed as links on: +// +// https://iterm2.com/downloads.html +// +// This package scrapes download links matching iTerm2-[34]*.zip from the +// HTML and returns them as structured entries. +package iterm2dist + +import ( + "context" + "fmt" + "io" + "iter" + "net/http" + "regexp" + "strings" +) + +// Entry is one iTerm2 download link with extracted metadata. +type Entry struct { + Version string `json:"version"` // "3.5.13" + Channel string `json:"channel"` // "stable" or "beta" + URL string `json:"url"` // full download URL +} + +var linkRe = regexp.MustCompile(`href="(https://iterm2\.com/downloads/[^"]*\.zip)"`) +var versionRe = regexp.MustCompile(`iTerm2[-_]v?(\d+(?:_\d+)*)(?:[-_]?(beta|preview)[-_]?(\d*))?\.zip`) + +// Fetch retrieves iTerm2 releases by scraping the downloads page. +// +// Yields one batch containing all releases. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Entry, error] { + return func(yield func([]Entry, error) bool) { + url := "https://iterm2.com/downloads.html" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("iterm2dist: %w", err)) + return + } + req.Header.Set("Accept", "text/html") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("iterm2dist: fetch: %w", err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("iterm2dist: fetch: %s", resp.Status)) + return + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + yield(nil, fmt.Errorf("iterm2dist: read: %w", err)) + return + } + + matches := linkRe.FindAllStringSubmatch(string(body), -1) + var entries []Entry + seen := make(map[string]bool) + for _, m := range matches { + link := m[1] + // Only include iTerm2 v3+ downloads. + if !strings.Contains(link, "iTerm2-3") && !strings.Contains(link, "iTerm2-4") { + continue + } + + entry := Entry{URL: link} + + // Determine channel from URL path. + if strings.Contains(link, "/stable/") { + entry.Channel = "stable" + } else { + entry.Channel = "beta" + } + + // Extract version: iTerm2-3_5_13.zip → 3.5.13 + vm := versionRe.FindStringSubmatch(link) + if vm != nil { + entry.Version = strings.ReplaceAll(vm[1], "_", ".") + // vm[2] = "beta" or "preview", vm[3] = optional number + if vm[2] != "" { + entry.Version += "-" + vm[2] + vm[3] + } + } + + // The downloads page has duplicate links for some betas + // (e.g. iTerm2-3_5_1beta1.zip and iTerm2-3_5_1_beta1.zip). + // Keep the first URL encountered per version. + if seen[entry.Version] { + continue + } + seen[entry.Version] = true + + entries = append(entries, entry) + } + + yield(entries, nil) + } +} diff --git a/internal/releases/juliadist/juliadist.go b/internal/releases/juliadist/juliadist.go new file mode 100644 index 0000000..1676886 --- /dev/null +++ b/internal/releases/juliadist/juliadist.go @@ -0,0 +1,89 @@ +// Package juliadist fetches Julia release data from the Julia S3 API. +// +// Julia publishes a version index at: +// +// https://julialang-s3.julialang.org/bin/versions.json +// +// The response is a JSON object keyed by version string, where each value +// has a "files" array of downloadable artifacts with url, triplet, kind, +// arch, os, sha256, size, and extension fields. +package juliadist + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// Release is one Julia version with its file artifacts. +type Release struct { + Version string `json:"version"` // set by us from the key + Stable bool `json:"stable"` + Files []File `json:"files"` +} + +// File is one downloadable artifact. +type File struct { + URL string `json:"url"` // full download URL + Triplet string `json:"triplet"` // "aarch64-apple-darwin14" + Kind string `json:"kind"` // "archive" or "installer" + Arch string `json:"arch"` // "aarch64", "x86_64", "i686" + OS string `json:"os"` // "mac", "linux", "winnt" + SHA256 string `json:"sha256"` + Size int64 `json:"size"` + Version string `json:"version"` // same as release version + Extension string `json:"extension"` // "tar.gz", "dmg", "exe" +} + +// rawRelease is the upstream JSON shape (stable as bool, files array). +type rawRelease struct { + Stable bool `json:"stable"` + Files []File `json:"files"` +} + +// Fetch retrieves the Julia release index. +// +// Yields one batch containing all releases. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + url := "https://julialang-s3.julialang.org/bin/versions.json" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("juliadist: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("juliadist: fetch: %w", err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("juliadist: fetch: %s", resp.Status)) + return + } + + var raw map[string]rawRelease + if err := json.NewDecoder(resp.Body).Decode(&raw); err != nil { + yield(nil, fmt.Errorf("juliadist: decode: %w", err)) + return + } + + var releases []Release + for version, r := range raw { + releases = append(releases, Release{ + Version: version, + Stable: r.Stable, + Files: r.Files, + }) + } + + yield(releases, nil) + } +} diff --git a/internal/releases/lsd/variants.go b/internal/releases/lsd/variants.go new file mode 100644 index 0000000..f825d2a --- /dev/null +++ b/internal/releases/lsd/variants.go @@ -0,0 +1,23 @@ +// Package lsd provides variant tagging for lsd (LSDeluxe) releases. +// +// lsd publishes .deb packages and windows-msvc builds alongside +// the standard archives. +package lsddist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags lsd-specific build variants. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Format == ".deb" { + assets[i].Variants = append(assets[i].Variants, "deb") + } + if strings.Contains(strings.ToLower(assets[i].Filename), "-msvc") { + assets[i].Variants = append(assets[i].Variants, "msvc") + } + } +} diff --git a/internal/releases/mariadbdist/mariadbdist.go b/internal/releases/mariadbdist/mariadbdist.go new file mode 100644 index 0000000..2f2fb96 --- /dev/null +++ b/internal/releases/mariadbdist/mariadbdist.go @@ -0,0 +1,159 @@ +// Package mariadbdist fetches MariaDB release data from the downloads API. +// +// MariaDB publishes release information via a REST API: +// +// https://downloads.mariadb.org/rest-api/mariadb/ +// https://downloads.mariadb.org/rest-api/mariadb/{major.minor}/ +// +// The first endpoint lists major release series; the second lists all point +// releases within a series, including download URLs per platform. +package mariadbdist + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" + "regexp" +) + +// MajorRelease describes one release series (e.g. "11.4"). +type MajorRelease struct { + ReleaseID string `json:"release_id"` // "11.4" + ReleaseName string `json:"release_name"` // "MariaDB Server 11.4" + ReleaseStatus string `json:"release_status"` // "Stable", "RC", "Alpha" + ReleaseSupportType string `json:"release_support_type"` // "Long Term Support", etc. +} + +// Release is one point release with its downloadable files. +type Release struct { + ReleaseID string `json:"release_id"` // "11.4.5" + ReleaseName string `json:"release_name"` // "MariaDB Server 11.4.5" + DateOfRelease string `json:"date_of_release"` // "2025-02-12" + ReleaseNotesURL string `json:"release_notes_url"` // URL + Files []File `json:"files"` + + // MajorStatus is copied from the parent MajorRelease. Not in upstream JSON. + MajorStatus string `json:"major_status,omitempty"` +} + +// File is one downloadable artifact within a release. +type File struct { + FileID int `json:"file_id"` + FileName string `json:"file_name"` + PackageType string `json:"package_type"` // "gzipped tar file", "ZIP file" + OS string `json:"os"` // "Linux", "Windows", or "" + CPU string `json:"cpu"` // "x86_64" or "" + Checksum Checksum `json:"checksum"` + FileDownloadURL string `json:"file_download_url"` +} + +// Checksum holds hash digests for a file. +type Checksum struct { + SHA256 string `json:"sha256sum"` +} + +type majorResp struct { + MajorReleases []MajorRelease `json:"major_releases"` +} + +type releaseResp struct { + Releases map[string]Release `json:"releases"` +} + +var reVersion = regexp.MustCompile(`^\d+\.\d+$`) + +// Fetch retrieves all MariaDB releases across all major series. +// +// Yields one batch per major release series. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + // Step 1: list major release series. + majors, err := fetchMajors(ctx, client) + if err != nil { + yield(nil, err) + return + } + + // Step 2: fetch point releases for each series. + for _, major := range majors { + if !reVersion.MatchString(major.ReleaseID) { + continue + } + + releases, err := fetchReleases(ctx, client, major.ReleaseID) + if err != nil { + yield(nil, fmt.Errorf("mariadbdist: %s: %w", major.ReleaseID, err)) + return + } + + // Tag each release with the major status. + for i := range releases { + releases[i].MajorStatus = major.ReleaseStatus + } + + if !yield(releases, nil) { + return + } + } + } +} + +func fetchMajors(ctx context.Context, client *http.Client) ([]MajorRelease, error) { + url := "https://downloads.mariadb.org/rest-api/mariadb/" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("mariadbdist: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("mariadbdist: fetch majors: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("mariadbdist: fetch majors: %s", resp.Status) + } + + var result majorResp + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("mariadbdist: decode majors: %w", err) + } + + return result.MajorReleases, nil +} + +func fetchReleases(ctx context.Context, client *http.Client, majorID string) ([]Release, error) { + url := fmt.Sprintf("https://downloads.mariadb.org/rest-api/mariadb/%s", majorID) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("mariadbdist: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("mariadbdist: fetch %s: %w", majorID, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("mariadbdist: fetch %s: %s", majorID, resp.Status) + } + + var result releaseResp + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("mariadbdist: decode %s: %w", majorID, err) + } + + var releases []Release + for _, r := range result.Releases { + releases = append(releases, r) + } + return releases, nil +} diff --git a/internal/releases/node/node.go b/internal/releases/node/node.go new file mode 100644 index 0000000..a56b1a0 --- /dev/null +++ b/internal/releases/node/node.go @@ -0,0 +1,39 @@ +// Package node fetches Node.js releases from both official and unofficial +// build sources. +// +// Official builds cover the standard platforms (linux-x64, osx-arm64, win-x64, +// etc.). Unofficial builds add musl, loong64, and other targets that the +// official CI doesn't produce. +// +// Both sources use the same index format, served by [nodedist]. +package nodedist + +import ( + "context" + "iter" + "net/http" + + "github.com/webinstall/webi-installers/internal/releases/nodedist" +) + +const ( + officialURL = "https://nodejs.org/download/release" + unofficialURL = "https://unofficial-builds.nodejs.org/download/release" +) + +// Fetch retrieves Node.js releases from both official and unofficial sources. +// Yields one batch per source (official first, then unofficial). +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]nodedist.Entry, error] { + return func(yield func([]nodedist.Entry, error) bool) { + for entries, err := range nodedist.Fetch(ctx, client, officialURL) { + if !yield(entries, err) { + return + } + } + for entries, err := range nodedist.Fetch(ctx, client, unofficialURL) { + if !yield(entries, err) { + return + } + } + } +} diff --git a/internal/releases/node/node_test.go b/internal/releases/node/node_test.go new file mode 100644 index 0000000..6149abe --- /dev/null +++ b/internal/releases/node/node_test.go @@ -0,0 +1,36 @@ +package nodedist_test + +import ( + "context" + "net/http" + "testing" + + "github.com/webinstall/webi-installers/internal/releases/node" +) + +func TestFetchCombinesSources(t *testing.T) { + if testing.Short() { + t.Skip("skipping network test in short mode") + } + + ctx := context.Background() + client := &http.Client{} + + var batches int + var total int + for entries, err := range nodedist.Fetch(ctx, client) { + if err != nil { + t.Fatalf("batch %d: %v", batches, err) + } + batches++ + total += len(entries) + } + + if batches != 2 { + t.Errorf("got %d batches, want 2 (official + unofficial)", batches) + } + if total < 100 { + t.Errorf("got %d total entries, expected at least 100", total) + } + t.Logf("fetched %d entries in %d batches", total, batches) +} diff --git a/internal/releases/node/variants.go b/internal/releases/node/variants.go new file mode 100644 index 0000000..ab8cad6 --- /dev/null +++ b/internal/releases/node/variants.go @@ -0,0 +1,20 @@ +package nodedist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants tags node-specific build variants. +// +// The bare .exe is just node.exe without npm — too minimal to be useful. +// The .msi is a Windows GUI installer — webi uses the .zip instead. +// The .pkg is a macOS installer package — webi uses the .tar.gz instead. +// Both are tagged as "installer" so ExportLegacy drops them. +func TagVariants(assets []storage.Asset) { + for i := range assets { + switch assets[i].Format { + case ".exe": + assets[i].Variants = append(assets[i].Variants, "bare-exe") + case ".msi", ".pkg": + assets[i].Variants = append(assets[i].Variants, "installer") + } + } +} diff --git a/internal/releases/nodedist/nodedist.go b/internal/releases/nodedist/nodedist.go new file mode 100644 index 0000000..7790e05 --- /dev/null +++ b/internal/releases/nodedist/nodedist.go @@ -0,0 +1,108 @@ +// Package nodedist fetches a Node.js-style distribution index. +// +// Node.js publishes a JSON index of all releases at: +// +// https://nodejs.org/download/release/index.json +// +// Unofficial builds (musl, etc.) use the same format at: +// +// https://unofficial-builds.nodejs.org/download/release/index.json +// +// This package fetches and deserializes that index. It does not classify, +// normalize, or transform the data — the caller gets what the API returns. +package nodedist + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// Entry is one release from a Node.js distribution index. +// Fields mirror the upstream JSON schema. +type Entry struct { + Version string `json:"version"` // "v25.8.0" + Date string `json:"date"` // "2026-03-03" + Files []string `json:"files"` // ["linux-arm64", "osx-arm64-tar", ...] + NPM string `json:"npm"` // "11.11.0" + V8 string `json:"v8"` // "14.1.146.11" + UV string `json:"uv"` // "1.51.0" + Zlib string `json:"zlib"` // "1.3.1" + OpenSSL string `json:"openssl"` // "3.5.5" + Modules string `json:"modules"` // "141" + LTS LTS `json:"lts"` // false or "Jod" + Security bool `json:"security"` // true if security release +} + +// LTS holds the long-term support status. The upstream API encodes this as +// either the boolean false or a codename string like "Jod" or "Iron". +// An empty string means the release is not LTS. +type LTS string + +func (l *LTS) UnmarshalJSON(data []byte) error { + // false → "" + if string(data) == "false" { + *l = "" + return nil + } + + // "Codename" → Codename + var s string + if err := json.Unmarshal(data, &s); err != nil { + return fmt.Errorf("nodedist: unexpected lts value: %s", data) + } + *l = LTS(s) + return nil +} + +func (l LTS) MarshalJSON() ([]byte, error) { + if l == "" { + return []byte("false"), nil + } + return json.Marshal(string(l)) +} + +// Fetch retrieves the Node.js distribution index from baseURL. +// +// The iterator yields one batch per HTTP response. The Node.js index API +// returns all releases in a single response, so there will be exactly one +// yield. The iterator interface exists so that callers use the same pattern +// for paginated sources (like GitHub). +// +// Standard base URLs: +// - https://nodejs.org/download/release +// - https://unofficial-builds.nodejs.org/download/release +func Fetch(ctx context.Context, client *http.Client, baseURL string) iter.Seq2[[]Entry, error] { + return func(yield func([]Entry, error) bool) { + url := baseURL + "/index.json" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("nodedist: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("nodedist: fetch %s: %w", url, err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("nodedist: fetch %s: %s", url, resp.Status)) + return + } + + var entries []Entry + if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil { + yield(nil, fmt.Errorf("nodedist: decode %s: %w", url, err)) + return + } + + yield(entries, nil) + } +} diff --git a/internal/releases/nodedist/nodedist_test.go b/internal/releases/nodedist/nodedist_test.go new file mode 100644 index 0000000..d451417 --- /dev/null +++ b/internal/releases/nodedist/nodedist_test.go @@ -0,0 +1,143 @@ +package nodedist_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/webinstall/webi-installers/internal/releases/nodedist" +) + +// Minimal fixture from the real Node.js dist API. +const testIndex = `[ + { + "version": "v22.14.0", + "date": "2025-02-11", + "files": ["linux-arm64", "linux-x64", "osx-arm64-tar", "win-x64-zip", "src", "headers"], + "npm": "10.9.2", + "v8": "12.4.254.21", + "uv": "1.49.2", + "zlib": "1.3.0.1-motley-82a6be0", + "openssl": "3.0.15+quic", + "modules": "127", + "lts": "Jod", + "security": false + }, + { + "version": "v23.7.0", + "date": "2025-02-04", + "files": ["linux-arm64", "linux-x64", "osx-arm64-tar", "win-x64-zip"], + "npm": "10.9.2", + "v8": "13.2.152.16", + "uv": "1.49.2", + "zlib": "1.3.0.1-motley-82a6be0", + "openssl": "3.0.15+quic", + "modules": "131", + "lts": false, + "security": true + } +]` + +func TestFetch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/index.json" { + t.Errorf("unexpected path: %s", r.URL.Path) + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(testIndex)) + })) + defer srv.Close() + + ctx := context.Background() + var got []nodedist.Entry + + for entries, err := range nodedist.Fetch(ctx, srv.Client(), srv.URL) { + if err != nil { + t.Fatalf("Fetch: %v", err) + } + got = append(got, entries...) + } + + if len(got) != 2 { + t.Fatalf("got %d entries, want 2", len(got)) + } + + // First entry: LTS release + if got[0].Version != "v22.14.0" { + t.Errorf("entry[0].Version = %q, want %q", got[0].Version, "v22.14.0") + } + if got[0].Date != "2025-02-11" { + t.Errorf("entry[0].Date = %q, want %q", got[0].Date, "2025-02-11") + } + if got[0].LTS != "Jod" { + t.Errorf("entry[0].LTS = %q, want %q", got[0].LTS, "Jod") + } + if got[0].Security { + t.Error("entry[0].Security = true, want false") + } + if len(got[0].Files) != 6 { + t.Errorf("entry[0].Files len = %d, want 6", len(got[0].Files)) + } + + // Second entry: non-LTS, security release + if got[1].Version != "v23.7.0" { + t.Errorf("entry[1].Version = %q, want %q", got[1].Version, "v23.7.0") + } + if got[1].LTS != "" { + t.Errorf("entry[1].LTS = %q, want empty (non-LTS)", got[1].LTS) + } + if !got[1].Security { + t.Error("entry[1].Security = false, want true") + } +} + +func TestFetchHTTPError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "rate limited", http.StatusTooManyRequests) + })) + defer srv.Close() + + ctx := context.Background() + for _, err := range nodedist.Fetch(ctx, srv.Client(), srv.URL) { + if err == nil { + t.Fatal("expected error for 429 response") + } + return + } +} + +func TestLTSMarshalRoundTrip(t *testing.T) { + // LTS codename + entry := nodedist.Entry{LTS: "Jod"} + data, err := json.Marshal(entry) + if err != nil { + t.Fatal(err) + } + + var got nodedist.Entry + if err := json.Unmarshal(data, &got); err != nil { + t.Fatal(err) + } + if got.LTS != "Jod" { + t.Errorf("LTS roundtrip: got %q, want %q", got.LTS, "Jod") + } + + // Non-LTS + entry2 := nodedist.Entry{LTS: ""} + data2, err := json.Marshal(entry2) + if err != nil { + t.Fatal(err) + } + + var got2 nodedist.Entry + if err := json.Unmarshal(data2, &got2); err != nil { + t.Fatal(err) + } + if got2.LTS != "" { + t.Errorf("non-LTS roundtrip: got %q, want empty", got2.LTS) + } +} diff --git a/internal/releases/ollama/variants.go b/internal/releases/ollama/variants.go new file mode 100644 index 0000000..224edc9 --- /dev/null +++ b/internal/releases/ollama/variants.go @@ -0,0 +1,32 @@ +// Package ollama provides variant tagging for Ollama releases. +// +// Ollama publishes GPU accelerator builds: -rocm (AMD), -jetpack5 +// and -jetpack6 (NVIDIA Jetson). +package ollamadist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags ollama-specific build variants. +func TagVariants(assets []storage.Asset) { + for i := range assets { + lower := strings.ToLower(assets[i].Filename) + for _, v := range []string{"rocm", "jetpack5", "jetpack6"} { + if strings.Contains(lower, "-"+v) { + assets[i].Variants = append(assets[i].Variants, v) + } + } + // Ollama-darwin.zip (capital O) is the macOS .app bundle. + // Installable by Go (extract .app), but not in legacy cache. + if strings.HasPrefix(assets[i].Filename, "Ollama-") { + assets[i].Variants = append(assets[i].Variants, "app") + } + // ollama-darwin is a universal2 fat binary (arm64 + amd64). + if assets[i].OS == "darwin" && assets[i].Arch == "" { + assets[i].Arch = "universal2" + } + } +} diff --git a/internal/releases/postgres/versions.go b/internal/releases/postgres/versions.go new file mode 100644 index 0000000..8ffb95e --- /dev/null +++ b/internal/releases/postgres/versions.go @@ -0,0 +1,80 @@ +package postgres + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// NormalizeVersions strips the REL_ prefix and converts underscores to dots. +// GitHub tags are "REL_17_0" → version becomes "17.0". +func NormalizeVersions(assets []storage.Asset) { + for i := range assets { + v := strings.TrimPrefix(assets[i].Version, "REL_") + assets[i].Version = strings.ReplaceAll(v, "_", ".") + } +} + +// LegacyReleases returns the old EnterpriseDB binary releases that predate +// the bnnanet/postgresql-releases GitHub repo. +func LegacyReleases() []storage.Asset { + edbURL := "https://get.enterprisedb.com/postgresql/" + return []storage.Asset{ + { + Filename: "postgresql-10.12-1-linux-x64-binaries.tar.gz", + Version: "10.12", + Channel: "stable", + OS: "linux", + Arch: "x86_64", + Libc: "gnu", + Format: ".tar.gz", + Download: edbURL + "postgresql-10.12-1-linux-x64-binaries.tar.gz?ls=Crossover&type=Crossover", + }, + { + Filename: "postgresql-10.12-1-linux-binaries.tar.gz", + Version: "10.12", + Channel: "stable", + OS: "linux", + Arch: "x86", + Libc: "gnu", + Format: ".tar.gz", + Download: edbURL + "postgresql-10.12-1-linux-binaries.tar.gz?ls=Crossover&type=Crossover", + }, + { + Filename: "postgresql-10.12-1-osx-binaries.zip", + Version: "10.12", + Channel: "stable", + OS: "darwin", + Arch: "x86_64", + Format: ".zip", + Download: edbURL + "postgresql-10.12-1-osx-binaries.zip?ls=Crossover&type=Crossover", + }, + { + Filename: "postgresql-10.13-1-osx-binaries.zip", + Version: "10.13", + Channel: "stable", + OS: "darwin", + Arch: "x86_64", + Format: ".zip", + Download: edbURL + "postgresql-10.13-1-osx-binaries.zip?ls=Crossover&type=Crossover", + }, + { + Filename: "postgresql-11.8-1-osx-binaries.zip", + Version: "11.8", + Channel: "stable", + OS: "darwin", + Arch: "x86_64", + Format: ".zip", + Download: edbURL + "postgresql-11.8-1-osx-binaries.zip?ls=Crossover&type=Crossover", + }, + { + Filename: "postgresql-12.3-1-osx-binaries.zip", + Version: "12.3", + Channel: "stable", + OS: "darwin", + Arch: "x86_64", + Format: ".zip", + Download: edbURL + "postgresql-12.3-1-osx-binaries.zip?ls=Crossover&type=Crossover", + }, + } +} diff --git a/internal/releases/pwsh/variants.go b/internal/releases/pwsh/variants.go new file mode 100644 index 0000000..b5ca0d5 --- /dev/null +++ b/internal/releases/pwsh/variants.go @@ -0,0 +1,37 @@ +// Package pwsh provides variant tagging for PowerShell releases. +// +// PowerShell publishes .NET framework-dependent builds (-fxdependent) +// that are smaller but require a .NET runtime to be installed. +package pwshdist + +import ( + "regexp" + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// winVersionRe matches Windows-version-specific filenames like +// "win10-win2016-x64" or "win81-x64" from early PowerShell releases. +var winVersionRe = regexp.MustCompile(`(?i)-win(?:7|8|81|10|2008|2012|2016)`) + +// TagVariants tags pwsh-specific build variants. +// +// Early releases (pre-6.1) used Windows-version-specific filenames +// like "win10-win2016-x64" and "win81-win2012r2-x64". These can't +// be resolved by the legacy cache and are tagged as variants. +func TagVariants(assets []storage.Asset) { + for i := range assets { + lower := strings.ToLower(assets[i].Filename) + switch { + case strings.Contains(lower, "-fxdependentwindesktop"): + assets[i].Variants = append(assets[i].Variants, "fxdependentWinDesktop") + case strings.Contains(lower, "-fxdependent"): + assets[i].Variants = append(assets[i].Variants, "fxdependent") + case winVersionRe.MatchString(lower): + assets[i].Variants = append(assets[i].Variants, "win-version-specific") + case strings.HasSuffix(lower, ".appimage"): + assets[i].Variants = append(assets[i].Variants, "appimage") + } + } +} diff --git a/internal/releases/sass/variants.go b/internal/releases/sass/variants.go new file mode 100644 index 0000000..e134009 --- /dev/null +++ b/internal/releases/sass/variants.go @@ -0,0 +1,19 @@ +// Package sass provides variant tagging for Dart Sass releases. +// +// Dart Sass uses bare "arm" in filenames to mean ARMv7 (the Dart VM's +// minimum ARM target). The generic classifier maps bare "arm" to armv6, +// so we correct it here. +package sassdist + +import ( + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants remaps bare arm → armv7 for Dart Sass assets. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Arch == "armv6" { + assets[i].Arch = "armv7" + } + } +} diff --git a/internal/releases/servicemandist/servicemandist.go b/internal/releases/servicemandist/servicemandist.go new file mode 100644 index 0000000..ca7aa39 --- /dev/null +++ b/internal/releases/servicemandist/servicemandist.go @@ -0,0 +1,75 @@ +// Package servicemandist fetches serviceman releases from two GitHub repos. +// +// serviceman moved from therootcompany/serviceman (binary cross-platform +// releases, ≤v0.8.x) to bnnanet/serviceman (source-only POSIX, v0.9.x+). +// Both repos must be fetched to provide the complete version history, +// including the only Windows binary at v0.8.0. +package servicemandist + +import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "path/filepath" + + "github.com/webinstall/webi-installers/internal/rawcache" + "github.com/webinstall/webi-installers/internal/releases/github" + "github.com/webinstall/webi-installers/internal/releases/githubish" +) + +const ( + primaryOwner = "bnnanet" + primaryRepo = "serviceman" + + legacyOwner = "therootcompany" + legacyRepo = "serviceman" +) + +// Fetch retrieves serviceman releases from both GitHub repos and merges +// them into the raw cache. The primary repo (bnnanet) contains v0.9.x+; +// the legacy repo (therootcompany) contains ≤v0.8.x with Windows binaries. +func Fetch(ctx context.Context, client *http.Client, rawDir, pkgName string, auth *githubish.Auth, shallow bool) error { + d, err := rawcache.Open(filepath.Join(rawDir, pkgName)) + if err != nil { + return err + } + + // Primary: bnnanet/serviceman (v0.9.x+ source tarballs). + for batch, err := range github.Fetch(ctx, client, primaryOwner, primaryRepo, auth) { + if err != nil { + return fmt.Errorf("servicemandist: %s/%s: %w", primaryOwner, primaryRepo, err) + } + for _, rel := range batch { + if rel.Draft { + continue + } + data, _ := json.Marshal(rel) + d.Merge(primaryOwner+"/"+rel.TagName, data) + } + if shallow { + break + } + } + + // Legacy: therootcompany/serviceman (≤v0.8.x binaries). + for batch, err := range github.Fetch(ctx, client, legacyOwner, legacyRepo, auth) { + if err != nil { + log.Printf("warning: servicemandist: %s/%s: %v", legacyOwner, legacyRepo, err) + break + } + for _, rel := range batch { + if rel.Draft { + continue + } + data, _ := json.Marshal(rel) + d.Merge(legacyOwner+"/"+rel.TagName, data) + } + if shallow { + break + } + } + + return nil +} diff --git a/internal/releases/servicemandist/variants.go b/internal/releases/servicemandist/variants.go new file mode 100644 index 0000000..6f5e833 --- /dev/null +++ b/internal/releases/servicemandist/variants.go @@ -0,0 +1,16 @@ +package servicemandist + +import ( + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants marks all git-format entries as POSIX-only. +// serviceman's git clone installs a POSIX shell script — no Windows support. +// Binary releases (v0.8.x tar.gz/zip) already have per-platform OS set. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Format == "git" && assets[i].OS == "" { + assets[i].OS = "posix_2017" + } + } +} diff --git a/internal/releases/sttr/variants.go b/internal/releases/sttr/variants.go new file mode 100644 index 0000000..3a13370 --- /dev/null +++ b/internal/releases/sttr/variants.go @@ -0,0 +1,36 @@ +// Package sttr provides variant tagging for sttr releases. +// +// sttr ships a darwin_all (universal macOS) archive alongside per-arch builds. +// These universal archives have no arch in the filename — Go classifies them as +// os="darwin", arch="" which the Node builds-cacher rejects with FORMAT CHANGE +// (Node's classifier extracts a different arch from "all"). Production Node +// also stores these as os="", arch="" (unroutable). +// +// .sbom.json files are software bill-of-materials metadata — not installable +// archives. They pass through the format filter (ext="") but should not be +// served. +package sttrdist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TagVariants tags sttr-specific build variants for exclusion from legacy export. +func TagVariants(assets []storage.Asset) { + for i := range assets { + lower := strings.ToLower(assets[i].Filename) + // darwin_all / Darwin_all: universal macOS archive with no arch info. + // Node's classifier extracts a different result → FORMAT CHANGE. + // Production LIVE_cache has these as os="", arch="" (unroutable). + if strings.Contains(lower, "darwin_all") { + assets[i].Variants = append(assets[i].Variants, "universal-all") + continue + } + // .sbom.json: software bill-of-materials, not an installable archive. + if strings.HasSuffix(lower, ".sbom.json") { + assets[i].Variants = append(assets[i].Variants, "metadata") + } + } +} diff --git a/internal/releases/uuidv7/variants.go b/internal/releases/uuidv7/variants.go new file mode 100644 index 0000000..9a5b7c0 --- /dev/null +++ b/internal/releases/uuidv7/variants.go @@ -0,0 +1,18 @@ +// Package uuidv7 provides variant tagging for uuidv7 releases. +package uuidv7dist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants tags uuidv7-specific build variants for exclusion from legacy export. +// +// uuidv7 ships powerpc (32-bit) and powerpc64 binaries alongside the common +// platforms. Webi does not serve powerpc targets, and production Node also +// classifies these as os="", arch="" (not routable). Tag them unsupported. +func TagVariants(assets []storage.Asset) { + for i := range assets { + switch assets[i].Arch { + case "powerpc", "ppc64", "ppc64le": + assets[i].Variants = append(assets[i].Variants, "unsupported-platform") + } + } +} diff --git a/internal/releases/watchexec/variants.go b/internal/releases/watchexec/variants.go new file mode 100644 index 0000000..23662ae --- /dev/null +++ b/internal/releases/watchexec/variants.go @@ -0,0 +1,18 @@ +// Package watchexec provides variant tagging and version normalization for watchexec. +package watchexecdist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants tags watchexec-specific build variants for exclusion from legacy export. +// +// Watchexec ships powerpc64le binaries alongside the common platforms. +// Webi does not serve powerpc targets, and production Node also classifies +// these as os="", arch="" (not routable). Tag them unsupported. +func TagVariants(assets []storage.Asset) { + for i := range assets { + switch assets[i].Arch { + case "powerpc", "ppc64", "ppc64le": + assets[i].Variants = append(assets[i].Variants, "unsupported-platform") + } + } +} diff --git a/internal/releases/watchexec/versions.go b/internal/releases/watchexec/versions.go new file mode 100644 index 0000000..f9949e0 --- /dev/null +++ b/internal/releases/watchexec/versions.go @@ -0,0 +1,18 @@ +package watchexecdist + +import ( + "strings" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// NormalizeVersions strips the "cli-" prefix from watchexec version strings. +// +// Watchexec transitioned to a monorepo with cli-prefixed tags (cli-v1.20.0) +// while older releases used plain tags (v1.20.6). Both are valid releases; +// the prefix is just a tag namespace, not part of the version. +func NormalizeVersions(assets []storage.Asset) { + for i := range assets { + assets[i].Version = strings.TrimPrefix(assets[i].Version, "cli-") + } +} diff --git a/internal/releases/xcaddy/variants.go b/internal/releases/xcaddy/variants.go new file mode 100644 index 0000000..ee80fac --- /dev/null +++ b/internal/releases/xcaddy/variants.go @@ -0,0 +1,15 @@ +// Package xcaddy provides variant tagging for xcaddy releases. +// +// xcaddy publishes .deb packages alongside the standard archives. +package xcaddydist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants tags xcaddy-specific build variants. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Format == ".deb" { + assets[i].Variants = append(assets[i].Variants, "deb") + } + } +} diff --git a/internal/releases/xz/variants.go b/internal/releases/xz/variants.go new file mode 100644 index 0000000..7fd1b49 --- /dev/null +++ b/internal/releases/xz/variants.go @@ -0,0 +1,16 @@ +package xzdist + +import "github.com/webinstall/webi-installers/internal/storage" + +// TagVariants handles xz-specific arch defaults. +// +// therootcompany/xz-static names builds xz-{version}-{os}-{arch} for +// Linux/macOS but xz-{version}-windows.zip for Windows (only amd64 +// shipped). The arch token is absent only for the Windows build. +func TagVariants(assets []storage.Asset) { + for i := range assets { + if assets[i].Arch == "" && assets[i].OS == "windows" { + assets[i].Arch = "x86_64" + } + } +} diff --git a/internal/releases/zigdist/zigdist.go b/internal/releases/zigdist/zigdist.go new file mode 100644 index 0000000..e385ba2 --- /dev/null +++ b/internal/releases/zigdist/zigdist.go @@ -0,0 +1,131 @@ +// Package zigdist fetches Zig release data from ziglang.org. +// +// The API is a single JSON object keyed by version or branch name: +// +// https://ziglang.org/download/index.json +// +// Each version key maps to an object containing "date", "notes", and +// platform keys like "x86_64-linux", "aarch64-macos", etc. Platform +// values have "tarball", "shasum", and "size" fields. +package zigdist + +import ( + "context" + "encoding/json" + "fmt" + "iter" + "net/http" +) + +// Release is one Zig version with its per-platform builds. +type Release struct { + Version string `json:"version"` // set by us from the key or inner "version" field + Date string `json:"date"` + Notes string `json:"notes,omitempty"` + Platforms map[string]Platform `json:"platforms,omitempty"` // "x86_64-linux" → Platform +} + +// Platform is one downloadable artifact for a specific arch-os combo. +type Platform struct { + Tarball string `json:"tarball"` + Shasum string `json:"shasum"` + Size json.Number `json:"size"` // upstream sends as string +} + +// Fetch retrieves the Zig release index. +// +// Yields one batch containing all releases. The iterator interface exists +// so callers use the same pattern as paginated sources. +func Fetch(ctx context.Context, client *http.Client) iter.Seq2[[]Release, error] { + return func(yield func([]Release, error) bool) { + url := "https://ziglang.org/download/index.json" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + yield(nil, fmt.Errorf("zigdist: %w", err)) + return + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + yield(nil, fmt.Errorf("zigdist: fetch: %w", err)) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("zigdist: fetch: %s", resp.Status)) + return + } + + // The JSON is an object keyed by version/branch name. + var raw map[string]json.RawMessage + if err := json.NewDecoder(resp.Body).Decode(&raw); err != nil { + yield(nil, fmt.Errorf("zigdist: decode: %w", err)) + return + } + + var releases []Release + for ref, data := range raw { + rel, err := parseRelease(ref, data) + if err != nil { + yield(nil, fmt.Errorf("zigdist: parse %s: %w", ref, err)) + return + } + releases = append(releases, rel) + } + + yield(releases, nil) + } +} + +// parseRelease extracts a Release from one version entry. The JSON mixes +// metadata fields ("date", "notes", "version", "src") with platform keys +// ("x86_64-linux", "aarch64-macos", etc.). +func parseRelease(ref string, data json.RawMessage) (Release, error) { + // First pass: grab known metadata fields. + var meta struct { + Version string `json:"version"` + Date string `json:"date"` + Notes string `json:"notes"` + } + if err := json.Unmarshal(data, &meta); err != nil { + return Release{}, err + } + + version := meta.Version + if version == "" { + version = ref + } + + // Second pass: grab all platform entries. + var all map[string]json.RawMessage + if err := json.Unmarshal(data, &all); err != nil { + return Release{}, err + } + + platforms := make(map[string]Platform) + for key, val := range all { + // Skip metadata keys. + switch key { + case "version", "date", "notes", "src": + continue + } + var p Platform + if err := json.Unmarshal(val, &p); err != nil { + continue // not a platform object + } + if p.Tarball == "" { + continue // not a platform object + } + platforms[key] = p + } + + return Release{ + Version: version, + Date: meta.Date, + Notes: meta.Notes, + Platforms: platforms, + }, nil +} diff --git a/internal/storage/fsstore/fsstore.go b/internal/storage/fsstore/fsstore.go new file mode 100644 index 0000000..a57b170 --- /dev/null +++ b/internal/storage/fsstore/fsstore.go @@ -0,0 +1,207 @@ +// Package fsstore implements [storage.Store] on the local filesystem. +// +// Directory layout: +// +// {root}/ +// {package}.json # asset list +// {package}.updated.txt # unix timestamp (seconds.millis) +// +// Write transactions build the new JSON in memory, then atomically +// rename into place so readers never see a partial file. +package fsstore + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "time" + + "github.com/webinstall/webi-installers/internal/lexver" + "github.com/webinstall/webi-installers/internal/storage" +) + +// Store is a filesystem-backed asset store. +type Store struct { + root string +} + +// Root returns the store's root directory path. +func (s *Store) Root() string { + return s.root +} + +// New creates a Store rooted at the given directory. +// The directory is created if it doesn't exist. +func New(root string) (*Store, error) { + if err := os.MkdirAll(root, 0o755); err != nil { + return nil, fmt.Errorf("fsstore: create root: %w", err) + } + return &Store{root: root}, nil +} + +// ListPackages returns the names of all cached packages. +func (s *Store) ListPackages(_ context.Context) ([]string, error) { + dir := s.root + entries, err := os.ReadDir(dir) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("fsstore: list packages: %w", err) + } + var pkgs []string + for _, e := range entries { + if strings.HasSuffix(e.Name(), ".json") { + pkgs = append(pkgs, strings.TrimSuffix(e.Name(), ".json")) + } + } + return pkgs, nil +} + +// Load reads a package's cached assets from disk. +// Returns nil (not an error) if the package is not cached. +func (s *Store) Load(_ context.Context, pkg string) (*storage.PackageData, error) { + jsonPath := filepath.Join(s.root, pkg+".json") + + data, err := os.ReadFile(jsonPath) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("fsstore: read %s: %w", pkg, err) + } + + // Decode via legacy format (Node.js compat: "releases", "name", "ext"). + var lc storage.LegacyCache + if err := json.Unmarshal(data, &lc); err != nil { + return nil, fmt.Errorf("fsstore: decode %s: %w", pkg, err) + } + pd := storage.ImportLegacy(lc) + + // Read the timestamp file. + tsPath := filepath.Join(s.root, pkg+".updated.txt") + if tsData, err := os.ReadFile(tsPath); err == nil { + pd.UpdatedAt = parseTimestamp(strings.TrimSpace(string(tsData))) + } + + return &pd, nil +} + +// BeginRefresh starts a write transaction for a package. +func (s *Store) BeginRefresh(_ context.Context, pkg string) (storage.RefreshTx, error) { + return &refreshTx{ + store: s, + pkg: pkg, + }, nil +} + +type refreshTx struct { + store *Store + pkg string + assets []storage.Asset +} + +func (tx *refreshTx) Put(assets []storage.Asset) error { + tx.assets = append(tx.assets, assets...) + return nil +} + +func (tx *refreshTx) Commit(_ context.Context) error { + now := time.Now() + dir := tx.store.root + + // Sort assets: stable/lts first, then beta, then rc, then alpha; + // within each channel, newest version first. + // The Node.js resolver picks the first matching entry, so stable[0] = latest stable + // must come before beta of a higher version number. + sort.SliceStable(tx.assets, func(i, j int) bool { + ri, rj := channelRank(tx.assets[i].Channel), channelRank(tx.assets[j].Channel) + if ri != rj { + return ri < rj + } + return lexver.Compare(lexver.Parse(tx.assets[i].Version), lexver.Parse(tx.assets[j].Version)) > 0 + }) + + // Encode via legacy format (Node.js compat: "releases", "name", "ext"). + // ExportLegacy applies per-package field backports and drops assets that + // can't be expressed in the legacy format (variants, unsupported formats). + lc, drops := storage.ExportLegacy(tx.pkg, storage.PackageData{Assets: tx.assets}) + if drops.Variants > 0 || drops.Formats > 0 { + log.Printf(" %s: legacy export dropped %d variant assets, %d unsupported-format assets", + tx.pkg, drops.Variants, drops.Formats) + } + + data, err := json.MarshalIndent(lc, "", " ") + if err != nil { + return fmt.Errorf("fsstore: encode %s: %w", tx.pkg, err) + } + + // Write JSON atomically via temp file + rename. + jsonPath := filepath.Join(dir, tx.pkg+".json") + if err := atomicWrite(jsonPath, data); err != nil { + return err + } + + // Write timestamp file. + tsPath := filepath.Join(dir, tx.pkg+".updated.txt") + ts := fmt.Sprintf("%.3f", float64(now.UnixMilli())/1000.0) + if err := atomicWrite(tsPath, []byte(ts)); err != nil { + return err + } + + tx.assets = nil + return nil +} + +func (tx *refreshTx) Rollback() error { + tx.assets = nil + return nil +} + +// atomicWrite writes data to path via a temp file + rename. +func atomicWrite(path string, data []byte) error { + tmp := path + ".tmp" + if err := os.WriteFile(tmp, data, 0o644); err != nil { + return fmt.Errorf("fsstore: write tmp: %w", err) + } + if err := os.Rename(tmp, path); err != nil { + os.Remove(tmp) + return fmt.Errorf("fsstore: rename: %w", err) + } + return nil +} + + +// channelRank returns a sort key for release channels so stable sorts first. +// Lower rank = sorted earlier (stable/lts before beta/rc/alpha). +func channelRank(channel string) int { + switch channel { + case "", "stable", "lts": + return 0 + case "rc": + return 1 + case "beta": + return 2 + case "alpha": + return 3 + default: + return 4 + } +} + +// parseTimestamp parses the "seconds.millis" format from .updated.txt files. +func parseTimestamp(s string) time.Time { + f, err := strconv.ParseFloat(s, 64) + if err != nil || f == 0 { + return time.Time{} + } + sec := int64(f) + nsec := int64((f - float64(sec)) * 1e9) + return time.Unix(sec, nsec) +} diff --git a/internal/storage/fsstore/fsstore_test.go b/internal/storage/fsstore/fsstore_test.go new file mode 100644 index 0000000..63d430b --- /dev/null +++ b/internal/storage/fsstore/fsstore_test.go @@ -0,0 +1,138 @@ +package fsstore_test + +import ( + "context" + "testing" + + "github.com/webinstall/webi-installers/internal/storage" + "github.com/webinstall/webi-installers/internal/storage/fsstore" +) + +func TestRoundTrip(t *testing.T) { + dir := t.TempDir() + s, err := fsstore.New(dir) + if err != nil { + t.Fatal(err) + } + ctx := context.Background() + + // Initially empty. + pd, err := s.Load(ctx, "bat") + if err != nil { + t.Fatal(err) + } + if pd != nil { + t.Fatal("expected nil for uncached package") + } + + // Write some assets. + tx, err := s.BeginRefresh(ctx, "bat") + if err != nil { + t.Fatal(err) + } + tx.Put([]storage.Asset{ + { + Filename: "bat-v0.26.1-aarch64-apple-darwin.tar.gz", + Version: "0.26.1", + Channel: "stable", + Date: "2025-12-02", + OS: "darwin", + Arch: "aarch64", + Format: ".tar.gz", + Download: "https://github.com/sharkdp/bat/releases/download/v0.26.1/bat-v0.26.1-aarch64-apple-darwin.tar.gz", + }, + { + Filename: "bat-v0.26.1-x86_64-unknown-linux-gnu.tar.gz", + Version: "0.26.1", + Channel: "stable", + Date: "2025-12-02", + OS: "linux", + Arch: "x86_64", + Libc: "gnu", + Format: ".tar.gz", + Download: "https://github.com/sharkdp/bat/releases/download/v0.26.1/bat-v0.26.1-x86_64-unknown-linux-gnu.tar.gz", + }, + }) + if err := tx.Commit(ctx); err != nil { + t.Fatal(err) + } + + // Read back. + pd, err = s.Load(ctx, "bat") + if err != nil { + t.Fatal(err) + } + if pd == nil { + t.Fatal("expected data after write") + } + if len(pd.Assets) != 2 { + t.Fatalf("got %d assets, want 2", len(pd.Assets)) + } + if pd.Assets[0].Filename != "bat-v0.26.1-aarch64-apple-darwin.tar.gz" { + t.Errorf("asset[0].Filename = %q", pd.Assets[0].Filename) + } + if pd.Assets[1].OS != "linux" { + t.Errorf("asset[1].OS = %q", pd.Assets[1].OS) + } + if pd.UpdatedAt.IsZero() { + t.Error("UpdatedAt should be set") + } +} + +func TestRollback(t *testing.T) { + dir := t.TempDir() + s, err := fsstore.New(dir) + if err != nil { + t.Fatal(err) + } + ctx := context.Background() + + tx, err := s.BeginRefresh(ctx, "bat") + if err != nil { + t.Fatal(err) + } + tx.Put([]storage.Asset{{Filename: "test", Version: "1.0"}}) + tx.Rollback() + + pd, err := s.Load(ctx, "bat") + if err != nil { + t.Fatal(err) + } + if pd != nil { + t.Fatal("expected nil after rollback") + } +} + +func TestReadLegacyFormat(t *testing.T) { + dir := t.TempDir() + s, err := fsstore.New(dir) + if err != nil { + t.Fatal(err) + } + ctx := context.Background() + + // Write assets and read back — the JSON uses "releases" key + // and "name"/"ext" field names for Node.js compat. + tx, _ := s.BeginRefresh(ctx, "aliasman") + tx.Put([]storage.Asset{ + { + Filename: "BeyondCodeBootcamp-aliasman-v1.1.2-0-g0e5e1c1.tar.gz", + Version: "v1.1.2", + Channel: "stable", + Date: "2023-02-23", + OS: "posix_2017", + Arch: "*", + Format: "", + Download: "https://codeload.github.com/BeyondCodeBootcamp/aliasman/legacy.tar.gz/refs/tags/v1.1.2", + }, + }) + tx.Commit(ctx) + + pd, err := s.Load(ctx, "aliasman") + if err != nil { + t.Fatal(err) + } + if pd.Assets[0].OS != "posix_2017" { + t.Errorf("OS = %q, want posix_2017", pd.Assets[0].OS) + } +} diff --git a/internal/storage/legacy.go b/internal/storage/legacy.go new file mode 100644 index 0000000..d60b182 --- /dev/null +++ b/internal/storage/legacy.go @@ -0,0 +1,444 @@ +package storage + +import ( + "sort" + "strings" +) + +// Legacy types for reading/writing the Node.js _cache/ JSON format. +// +// The Node.js server calls assets "releases" and uses "name" for the +// filename and "ext" for the format. These types preserve that wire +// format for backward compatibility during migration. +// +// Internal Go code uses [Asset] and [PackageData] directly. + +// LegacyAsset matches the JSON shape the Node.js server writes and reads. +type LegacyAsset struct { + Name string `json:"name"` + Version string `json:"version"` + GitTag string `json:"git_tag,omitempty"` + GitCommitHash string `json:"git_commit_hash,omitempty"` + LTS bool `json:"lts"` + Channel string `json:"channel"` + Date string `json:"date"` + OS string `json:"os"` + Arch string `json:"arch"` + Libc string `json:"libc"` + Ext string `json:"ext"` + Download string `json:"download"` +} + +// LegacyCache matches the top-level JSON shape in _cache/{pkg}.json. +type LegacyCache struct { + OSes []string `json:"oses,omitempty"` + Arches []string `json:"arches,omitempty"` + Libcs []string `json:"libcs,omitempty"` + Formats []string `json:"formats,omitempty"` + Releases []LegacyAsset `json:"releases"` + Download string `json:"download"` +} + +// LegacyDropStats reports how many assets were excluded during ExportLegacy. +type LegacyDropStats struct { + Variants int // dropped: has build variant tags (e.g. rocm, installer, fxdependent) + Formats int // dropped: format not recognized by the Node.js server + Android int // dropped: android OS — classifier maps android filenames to linux + NoTarget int // dropped: no OS and no arch — unclassifiable source tarballs +} + +// ToAsset converts a LegacyAsset to the internal Asset type. +// It reverses the key vocabulary translations applied by toLegacy so that +// the internal (Go canonical) representation is preserved. +func (la LegacyAsset) ToAsset() Asset { + // Reverse-translate legacy Node.js vocabulary to Go canonical names. + // toLegacy writes macos/amd64/arm64; internal code uses darwin/x86_64/aarch64. + // "none" libc is buildmeta.LibcNone — preserve it (don't collapse to ""). + os := la.OS + switch os { + case "macos": + os = "darwin" + case "*": + os = "" + } + arch := la.Arch + switch arch { + case "amd64": + arch = "x86_64" + case "arm64": + arch = "aarch64" + case "*": + arch = "" + } + // Restore the dot-prefix convention used throughout internal Go code. + // The cache stores ext without a leading dot (e.g. "tar.gz", "zip", "exe"), + // but Asset.Format uses dotted strings (e.g. ".tar.gz", ".zip", ".exe"). + // "exe" is ambiguous: bare binary (no .exe suffix) vs Windows .exe file. + // Disambiguate by checking whether the filename ends with ".exe". + format := la.Ext + switch { + case format == "exe" && !strings.HasSuffix(strings.ToLower(la.Name), ".exe"): + format = "" // bare binary — internal convention is empty string + case format != "": + format = "." + format // restore dot prefix for internal use + } + return Asset{ + Filename: la.Name, + Version: la.Version, + LTS: la.LTS, + Channel: la.Channel, + Date: la.Date, + OS: os, + Arch: arch, + Libc: la.Libc, + Format: format, + Download: la.Download, + GitTag: la.GitTag, + GitCommitHash: la.GitCommitHash, + } +} + +// toLegacy converts an Asset to the LegacyAsset wire format. +// Callers must have already applied legacyFieldBackport before calling this. +func (a Asset) toLegacy() LegacyAsset { + libc := a.Libc + if libc == "" { + libc = "none" // API expects "none" rather than empty string + } + // Strip leading dot: API expects "tar.gz" not ".tar.gz". + ext := strings.TrimPrefix(a.Format, ".") + // Bare binaries: API expects "exe". Internal convention is Format="" + // for bare binaries (no archive extension). By the time we reach + // toLegacy, source tarballs and git-clone entries have been filtered + // or tagged, so Format="" reliably means bare binary. + if ext == "" { + ext = "exe" + } + return LegacyAsset{ + Name: a.Filename, + Version: strings.TrimPrefix(a.Version, "v"), // API expects no v-prefix + GitTag: a.GitTag, + GitCommitHash: a.GitCommitHash, + LTS: a.LTS, + Channel: a.Channel, + Date: a.Date, + OS: a.OS, + Arch: a.Arch, + Libc: libc, + Ext: ext, + Download: a.Download, + } +} + +// legacyFieldBackport translates canonical classifier field values to the +// values the legacy Node.js resolver expects. This is called at export time +// only — the canonical values are preserved in Go-native storage (pgstore). +// +// The Node build-classifier re-parses each asset's download filename and drops +// any entry where the cache field doesn't match what it extracts from the name. +// These translations ensure the cache matches the classifier's extraction. +// +// Global OS translations: +// - sunos → solaris: Node's classifier maps "sunos" filenames to "solaris". +// LIVE_cache has "solaris" and "illumos" but never "sunos". +// +// Global arch translations (all packages): +// - universal2/universal1 → x86_64: classifier maps "universal" in filename +// to x86_64. The darwin WATERFALL falls back aarch64→x86_64, so arm64 +// users still receive these builds. +// - x86_64_v2/v3/v4 → x86_64: AMD64 microarch levels not in LIVE_cache; +// fold to baseline x86_64. +// - mips64r6 → mips64: exotic MIPS64R6, not in LIVE_cache. +// - mips64r6el → mips64le: exotic MIPS64R6 little-endian, not in LIVE_cache. +// - ARM (filename-based): explicit armvN takes priority over ABI tags. +// Go normalizes these; see legacyARMArchFromFilename for filename extraction. +// Final ARM vocab mapping to LIVE_cache values: +// armv6→armv6l, armv7a→armv7l, armhf→armv7l, armel→arm. +// - powerpc (32-bit): not in LIVE_cache; entry is dropped. +// +// Note: mipsle and mips64le are kept as-is — LIVE_cache uses these exact values. +// Note: solaris and illumos are kept as-is — both exist in LIVE_cache. +// +// Package-specific rules replicate per-package overrides in production's releases.js: +// - ffmpeg: Windows .gz → .exe (prod releases.js: rel.ext = 'exe') +// +// Git-clone entries: +// - format="git" with empty OS/arch → os="*", arch="*" +// The legacy cache uses "*" for ANYOS/ANYARCH (builds-cacher LEGACY_OS_MAP['*']='ANYOS'). +// vim plugins, aliasman, serviceman, and other POSIX packages use this format. +func legacyFieldBackport(pkg string, a Asset) Asset { + // Git-clone entries are ANYOS/ANYARCH — legacy cache uses "*" for these. + // This matches production LIVE_cache for vim-commentary, aliasman, etc. + if a.Format == "git" { + if a.OS == "" { + a.OS = "*" + } + if a.Arch == "" { + a.Arch = "*" + } + } + + // sunos → solaris: Node's classifier maps "sunos" filenames to "solaris". + // LIVE_cache has "solaris" and "illumos" but never "sunos". + if a.OS == "sunos" { + a.OS = "solaris" + } + + // darwin → macos: LIVE_cache pre-classified packages (go, node, zig, fish, etc.) + // use "macos". Julia is the sole exception — LIVE julia.json uses "darwin". + if a.OS == "darwin" && pkg != "julia" { + a.OS = "macos" + } + + // Universal fat binaries: expandUniversal splits these into per-arch + // entries earlier in the pipeline. This is a safety fallback in case + // any universal entries reach the legacy export unexpectedly. + if a.Arch == "universal2" || a.Arch == "universal1" { + a.Arch = "x86_64" + } + + // AMD64 microarch levels: not in LIVE_cache; fold to baseline x86_64. + switch a.Arch { + case "x86_64_v2", "x86_64_v3", "x86_64_v4": + a.Arch = "x86_64" + } + + // x86_64 → amd64, aarch64 → arm64: LIVE_cache pre-classified packages use + // "amd64" and "arm64". Go's classifier uses "x86_64" and "aarch64". + // These come after universal2→x86_64 and x86_64_v*/→x86_64 so the chains work. + if a.Arch == "x86_64" { + a.Arch = "amd64" + } + if a.Arch == "aarch64" { + a.Arch = "arm64" + } + + // MIPS variants not in LIVE_cache: fold to nearest supported value. + // mipsle and mips64le are kept as-is — LIVE_cache uses these exact spellings. + switch a.Arch { + case "mips64r6": + a.Arch = "mips64" + case "mips64r6el": + a.Arch = "mips64le" + } + + // powerpc (32-bit): not in LIVE_cache; mark for drop by clearing both fields. + // Per-package taggers (uuidv7, watchexec) handle this via variant tags, but + // for any package without a tagger, clear here so the NoTarget filter drops it. + if a.Arch == "powerpc" { + a.OS = "" + a.Arch = "" + } + + // ARM arch: the Node classifier re-parses filenames and expects the cache + // arch to match what it extracts. Go normalizes arch values; use filename + // heuristics to match what Node would extract. + switch a.Arch { + case "armv5", "armv6", "armv7": + if leg := legacyARMArchFromFilename(a.Filename); leg != "" { + a.Arch = leg + } + } + // Translate ARM arch values to LIVE_cache vocabulary. + // legacyARMArchFromFilename can produce armhf/armel/armv7a which aren't + // in LIVE_cache; also translate raw armv6/armv7 (when no filename override). + switch a.Arch { + case "armv6": + a.Arch = "armv6l" + case "armv7": + a.Arch = "armv7l" + case "armhf": + a.Arch = "armv7l" + case "armel": + a.Arch = "arm" + case "armv7a": + a.Arch = "armv7l" + } + + switch pkg { + case "ffmpeg": + if a.OS == "windows" { + switch a.Format { + case ".gz", "": + a.Format = ".exe" + } + } + } + + return a +} + +// legacyARMArchFromFilename returns the arch string the Node build-classifier +// would extract from a filename for ARM-family builds. Returns "" when the +// Go canonical arch value already matches what the classifier would extract. +// +// The Node classifier's extraction rules differ from Go's normalization: +// - armv7a (explicit) → "armv7a" (not "armv7") +// - armv7 (explicit, e.g. "armv7-unknown-linux-gnueabihf") → "armv7" +// The explicit version number takes priority over the ABI suffix. +// - arm-5 / arm-7 (Gitea naming: "linux-arm-5", "linux-arm-7") → "armel" / "armv7" +// patternToTerms converts "arm-5" → "armv5" and "arm-7" → "armv7". +// - armv6hf (shellcheck naming) → "armhf" (tpm['armv6hf'] = ARMHF) +// - gnueabihf (Rust triplet, no explicit armvN) → "armhf" +// - armhf (Debian armhf) → "armhf" +// - armel (Debian soft-float ABI) → "armel" (not "armv6") +// - armv5 (explicit) → "armel" (Node tiered map: armv5 falls back to armel) +func legacyARMArchFromFilename(filename string) string { + lower := strings.ToLower(filename) + // armv7a before armv7 — "armv7a" contains "armv7" as a prefix. + if strings.Contains(lower, "armv7a") { + return "armv7a" + } + // Explicit armv7 in filename: takes priority over ABI suffix (gnueabihf). + // e.g. "armv7-unknown-linux-gnueabihf" → classifier extracts "armv7". + if strings.Contains(lower, "armv7") { + return "armv7" + } + // armv6hf (shellcheck naming): tpm['armv6hf'] = ARMHF → "armhf". + if strings.Contains(lower, "armv6hf") { + return "armhf" + } + // Gitea arm-N naming: "linux-arm-5" → patternToTerms → "armv5" → armel. + if strings.Contains(lower, "arm-5") { + return "armel" + } + // Gitea arm-N naming: "linux-arm-7" → patternToTerms → "armv7" → armv7. + if strings.Contains(lower, "arm-7") { + return "armv7" + } + // Rust gnueabihf triplet (no explicit armvN): classifier → "armhf". + if strings.Contains(lower, "gnueabihf") { + return "armhf" + } + // Debian armhf (hard-float ABI): classifier → "armhf". + if strings.Contains(lower, "armhf") { + return "armhf" + } + if strings.Contains(lower, "armel") { + return "armel" + } + if strings.Contains(lower, "armv5") { + return "armel" + } + return "" +} + +// ImportLegacy converts a LegacyCache to PackageData. +func ImportLegacy(lc LegacyCache) PackageData { + assets := make([]Asset, len(lc.Releases)) + for i, la := range lc.Releases { + assets[i] = la.ToAsset() + } + return PackageData{Assets: assets} +} + +// legacyFormats is the set of formats the Node.js server recognizes. +// Assets with formats not in this set are filtered out of legacy exports. +var legacyFormats = map[string]bool{ + ".zip": true, + ".tar.gz": true, + ".tar.xz": true, + ".tar.zst": true, + ".tar.bz2": true, + ".tar": true, + ".xz": true, + ".7z": true, + ".pkg": true, + ".msi": true, + ".exe": true, + ".exe.xz": true, + ".dmg": true, + ".app.zip": true, + ".gz": true, + "git": true, +} + +// ExportLegacy converts canonical PackageData to the LegacyCache wire format. +// +// The pkg name is used to apply per-package field translations (see legacyFieldBackport). +// Assets are excluded when: +// - Variants is non-empty (Node.js has no variant logic) +// - OS is android (classifier maps android filenames to linux) +// - OS and arch are both empty (unclassifiable source tarballs) +// - Format is non-empty and not in the Node.js recognized set +// +// Dropped counts are returned in LegacyDropStats for logging. +func ExportLegacy(pkg string, pd PackageData) (LegacyCache, LegacyDropStats) { + var releases []LegacyAsset + var stats LegacyDropStats + + for _, a := range pd.Assets { + // Skip variant builds — Node.js doesn't have variant logic. + if len(a.Variants) > 0 { + stats.Variants++ + continue + } + // Skip android — classifier maps android filenames to linux OS, + // which mismatches cache entries tagged android. + if a.OS == "android" { + stats.Android++ + continue + } + // Skip entries with no OS and no arch, unless they're git-clone packages. + // Source tarballs (cmake, dashcore, bun npm) have format != "git". + // Git-clone packages (vim plugins, aliasman) legitimately have no OS/arch — + // legacyFieldBackport will translate them to os="*", arch="*". + if a.OS == "" && a.Arch == "" && a.Format != "git" { + stats.NoTarget++ + continue + } + // Apply per-package and global legacy field translations. + a = legacyFieldBackport(pkg, a) + // Skip formats Node.js doesn't recognize. + if a.Format != "" && !legacyFormats[a.Format] { + stats.Formats++ + continue + } + releases = append(releases, a.toLegacy()) + } + if releases == nil { + releases = []LegacyAsset{} + } + + // Build sorted summary arrays from the included releases. + // These let the API skip normalize.js vocabulary filtering entirely. + oSet := map[string]bool{} + aSet := map[string]bool{} + lSet := map[string]bool{} + fSet := map[string]bool{} + for _, r := range releases { + if r.OS != "" && r.OS != "*" { + oSet[r.OS] = true + } + if r.Arch != "" && r.Arch != "*" { + aSet[r.Arch] = true + } + if r.Libc != "" { + lSet[r.Libc] = true + } + if r.Ext != "" { + fSet[strings.TrimPrefix(r.Ext, ".")] = true + } + } + lc := LegacyCache{ + OSes: sortedKeys(oSet), + Arches: sortedKeys(aSet), + Libcs: sortedKeys(lSet), + Formats: sortedKeys(fSet), + Releases: releases, + } + return lc, stats +} + +// sortedKeys returns the keys of a string set in sorted order. +func sortedKeys(m map[string]bool) []string { + if len(m) == 0 { + return nil + } + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + sort.Strings(out) + return out +} diff --git a/internal/storage/legacy_test.go b/internal/storage/legacy_test.go new file mode 100644 index 0000000..dbf0a2a --- /dev/null +++ b/internal/storage/legacy_test.go @@ -0,0 +1,609 @@ +package storage_test + +import ( + "encoding/json" + "testing" + + "github.com/webinstall/webi-installers/internal/storage" +) + +// TestDecodeLegacyJSON verifies we can parse the exact JSON format +// the Node.js server writes to _cache/. +func TestDecodeLegacyJSON(t *testing.T) { + // Real data from _cache/2026-03/aliasman.json. + raw := `{ + "releases": [ + { + "name": "BeyondCodeBootcamp-aliasman-v1.1.2-0-g0e5e1c1.tar.gz", + "version": "v1.1.2", + "lts": false, + "channel": "stable", + "date": "2023-02-23", + "os": "posix_2017", + "arch": "*", + "libc": "", + "ext": "", + "download": "https://codeload.github.com/BeyondCodeBootcamp/aliasman/legacy.tar.gz/refs/tags/v1.1.2" + }, + { + "name": "BeyondCodeBootcamp-aliasman-v1.1.2-0-g0e5e1c1.zip", + "version": "v1.1.2", + "lts": false, + "channel": "stable", + "date": "2023-02-23", + "os": "posix_2017", + "arch": "*", + "libc": "", + "ext": "", + "download": "https://codeload.github.com/BeyondCodeBootcamp/aliasman/legacy.zip/refs/tags/v1.1.2" + } + ], + "download": "" +}` + + var lc storage.LegacyCache + if err := json.Unmarshal([]byte(raw), &lc); err != nil { + t.Fatal(err) + } + + if len(lc.Releases) != 2 { + t.Fatalf("got %d releases, want 2", len(lc.Releases)) + } + + pd := storage.ImportLegacy(lc) + if len(pd.Assets) != 2 { + t.Fatalf("got %d assets, want 2", len(pd.Assets)) + } + + a := pd.Assets[0] + if a.Filename != "BeyondCodeBootcamp-aliasman-v1.1.2-0-g0e5e1c1.tar.gz" { + t.Errorf("Filename = %q", a.Filename) + } + if a.Version != "v1.1.2" { + t.Errorf("Version = %q", a.Version) + } + if a.OS != "posix_2017" { + t.Errorf("OS = %q", a.OS) + } + if a.Arch != "" { + t.Errorf("Arch = %q, want %q (wildcard '*' reversed to empty)", a.Arch, "") + } + if a.Download != "https://codeload.github.com/BeyondCodeBootcamp/aliasman/legacy.tar.gz/refs/tags/v1.1.2" { + t.Errorf("Download = %q", a.Download) + } + + // Round-trip: export back to legacy and verify JSON shape. + lc2, _ := storage.ExportLegacy("aliasman", pd) + data, _ := json.MarshalIndent(lc2, "", " ") + var lc3 storage.LegacyCache + json.Unmarshal(data, &lc3) + + if lc3.Releases[0].Name != a.Filename { + t.Errorf("round-trip Name = %q, want %q", lc3.Releases[0].Name, a.Filename) + } + // Legacy data has ext:"" for this tarball — broken cache entry. + // toLegacy normalizes Format="" to ext:"exe" (bare binary convention). + // In the real Go pipeline, aliasman would have Format=".tar.gz". + if lc3.Releases[0].Ext != "exe" { + t.Errorf("round-trip Ext = %q, want %q", lc3.Releases[0].Ext, "exe") + } +} + +// TestExportLegacyDrops verifies that ExportLegacy correctly drops and counts +// assets that can't be represented in the Node.js legacy cache format. +func TestExportLegacyDrops(t *testing.T) { + t.Run("variant_builds_dropped", func(t *testing.T) { + // Assets with variant tags (rocm, installer, fxdependent, etc.) are + // dropped because Node.js has no variant-selection logic. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "ollama-linux-amd64-rocm.tgz", OS: "linux", Arch: "x86_64", Format: ".tar.gz", Variants: []string{"rocm"}}, + {Filename: "ollama-linux-amd64.tgz", OS: "linux", Arch: "x86_64", Format: ".tar.gz"}, + }, + } + lc, stats := storage.ExportLegacy("ollama", pd) + if stats.Variants != 1 { + t.Errorf("Variants dropped = %d, want 1", stats.Variants) + } + if len(lc.Releases) != 1 { + t.Errorf("releases = %d, want 1 (baseline only)", len(lc.Releases)) + } + if lc.Releases[0].Name != "ollama-linux-amd64.tgz" { + t.Errorf("kept wrong release: %q", lc.Releases[0].Name) + } + }) + + t.Run("android_dropped", func(t *testing.T) { + // Android entries are dropped: the classifier maps android filenames to + // linux OS and then rejects the cache entry that says android. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "fzf-0.57.0-android-arm64.tar.gz", OS: "android", Arch: "aarch64", Format: ".tar.gz"}, + {Filename: "fzf-0.57.0-linux-arm64.tar.gz", OS: "linux", Arch: "aarch64", Format: ".tar.gz"}, + }, + } + lc, stats := storage.ExportLegacy("fzf", pd) + if stats.Android != 1 { + t.Errorf("Android dropped = %d, want 1", stats.Android) + } + if len(lc.Releases) != 1 { + t.Errorf("releases = %d, want 1 (linux only)", len(lc.Releases)) + } + }) + + t.Run("unknown_formats_dropped", func(t *testing.T) { + // .AppImage, .deb, .rpm are not in the Node.js format set. + // Assets have Arch set (matching real classifier output for these formats). + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "tool.AppImage", OS: "linux", Arch: "x86_64", Format: ".AppImage"}, + {Filename: "tool.deb", OS: "linux", Arch: "x86_64", Format: ".deb"}, + {Filename: "tool.rpm", OS: "linux", Arch: "x86_64", Format: ".rpm"}, + {Filename: "tool-linux-amd64.tar.gz", OS: "linux", Arch: "x86_64", Format: ".tar.gz"}, + }, + } + lc, stats := storage.ExportLegacy("tool", pd) + if stats.Formats != 3 { + t.Errorf("Formats dropped = %d, want 3", stats.Formats) + } + if len(lc.Releases) != 1 { + t.Errorf("releases = %d, want 1 (tar.gz only)", len(lc.Releases)) + } + }) + + t.Run("empty_format_passes_through", func(t *testing.T) { + // Assets with empty format (e.g. bare binaries, git sources) pass through. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "jq-linux-amd64", OS: "linux", Arch: "x86_64", Format: ""}, + }, + } + lc, stats := storage.ExportLegacy("jq", pd) + if stats.Formats != 0 { + t.Errorf("Formats dropped = %d, want 0", stats.Formats) + } + if len(lc.Releases) != 1 { + t.Errorf("releases = %d, want 1", len(lc.Releases)) + } + }) +} + +// TestExportLegacyTranslations verifies that legacyFieldBackport applies the +// correct field translations for Node.js compatibility. +func TestExportLegacyTranslations(t *testing.T) { + t.Run("universal2_translated_to_amd64", func(t *testing.T) { + // universal2 fat binaries: the Node classifier sees "universal" in the + // filename and maps it to x86_64. Cache must say amd64 (via universal2→x86_64→amd64 + // chain) to match. The darwin WATERFALL (arm64 → [arm64, amd64]) means arm64 + // users also receive these builds as a fallback. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "hugo_0.145.0_darwin-universal.tar.gz", OS: "darwin", Arch: "universal2", Format: ".tar.gz"}, + {Filename: "hugo_0.145.0_darwin-arm64.tar.gz", OS: "darwin", Arch: "aarch64", Format: ".tar.gz"}, + }, + } + lc, stats := storage.ExportLegacy("hugo", pd) + if stats.Variants != 0 || stats.Formats != 0 || stats.Android != 0 { + t.Errorf("unexpected drops: %+v", stats) + } + if len(lc.Releases) != 2 { + t.Fatalf("releases = %d, want 2", len(lc.Releases)) + } + var universal2Arch string + for _, r := range lc.Releases { + if r.Name == "hugo_0.145.0_darwin-universal.tar.gz" { + universal2Arch = r.Arch + } + } + if universal2Arch != "amd64" { + t.Errorf("universal2 arch in legacy = %q, want amd64 (universal2→x86_64→amd64)", universal2Arch) + } + }) + + t.Run("solaris_kept_as_is", func(t *testing.T) { + // Solaris/illumos/sunos are kept as-is. The build-classifier (triplet.js) + // recognizes all three as distinct values and matches them correctly. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "go1.20.1.solaris-amd64.tar.gz", OS: "solaris", Arch: "x86_64", Format: ".tar.gz"}, + }, + } + lc, stats := storage.ExportLegacy("go", pd) + if stats.Android != 0 || stats.Variants != 0 || stats.Formats != 0 { + t.Errorf("unexpected drops: %+v", stats) + } + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].OS != "solaris" { + t.Errorf("OS = %q, want solaris", lc.Releases[0].OS) + } + }) + + t.Run("illumos_kept_as_is", func(t *testing.T) { + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "go1.20.1.illumos-amd64.tar.gz", OS: "illumos", Arch: "x86_64", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("go", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].OS != "illumos" { + t.Errorf("OS = %q, want illumos", lc.Releases[0].OS) + } + }) + + t.Run("darwin_to_macos", func(t *testing.T) { + // All packages except julia translate darwin → macos. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "go1.20.1.darwin-amd64.tar.gz", OS: "darwin", Arch: "aarch64", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("go", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].OS != "macos" { + t.Errorf("OS = %q, want macos (darwin → macos)", lc.Releases[0].OS) + } + }) + + t.Run("julia_darwin_kept_as_is", func(t *testing.T) { + // julia is the sole exception: LIVE julia.json uses "darwin", not "macos". + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "julia-1.9.3-mac64.tar.gz", OS: "darwin", Arch: "aarch64", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("julia", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].OS != "darwin" { + t.Errorf("OS = %q, want darwin (julia exception — LIVE uses darwin)", lc.Releases[0].OS) + } + }) + + t.Run("x86_64_v2_to_amd64", func(t *testing.T) { + // Micro-arch levels (v2/v3/v4): fold to baseline x86_64, then x86_64→amd64. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "tool-linux-x86_64_v2.tar.gz", OS: "linux", Arch: "x86_64_v2", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("tool", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "amd64" { + t.Errorf("arch = %q, want amd64 (x86_64_v2 → x86_64 → amd64)", lc.Releases[0].Arch) + } + }) + + t.Run("mips64r6_folded", func(t *testing.T) { + // mips64r6/mips64r6el: exotic variants not in LIVE_cache; fold to mips64/mips64le. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "tool-linux-mips64r6.tar.gz", OS: "linux", Arch: "mips64r6", Format: ".tar.gz"}, + {Filename: "tool-linux-mips64r6el.tar.gz", OS: "linux", Arch: "mips64r6el", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("tool", pd) + if len(lc.Releases) != 2 { + t.Fatalf("releases = %d, want 2", len(lc.Releases)) + } + if lc.Releases[0].Arch != "mips64" { + t.Errorf("arch = %q, want mips64 (mips64r6 → mips64)", lc.Releases[0].Arch) + } + if lc.Releases[1].Arch != "mips64le" { + t.Errorf("arch = %q, want mips64le (mips64r6el → mips64le)", lc.Releases[1].Arch) + } + }) + + t.Run("mipsle_unchanged", func(t *testing.T) { + // mipsle: LIVE_cache uses "mipsle" — keep as-is. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "caddy_linux_mipsle.tar.gz", OS: "linux", Arch: "mipsle", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("caddy", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "mipsle" { + t.Errorf("arch = %q, want mipsle (LIVE_cache uses mipsle)", lc.Releases[0].Arch) + } + }) + + t.Run("mips64le_unchanged", func(t *testing.T) { + // mips64le: LIVE_cache uses "mips64le" — keep as-is. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "gitea-linux-mips64le.tar.gz", OS: "linux", Arch: "mips64le", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("gitea", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "mips64le" { + t.Errorf("arch = %q, want mips64le (LIVE_cache uses mips64le)", lc.Releases[0].Arch) + } + }) + + t.Run("ffmpeg_windows_gz_to_exe", func(t *testing.T) { + // ffmpeg Windows releases are .gz archives containing a bare .exe. + // Production releases.js overrides ext to 'exe' for install compatibility. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "ffmpeg-7.0-windows-amd64.gz", OS: "windows", Arch: "x86_64", Format: ".gz"}, + {Filename: "ffmpeg-7.0-linux-amd64.tar.gz", OS: "linux", Arch: "x86_64", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("ffmpeg", pd) + if len(lc.Releases) != 2 { + t.Fatalf("releases = %d, want 2", len(lc.Releases)) + } + var windowsExt string + for _, r := range lc.Releases { + if r.OS == "windows" { + windowsExt = r.Ext + } + } + if windowsExt != "exe" { + t.Errorf("ffmpeg windows ext = %q, want exe", windowsExt) + } + }) + + t.Run("ffmpeg_translation_not_applied_to_other_packages", func(t *testing.T) { + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "othertool-windows-amd64.gz", OS: "windows", Arch: "x86_64", Format: ".gz"}, + }, + } + lc, _ := storage.ExportLegacy("othertool", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Ext != "gz" { + t.Errorf("ext = %q, want gz (no translation outside ffmpeg)", lc.Releases[0].Ext) + } + }) + + // ARM arch translations: translate Go-canonical values to LIVE_cache vocabulary. + // LIVE_cache uses: armv6l, armv7l, armv7, arm (not armv6, armhf, armel, armv7a). + t.Run("arm_gnueabihf_to_armv7l", func(t *testing.T) { + // gnueabihf ABI suffix (no explicit armvN): filename → armhf → armv7l + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "bat-v0.9.0-arm-unknown-linux-gnueabihf.tar.gz", OS: "linux", Arch: "armv6", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("bat", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (gnueabihf → armhf → armv7l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armhf_to_armv7l", func(t *testing.T) { + // Debian armhf = ARMv7 hard-float; LIVE_cache uses armv7l for this. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "caddy_linux_armhf.tar.gz", OS: "linux", Arch: "armv7", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("caddy", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (armhf → armv7l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armel_to_arm", func(t *testing.T) { + // Debian armel = ARM soft-float; LIVE_cache uses "arm" for this. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "caddy_linux_armel.tar.gz", OS: "linux", Arch: "armv6", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("caddy", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "arm" { + t.Errorf("arch = %q, want arm (armel → arm)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv5_to_arm", func(t *testing.T) { + // armv5 → legacyARMArchFromFilename → "armel" → "arm" + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "caddy_linux_armv5.tar.gz", OS: "linux", Arch: "armv5", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("caddy", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "arm" { + t.Errorf("arch = %q, want arm (armv5 → armel → arm)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv7a_to_armv7l", func(t *testing.T) { + // armv7a (ARM application profile): LIVE_cache uses armv7l. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "tool-armv7a-linux.tar.gz", OS: "linux", Arch: "armv7", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("tool", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (armv7a → armv7l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv7l_filename_to_armv7l", func(t *testing.T) { + // armv7l in filename: legacyARMArchFromFilename extracts "armv7" (armv7l contains armv7), + // then the canonical armv7→armv7l translation maps it to armv7l (the correct API vocab). + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "tool-armv7l-linux.tar.gz", OS: "linux", Arch: "armv7", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("tool", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (armv7l filename → armv7 → armv7l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv6l_to_armv6l", func(t *testing.T) { + // armv6l in filename: legacyARMArchFromFilename returns "" (no armv7/armhf/etc match). + // armv6 (Go canonical) → armv6l (LIVE_cache vocabulary). + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "tool-armv6l-linux.tar.gz", OS: "linux", Arch: "armv6", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("tool", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv6l" { + t.Errorf("arch = %q, want armv6l (armv6 → armv6l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv7_gnueabihf_to_armv7l", func(t *testing.T) { + // Files like "ripgrep-14.1.0-armv7-unknown-linux-gnueabihf.tar.gz": + // Go classifies as armv7; the "armv7" term in filename takes priority + // over the gnueabihf ABI suffix. legacyARMArchFromFilename returns "armv7", + // then the canonical armv7→armv7l translation produces armv7l. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "ripgrep-14.1.0-armv7-unknown-linux-gnueabihf.tar.gz", OS: "linux", Arch: "armv7", Format: ".tar.gz"}, + }, + } + lc, _ := storage.ExportLegacy("ripgrep", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (armv7 in filename → armv7 → armv7l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_armv6hf_to_armhf", func(t *testing.T) { + // shellcheck uses "armv6hf" naming; classifier tpm['armv6hf'] = ARMHF → "armhf". + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "shellcheck-v0.9.0.linux.armv6hf.tar.xz", OS: "linux", Arch: "armv6", Format: ".tar.xz"}, + }, + } + lc, _ := storage.ExportLegacy("shellcheck", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (armv6hf → armhf → armv7l)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_gitea_arm5_to_armel", func(t *testing.T) { + // Gitea uses "arm-5" naming; patternToTerms converts to "armv5" → tpm → "armel". + // Go sees \barm\b → classifies as armv6. Legacy export must correct to armel. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "gitea-1.20.0-linux-arm-5", OS: "linux", Arch: "armv6", Format: ""}, + }, + } + lc, _ := storage.ExportLegacy("gitea", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "arm" { + t.Errorf("arch = %q, want arm (arm-5 → armel → arm)", lc.Releases[0].Arch) + } + }) + + t.Run("arm_gitea_arm7_to_armv7l", func(t *testing.T) { + // Gitea uses "arm-7" naming; patternToTerms converts to "armv7" → tpm → "armv7". + // Go sees \barm\b → classifies as armv6. legacyARMArchFromFilename returns "armv7", + // then the canonical armv7→armv7l translation produces armv7l. + pd := storage.PackageData{ + Assets: []storage.Asset{ + {Filename: "gitea-1.20.0-linux-arm-7", OS: "linux", Arch: "armv6", Format: ""}, + }, + } + lc, _ := storage.ExportLegacy("gitea", pd) + if len(lc.Releases) != 1 { + t.Fatalf("releases = %d, want 1", len(lc.Releases)) + } + if lc.Releases[0].Arch != "armv7l" { + t.Errorf("arch = %q, want armv7l (arm-7 → armv7 → armv7l)", lc.Releases[0].Arch) + } + }) +} + +// TestExportLegacyMixed verifies correct counting when multiple drop categories +// appear together in a single export call. +func TestExportLegacyMixed(t *testing.T) { + pd := storage.PackageData{ + Assets: []storage.Asset{ + // kept: baseline linux build + {Filename: "tool-linux-amd64.tar.gz", OS: "linux", Arch: "x86_64", Format: ".tar.gz"}, + // dropped: variant build + {Filename: "tool-linux-amd64-rocm.tar.gz", OS: "linux", Arch: "x86_64", Format: ".tar.gz", Variants: []string{"rocm"}}, + // dropped: android + {Filename: "tool-android-arm64.tar.gz", OS: "android", Arch: "aarch64", Format: ".tar.gz"}, + // dropped: .AppImage format + {Filename: "tool.AppImage", OS: "linux", Arch: "x86_64", Format: ".AppImage"}, + // kept (translated): universal2 → x86_64 + {Filename: "tool-darwin-universal.tar.gz", OS: "darwin", Arch: "universal2", Format: ".tar.gz"}, + // kept: solaris as-is + {Filename: "tool-solaris-amd64.tar.gz", OS: "solaris", Arch: "x86_64", Format: ".tar.gz"}, + }, + } + lc, stats := storage.ExportLegacy("tool", pd) + + if stats.Variants != 1 { + t.Errorf("Variants = %d, want 1", stats.Variants) + } + if stats.Android != 1 { + t.Errorf("Android = %d, want 1", stats.Android) + } + if stats.Formats != 1 { + t.Errorf("Formats = %d, want 1", stats.Formats) + } + if len(lc.Releases) != 3 { + t.Errorf("releases = %d, want 3 (linux + macos/amd64 + solaris)", len(lc.Releases)) + } + + // Verify universal2 was translated to amd64 (via universal2→x86_64→amd64), + // and darwin was translated to macos. + var macosArch string + for _, r := range lc.Releases { + if r.OS == "macos" { + macosArch = r.Arch + } + } + if macosArch != "amd64" { + t.Errorf("macos arch = %q, want amd64 (universal2→x86_64→amd64, darwin→macos)", macosArch) + } +} diff --git a/internal/storage/storage.go b/internal/storage/storage.go new file mode 100644 index 0000000..3f0fd6b --- /dev/null +++ b/internal/storage/storage.go @@ -0,0 +1,71 @@ +// Package storage defines the interface for reading and writing +// classified release assets. +// +// webid reads assets through [Store]. webicached writes them through +// [RefreshTx], obtained from [Store.BeginRefresh]. +// +// The two implementations are fsstore (filesystem JSON, compatible with +// the Node.js _cache/ format) and pgstore (PostgreSQL, future). +package storage + +import ( + "context" + "time" +) + +// Asset is a single downloadable file — one entry in a release. +// A release like "bat v0.26.1" has many assets (one per platform/format). +// +// No JSON tags — serialization goes through [LegacyAsset] for Node.js +// compat, or through a future v2 format. +type Asset struct { + Filename string + Version string + LTS bool + Channel string + Date string + OS string + Arch string + Libc string + Format string + Download string + Extra string // extra version info for sorting (e.g. build metadata) + GitTag string // original git tag (e.g. "v1.2", "master") — only for format="git" + GitCommitHash string // short commit hash (e.g. "54c216e") — only for format="git" + Variants []string // build qualifiers: "installer", "rocm", "jetpack5", "fxdependent", etc. +} + +// PackageData is the full set of assets for a package, plus metadata. +type PackageData struct { + Assets []Asset + UpdatedAt time.Time +} + +// Store is the read/write interface for release asset storage. +type Store interface { + // ListPackages returns the names of all packages in the store. + ListPackages(ctx context.Context) ([]string, error) + + // Load returns all assets for a package, or nil if the package + // is not cached. The returned data may be stale — check UpdatedAt. + Load(ctx context.Context, pkg string) (*PackageData, error) + + // BeginRefresh starts a write transaction for a package. + // Write assets via [RefreshTx.Put], then call Commit to atomically + // replace the stored data. Call Rollback to discard. + BeginRefresh(ctx context.Context, pkg string) (RefreshTx, error) +} + +// RefreshTx is a write transaction for replacing a package's assets. +type RefreshTx interface { + // Put stages assets to be written. May be called multiple times + // to append assets incrementally. + Put(assets []Asset) error + + // Commit atomically replaces the package's stored assets with + // everything staged via Put. + Commit(ctx context.Context) error + + // Rollback discards all staged data. + Rollback() error +}