diff --git a/COMPARISON.md b/COMPARISON.md new file mode 100644 index 0000000..7ea4e02 --- /dev/null +++ b/COMPARISON.md @@ -0,0 +1,181 @@ +# Go vs Node.js Cache Comparison + +Systematic comparison of Go pipeline output (`_cache/`) vs Node.js production +cache (`LIVE_cache/`). Generated by `cmd/comparecache`. + +## Key Observations + +### 1. Classification Timing +The Node.js cache stores assets with **empty** os/arch/ext fields — `normalize.js` +fills those at serve time. The Go pipeline classifies at write time. This means +the Go cache has richer data per-asset, but the comparison must be done at the +**filename level**, not the classified fields. + +### 2. Meta-Asset Filtering +Go's `isMetaAsset()` filters out checksums, signatures, SBOMs, etc. Node.js +includes them. This accounts for 43 packages showing `live-has-meta` differences. +**This is correct behavior** — Go is more aggressive about filtering non-installable +files. + +### 3. Source Tarballs +Go includes GitHub source tarballs (tarball_url/zipball_url) for releases with no +binary assets. Node.js does not. Affects 18 packages. **Decision needed**: should +these be included? + +### 4. Unsupported Sources +10 packages use custom source types not yet implemented in Go: +- `chromedist` — chromedriver +- `flutterdist` — flutter +- `golang` — go +- `gpgdist` — gpg +- `hashicorp` — terraform +- `iterm2dist` — iterm2 +- `juliadist` — julia +- `mariadbdist` — mariadb +- `zigdist` — zig +- (macos has no releases.conf) + +### 5. Version Depth +Go has deeper version history for most GitHub-sourced packages because it fetches +all pages. Node.js limits to 30 releases per API call. This is a **feature** — Go +provides complete histories. + +## Categories + +| Category | Count | Meaning | +|----------|-------|---------| +| match | 8 | Identical asset filenames | +| go-missing | 4 | Go produces no output (unsupported source or alias) | +| live-missing | 16 | Package exists in Go but not in live cache | +| go-extra-versions | 50 | Go has more version history (deeper fetch) | +| live-extra-versions | 20 | Live has newer data or uses a different source | +| go-extra-assets | 56 | Go includes assets that Node.js filters out | +| live-extra-assets | 22 | Node.js includes assets that Go filters out | +| live-has-meta | 43 | Node.js includes meta-assets (checksums, sigs) | +| go-has-source-tarballs | 18 | Go includes source tarballs for no-binary releases | + +## Per-Package Checklist + +Status: `[x]` reviewed, `[-]` known difference (acceptable), `[ ]` needs review + +### Exact Matches (8) +- [x] atomicparsley — match +- [x] awless — match +- [x] dotenv-linter — match +- [x] hexyl — match +- [x] koji — match +- [x] lf — match +- [x] sd — match +- [x] zoxide — match + +### Go Missing — Unsupported Source (4) +- [-] dashd — alias_of=dashcore, skipped (correct) +- [-] macos — no releases.conf +- [-] pg-essentials — meta-package +- [-] zig.vim — gittag source, 0 raw data? + +### Live Missing — Go-Only Packages (16) +- [-] node-official — Go split, not in live cache +- [-] node-unofficial — Go split, not in live cache +- [-] pg — Go name, live uses postgres +- [-] ripgrep — Go name, live uses rg +- [-] rust.vim — symlink to vim-rust +- [-] vim-airline — gittag packages not in live cache +- [-] vim-airline-themes — gittag packages not in live cache +- [-] vim-ale — gittag packages not in live cache +- [-] vim-devicons — gittag packages not in live cache +- [-] vim-go — gittag packages not in live cache +- [-] vim-nerdtree — gittag packages not in live cache +- [-] vim-prettier — gittag packages not in live cache +- [-] vim-rust — gittag packages not in live cache +- [-] vim-sensible — gittag packages not in live cache +- [-] vim-shfmt — gittag packages not in live cache +- [-] vim-syntastic — gittag packages not in live cache + +### Meta-Asset Only Differences (Go filters, Node.js keeps) +These packages differ only because Go strips checksums/sigs/SBOMs: +- [-] curlie — live-has-meta(21) +- [-] dashmsg — live-has-meta(1) +- [-] dotenv — live-has-meta(1) +- [-] ffuf — live-has-meta(50) +- [-] gitdeploy — live-has-meta(1) +- [-] gprox — live-has-meta(7) +- [-] keypairs — live-has-meta(1) +- [-] monorel — live-has-meta(3) +- [-] ots — live-has-meta(28) +- [-] runzip — live-has-meta(1) +- [-] sclient — live-has-meta(1) +- [-] sqlpkg — live-has-meta(7) +- [-] xz — live-has-meta(1) + +### Unsupported Source Types (Go has 0 assets) +- [ ] chromedriver — source=chromedist, needs custom fetcher +- [ ] flutter — source=flutterdist, needs custom fetcher +- [ ] go — source=golang, needs custom fetcher +- [ ] gpg — source=gpgdist, needs custom fetcher +- [ ] iterm2 — source=iterm2dist, needs custom fetcher +- [ ] julia — source=juliadist, needs custom fetcher +- [ ] mariadb — source=mariadbdist, needs custom fetcher +- [ ] terraform — source=hashicorp, needs custom fetcher +- [ ] zig — source=zigdist, needs custom fetcher + +### Version Depth + Extra Assets (needs detailed review) +These have real asset selection differences beyond version depth and meta filtering. + +- [ ] aliasman — go-has-source-tarballs(8), live-extra-assets(8), go-extra-assets(8) +- [ ] arc — go-extra-versions(9), go-has-source-tarballs(18), go-extra-assets(18) +- [ ] bat — go-extra-versions(13), go-extra-assets(85) +- [ ] bun — go-extra-versions(210), live-extra-versions(30), go-extra-assets(85) +- [ ] caddy — go-extra-versions(104), live-has-meta(3415), live-extra-assets(30), go-extra-assets(1180) +- [ ] cilium — go-extra-versions(97), go-extra-assets(5) +- [ ] cmake — go-extra-versions(280), go-extra-assets(4352) +- [ ] comrak — go-extra-versions(60), live-extra-versions(1), live-extra-assets(6), go-extra-assets(194) +- [ ] crabz — go-extra-assets(4) +- [ ] dashcore — go-extra-versions(101), live-has-meta(327), go-extra-assets(896) +- [ ] delta — go-extra-versions(29), go-extra-assets(304) +- [ ] deno — go-extra-versions(338), live-extra-assets(1), go-extra-assets(4) +- [ ] duckdns.sh — go-has-source-tarballs(6), live-extra-assets(6), go-extra-assets(6) +- [ ] fd — go-extra-versions(15), live-extra-versions(1), live-extra-assets(21), go-extra-assets(66) +- [ ] ffmpeg — go-extra-versions(11), live-extra-versions(2), go-extra-assets(68) +- [ ] fish — go-extra-versions(35), go-extra-assets(182) +- [ ] fzf — go-extra-versions(46), go-extra-assets(669) +- [ ] gh — go-extra-versions(159), go-extra-assets(2644) +- [ ] git — go-extra-versions(339), go-extra-assets(3724) +- [ ] gitea — go-extra-versions(194), live-has-meta(1968), go-extra-assets(4600) +- [ ] goreleaser — go-extra-versions(556), go-extra-assets(1657) +- [ ] grype — go-extra-versions(161), live-has-meta(90), go-extra-assets(1840) +- [ ] hugo — go-extra-versions(335), live-has-meta(30), go-extra-assets(8176) +- [ ] hugo-extended — go-extra-versions(335), go-extra-assets(8206) +- [ ] jq — go-extra-versions(15), live-extra-versions(12), go-extra-assets(7) +- [ ] k9s — go-extra-versions(227), go-extra-assets(749) +- [ ] kind — go-extra-versions(7), live-has-meta(7) +- [ ] kubectx — go-extra-versions(15), go-has-source-tarballs(30), go-extra-assets(78) +- [ ] kubens — go-extra-versions(15), go-has-source-tarballs(30), go-extra-assets(78) +- [ ] lsd — go-extra-versions(2), go-extra-assets(188) +- [ ] mutagen — go-extra-versions(82), go-has-source-tarballs(4), go-extra-assets(2478) +- [ ] node — go-extra-versions(713), live-extra-assets(431), go-extra-assets(15208) +- [ ] ollama — go-extra-versions(160), live-extra-assets(8), go-extra-assets(15) +- [ ] pandoc — go-extra-versions(123), go-extra-assets(698) +- [ ] pathman — go-extra-assets(1) +- [ ] postgres — go-extra-versions(4), live-extra-versions(8), live-extra-assets(6), go-extra-assets(24) +- [ ] psql — go-extra-versions(4), live-extra-versions(4), go-extra-assets(28) +- [ ] pwsh — go-extra-versions(163), go-extra-assets(3407) +- [ ] rclone — go-extra-versions(92), go-extra-assets(2548) +- [ ] rg — go-extra-versions(44), live-has-meta(100), go-extra-assets(282) +- [ ] sass — go-extra-versions(283), go-extra-assets(2194) +- [ ] serviceman — live-extra-versions(1), live-extra-assets(18), go-extra-assets(12) +- [ ] shellcheck — go-extra-versions(17), go-has-source-tarballs(34), go-extra-assets(34) +- [ ] shfmt — go-extra-versions(22), go-extra-assets(360) +- [ ] sqlc — go-extra-versions(7), go-has-source-tarballs(2), go-extra-assets(19) +- [ ] sttr — go-extra-versions(4), live-has-meta(42), go-extra-assets(20) +- [ ] syncthing — go-extra-versions(431), live-extra-assets(1), go-extra-assets(11983) +- [ ] terramate — go-extra-versions(193), live-has-meta(421), go-extra-assets(1744) +- [ ] tinygo — go-extra-versions(19), go-extra-assets(84) +- [ ] trip — go-extra-versions(6), go-extra-assets(12) +- [ ] uuidv7 — go-extra-assets(16) +- [ ] vim-commentary — go-extra-versions(5), live-extra-versions(5), live-extra-assets(5), go-extra-assets(5) +- [ ] vim-zig — go-extra-versions(1), live-extra-versions(1), live-extra-assets(1), go-extra-assets(1) +- [ ] watchexec — go-extra-versions(83), live-extra-versions(2), live-has-meta(1407), go-extra-assets(671) +- [ ] xcaddy — go-extra-assets(123) +- [ ] xsv — go-extra-versions(35), go-extra-assets(36) +- [ ] yq — go-extra-versions(134), go-extra-assets(17) diff --git a/GO_WEBI.md b/GO_WEBI.md index 7d41486..ac69465 100644 --- a/GO_WEBI.md +++ b/GO_WEBI.md @@ -281,12 +281,15 @@ Node.js server. - [x] `internal/rawcache` — double-buffered raw upstream response storage - [x] `internal/classify` — build artifact classifier (80/20, filename→target) - [x] `internal/platlatest` — per-platform latest version index -- [ ] End-to-end: fetch complete histories for a few real packages -- [ ] Per-installer config format (fallback rules, version-ranged overrides) +- [x] End-to-end: fetch complete histories for all 103 packages +- [x] `internal/installerconf` — flat key=value config parser with typed struct - [ ] Resolver (platlatest + installer config + CompatArches → pick binary) -- [ ] `internal/storage` — interface definition -- [ ] `internal/storage/fsstore` — filesystem implementation -- [ ] `cmd/webicached` — cache daemon that can replace the Node.js caching +- [x] `internal/storage` — interface definition (Asset, PackageData, Store, RefreshTx) +- [x] `internal/storage/legacy.go` — LegacyAsset/LegacyCache for Node.js compat +- [x] `internal/storage/fsstore` — filesystem implementation (atomic writes) +- [x] `cmd/webicached` — cache daemon (fetch → classify → write, all sources) +- [x] `cmd/comparecache` — Go vs Node.js cache comparison tool +- [ ] Comparison review: see `COMPARISON.md` for per-package checklist **Integration point:** `webicached` writes the same `_cache/` JSON format. The Node.js server can read from it. Zero-risk cutover for release fetching. diff --git a/cmd/comparecache/main.go b/cmd/comparecache/main.go new file mode 100644 index 0000000..d73ff27 --- /dev/null +++ b/cmd/comparecache/main.go @@ -0,0 +1,439 @@ +// Command comparecache compares Go-generated cache output against the +// Node.js LIVE_cache. It identifies categorical differences in asset +// selection — which filenames appear in one cache but not the other. +// +// The comparison is done at the filename level (not OS/arch/ext fields) +// because the Node.js cache leaves those empty (normalize.js fills them +// at serve time), while the Go pipeline classifies at write time. +// +// Usage: +// +// go run ./cmd/comparecache -live ./LIVE_cache -go ./_cache +// go run ./cmd/comparecache -live ./LIVE_cache -go ./_cache bat jq +// go run ./cmd/comparecache -live ./LIVE_cache -go ./_cache -summary +package main + +import ( + "encoding/json" + "flag" + "fmt" + "log" + "os" + "path/filepath" + "sort" + "strings" +) + +type cacheEntry struct { + Releases []struct { + Name string `json:"name"` + Version string `json:"version"` + Download string `json:"download"` + Channel string `json:"channel"` + OS string `json:"os"` + Arch string `json:"arch"` + Ext string `json:"ext"` + } `json:"releases"` +} + +type packageDiff struct { + Name string + LiveCount int + GoCount int + OnlyInLive []string // filenames only in Node.js cache + OnlyInGo []string // filenames only in Go cache + VersionsLive []string // unique versions in live + VersionsGo []string // unique versions in go + GoMissing bool // true if Go didn't produce output for this package + LiveMissing bool // true if no live cache for this package + Categories []string // categorical difference labels +} + +func main() { + liveDir := flag.String("live", "./LIVE_cache", "path to Node.js LIVE_cache directory") + goDir := flag.String("go", "./_cache", "path to Go cache directory") + summary := flag.Bool("summary", false, "only print summary, not per-package details") + latest := flag.Bool("latest", false, "only compare latest version in each cache") + flag.Parse() + filterPkgs := flag.Args() + + // Find the most recent month directory in each cache. + liveMonth := findLatestMonth(*liveDir) + goMonth := findLatestMonth(*goDir) + if liveMonth == "" { + log.Fatalf("no month directories found in %s", *liveDir) + } + + livePath := filepath.Join(*liveDir, liveMonth) + goPath := "" + if goMonth != "" { + goPath = filepath.Join(*goDir, goMonth) + } + + // Discover all packages across both caches. + allPkgs := discoverPackages(livePath, goPath) + if len(filterPkgs) > 0 { + nameSet := make(map[string]bool, len(filterPkgs)) + for _, n := range filterPkgs { + nameSet[n] = true + } + var filtered []string + for _, p := range allPkgs { + if nameSet[p] { + filtered = append(filtered, p) + } + } + allPkgs = filtered + } + + var diffs []packageDiff + for _, pkg := range allPkgs { + d := compare(livePath, goPath, pkg, *latest) + categorize(&d) + diffs = append(diffs, d) + } + + if *summary { + printSummary(diffs) + } else { + printDetails(diffs) + } +} + +func findLatestMonth(dir string) string { + entries, err := os.ReadDir(dir) + if err != nil { + return "" + } + var months []string + for _, e := range entries { + if e.IsDir() && len(e.Name()) == 7 && e.Name()[4] == '-' { + months = append(months, e.Name()) + } + } + if len(months) == 0 { + return "" + } + sort.Strings(months) + return months[len(months)-1] +} + +func discoverPackages(livePath, goPath string) []string { + seen := make(map[string]bool) + for _, dir := range []string{livePath, goPath} { + if dir == "" { + continue + } + entries, err := os.ReadDir(dir) + if err != nil { + continue + } + for _, e := range entries { + name := e.Name() + if strings.HasSuffix(name, ".json") && !strings.HasSuffix(name, ".updated.txt") { + pkg := strings.TrimSuffix(name, ".json") + seen[pkg] = true + } + } + } + var pkgs []string + for p := range seen { + pkgs = append(pkgs, p) + } + sort.Strings(pkgs) + return pkgs +} + +func loadCache(dir, pkg string) *cacheEntry { + if dir == "" { + return nil + } + data, err := os.ReadFile(filepath.Join(dir, pkg+".json")) + if err != nil { + return nil + } + var entry cacheEntry + if err := json.Unmarshal(data, &entry); err != nil { + return nil + } + return &entry +} + +func compare(livePath, goPath, pkg string, latestOnly bool) packageDiff { + live := loadCache(livePath, pkg) + goCache := loadCache(goPath, pkg) + + d := packageDiff{Name: pkg} + + if live == nil { + d.LiveMissing = true + } + if goCache == nil { + d.GoMissing = true + } + if d.LiveMissing && d.GoMissing { + return d + } + + // Collect filenames by version. + type versionFiles struct { + version string + files map[string]bool + } + + extractVersionFiles := func(ce *cacheEntry) (map[string]map[string]bool, []string) { + vf := make(map[string]map[string]bool) + for _, r := range ce.Releases { + if vf[r.Version] == nil { + vf[r.Version] = make(map[string]bool) + } + vf[r.Version][r.Name] = true + } + var versions []string + for v := range vf { + versions = append(versions, v) + } + sort.Strings(versions) + return vf, versions + } + + var liveFiles, goFiles map[string]bool + + if live != nil { + vf, versions := extractVersionFiles(live) + d.VersionsLive = versions + d.LiveCount = len(live.Releases) + + if latestOnly && len(versions) > 0 { + liveFiles = vf[versions[len(versions)-1]] + } else { + liveFiles = make(map[string]bool) + for _, r := range live.Releases { + liveFiles[r.Name] = true + } + } + } else { + liveFiles = make(map[string]bool) + } + + if goCache != nil { + vf, versions := extractVersionFiles(goCache) + d.VersionsGo = versions + d.GoCount = len(goCache.Releases) + + if latestOnly && len(versions) > 0 { + goFiles = vf[versions[len(versions)-1]] + } else { + goFiles = make(map[string]bool) + for _, r := range goCache.Releases { + goFiles[r.Name] = true + } + } + } else { + goFiles = make(map[string]bool) + } + + for f := range liveFiles { + if !goFiles[f] { + d.OnlyInLive = append(d.OnlyInLive, f) + } + } + for f := range goFiles { + if !liveFiles[f] { + d.OnlyInGo = append(d.OnlyInGo, f) + } + } + sort.Strings(d.OnlyInLive) + sort.Strings(d.OnlyInGo) + + return d +} + +func categorize(d *packageDiff) { + if d.GoMissing { + d.Categories = append(d.Categories, "go-missing") + return + } + if d.LiveMissing { + d.Categories = append(d.Categories, "live-missing") + return + } + + if len(d.OnlyInLive) == 0 && len(d.OnlyInGo) == 0 { + d.Categories = append(d.Categories, "match") + return + } + + // Check if differences are only version depth (Go has more history). + liveVersionSet := make(map[string]bool, len(d.VersionsLive)) + for _, v := range d.VersionsLive { + liveVersionSet[v] = true + } + goVersionSet := make(map[string]bool, len(d.VersionsGo)) + for _, v := range d.VersionsGo { + goVersionSet[v] = true + } + + goExtraVersions := 0 + for _, v := range d.VersionsGo { + if !liveVersionSet[v] { + goExtraVersions++ + } + } + liveExtraVersions := 0 + for _, v := range d.VersionsLive { + if !goVersionSet[v] { + liveExtraVersions++ + } + } + + if goExtraVersions > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("go-extra-versions(%d)", goExtraVersions)) + } + if liveExtraVersions > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("live-extra-versions(%d)", liveExtraVersions)) + } + + // Check for meta-asset filtering differences. + metaOnlyInLive := 0 + nonMetaOnlyInLive := 0 + for _, f := range d.OnlyInLive { + if isMetaFile(f) { + metaOnlyInLive++ + } else { + nonMetaOnlyInLive++ + } + } + metaOnlyInGo := 0 + nonMetaOnlyInGo := 0 + for _, f := range d.OnlyInGo { + if isMetaFile(f) { + metaOnlyInGo++ + } else { + nonMetaOnlyInGo++ + } + } + + if metaOnlyInLive > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("live-has-meta(%d)", metaOnlyInLive)) + } + if metaOnlyInGo > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("go-has-meta(%d)", metaOnlyInGo)) + } + + // Check for source tarball differences. + srcOnlyInGo := 0 + for _, f := range d.OnlyInGo { + if strings.HasSuffix(f, ".tar.gz") || strings.HasSuffix(f, ".zip") { + if strings.HasPrefix(f, "v") || strings.HasPrefix(f, "refs/") { + srcOnlyInGo++ + } + } + } + if srcOnlyInGo > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("go-has-source-tarballs(%d)", srcOnlyInGo)) + } + + if nonMetaOnlyInLive > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("live-extra-assets(%d)", nonMetaOnlyInLive)) + } + if nonMetaOnlyInGo > 0 { + d.Categories = append(d.Categories, fmt.Sprintf("go-extra-assets(%d)", nonMetaOnlyInGo)) + } +} + +func isMetaFile(name string) bool { + lower := strings.ToLower(name) + for _, suffix := range []string{ + ".sha256", ".sha256sum", ".sha512", ".sha512sum", + ".md5", ".md5sum", ".sig", ".asc", ".pem", + "checksums.txt", "sha256sums", "sha512sums", + ".sbom", ".spdx", ".json.sig", ".sigstore", + ".d.ts", ".pub", + } { + if strings.HasSuffix(lower, suffix) { + return true + } + } + for _, contains := range []string{ + "checksums", "sha256sum", "sha512sum", + } { + if strings.Contains(lower, contains) { + return true + } + } + return false +} + +func printSummary(diffs []packageDiff) { + // Count by category. + categoryCounts := make(map[string]int) + for _, d := range diffs { + for _, c := range d.Categories { + // Strip the count suffix for grouping. + base := c + if idx := strings.Index(c, "("); idx != -1 { + base = c[:idx] + } + categoryCounts[base]++ + } + } + + fmt.Println("=== COMPARISON SUMMARY ===") + fmt.Printf("Total packages: %d\n\n", len(diffs)) + + var cats []string + for c := range categoryCounts { + cats = append(cats, c) + } + sort.Strings(cats) + for _, c := range cats { + fmt.Printf(" %-30s %d\n", c, categoryCounts[c]) + } + + fmt.Println("\n=== PER-PACKAGE CATEGORIES ===") + for _, d := range diffs { + fmt.Printf("%-25s %s\n", d.Name, strings.Join(d.Categories, ", ")) + } +} + +func printDetails(diffs []packageDiff) { + for _, d := range diffs { + fmt.Printf("=== %s ===\n", d.Name) + fmt.Printf(" Categories: %s\n", strings.Join(d.Categories, ", ")) + fmt.Printf(" Live: %d assets, %d versions | Go: %d assets, %d versions\n", + d.LiveCount, len(d.VersionsLive), d.GoCount, len(d.VersionsGo)) + + if len(d.OnlyInLive) > 0 { + fmt.Printf(" Only in LIVE (%d):\n", len(d.OnlyInLive)) + for _, f := range d.OnlyInLive { + if len(d.OnlyInLive) > 20 { + fmt.Printf(" - %s\n", f) + if f == d.OnlyInLive[19] { + fmt.Printf(" ... and %d more\n", len(d.OnlyInLive)-20) + break + } + } else { + fmt.Printf(" - %s\n", f) + } + } + } + + if len(d.OnlyInGo) > 0 { + fmt.Printf(" Only in Go (%d):\n", len(d.OnlyInGo)) + for _, f := range d.OnlyInGo { + if len(d.OnlyInGo) > 20 { + fmt.Printf(" - %s\n", f) + if f == d.OnlyInGo[19] { + fmt.Printf(" ... and %d more\n", len(d.OnlyInGo)-20) + break + } + } else { + fmt.Printf(" - %s\n", f) + } + } + } + + fmt.Println() + } +} diff --git a/cmd/webicached/main.go b/cmd/webicached/main.go index cf74ab0..87fccc9 100644 --- a/cmd/webicached/main.go +++ b/cmd/webicached/main.go @@ -10,7 +10,8 @@ // // go run ./cmd/webicached // go run ./cmd/webicached -conf . -cache ./_cache -raw ./_cache/raw bat goreleaser -// go run ./cmd/webicached -once # single pass, no periodic refresh +// go run ./cmd/webicached -once # single pass, no periodic refresh +// go run ./cmd/webicached -once -no-fetch # classify from existing raw data only package main import ( @@ -44,6 +45,7 @@ func main() { rawDir := flag.String("raw", "_cache/raw", "raw cache directory for upstream responses") token := flag.String("token", os.Getenv("GITHUB_TOKEN"), "GitHub API token") once := flag.Bool("once", false, "run once then exit (no periodic refresh)") + noFetch := flag.Bool("no-fetch", false, "skip fetching, classify from existing raw data only") interval := flag.Duration("interval", 15*time.Minute, "refresh interval") flag.Parse() @@ -91,7 +93,7 @@ func main() { continue } - err := refreshPackage(ctx, client, store, *rawDir, pkg, auth) + err := refreshPackage(ctx, client, store, *rawDir, pkg, auth, *noFetch) if err != nil { log.Printf(" ERROR %s: %v", pkg.name, err) } @@ -145,13 +147,15 @@ func discover(dir string) ([]pkgConf, error) { // refreshPackage does the full pipeline for one package: // fetch raw → classify → write to fsstore. -func refreshPackage(ctx context.Context, client *http.Client, store *fsstore.Store, rawDir string, pkg pkgConf, auth *githubish.Auth) error { +func refreshPackage(ctx context.Context, client *http.Client, store *fsstore.Store, rawDir string, pkg pkgConf, auth *githubish.Auth, skipFetch bool) error { name := pkg.name conf := pkg.conf - // Step 1: Fetch raw upstream data to rawcache. - if err := fetchRaw(ctx, client, rawDir, pkg, auth); err != nil { - return fmt.Errorf("fetch: %w", err) + // Step 1: Fetch raw upstream data to rawcache (unless -no-fetch). + if !skipFetch { + if err := fetchRaw(ctx, client, rawDir, pkg, auth); err != nil { + return fmt.Errorf("fetch: %w", err) + } } // Step 2: Classify raw data into assets.