mirror of
https://github.com/webinstall/webi-installers.git
synced 2026-04-06 18:36:50 +00:00
feat: add cmd/inspect for package structure inspection
Downloads release archives, unpacks them, and reports internal structure.
Uses httpclient for downloads with content-disposition awareness.
Supports tar.gz, tar.xz, tar.zst, zip, and DMG formats.
Caches downloads in _cache/downloads/{pkg}/{version}/.
This commit is contained in:
625
cmd/inspect/main.go
Normal file
625
cmd/inspect/main.go
Normal file
@@ -0,0 +1,625 @@
|
||||
// Command inspect downloads release archives, unpacks them, and reports
|
||||
// their internal structure. This helps discover how packages are laid out
|
||||
// and whether the layout changes across versions.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// go run ./cmd/inspect -csv distributables.csv -cache ./_cache/downloads ollama sd
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"mime"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/webinstall/webi-installers/internal/httpclient"
|
||||
)
|
||||
|
||||
// Row is one CSV row from distributables.csv.
|
||||
type Row struct {
|
||||
Package string
|
||||
Version string
|
||||
Channel string
|
||||
Date string
|
||||
OS string
|
||||
Arch string
|
||||
Libc string
|
||||
Format string
|
||||
Download string
|
||||
Filename string
|
||||
Extra string
|
||||
}
|
||||
|
||||
// archiveFormats are the formats we download and unpack.
|
||||
var archiveFormats = map[string]bool{
|
||||
".tar.gz": true,
|
||||
".tar.xz": true,
|
||||
".tar.bz2": true,
|
||||
".tar.zst": true,
|
||||
".zip": true,
|
||||
".dmg": true,
|
||||
".gz": true,
|
||||
".xz": true,
|
||||
}
|
||||
|
||||
// inspectOSes are the OSes we inspect.
|
||||
var inspectOSes = map[string]bool{
|
||||
"linux": true,
|
||||
"darwin": true,
|
||||
"windows": true,
|
||||
"": true, // source-only packages
|
||||
}
|
||||
|
||||
// preferredArch picks one arch per OS to download.
|
||||
func preferredArch(os_ string) string {
|
||||
switch os_ {
|
||||
case "darwin":
|
||||
return "aarch64"
|
||||
default:
|
||||
return "x86_64"
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
csvFile := flag.String("csv", "distributables.csv", "path to distributables CSV")
|
||||
cacheDir := flag.String("cache", "_cache/downloads", "download cache directory")
|
||||
flag.Parse()
|
||||
|
||||
packages := flag.Args()
|
||||
if len(packages) == 0 {
|
||||
log.Fatal("usage: inspect [-csv FILE] [-cache DIR] PACKAGE [PACKAGE...]")
|
||||
}
|
||||
|
||||
rows, err := readCSV(*csvFile)
|
||||
if err != nil {
|
||||
log.Fatalf("read csv: %v", err)
|
||||
}
|
||||
|
||||
client := httpclient.New()
|
||||
// Override timeout for large downloads.
|
||||
client.Timeout = 10 * time.Minute
|
||||
|
||||
for _, pkg := range packages {
|
||||
log.Printf("=== %s ===", pkg)
|
||||
if err := inspectPackage(client, rows, pkg, *cacheDir); err != nil {
|
||||
log.Printf("ERROR: %s: %v", pkg, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func readCSV(path string) ([]Row, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r := csv.NewReader(f)
|
||||
header, err := r.Read()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build column index.
|
||||
idx := make(map[string]int, len(header))
|
||||
for i, col := range header {
|
||||
idx[col] = i
|
||||
}
|
||||
|
||||
var rows []Row
|
||||
for {
|
||||
record, err := r.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
get := func(col string) string {
|
||||
if i, ok := idx[col]; ok && i < len(record) {
|
||||
return record[i]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
rows = append(rows, Row{
|
||||
Package: get("package"),
|
||||
Version: get("version"),
|
||||
Channel: get("channel"),
|
||||
Date: get("date"),
|
||||
OS: get("os"),
|
||||
Arch: get("arch"),
|
||||
Libc: get("libc"),
|
||||
Format: get("format"),
|
||||
Download: get("download"),
|
||||
Filename: get("filename"),
|
||||
Extra: get("extra"),
|
||||
})
|
||||
}
|
||||
return rows, nil
|
||||
}
|
||||
|
||||
func inspectPackage(client *http.Client, allRows []Row, pkg, cacheDir string) error {
|
||||
// Filter rows for this package.
|
||||
var pkgRows []Row
|
||||
for _, r := range allRows {
|
||||
if r.Package == pkg {
|
||||
pkgRows = append(pkgRows, r)
|
||||
}
|
||||
}
|
||||
if len(pkgRows) == 0 {
|
||||
return fmt.Errorf("no rows found")
|
||||
}
|
||||
|
||||
// Find latest stable version, fall back to any version.
|
||||
versions := findVersionsByDate(pkgRows)
|
||||
if len(versions) == 0 {
|
||||
return fmt.Errorf("no versions found")
|
||||
}
|
||||
|
||||
latestVer := versions[0]
|
||||
log.Printf(" latest version: %s", latestVer)
|
||||
|
||||
// Check if latest has assets uploaded (more than just source tarballs).
|
||||
latestRows := filterVersion(pkgRows, latestVer)
|
||||
hasRealAssets := false
|
||||
for _, r := range latestRows {
|
||||
if r.Extra != "source" && archiveFormats[r.Format] {
|
||||
hasRealAssets = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If latest looks empty, step back one version.
|
||||
if !hasRealAssets && len(versions) > 1 {
|
||||
latestVer = versions[1]
|
||||
latestRows = filterVersion(pkgRows, latestVer)
|
||||
log.Printf(" latest has no assets, using: %s", latestVer)
|
||||
}
|
||||
|
||||
// Inspect the latest version.
|
||||
if err := inspectVersion(client, pkg, latestVer, latestRows, cacheDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Find versions roughly a year apart going back.
|
||||
yearVersions := findYearlyVersions(pkgRows, latestVer)
|
||||
for _, v := range yearVersions {
|
||||
log.Printf(" --- checking %s ---", v)
|
||||
vRows := filterVersion(pkgRows, v)
|
||||
if err := inspectVersion(client, pkg, v, vRows, cacheDir); err != nil {
|
||||
log.Printf(" ERROR: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// findVersionsByDate returns versions sorted newest first, preferring stable.
|
||||
func findVersionsByDate(rows []Row) []string {
|
||||
type vInfo struct {
|
||||
version string
|
||||
date string
|
||||
stable bool
|
||||
}
|
||||
seen := map[string]*vInfo{}
|
||||
for _, r := range rows {
|
||||
if _, ok := seen[r.Version]; !ok {
|
||||
seen[r.Version] = &vInfo{
|
||||
version: r.Version,
|
||||
date: r.Date,
|
||||
stable: r.Channel == "stable",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var vs []*vInfo
|
||||
for _, v := range seen {
|
||||
vs = append(vs, v)
|
||||
}
|
||||
|
||||
// Sort: stable first, then by date descending, then version descending.
|
||||
sort.Slice(vs, func(i, j int) bool {
|
||||
if vs[i].stable != vs[j].stable {
|
||||
return vs[i].stable
|
||||
}
|
||||
if vs[i].date != vs[j].date {
|
||||
return vs[i].date > vs[j].date
|
||||
}
|
||||
return vs[i].version > vs[j].version
|
||||
})
|
||||
|
||||
result := make([]string, len(vs))
|
||||
for i, v := range vs {
|
||||
result[i] = v.version
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// findYearlyVersions picks versions roughly a year apart before the given version.
|
||||
func findYearlyVersions(rows []Row, latestVer string) []string {
|
||||
// Find the date of latest.
|
||||
var latestDate string
|
||||
for _, r := range rows {
|
||||
if r.Version == latestVer && r.Date != "" {
|
||||
latestDate = r.Date
|
||||
break
|
||||
}
|
||||
}
|
||||
if latestDate == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
latestTime, err := time.Parse("2006-01-02", latestDate)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Collect all stable versions with dates.
|
||||
type vd struct {
|
||||
version string
|
||||
date time.Time
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
var all []vd
|
||||
for _, r := range rows {
|
||||
if seen[r.Version] || r.Date == "" || r.Channel != "stable" {
|
||||
continue
|
||||
}
|
||||
seen[r.Version] = true
|
||||
t, err := time.Parse("2006-01-02", r.Date)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if t.Before(latestTime) {
|
||||
all = append(all, vd{r.Version, t})
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(all, func(i, j int) bool {
|
||||
return all[i].date.After(all[j].date)
|
||||
})
|
||||
|
||||
// Pick versions roughly a year apart.
|
||||
var result []string
|
||||
nextTarget := latestTime.AddDate(-1, 0, 0)
|
||||
for _, v := range all {
|
||||
if v.date.Before(nextTarget) || v.date.Equal(nextTarget) {
|
||||
result = append(result, v.version)
|
||||
nextTarget = v.date.AddDate(-1, 0, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func filterVersion(rows []Row, version string) []Row {
|
||||
var result []Row
|
||||
for _, r := range rows {
|
||||
if r.Version == version {
|
||||
result = append(result, r)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// inspectVersion downloads and inspects archives for one version.
|
||||
func inspectVersion(client *http.Client, pkg, version string, rows []Row, cacheDir string) error {
|
||||
// Group by OS, pick one arch per OS, pick distinct formats.
|
||||
type dlKey struct {
|
||||
os_ string
|
||||
format string
|
||||
}
|
||||
selected := map[dlKey]*Row{}
|
||||
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
if !inspectOSes[r.OS] {
|
||||
continue
|
||||
}
|
||||
if !archiveFormats[r.Format] {
|
||||
continue
|
||||
}
|
||||
|
||||
key := dlKey{r.OS, r.Format}
|
||||
existing := selected[key]
|
||||
if existing == nil {
|
||||
selected[key] = r
|
||||
continue
|
||||
}
|
||||
|
||||
// Prefer the preferred arch.
|
||||
pref := preferredArch(r.OS)
|
||||
if r.Arch == pref && existing.Arch != pref {
|
||||
selected[key] = r
|
||||
}
|
||||
// Skip rocm/jetpack variants.
|
||||
if strings.Contains(r.Filename, "rocm") || strings.Contains(r.Filename, "jetpack") {
|
||||
if !strings.Contains(existing.Filename, "rocm") && !strings.Contains(existing.Filename, "jetpack") {
|
||||
continue // keep existing non-special variant
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(selected) == 0 {
|
||||
log.Printf(" %s: no downloadable archives", version)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort keys for deterministic output.
|
||||
var keys []dlKey
|
||||
for k := range selected {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Slice(keys, func(i, j int) bool {
|
||||
if keys[i].os_ != keys[j].os_ {
|
||||
return keys[i].os_ < keys[j].os_
|
||||
}
|
||||
return keys[i].format < keys[j].format
|
||||
})
|
||||
|
||||
for _, key := range keys {
|
||||
r := selected[key]
|
||||
os_ := r.OS
|
||||
if os_ == "" {
|
||||
os_ = "any"
|
||||
}
|
||||
log.Printf(" [%s] %s %s → %s", version, os_, r.Format, r.Filename)
|
||||
|
||||
dlPath, err := download(client, r.Download, r.Filename, filepath.Join(cacheDir, pkg, version))
|
||||
if err != nil {
|
||||
log.Printf(" download error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
contents, err := unpackAndList(dlPath, r.Format)
|
||||
if err != nil {
|
||||
log.Printf(" unpack error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
printContents(contents)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// download fetches a URL to the cache dir. Returns the path to the cached file.
|
||||
// Skips download if the file already exists.
|
||||
func download(client *http.Client, url, hintFilename, dir string) (string, error) {
|
||||
// Check if already cached by hint filename.
|
||||
cached := filepath.Join(dir, hintFilename)
|
||||
if _, err := os.Stat(cached); err == nil {
|
||||
return cached, nil
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
resp, err := httpclient.Get(ctx, client, url)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("GET %s: %w", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("GET %s: %s", url, resp.Status)
|
||||
}
|
||||
|
||||
// Determine filename from Content-Disposition or hint.
|
||||
filename := hintFilename
|
||||
if cd := resp.Header.Get("Content-Disposition"); cd != "" {
|
||||
_, params, err := mime.ParseMediaType(cd)
|
||||
if err == nil {
|
||||
if fn, ok := params["filename"]; ok && fn != "" {
|
||||
filename = fn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outPath := filepath.Join(dir, filename)
|
||||
|
||||
// Atomic write: temp file + rename.
|
||||
tmp := outPath + ".tmp"
|
||||
f, err := os.Create(tmp)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
n, err := io.Copy(f, resp.Body)
|
||||
if closeErr := f.Close(); closeErr != nil && err == nil {
|
||||
err = closeErr
|
||||
}
|
||||
if err != nil {
|
||||
os.Remove(tmp)
|
||||
return "", fmt.Errorf("download %s: %w", url, err)
|
||||
}
|
||||
|
||||
if err := os.Rename(tmp, outPath); err != nil {
|
||||
os.Remove(tmp)
|
||||
return "", err
|
||||
}
|
||||
|
||||
log.Printf(" downloaded %s (%d bytes)", filename, n)
|
||||
return outPath, nil
|
||||
}
|
||||
|
||||
// FileEntry describes one file inside an archive.
|
||||
type FileEntry struct {
|
||||
Path string
|
||||
Size int64
|
||||
Mode os.FileMode
|
||||
IsDir bool
|
||||
IsExec bool
|
||||
IsSymlink bool
|
||||
LinkTarget string
|
||||
}
|
||||
|
||||
// unpackAndList extracts an archive to a temp dir and lists contents.
|
||||
func unpackAndList(archivePath, format string) ([]FileEntry, error) {
|
||||
tmpDir, err := os.MkdirTemp("", "webi-inspect-*")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
switch format {
|
||||
case ".tar.gz":
|
||||
err = run("tar", "xzf", archivePath, "-C", tmpDir)
|
||||
case ".tar.xz":
|
||||
err = run("tar", "xJf", archivePath, "-C", tmpDir)
|
||||
case ".tar.bz2":
|
||||
err = run("tar", "xjf", archivePath, "-C", tmpDir)
|
||||
case ".tar.zst":
|
||||
err = run("tar", "--zstd", "-xf", archivePath, "-C", tmpDir)
|
||||
case ".zip":
|
||||
err = run("unzip", "-q", "-o", archivePath, "-d", tmpDir)
|
||||
case ".dmg":
|
||||
err = extractDMG(archivePath, tmpDir)
|
||||
case ".gz":
|
||||
// Single file gzip.
|
||||
base := filepath.Base(archivePath)
|
||||
base = strings.TrimSuffix(base, ".gz")
|
||||
outPath := filepath.Join(tmpDir, base)
|
||||
err = run("sh", "-c", fmt.Sprintf("gunzip -c %q > %q", archivePath, outPath))
|
||||
case ".xz":
|
||||
base := filepath.Base(archivePath)
|
||||
base = strings.TrimSuffix(base, ".xz")
|
||||
outPath := filepath.Join(tmpDir, base)
|
||||
err = run("sh", "-c", fmt.Sprintf("xz -dc %q > %q", archivePath, outPath))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported format: %s", format)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("extract %s: %w", format, err)
|
||||
}
|
||||
|
||||
return listDir(tmpDir, "")
|
||||
}
|
||||
|
||||
func extractDMG(dmgPath, outDir string) error {
|
||||
// Try 7z first (doesn't require mounting).
|
||||
if _, err := exec.LookPath("7z"); err == nil {
|
||||
return run("7z", "x", "-o"+outDir, dmgPath)
|
||||
}
|
||||
|
||||
// Fall back to hdiutil mount + copy + unmount.
|
||||
mountPoint, err := os.MkdirTemp("", "webi-dmg-*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(mountPoint)
|
||||
|
||||
if err := run("hdiutil", "attach", dmgPath, "-mountpoint", mountPoint, "-nobrowse", "-quiet"); err != nil {
|
||||
return fmt.Errorf("mount dmg: %w", err)
|
||||
}
|
||||
defer run("hdiutil", "detach", mountPoint, "-quiet")
|
||||
|
||||
// Copy contents.
|
||||
return run("cp", "-R", mountPoint+"/.", outDir)
|
||||
}
|
||||
|
||||
func run(name string, args ...string) error {
|
||||
cmd := exec.Command(name, args...)
|
||||
cmd.Stderr = os.Stderr
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
func listDir(root, prefix string) ([]FileEntry, error) {
|
||||
entries, err := os.ReadDir(filepath.Join(root, prefix))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var result []FileEntry
|
||||
for _, e := range entries {
|
||||
relPath := filepath.Join(prefix, e.Name())
|
||||
fullPath := filepath.Join(root, relPath)
|
||||
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
entry := FileEntry{
|
||||
Path: relPath,
|
||||
Size: info.Size(),
|
||||
Mode: info.Mode(),
|
||||
IsDir: e.IsDir(),
|
||||
}
|
||||
|
||||
if info.Mode()&os.ModeSymlink != 0 {
|
||||
entry.IsSymlink = true
|
||||
target, _ := os.Readlink(fullPath)
|
||||
entry.LinkTarget = target
|
||||
}
|
||||
|
||||
if !e.IsDir() && info.Mode()&0o111 != 0 {
|
||||
entry.IsExec = true
|
||||
}
|
||||
|
||||
result = append(result, entry)
|
||||
|
||||
if e.IsDir() {
|
||||
sub, err := listDir(root, relPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
result = append(result, sub...)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func printContents(entries []FileEntry) {
|
||||
for _, e := range entries {
|
||||
marker := " "
|
||||
if e.IsExec {
|
||||
marker = "* "
|
||||
}
|
||||
if e.IsDir {
|
||||
marker = "d "
|
||||
}
|
||||
if e.IsSymlink {
|
||||
marker = "→ "
|
||||
}
|
||||
|
||||
size := ""
|
||||
if !e.IsDir {
|
||||
size = formatSize(e.Size)
|
||||
}
|
||||
|
||||
line := fmt.Sprintf(" %s%-50s %8s", marker, e.Path, size)
|
||||
if e.IsSymlink {
|
||||
line += " → " + e.LinkTarget
|
||||
}
|
||||
log.Print(line)
|
||||
}
|
||||
}
|
||||
|
||||
func formatSize(n int64) string {
|
||||
switch {
|
||||
case n >= 1<<30:
|
||||
return fmt.Sprintf("%.1fG", float64(n)/float64(1<<30))
|
||||
case n >= 1<<20:
|
||||
return fmt.Sprintf("%.1fM", float64(n)/float64(1<<20))
|
||||
case n >= 1<<10:
|
||||
return fmt.Sprintf("%.1fK", float64(n)/float64(1<<10))
|
||||
default:
|
||||
return fmt.Sprintf("%dB", n)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user