mirror of
https://github.com/nubenetes/awesome-kubernetes.git
synced 2026-05-22 09:03:23 +00:00
551 lines
30 KiB
Python
551 lines
30 KiB
Python
import os
|
|
import re
|
|
import json
|
|
import asyncio
|
|
import yaml
|
|
import httpx
|
|
from datetime import datetime
|
|
from typing import List, Dict, Set, Any
|
|
from src.config import GEMINI_API_KEYS, GH_TOKEN, TARGET_REPO, MADRID_TZ
|
|
from src.gemini_utils import call_gemini_with_retry
|
|
from src.logger import log_event
|
|
|
|
def normalize_url(url: str) -> str:
|
|
url = url.split("#")[0].split("?")[0].rstrip("/")
|
|
if url.startswith("http://"): url = "https://" + url[7:]
|
|
return url.lower()
|
|
|
|
V1_DIR = "docs"
|
|
V2_DIR = "v2-docs"
|
|
INVENTORY_PATH = "data/inventory.yaml"
|
|
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
|
|
|
|
class V2VisionEngine:
|
|
def __init__(self):
|
|
# 100% Comprehensive 2026 Taxonomy
|
|
self.dimensions = {
|
|
"Intelligent Control Plane": ["ai", "ai-agents-mcp", "chatgpt", "mlops"],
|
|
"Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"],
|
|
"Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"],
|
|
"Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"],
|
|
"Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"],
|
|
"Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"],
|
|
"The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"],
|
|
"Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"],
|
|
"Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"],
|
|
"Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"],
|
|
"Career & Industry": ["recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"]
|
|
}
|
|
|
|
self.library_criteria = (
|
|
"You are a Technical Librarian in 2026. Your mission is to build a high-density, professional reference library.\n"
|
|
"PHASE 1: TECHNICAL PRESERVATION (HIGH INCLUSIVITY)\n"
|
|
"- KEEP >90% of technical resources.\n"
|
|
"PHASE 2: SOPHISTICATED SYNTHESIS & DATING\n"
|
|
"- Extract precise PUBLICATION DATE (YYYY-MM-DD or YYYY): Look for dates in the URL, Twitter/X post dates, or text context. Return 'N/A' if truly unknown.\n"
|
|
"- Assign QUALITY level (0-5 stars):\n"
|
|
" * 0 stars: Good technical resource (Baseline).\n"
|
|
" * 1 star (🌟): High-quality technical guide or tool.\n"
|
|
" * 2 stars (🌟🌟): Exceptional, enterprise-grade resource.\n"
|
|
" * 3 stars (🌟🌟🌟): Elite Gem. Recommended for all architects.\n"
|
|
" * 4 stars (🌟🌟🌟🌟): Masterclass content or Essential Industry Tool.\n"
|
|
" * 5 stars (🌟🌟🌟🌟🌟): Legendary Resource (e.g., K8s Official Docs, Foundations like Prometheus/Envoy).\n"
|
|
"- Assign a MATURITY TAG based on content type/status.\n"
|
|
"PHASE 3: MANDATORY DESCRIPTIONS (V1 PRIORITY)\n"
|
|
"- If 'Current Desc' is already provided and descriptive, DO NOT CHANGE IT.\n"
|
|
"- If 'Current Desc' is empty, too short, or non-descriptive, generate a professional 1-2 sentence summary.\n"
|
|
"- Style: Technical, neutral, and informative. Language: English only.\n"
|
|
)
|
|
self.inventory = self._load_inventory()
|
|
self.structure_map = self._load_structure_map()
|
|
|
|
def _load_inventory(self) -> Dict:
|
|
if os.path.exists(INVENTORY_PATH):
|
|
try:
|
|
with open(INVENTORY_PATH, "r") as f:
|
|
return yaml.safe_load(f) or {}
|
|
except: return {}
|
|
return {}
|
|
|
|
def _save_inventory(self):
|
|
os.makedirs(os.path.dirname(INVENTORY_PATH), exist_ok=True)
|
|
with open(INVENTORY_PATH, "w") as f:
|
|
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
|
|
|
|
def _load_structure_map(self) -> dict:
|
|
if os.path.exists(STRUCTURE_MAP_PATH):
|
|
try:
|
|
with open(STRUCTURE_MAP_PATH, "r") as f:
|
|
import yaml
|
|
return yaml.safe_load(f) or {}
|
|
except: return {}
|
|
return {}
|
|
|
|
def _save_structure_map(self):
|
|
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
|
|
with open(STRUCTURE_MAP_PATH, "w") as f:
|
|
import yaml
|
|
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
|
|
|
|
async def analyze_and_cluster(self):
|
|
log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
|
|
all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
|
|
log_event(f"[*] Discovery: Found {len(all_v1_links)} resources in V1 archive.")
|
|
|
|
log_event("[*] Phase 1: Health Check & Metadata Enrichment...")
|
|
# Rapid Async Health Check
|
|
health_inventory = await self._verify_link_health(all_v1_links)
|
|
log_event(f"[*] Health Check Complete. {len(health_inventory)}/{len(all_v1_links)} links are online.")
|
|
|
|
log_event("[*] Phase 2: Library Evaluation, Year Extraction & Quality Scoring...")
|
|
library_inventory = await self._evaluate_and_score_resources(health_inventory)
|
|
log_event(f"[*] Inventory Refined: {len(library_inventory)} resources kept.")
|
|
|
|
log_event("[*] Phase 3: Dimensional Clustering & Chronological Sorting...")
|
|
v2_data = await self._rebuild_structure(library_inventory)
|
|
|
|
log_event("[*] Phase 4: Generating Premium Portal Pages...")
|
|
os.makedirs(V2_DIR, exist_ok=True)
|
|
await self._write_premium_files(v2_data, mosaic_html, videos_html)
|
|
await self._sync_enterprise_navigation(v2_data)
|
|
|
|
self._save_inventory(); self._save_structure_map()
|
|
log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)
|
|
|
|
async def _gather_all_v1_content(self) -> (List[Dict], str, str):
|
|
all_links = []
|
|
mosaic_html = ""
|
|
videos_html = ""
|
|
|
|
if os.path.exists("docs/index.md"):
|
|
with open("docs/index.md", "r") as f:
|
|
idx_content = f.read()
|
|
# Find the BIG mosaic (the one with many images)
|
|
# Support both old <center> and new <div style="text-align: center;" markdown="1">
|
|
mosaics = re.findall(r'<(?:div style="text-align: center;" markdown="1"|center markdown="1"|center)>\s*(.*?)\s*</(?:div|center)>', idx_content, re.DOTALL)
|
|
if mosaics:
|
|
# Filter for the one containing many image links
|
|
for m in mosaics:
|
|
if m.count("[![") > 5:
|
|
mosaic_html = m
|
|
break
|
|
|
|
videos_match = re.search(r'\?\?\? note "Top Videos & Clips.*?\n(.*?\n)\s*</center>', idx_content, re.DOTALL)
|
|
if videos_match: videos_html = videos_match.group(1)
|
|
|
|
for root, _, files in os.walk(V1_DIR):
|
|
for file in files:
|
|
if not file.endswith(".md") or file == "index.md": continue
|
|
path = os.path.join(root, file)
|
|
with open(path, "r") as f:
|
|
content = f.read()
|
|
matches = re.finditer(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*?(?:\n\s{2,}.*)*)', content, re.MULTILINE)
|
|
for m in matches:
|
|
title, url, full_desc = m.groups()
|
|
|
|
# FIX: Convert relative .md links to absolute V1 links for cross-edition stability
|
|
if not url.startswith(("http://", "https://", "mailto:", "#")):
|
|
if url.endswith(".md"):
|
|
url = f"https://nubenetes.com/{url.replace('.md', '/')}"
|
|
elif url.startswith("images/"):
|
|
# Use relative path from V2 to V1 images (handled via symlink)
|
|
url = f"{url}"
|
|
|
|
all_links.append({
|
|
"title": title,
|
|
"url": url,
|
|
"description": full_desc.strip(),
|
|
"original_file": file
|
|
})
|
|
return all_links, mosaic_html, videos_html
|
|
|
|
async def _verify_link_health(self, links: List[Dict]) -> List[Dict]:
|
|
online_links = []
|
|
BATCH_SIZE = 50 # Smaller batches for stability
|
|
|
|
# User-Agent rotation to mimic real browsers
|
|
user_agents = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
|
|
]
|
|
|
|
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, verify=False) as client:
|
|
for i in range(0, len(links), BATCH_SIZE):
|
|
batch = links[i:i+BATCH_SIZE]
|
|
tasks = []
|
|
for l in batch:
|
|
ua = user_agents[i % len(user_agents)]
|
|
tasks.append(self._check_single_link_resilient(client, l, ua))
|
|
|
|
results = await asyncio.gather(*tasks)
|
|
online_links.extend([r for r in results if r is not None])
|
|
|
|
if i % 500 == 0:
|
|
log_event(f" [Resilient Health] Verified {i}/{len(links)} links...")
|
|
|
|
# Brief pause to avoid triggering Rate Limits
|
|
await asyncio.sleep(0.1)
|
|
|
|
return online_links
|
|
|
|
async def _check_single_link_resilient(self, client, link: Dict, ua: str, attempts: int = 3) -> Dict:
|
|
url = link["url"]
|
|
|
|
# NOTE: All domains must be checked for validity.
|
|
|
|
# 2. Cached Health
|
|
if url in self.inventory and self.inventory[normalize_url(url)].get("status") == "online":
|
|
link["health_status"] = "cached"
|
|
return link
|
|
|
|
# 3. Multi-Attempt Verification with Identity Rotation
|
|
headers = {
|
|
"User-Agent": ua,
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
"Referer": "https://www.google.com/"
|
|
}
|
|
|
|
for attempt in range(attempts):
|
|
try:
|
|
# Use GET instead of HEAD as many sites block HEAD or return 405
|
|
resp = await client.get(url, headers=headers, timeout=10.0)
|
|
if resp.status_code < 400:
|
|
self.inventory.setdefault(url, {})["status"] = "online"
|
|
link["health_status"] = "online"
|
|
return link
|
|
|
|
# If 404, it's a definitive fail
|
|
if resp.status_code == 404:
|
|
log_event(f" [Health] Definitive 404: {url}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
if attempt == attempts - 1:
|
|
# Final attempt failed - Soft Flagging instead of removal
|
|
# If it's not a 404, we keep it but with a warning
|
|
link["health_status"] = "uncertain"
|
|
link["warning"] = "offline"
|
|
return link
|
|
|
|
# Backoff before retry
|
|
await asyncio.sleep(0.5 * (attempt + 1))
|
|
|
|
return link
|
|
|
|
async def _evaluate_and_score_resources(self, links: List[Dict]) -> List[Dict]:
|
|
refined = []
|
|
to_evaluate = []
|
|
force_eval = os.getenv("FORCE_EVAL", "false").lower() == "true"
|
|
|
|
# We want to re-evaluate the tags and years, so we will bypass cache for tagging logic,
|
|
# but use cache for AI stars if available to save cost.
|
|
for l in links:
|
|
url = l["url"]
|
|
# To allow the new logic to apply to cached items, we re-process GitHub links
|
|
# and re-apply the tag logic even if it's in the cache.
|
|
item = l.copy()
|
|
if not force_eval and url in self.inventory and "stars" in self.inventory[normalize_url(url)]:
|
|
item.update(self.inventory[normalize_url(url)])
|
|
# If cache has a generated description and item is missing one, use it
|
|
if "ai_summary" in self.inventory[normalize_url(url)] and not item["description"]:
|
|
item["description"] = self.inventory[normalize_url(url)]["ai_summary"]
|
|
|
|
# Re-evaluate if description is still missing even after cache check
|
|
if not item.get("description"):
|
|
to_evaluate.append(item)
|
|
continue
|
|
|
|
# Re-apply GitHub metadata and mature tagging for cached items
|
|
if "github.com" in url:
|
|
gh_meta = await self._fetch_github_metadata(url)
|
|
item.update(gh_meta)
|
|
if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
|
|
item["year"] = gh_meta["gh_updated"].split("-")[0]
|
|
|
|
item["tag"] = self._calculate_tag(item)
|
|
refined.append(item)
|
|
|
|
if not to_evaluate: return refined
|
|
|
|
BATCH_SIZE = 50
|
|
for i in range(0, len(to_evaluate), BATCH_SIZE):
|
|
batch = to_evaluate[i:i+BATCH_SIZE]
|
|
batch_num = i//BATCH_SIZE + 1
|
|
log_event(f" [>] Processing Batch {batch_num} with AI (Mandatory Descriptions)...")
|
|
|
|
prompt = (
|
|
f"{self.library_criteria}\n"
|
|
"Respond ONLY with a JSON object: {\"results\": [{\"idx\": int, \"year\": \"YYYY\", \"stars\": int, \"is_video\": bool, \"tag\": \"[TAG]\", \"summary\": \"1-2 sentences description\"}, ...]}\n\n"
|
|
"LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) - Current Desc: {l['description'][:50]}" for idx, l in enumerate(batch)])
|
|
)
|
|
|
|
try:
|
|
data = await call_gemini_with_retry(prompt, prefer_flash=True)
|
|
results = data.get("results", [])
|
|
|
|
for res in results:
|
|
try:
|
|
idx = int(res["idx"])
|
|
if idx < len(batch):
|
|
item = batch[idx].copy()
|
|
eval_data = {
|
|
"year": str(res.get("year", "N/A")),
|
|
"stars": min(max(int(res.get("stars", 0)), 0), 5),
|
|
"is_video": res.get("is_video", False),
|
|
"tag": res.get("tag", "[ENTERPRISE-STABLE]"),
|
|
"ai_summary": res.get("summary", "")
|
|
}
|
|
item.update(eval_data)
|
|
if not item["description"] and item["ai_summary"]:
|
|
item["description"] = item["ai_summary"]
|
|
|
|
# GitHub overrides
|
|
if "github.com" in item["url"]:
|
|
gh_meta = await self._fetch_github_metadata(item["url"])
|
|
item.update(gh_meta)
|
|
if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
|
|
item["year"] = gh_meta["gh_updated"].split("-")[0]
|
|
eval_data["year"] = item["year"]
|
|
|
|
item["tag"] = self._calculate_tag(item)
|
|
eval_data["tag"] = item["tag"]
|
|
|
|
# Save to cache
|
|
self.inventory[item["url"]] = eval_data
|
|
refined.append(item)
|
|
except: continue
|
|
except:
|
|
for l in batch:
|
|
item = l.copy()
|
|
item["year"], item["stars"], item["is_video"] = "N/A", 0, "youtube" in l["url"]
|
|
item["tag"] = self._calculate_tag(item)
|
|
refined.append(item)
|
|
await asyncio.sleep(0.3)
|
|
return refined
|
|
|
|
def _calculate_tag(self, item: Dict) -> str:
|
|
# Dynamic Evolutionary Tagging (Automatic Project Growth Detection)
|
|
url = item.get("url", "").lower()
|
|
stars = item.get("gh_stars", 0)
|
|
year_str = str(item.get("year", "2024"))
|
|
year = int(year_str) if year_str.isdigit() else 2024
|
|
|
|
if "github.com" in url or "gitlab.com" in url:
|
|
if stars > 15000: return "[DE FACTO STANDARD]"
|
|
if stars > 3000: return "[ENTERPRISE-STABLE]"
|
|
if stars > 500 and year >= 2025: return "[HIGH-GROWTH / EMERGING]"
|
|
if year <= 2021 and stars < 100: return "[LEGACY / MAINTENANCE]"
|
|
return "[COMMUNITY-TOOL]"
|
|
|
|
# Article/Guide Logic
|
|
title = item.get("title", "").lower()
|
|
if "awesome" in title: return "[FOUNDATIONAL]"
|
|
if "guide" in title or "architecture" in title: return "[ARCHITECTURE-GUIDE]"
|
|
if "deep dive" in title or "internals" in title: return "[TECHNICAL-DEEP-DIVE]"
|
|
if "how to" in title or "tutorial" in title: return "[CASE-STUDY]"
|
|
|
|
# Fallback to AI's tag or defaults
|
|
tag = item.get("tag", "").upper()
|
|
valid_tags = ["[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING / INNOVATION]", "[LEGACY / MAINTENANCE]", "[ARCHITECTURE-GUIDE]", "[TOOLING]", "[CASE-STUDY]", "[CHEATSHEET]"]
|
|
if tag in valid_tags:
|
|
return tag
|
|
|
|
return "[EXPERT-ARTICLE]"
|
|
|
|
async def _fetch_github_metadata(self, url: str) -> Dict:
|
|
match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
|
|
if not match: return {}
|
|
owner, repo = match.groups()
|
|
repo = repo.split("#")[0].split("?")[0] # Clean up
|
|
|
|
headers = {"Authorization": f"token {GH_TOKEN}"} if GH_TOKEN else {}
|
|
api_url = f"https://api.github.com/repos/{owner}/{repo}"
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
resp = await client.get(api_url, headers=headers)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
return {
|
|
"gh_stars": data.get("stargazers_count", 0),
|
|
"gh_pushed": data.get("pushed_at", "").split("T")[0], "gh_created": data.get("created_at", "").split("T")[0]
|
|
}
|
|
except: pass
|
|
return {}
|
|
|
|
async def _rebuild_structure(self, inventory: List[Dict]) -> Dict[str, Dict]:
|
|
v2_structure = {dim: {"summary": "", "categories": {}} for dim in self.dimensions.keys()}
|
|
file_to_dim = {}
|
|
for dim, files in self.dimensions.items():
|
|
for f in files: file_to_dim[f + ".md"] = dim
|
|
|
|
for item in inventory:
|
|
dim = file_to_dim.get(item["original_file"], "Architectural Foundations")
|
|
cat_name = item["original_file"].replace(".md", "").capitalize()
|
|
if cat_name not in v2_structure[dim]["categories"]:
|
|
v2_structure[dim]["categories"][cat_name] = []
|
|
v2_structure[dim]["categories"][cat_name].append(item)
|
|
|
|
for dim in v2_structure.keys():
|
|
if not v2_structure[dim]["categories"]: continue
|
|
for cat in v2_structure[dim]["categories"]:
|
|
# Sort by: 1. Stars (DESC), 2. Year (DESC, N/A at the end)
|
|
v2_structure[dim]["categories"][cat].sort(
|
|
key=lambda x: (
|
|
-x.get("stars", 1),
|
|
-(int(x["year"]) if x.get("year", "").isdigit() else 0)
|
|
)
|
|
)
|
|
|
|
prompt = f"Write a professional 2026 executive summary for '{dim}'. Focus on high-density value. 1 sentence only."
|
|
try:
|
|
v2_structure[dim]["summary"] = await call_gemini_with_retry(prompt, response_format="text", prefer_flash=True)
|
|
except:
|
|
v2_structure[dim]["summary"] = f"Impact-driven reference library for {dim}."
|
|
|
|
return v2_structure
|
|
|
|
async def _write_premium_files(self, data: Dict[str, Dict], mosaic_html: str, videos_html: str):
|
|
# FIX: Ensure mosaic images point to V1 root via symlink
|
|
mosaic_html = mosaic_html.replace('src="images/', 'src="images/').replace('](images/', '](images/')
|
|
|
|
master_selection = []
|
|
for dim in data.values():
|
|
for cat_links in dim["categories"].values():
|
|
master_selection.extend([l for l in cat_links if l.get("stars", 0) >= 3])
|
|
|
|
# Sort master selection by Stars (DESC), then Year (DESC), then Title (ASC)
|
|
master_selection.sort(
|
|
key=lambda x: (
|
|
-x.get("stars", 0),
|
|
-(int(x["year"]) if x.get("year", "").isdigit() else 0),
|
|
x["title"]
|
|
)
|
|
)
|
|
|
|
# --- THE AGENTIC PULSE (Trending) ---
|
|
trending_pool = []
|
|
for url, meta in self.inventory.items():
|
|
if meta.get("stars", 0) >= 3:
|
|
trending_pool.append(meta.copy())
|
|
trending_pool[-1]["url"] = url
|
|
|
|
# Sort by: 1. Pub/Post Date (DESC), 2. Stars (DESC)
|
|
trending_pool.sort(key=lambda x: (
|
|
x.get("pub_date", "0000") if x.get("pub_date") != "N/A" else x.get("post_date", "0000"),
|
|
-x.get("stars", 0)
|
|
), reverse=True)
|
|
|
|
pulse_md = "## ⚡ The Agentic Pulse: Trending Excellence\n"
|
|
pulse_md += "Directly from the latest 2026 curation surges. High-impact technical depth recently added.\n\n"
|
|
for l in trending_pool[:5]:
|
|
stars = "🌟" * l.get("stars", 3)
|
|
date = l.get("pub_date") if l.get("pub_date") != "N/A" else l.get("post_date")
|
|
date_prefix = f"**({date[:10]})** " if date and date != "N/A" else ""
|
|
pulse_md += f"- {date_prefix}[**=={l['title']}==**]({l['url']}) {stars}\n"
|
|
|
|
index_md = (
|
|
"# Nubenetes V2 | The High-Density Library (2026)\n\n"
|
|
"\n\n"
|
|
"!!! quote \"The Library of 2026\"\n"
|
|
" A meticulously curated reference of over 15,000 resources. This V2 portal preserves technical depth while providing "
|
|
" impact-driven synthesis and expert quality classification.\n\n"
|
|
f"<center markdown=\"1\">\n{mosaic_html}\n</center>\n\n"
|
|
f"{pulse_md}\n"
|
|
"## 🛡️ V2 Taxonomy & Elite Quality Tiers\n"
|
|
"To maximize technical clarity, V2 resources are classified by maturity rather than subjective quality:\n\n"
|
|
"- <span class='md-tag md-tag--success'>[DE FACTO STANDARD]</span>: Foundational industry tools with massive adoption (>10k GitHub stars).\n"
|
|
"- <span class='md-tag md-tag--info'>[ENTERPRISE-STABLE]</span>: Production-ready tools actively maintained.\n"
|
|
"- <span class='md-tag md-tag--warning'>[EMERGING / INNOVATION]</span>: High-growth technologies released or heavily updated recently (≥2025).\n"
|
|
"- <span class='md-tag md-tag--critical'>[LEGACY / MAINTENANCE]</span>: Proven solutions with no major updates since 2022. Use with caution.\n"
|
|
"- <span class='md-tag md-tag--primary'>[ARCHITECTURE-GUIDE]</span> / <span class='md-tag md-tag--primary'>[CASE-STUDY]</span>: High-value reading material and use cases.\n\n"
|
|
|
|
"## 🌟 Master Selection (Top-Tier Gems)\n"
|
|
"A global selection of the most impactful resources across all dimensions.\n\n"
|
|
)
|
|
for l in master_selection[:100]:
|
|
gh_info = f" `[⭐ {l['gh_stars']}]`" if "gh_stars" in l else ""
|
|
year_prefix = f"**({l['year']})** " if l.get("year") and l["year"] != "N/A" else ""
|
|
title_clean = l['title'].replace("==", "")
|
|
# Master selection links are 3-5 stars, so we highlight
|
|
title_display = f"**=={title_clean}==**"
|
|
stars_val = l.get("stars", 3)
|
|
stars_str = "🌟" * stars_val
|
|
index_md += f"- {year_prefix}[{title_display}]({l['url']}){gh_info} {stars_str}\n"
|
|
|
|
index_md += "\n??? note \"Elite Video Selection - Click to expand!\"\n"
|
|
index_md += f" <center markdown=\"1\">\n{videos_html}\n </center>\n\n"
|
|
|
|
index_md += "## Strategic Dimensions\n"
|
|
for dim, content in data.items():
|
|
if not content["categories"]: continue
|
|
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
|
|
index_md += f"- **[{dim}](./{slug}.md)**: {content['summary']}\n"
|
|
|
|
with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md)
|
|
|
|
for dim, content in data.items():
|
|
if not content["categories"]: continue
|
|
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
|
|
md = f"# {dim}\n\n"
|
|
md += f"!!! info \"Architectural Context\"\n {content['summary']}\n\n"
|
|
for cat, links in content["categories"].items():
|
|
md += f"## {cat}\n"
|
|
for l in links:
|
|
year, stars_val = l.get("year", "N/A"), l.get("stars", 0)
|
|
stars = ("🌟" * stars_val) if stars_val > 0 else ""
|
|
tag = l.get("tag", "[ENTERPRISE-STABLE]")
|
|
|
|
# Determine color mapping for new tags
|
|
if "STANDARD" in tag or "FOUNDATIONAL" in tag: color = "success"
|
|
elif "EMERGING" in tag: color = "warning"
|
|
elif "LEGACY" in tag: color = "critical"
|
|
elif "STABLE" in tag: color = "info"
|
|
else: color = "primary"
|
|
|
|
title_clean = l['title'].replace("==", "")
|
|
if stars_val >= 3 or "STANDARD" in tag:
|
|
title_display = f"**=={title_clean}==**"
|
|
elif stars_val == 2:
|
|
title_display = f"**{title_clean}**"
|
|
else:
|
|
title_display = title_clean
|
|
|
|
year_prefix = f"**({year})** " if year and year != "N/A" else ""
|
|
|
|
gh_info = f" <span class='md-tag md-tag--info'>⭐ {l['gh_stars']}</span>" if "gh_stars" in l else ""
|
|
icon = " 🎥" if l.get("is_video") else ""
|
|
md += f" - {year_prefix}[{title_display}]({l['url']}){icon}{gh_info} {stars} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
|
|
if l['description']:
|
|
desc = l['description']
|
|
if "\n" in desc:
|
|
md += "\n" + "\n".join([" " + line for line in desc.split("\n")]) + "\n\n"
|
|
else:
|
|
md += f" {desc}\n"
|
|
md += "\n"
|
|
with open(os.path.join(V2_DIR, f"{slug}.md"), "w") as f: f.write(md)
|
|
|
|
async def _sync_enterprise_navigation(self, data: Dict[str, Dict]):
|
|
try:
|
|
with open("v2-mkdocs.yml", "r") as f: content = f.read()
|
|
nav_items = [
|
|
"nav:",
|
|
" - \"🔙 Back to V1 (Exhaustive)\": https://nubenetes.com/",
|
|
" - \"The 2026 Vision\": index.md"
|
|
]
|
|
for dim in data.keys():
|
|
if not data[dim]["categories"]: continue
|
|
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
|
|
nav_items.append(f" - \"{dim}\": {slug}.md")
|
|
new_nav = "\n".join(nav_items)
|
|
updated_content = re.sub(r'nav:.*', new_nav, content, flags=re.DOTALL)
|
|
with open("v2-mkdocs.yml", "w") as f: f.write(updated_content)
|
|
except: pass
|
|
|
|
if __name__ == "__main__":
|
|
engine = V2VisionEngine()
|
|
asyncio.run(engine.analyze_and_cluster())
|