Files
awesome-kubernetes/src/v2_optimizer.py

551 lines
30 KiB
Python

import os
import re
import json
import asyncio
import yaml
import httpx
from datetime import datetime
from typing import List, Dict, Set, Any
from src.config import GEMINI_API_KEYS, GH_TOKEN, TARGET_REPO, MADRID_TZ
from src.gemini_utils import call_gemini_with_retry
from src.logger import log_event
def normalize_url(url: str) -> str:
url = url.split("#")[0].split("?")[0].rstrip("/")
if url.startswith("http://"): url = "https://" + url[7:]
return url.lower()
V1_DIR = "docs"
V2_DIR = "v2-docs"
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class V2VisionEngine:
def __init__(self):
# 100% Comprehensive 2026 Taxonomy
self.dimensions = {
"Intelligent Control Plane": ["ai", "ai-agents-mcp", "chatgpt", "mlops"],
"Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"],
"Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"],
"Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"],
"Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"],
"Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"],
"The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"],
"Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"],
"Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"],
"Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"],
"Career & Industry": ["recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"]
}
self.library_criteria = (
"You are a Technical Librarian in 2026. Your mission is to build a high-density, professional reference library.\n"
"PHASE 1: TECHNICAL PRESERVATION (HIGH INCLUSIVITY)\n"
"- KEEP >90% of technical resources.\n"
"PHASE 2: SOPHISTICATED SYNTHESIS & DATING\n"
"- Extract precise PUBLICATION DATE (YYYY-MM-DD or YYYY): Look for dates in the URL, Twitter/X post dates, or text context. Return 'N/A' if truly unknown.\n"
"- Assign QUALITY level (0-5 stars):\n"
" * 0 stars: Good technical resource (Baseline).\n"
" * 1 star (🌟): High-quality technical guide or tool.\n"
" * 2 stars (🌟🌟): Exceptional, enterprise-grade resource.\n"
" * 3 stars (🌟🌟🌟): Elite Gem. Recommended for all architects.\n"
" * 4 stars (🌟🌟🌟🌟): Masterclass content or Essential Industry Tool.\n"
" * 5 stars (🌟🌟🌟🌟🌟): Legendary Resource (e.g., K8s Official Docs, Foundations like Prometheus/Envoy).\n"
"- Assign a MATURITY TAG based on content type/status.\n"
"PHASE 3: MANDATORY DESCRIPTIONS (V1 PRIORITY)\n"
"- If 'Current Desc' is already provided and descriptive, DO NOT CHANGE IT.\n"
"- If 'Current Desc' is empty, too short, or non-descriptive, generate a professional 1-2 sentence summary.\n"
"- Style: Technical, neutral, and informative. Language: English only.\n"
)
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
def _load_inventory(self) -> Dict:
if os.path.exists(INVENTORY_PATH):
try:
with open(INVENTORY_PATH, "r") as f:
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_inventory(self):
os.makedirs(os.path.dirname(INVENTORY_PATH), exist_ok=True)
with open(INVENTORY_PATH, "w") as f:
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
async def analyze_and_cluster(self):
log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
log_event(f"[*] Discovery: Found {len(all_v1_links)} resources in V1 archive.")
log_event("[*] Phase 1: Health Check & Metadata Enrichment...")
# Rapid Async Health Check
health_inventory = await self._verify_link_health(all_v1_links)
log_event(f"[*] Health Check Complete. {len(health_inventory)}/{len(all_v1_links)} links are online.")
log_event("[*] Phase 2: Library Evaluation, Year Extraction & Quality Scoring...")
library_inventory = await self._evaluate_and_score_resources(health_inventory)
log_event(f"[*] Inventory Refined: {len(library_inventory)} resources kept.")
log_event("[*] Phase 3: Dimensional Clustering & Chronological Sorting...")
v2_data = await self._rebuild_structure(library_inventory)
log_event("[*] Phase 4: Generating Premium Portal Pages...")
os.makedirs(V2_DIR, exist_ok=True)
await self._write_premium_files(v2_data, mosaic_html, videos_html)
await self._sync_enterprise_navigation(v2_data)
self._save_inventory(); self._save_structure_map()
log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)
async def _gather_all_v1_content(self) -> (List[Dict], str, str):
all_links = []
mosaic_html = ""
videos_html = ""
if os.path.exists("docs/index.md"):
with open("docs/index.md", "r") as f:
idx_content = f.read()
# Find the BIG mosaic (the one with many images)
# Support both old <center> and new <div style="text-align: center;" markdown="1">
mosaics = re.findall(r'<(?:div style="text-align: center;" markdown="1"|center markdown="1"|center)>\s*(.*?)\s*</(?:div|center)>', idx_content, re.DOTALL)
if mosaics:
# Filter for the one containing many image links
for m in mosaics:
if m.count("[![") > 5:
mosaic_html = m
break
videos_match = re.search(r'\?\?\? note "Top Videos & Clips.*?\n(.*?\n)\s*</center>', idx_content, re.DOTALL)
if videos_match: videos_html = videos_match.group(1)
for root, _, files in os.walk(V1_DIR):
for file in files:
if not file.endswith(".md") or file == "index.md": continue
path = os.path.join(root, file)
with open(path, "r") as f:
content = f.read()
matches = re.finditer(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*?(?:\n\s{2,}.*)*)', content, re.MULTILINE)
for m in matches:
title, url, full_desc = m.groups()
# FIX: Convert relative .md links to absolute V1 links for cross-edition stability
if not url.startswith(("http://", "https://", "mailto:", "#")):
if url.endswith(".md"):
url = f"https://nubenetes.com/{url.replace('.md', '/')}"
elif url.startswith("images/"):
# Use relative path from V2 to V1 images (handled via symlink)
url = f"{url}"
all_links.append({
"title": title,
"url": url,
"description": full_desc.strip(),
"original_file": file
})
return all_links, mosaic_html, videos_html
async def _verify_link_health(self, links: List[Dict]) -> List[Dict]:
online_links = []
BATCH_SIZE = 50 # Smaller batches for stability
# User-Agent rotation to mimic real browsers
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
]
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, verify=False) as client:
for i in range(0, len(links), BATCH_SIZE):
batch = links[i:i+BATCH_SIZE]
tasks = []
for l in batch:
ua = user_agents[i % len(user_agents)]
tasks.append(self._check_single_link_resilient(client, l, ua))
results = await asyncio.gather(*tasks)
online_links.extend([r for r in results if r is not None])
if i % 500 == 0:
log_event(f" [Resilient Health] Verified {i}/{len(links)} links...")
# Brief pause to avoid triggering Rate Limits
await asyncio.sleep(0.1)
return online_links
async def _check_single_link_resilient(self, client, link: Dict, ua: str, attempts: int = 3) -> Dict:
url = link["url"]
# NOTE: All domains must be checked for validity.
# 2. Cached Health
if url in self.inventory and self.inventory[normalize_url(url)].get("status") == "online":
link["health_status"] = "cached"
return link
# 3. Multi-Attempt Verification with Identity Rotation
headers = {
"User-Agent": ua,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.google.com/"
}
for attempt in range(attempts):
try:
# Use GET instead of HEAD as many sites block HEAD or return 405
resp = await client.get(url, headers=headers, timeout=10.0)
if resp.status_code < 400:
self.inventory.setdefault(url, {})["status"] = "online"
link["health_status"] = "online"
return link
# If 404, it's a definitive fail
if resp.status_code == 404:
log_event(f" [Health] Definitive 404: {url}")
return None
except Exception as e:
if attempt == attempts - 1:
# Final attempt failed - Soft Flagging instead of removal
# If it's not a 404, we keep it but with a warning
link["health_status"] = "uncertain"
link["warning"] = "offline"
return link
# Backoff before retry
await asyncio.sleep(0.5 * (attempt + 1))
return link
async def _evaluate_and_score_resources(self, links: List[Dict]) -> List[Dict]:
refined = []
to_evaluate = []
force_eval = os.getenv("FORCE_EVAL", "false").lower() == "true"
# We want to re-evaluate the tags and years, so we will bypass cache for tagging logic,
# but use cache for AI stars if available to save cost.
for l in links:
url = l["url"]
# To allow the new logic to apply to cached items, we re-process GitHub links
# and re-apply the tag logic even if it's in the cache.
item = l.copy()
if not force_eval and url in self.inventory and "stars" in self.inventory[normalize_url(url)]:
item.update(self.inventory[normalize_url(url)])
# If cache has a generated description and item is missing one, use it
if "ai_summary" in self.inventory[normalize_url(url)] and not item["description"]:
item["description"] = self.inventory[normalize_url(url)]["ai_summary"]
# Re-evaluate if description is still missing even after cache check
if not item.get("description"):
to_evaluate.append(item)
continue
# Re-apply GitHub metadata and mature tagging for cached items
if "github.com" in url:
gh_meta = await self._fetch_github_metadata(url)
item.update(gh_meta)
if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
item["year"] = gh_meta["gh_updated"].split("-")[0]
item["tag"] = self._calculate_tag(item)
refined.append(item)
if not to_evaluate: return refined
BATCH_SIZE = 50
for i in range(0, len(to_evaluate), BATCH_SIZE):
batch = to_evaluate[i:i+BATCH_SIZE]
batch_num = i//BATCH_SIZE + 1
log_event(f" [>] Processing Batch {batch_num} with AI (Mandatory Descriptions)...")
prompt = (
f"{self.library_criteria}\n"
"Respond ONLY with a JSON object: {\"results\": [{\"idx\": int, \"year\": \"YYYY\", \"stars\": int, \"is_video\": bool, \"tag\": \"[TAG]\", \"summary\": \"1-2 sentences description\"}, ...]}\n\n"
"LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) - Current Desc: {l['description'][:50]}" for idx, l in enumerate(batch)])
)
try:
data = await call_gemini_with_retry(prompt, prefer_flash=True)
results = data.get("results", [])
for res in results:
try:
idx = int(res["idx"])
if idx < len(batch):
item = batch[idx].copy()
eval_data = {
"year": str(res.get("year", "N/A")),
"stars": min(max(int(res.get("stars", 0)), 0), 5),
"is_video": res.get("is_video", False),
"tag": res.get("tag", "[ENTERPRISE-STABLE]"),
"ai_summary": res.get("summary", "")
}
item.update(eval_data)
if not item["description"] and item["ai_summary"]:
item["description"] = item["ai_summary"]
# GitHub overrides
if "github.com" in item["url"]:
gh_meta = await self._fetch_github_metadata(item["url"])
item.update(gh_meta)
if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
item["year"] = gh_meta["gh_updated"].split("-")[0]
eval_data["year"] = item["year"]
item["tag"] = self._calculate_tag(item)
eval_data["tag"] = item["tag"]
# Save to cache
self.inventory[item["url"]] = eval_data
refined.append(item)
except: continue
except:
for l in batch:
item = l.copy()
item["year"], item["stars"], item["is_video"] = "N/A", 0, "youtube" in l["url"]
item["tag"] = self._calculate_tag(item)
refined.append(item)
await asyncio.sleep(0.3)
return refined
def _calculate_tag(self, item: Dict) -> str:
# Dynamic Evolutionary Tagging (Automatic Project Growth Detection)
url = item.get("url", "").lower()
stars = item.get("gh_stars", 0)
year_str = str(item.get("year", "2024"))
year = int(year_str) if year_str.isdigit() else 2024
if "github.com" in url or "gitlab.com" in url:
if stars > 15000: return "[DE FACTO STANDARD]"
if stars > 3000: return "[ENTERPRISE-STABLE]"
if stars > 500 and year >= 2025: return "[HIGH-GROWTH / EMERGING]"
if year <= 2021 and stars < 100: return "[LEGACY / MAINTENANCE]"
return "[COMMUNITY-TOOL]"
# Article/Guide Logic
title = item.get("title", "").lower()
if "awesome" in title: return "[FOUNDATIONAL]"
if "guide" in title or "architecture" in title: return "[ARCHITECTURE-GUIDE]"
if "deep dive" in title or "internals" in title: return "[TECHNICAL-DEEP-DIVE]"
if "how to" in title or "tutorial" in title: return "[CASE-STUDY]"
# Fallback to AI's tag or defaults
tag = item.get("tag", "").upper()
valid_tags = ["[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING / INNOVATION]", "[LEGACY / MAINTENANCE]", "[ARCHITECTURE-GUIDE]", "[TOOLING]", "[CASE-STUDY]", "[CHEATSHEET]"]
if tag in valid_tags:
return tag
return "[EXPERT-ARTICLE]"
async def _fetch_github_metadata(self, url: str) -> Dict:
match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
if not match: return {}
owner, repo = match.groups()
repo = repo.split("#")[0].split("?")[0] # Clean up
headers = {"Authorization": f"token {GH_TOKEN}"} if GH_TOKEN else {}
api_url = f"https://api.github.com/repos/{owner}/{repo}"
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(api_url, headers=headers)
if resp.status_code == 200:
data = resp.json()
return {
"gh_stars": data.get("stargazers_count", 0),
"gh_pushed": data.get("pushed_at", "").split("T")[0], "gh_created": data.get("created_at", "").split("T")[0]
}
except: pass
return {}
async def _rebuild_structure(self, inventory: List[Dict]) -> Dict[str, Dict]:
v2_structure = {dim: {"summary": "", "categories": {}} for dim in self.dimensions.keys()}
file_to_dim = {}
for dim, files in self.dimensions.items():
for f in files: file_to_dim[f + ".md"] = dim
for item in inventory:
dim = file_to_dim.get(item["original_file"], "Architectural Foundations")
cat_name = item["original_file"].replace(".md", "").capitalize()
if cat_name not in v2_structure[dim]["categories"]:
v2_structure[dim]["categories"][cat_name] = []
v2_structure[dim]["categories"][cat_name].append(item)
for dim in v2_structure.keys():
if not v2_structure[dim]["categories"]: continue
for cat in v2_structure[dim]["categories"]:
# Sort by: 1. Stars (DESC), 2. Year (DESC, N/A at the end)
v2_structure[dim]["categories"][cat].sort(
key=lambda x: (
-x.get("stars", 1),
-(int(x["year"]) if x.get("year", "").isdigit() else 0)
)
)
prompt = f"Write a professional 2026 executive summary for '{dim}'. Focus on high-density value. 1 sentence only."
try:
v2_structure[dim]["summary"] = await call_gemini_with_retry(prompt, response_format="text", prefer_flash=True)
except:
v2_structure[dim]["summary"] = f"Impact-driven reference library for {dim}."
return v2_structure
async def _write_premium_files(self, data: Dict[str, Dict], mosaic_html: str, videos_html: str):
# FIX: Ensure mosaic images point to V1 root via symlink
mosaic_html = mosaic_html.replace('src="images/', 'src="images/').replace('](images/', '](images/')
master_selection = []
for dim in data.values():
for cat_links in dim["categories"].values():
master_selection.extend([l for l in cat_links if l.get("stars", 0) >= 3])
# Sort master selection by Stars (DESC), then Year (DESC), then Title (ASC)
master_selection.sort(
key=lambda x: (
-x.get("stars", 0),
-(int(x["year"]) if x.get("year", "").isdigit() else 0),
x["title"]
)
)
# --- THE AGENTIC PULSE (Trending) ---
trending_pool = []
for url, meta in self.inventory.items():
if meta.get("stars", 0) >= 3:
trending_pool.append(meta.copy())
trending_pool[-1]["url"] = url
# Sort by: 1. Pub/Post Date (DESC), 2. Stars (DESC)
trending_pool.sort(key=lambda x: (
x.get("pub_date", "0000") if x.get("pub_date") != "N/A" else x.get("post_date", "0000"),
-x.get("stars", 0)
), reverse=True)
pulse_md = "## ⚡ The Agentic Pulse: Trending Excellence\n"
pulse_md += "Directly from the latest 2026 curation surges. High-impact technical depth recently added.\n\n"
for l in trending_pool[:5]:
stars = "🌟" * l.get("stars", 3)
date = l.get("pub_date") if l.get("pub_date") != "N/A" else l.get("post_date")
date_prefix = f"**({date[:10]})** " if date and date != "N/A" else ""
pulse_md += f"- {date_prefix}[**=={l['title']}==**]({l['url']}) {stars}\n"
index_md = (
"# Nubenetes V2 | The High-Density Library (2026)\n\n"
"![Banner](images/kubernetes_logo.jpg)\n\n"
"!!! quote \"The Library of 2026\"\n"
" A meticulously curated reference of over 15,000 resources. This V2 portal preserves technical depth while providing "
" impact-driven synthesis and expert quality classification.\n\n"
f"<center markdown=\"1\">\n{mosaic_html}\n</center>\n\n"
f"{pulse_md}\n"
"## 🛡️ V2 Taxonomy & Elite Quality Tiers\n"
"To maximize technical clarity, V2 resources are classified by maturity rather than subjective quality:\n\n"
"- <span class='md-tag md-tag--success'>[DE FACTO STANDARD]</span>: Foundational industry tools with massive adoption (>10k GitHub stars).\n"
"- <span class='md-tag md-tag--info'>[ENTERPRISE-STABLE]</span>: Production-ready tools actively maintained.\n"
"- <span class='md-tag md-tag--warning'>[EMERGING / INNOVATION]</span>: High-growth technologies released or heavily updated recently (≥2025).\n"
"- <span class='md-tag md-tag--critical'>[LEGACY / MAINTENANCE]</span>: Proven solutions with no major updates since 2022. Use with caution.\n"
"- <span class='md-tag md-tag--primary'>[ARCHITECTURE-GUIDE]</span> / <span class='md-tag md-tag--primary'>[CASE-STUDY]</span>: High-value reading material and use cases.\n\n"
"## 🌟 Master Selection (Top-Tier Gems)\n"
"A global selection of the most impactful resources across all dimensions.\n\n"
)
for l in master_selection[:100]:
gh_info = f" `[⭐ {l['gh_stars']}]`" if "gh_stars" in l else ""
year_prefix = f"**({l['year']})** " if l.get("year") and l["year"] != "N/A" else ""
title_clean = l['title'].replace("==", "")
# Master selection links are 3-5 stars, so we highlight
title_display = f"**=={title_clean}==**"
stars_val = l.get("stars", 3)
stars_str = "🌟" * stars_val
index_md += f"- {year_prefix}[{title_display}]({l['url']}){gh_info} {stars_str}\n"
index_md += "\n??? note \"Elite Video Selection - Click to expand!\"\n"
index_md += f" <center markdown=\"1\">\n{videos_html}\n </center>\n\n"
index_md += "## Strategic Dimensions\n"
for dim, content in data.items():
if not content["categories"]: continue
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
index_md += f"- **[{dim}](./{slug}.md)**: {content['summary']}\n"
with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md)
for dim, content in data.items():
if not content["categories"]: continue
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
md = f"# {dim}\n\n"
md += f"!!! info \"Architectural Context\"\n {content['summary']}\n\n"
for cat, links in content["categories"].items():
md += f"## {cat}\n"
for l in links:
year, stars_val = l.get("year", "N/A"), l.get("stars", 0)
stars = ("🌟" * stars_val) if stars_val > 0 else ""
tag = l.get("tag", "[ENTERPRISE-STABLE]")
# Determine color mapping for new tags
if "STANDARD" in tag or "FOUNDATIONAL" in tag: color = "success"
elif "EMERGING" in tag: color = "warning"
elif "LEGACY" in tag: color = "critical"
elif "STABLE" in tag: color = "info"
else: color = "primary"
title_clean = l['title'].replace("==", "")
if stars_val >= 3 or "STANDARD" in tag:
title_display = f"**=={title_clean}==**"
elif stars_val == 2:
title_display = f"**{title_clean}**"
else:
title_display = title_clean
year_prefix = f"**({year})** " if year and year != "N/A" else ""
gh_info = f" <span class='md-tag md-tag--info'>⭐ {l['gh_stars']}</span>" if "gh_stars" in l else ""
icon = " 🎥" if l.get("is_video") else ""
md += f" - {year_prefix}[{title_display}]({l['url']}){icon}{gh_info} {stars} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
if l['description']:
desc = l['description']
if "\n" in desc:
md += "\n" + "\n".join([" " + line for line in desc.split("\n")]) + "\n\n"
else:
md += f" {desc}\n"
md += "\n"
with open(os.path.join(V2_DIR, f"{slug}.md"), "w") as f: f.write(md)
async def _sync_enterprise_navigation(self, data: Dict[str, Dict]):
try:
with open("v2-mkdocs.yml", "r") as f: content = f.read()
nav_items = [
"nav:",
" - \"🔙 Back to V1 (Exhaustive)\": https://nubenetes.com/",
" - \"The 2026 Vision\": index.md"
]
for dim in data.keys():
if not data[dim]["categories"]: continue
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
nav_items.append(f" - \"{dim}\": {slug}.md")
new_nav = "\n".join(nav_items)
updated_content = re.sub(r'nav:.*', new_nav, content, flags=re.DOTALL)
with open("v2-mkdocs.yml", "w") as f: f.write(updated_content)
except: pass
if __name__ == "__main__":
engine = V2VisionEngine()
asyncio.run(engine.analyze_and_cluster())