mirror of
https://github.com/nubenetes/awesome-kubernetes.git
synced 2026-05-26 19:18:58 +00:00
952 lines
56 KiB
Python
952 lines
56 KiB
Python
import os
|
|
import re
|
|
import json
|
|
import asyncio
|
|
import yaml
|
|
import httpx
|
|
from datetime import datetime
|
|
from typing import List, Dict, Set, Any, Tuple
|
|
from src.config import GEMINI_API_KEYS, GH_TOKEN, TARGET_REPO, MADRID_TZ, INVENTORY_PATH
|
|
from src.gemini_utils import call_gemini_with_retry, normalize_url, clean_toc_text, get_github_activity, fetch_youtube_metadata
|
|
from src.logger import log_event
|
|
|
|
def nuclear_strip(text: str) -> str:
|
|
"""Mandate 30: MD039 - Removes all leading/trailing whitespace including hidden unicode characters."""
|
|
if not text: return ""
|
|
# Purge all known whitespace characters (standard, non-breaking, thin, etc.)
|
|
text = re.sub(r'^[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+', '', text)
|
|
text = re.sub(r'[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+$', '', text)
|
|
return text.replace("==", "")
|
|
|
|
V1_DIR = "docs"
|
|
V2_DIR = "v2-docs"
|
|
|
|
class V2VisionEngine:
|
|
def __init__(self, render_only: bool = False):
|
|
self.render_only = render_only
|
|
# Load Config & Policy
|
|
self.special_assets_rules = self._load_special_assets()
|
|
self.link_rules = self._load_link_rules()
|
|
self.max_depth = self.link_rules.get("hierarchy_rules", {}).get("max_depth", 10)
|
|
|
|
# 100% Comprehensive 2026 Taxonomy
|
|
self.dimensions = {
|
|
"AI and Artificial Intelligence": ["ai", "ai-agents-mcp", "chatgpt", "mlops"],
|
|
"Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"],
|
|
"Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"],
|
|
"Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"],
|
|
"Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"],
|
|
"Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"],
|
|
"The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"],
|
|
"Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"],
|
|
"Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"],
|
|
"Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"],
|
|
"Career & Industry": ["recruitment", "hr", "finops", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"]
|
|
}
|
|
|
|
self.library_criteria = (
|
|
"You are a Senior Technical Architect in 2026. Your mission is to organize a high-density technical reference portal "
|
|
"structured like a professional technical book (O'Reilly style).\n"
|
|
"PHASE 1: TECHNICAL PRESERVATION & CURATION\n"
|
|
"- KEEP >90% of technical resources (except for 'introduction.md' where only high-impact links are kept).\n"
|
|
"PHASE 2: SOPHISTICATED HIERARCHICAL CLASSIFICATION\n"
|
|
"- Identify TECHNICAL_HIERARCHY: A list of strings (max 10) representing Area > Topic > Subtopics.\n"
|
|
"- For 'introduction.md', identify links related to MICROSERVICES for extraction.\n"
|
|
"PHASE 3: KNOWLEDGE ASSIMILATION FLOW\n"
|
|
"- Order hierarchy to facilitate a structured learning journey.\n"
|
|
"PHASE 4: HIGH-DENSITY TECHNICAL SUMMARIES (Double-Evidence Synthesis)\n"
|
|
"- Generate professional, neutral, and advanced technical summaries. Style: O'Reilly technical.\n"
|
|
"- PROTOCOL: Contrast 'Curator Insight' (from source) with 'Live Grounding' (from search).\n"
|
|
"- If discrepancies are found (e.g. project is archived but source says it's new), PRIORITIZE live engineering truth.\n"
|
|
"- Summaries MUST be high-density: Include architectural value, key features, and technical significance.\n"
|
|
"- Format: Use paragraphs and bullet points for complex tools. Aim for 2-5 sentences of depth.\n"
|
|
"PHASE 5: ADVANCED MATURITY TAGGING\n"
|
|
"- Assign 1 to 3 tags from: [DE FACTO STANDARD], [ENTERPRISE-STABLE], [EMERGING], [GUIDE], [CASE STUDY], [COMMUNITY-TOOL], [LEGACY].\n"
|
|
)
|
|
self.inventory = self._load_inventory()
|
|
self.maturity_audit = []
|
|
|
|
def _load_special_assets(self) -> Dict:
|
|
path = "data/special_assets.yaml"
|
|
if os.path.exists(path):
|
|
try: return yaml.safe_load(open(path, "r")) or {}
|
|
except: return {}
|
|
return {}
|
|
|
|
def _load_link_rules(self) -> Dict:
|
|
path = "data/link_rules.yaml"
|
|
if os.path.exists(path):
|
|
try: return yaml.safe_load(open(path, "r")) or {}
|
|
except: return {}
|
|
return {}
|
|
|
|
def _load_inventory(self) -> Dict:
|
|
from src.inventory_manager import load_inventory
|
|
return load_inventory()
|
|
|
|
def _save_inventory(self):
|
|
from src.inventory_manager import save_inventory
|
|
save_inventory(self.inventory)
|
|
|
|
async def analyze_and_cluster(self):
|
|
log_event("STARTING V2 HIGH-DENSITY O'REILLY LIBRARY GENERATION", section_break=True)
|
|
|
|
# Mandate 30: MD039 - Global Data Sanitization (Purge all whitespace/hidden chars from titles)
|
|
for url in list(self.inventory.keys()):
|
|
if isinstance(self.inventory[url], dict) and "title" in self.inventory[url]:
|
|
# Purge all known whitespace characters (standard, non-breaking, thin, etc.)
|
|
t = self.inventory[url]["title"]
|
|
t = re.sub(r'^[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+', '', t)
|
|
t = re.sub(r'[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+$', '', t)
|
|
self.inventory[url]["title"] = t
|
|
|
|
# 0. Mandate Sync
|
|
try:
|
|
from src.mandate_ingestor import MandateIngestor
|
|
MandateIngestor().save_system_instructions()
|
|
except: pass
|
|
|
|
all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
|
|
|
|
log_event(f"[*] Discovery: Found {len(all_v1_links)} resources to process.")
|
|
|
|
log_event("[*] Phase 1: Health Check...")
|
|
if self.render_only:
|
|
health_inventory = [l for l in all_v1_links if self.inventory.get(normalize_url(l["url"]), {}).get("status") == "online"]
|
|
else:
|
|
health_inventory = await self._verify_link_health(all_v1_links)
|
|
|
|
log_event("[*] Phase 2: Evaluation & Deep Indexing (Semantic Dedup)...")
|
|
library_inventory = await self._evaluate_and_score_resources(health_inventory)
|
|
|
|
log_event("[*] Phase 3: Recursive Hierarchy Construction...")
|
|
v2_data = await self._rebuild_structure(library_inventory)
|
|
|
|
log_event("[*] Phase 4: Generating Premium Portal Hubs...")
|
|
os.makedirs(V2_DIR, exist_ok=True)
|
|
|
|
# --- SURGICAL GARBAGE COLLECTION ---
|
|
# Track every file we generate
|
|
generated_files = {"index.md", "audit-log.md", "videos.md"}
|
|
for f_name in v2_data.keys():
|
|
generated_files.add(f_name)
|
|
|
|
|
|
await self._write_premium_files(v2_data, mosaic_html, videos_html)
|
|
await self._sync_enterprise_navigation(v2_data)
|
|
|
|
# Delete only orphaned files
|
|
log_event("[*] Phase 5: Pruning Orphaned V2 Assets...")
|
|
for f in os.listdir(V2_DIR):
|
|
if f.endswith(".md") and f not in generated_files:
|
|
log_event(f" [DEL] Pruning obsolete V2 page: {f}")
|
|
os.remove(os.path.join(V2_DIR, f))
|
|
|
|
self._save_inventory()
|
|
|
|
# --- FINAL SAFETY AUDIT ---
|
|
try:
|
|
from src.safety_guard import SafetyGuard
|
|
guard = SafetyGuard()
|
|
report = guard.generate_audit_report()
|
|
with open("v2_safety_report.md", "w") as f: f.write(report)
|
|
except Exception as e:
|
|
log_event(f" [!] V2 Safety Audit Error: {e}")
|
|
|
|
log_event("V2 ELITE PORTAL GENERATED SUCCESSFULLY.")
|
|
|
|
async def _gather_all_v1_content(self):
|
|
all_links, mosaic_html, videos_html = [], "", ""
|
|
if os.path.exists("docs/index.md"):
|
|
with open("docs/index.md", "r") as f:
|
|
idx_content = f.read()
|
|
mosaics = re.findall(r'<center markdown="1">\s*\n(.*?)\n\s*</center>', idx_content, re.DOTALL)
|
|
if mosaics:
|
|
for m in mosaics:
|
|
if m.count("[![") > 5: mosaic_html = m; break
|
|
videos_match = re.search(r'\?\?\? note "Top Videos & Clips.*?\n(.*?\n)\s*</center>', idx_content, re.DOTALL)
|
|
if videos_match: videos_html = videos_match.group(1)
|
|
|
|
for root, _, files in os.walk(V1_DIR):
|
|
for file in files:
|
|
if not file.endswith(".md") or file == "index.md": continue
|
|
path = os.path.join(root, file)
|
|
with open(path, "r") as f: content = f.read()
|
|
matches = re.finditer(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*?(?:\n\s{2,}.*)*)', content, re.MULTILINE)
|
|
for m in matches:
|
|
title, url, full_desc = m.groups()
|
|
if not url.startswith(("http", "mailto", "#")):
|
|
url = f"https://nubenetes.com/{url.replace('.md', '/')}"
|
|
# Mandate 30: MD039 - Strip all whitespace (including non-breaking space) from link text
|
|
all_links.append({"title": nuclear_strip(title), "url": url.strip(), "description": full_desc.strip(), "original_file": file})
|
|
return all_links, mosaic_html, videos_html
|
|
|
|
async def _verify_link_health(self, links: List[Dict]):
|
|
force_full = os.getenv("FORCE_FULL_CHECK", "false").lower() == "true"
|
|
fast_online = []
|
|
needs_check = []
|
|
|
|
for l in links:
|
|
nu = normalize_url(l["url"])
|
|
entry = self.inventory.get(nu, {})
|
|
# Mandate 32: skip links under review
|
|
if entry.get("status") == "review_required": continue
|
|
|
|
if not force_full and entry.get("status") == "online":
|
|
fast_online.append(l)
|
|
else:
|
|
needs_check.append(l)
|
|
|
|
if not needs_check: return fast_online
|
|
|
|
log_event(f" [>] Fast-Track Health: {len(fast_online)} | Network-Check: {len(needs_check)}")
|
|
|
|
online_links = list(fast_online)
|
|
total_needs = len(needs_check)
|
|
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, verify=False) as client:
|
|
for i in range(0, total_needs, 50):
|
|
batch = needs_check[i:i+50]
|
|
tasks = [self._check_single_link_resilient(client, l) for l in batch]
|
|
results = await asyncio.gather(*tasks)
|
|
online_links.extend([r for r in results if r is not None])
|
|
if i % 100 == 0:
|
|
log_event(f" [>] Progress: [{i}/{total_needs}] links validated over network...")
|
|
await asyncio.sleep(0.1)
|
|
return online_links
|
|
|
|
async def _check_single_link_resilient(self, client, link: Dict):
|
|
url = link["url"]
|
|
norm_url = normalize_url(url)
|
|
entry = self.inventory.get(norm_url, {})
|
|
|
|
# Mandate 31: Skip links under review for V2 Elite
|
|
if entry.get("status") == "review_required":
|
|
log_event(f" [-] SKIPPING V2: {url} is under Review.")
|
|
return None
|
|
|
|
if entry.get("status") == "online" and os.getenv("FORCE_FULL_CHECK", "false").lower() != "true": return link
|
|
try:
|
|
resp = await client.get(url, timeout=10.0)
|
|
if resp.status_code < 400:
|
|
final_url = str(resp.url)
|
|
from src.gemini_utils import sanitize_trailing_slashes
|
|
final_url = sanitize_trailing_slashes(final_url)
|
|
|
|
# Update URL if it was redirected/normalized
|
|
if final_url != url:
|
|
link["url"] = final_url
|
|
|
|
self.inventory.setdefault(normalize_url(final_url), {})["status"] = "online"
|
|
# Mandate 22: Update last_checked for the inventory entry
|
|
self.inventory[normalize_url(final_url)]["last_checked"] = datetime.now().timestamp()
|
|
return link
|
|
except: pass
|
|
return None
|
|
|
|
async def _evaluate_and_score_resources(self, links: List[Dict]):
|
|
to_evaluate = []
|
|
project_registry = {}
|
|
force_eval = os.getenv("FORCE_EVAL", "false").lower() == "true"
|
|
force_full_check = os.getenv("FORCE_FULL_CHECK", "false").lower() == "true"
|
|
# Bypassing GitHub UI limitation: If force_eval or force_full_check is ON, we must enrich metadata
|
|
enrich_metadata = os.getenv("ENRICH_METADATA", "false").lower() == "true" or force_eval or force_full_check
|
|
special_files = [sa["file"] for sa in self.special_assets_rules.get("special_assets", [])]
|
|
|
|
# Mandate 47: Zero-Redundancy & Smart Grounding
|
|
from src.mandate_ingestor import get_system_mandates
|
|
dynamic_mandates = get_system_mandates()
|
|
|
|
# Mandate 15: Proactive Enrichment for V2 (GitHub metadata is critical for tags)
|
|
# To avoid duplicate logs and redundant API calls, we deduplicate unique GitHub repos first
|
|
processed_gh_metadata = set()
|
|
gh_fetch_count = 0
|
|
for l in links:
|
|
norm_url = normalize_url(l["url"])
|
|
if "github.com" not in norm_url or self.render_only: continue
|
|
|
|
cached = self.inventory.get(norm_url, {})
|
|
# Mandate 43: Always ensure GH metadata for GitHub links in V2 to power [DE FACTO STANDARD] logic
|
|
if (enrich_metadata or not cached.get("gh_stars")) and norm_url not in processed_gh_metadata:
|
|
log_event(f" [METADATA] V2 Pulse: Fetching GH Activity for {norm_url}")
|
|
processed_gh_metadata.add(norm_url) # Add BEFORE await to block any (even theoretical) parallelism
|
|
gh_data = await get_github_activity(norm_url)
|
|
if gh_data:
|
|
if norm_url not in self.inventory: self.inventory[norm_url] = {}
|
|
self.inventory[norm_url].update(gh_data)
|
|
|
|
gh_fetch_count += 1
|
|
if gh_fetch_count % 500 == 0:
|
|
log_event(f" [💾] Periodic Save: Persisting inventory after {gh_fetch_count} metadata fetches...")
|
|
from src.inventory_manager import save_inventory
|
|
save_inventory(self.inventory)
|
|
|
|
for l in links:
|
|
item = l.copy()
|
|
norm_url = normalize_url(l["url"])
|
|
orig_file = l.get("original_file", "unknown.md")
|
|
is_special = orig_file in special_files
|
|
item["is_special"] = is_special
|
|
project_id = norm_url
|
|
if "github.com" in norm_url:
|
|
match = re.search(r'github\.com/([^/]+/[^/]+)', norm_url)
|
|
if match: project_id = match.group(1).lower()
|
|
|
|
# Reuse enriched metadata from inventory
|
|
if "github.com" in norm_url:
|
|
item.update(self.inventory.get(norm_url, {}))
|
|
|
|
if not force_eval and norm_url in self.inventory and "stars" in self.inventory[norm_url]:
|
|
cached = self.inventory[norm_url]
|
|
item.update(cached)
|
|
if is_special: item["is_special"] = True
|
|
if cached.get("hierarchy"):
|
|
if project_id not in project_registry:
|
|
project_registry[project_id] = item
|
|
else:
|
|
existing = project_registry[project_id]
|
|
if item.get("is_special"): existing["is_special"] = True
|
|
if "github.com" not in norm_url or item.get("stars", 0) > existing.get("stars", 0):
|
|
item.setdefault("aliases", []).append(existing["url"])
|
|
if existing.get("is_special"): item["is_special"] = True
|
|
project_registry[project_id] = item
|
|
else:
|
|
existing.setdefault("aliases", []).append(l["url"])
|
|
continue
|
|
to_evaluate.append(item)
|
|
|
|
if to_evaluate and not self.render_only:
|
|
# Mandate 47: Zero-Redundancy & Smart Grounding
|
|
# Fast-Track (Metadata/Desc present) vs Grounded-Track (Needs deep search)
|
|
fast_track = []
|
|
grounded_track = []
|
|
|
|
for l in to_evaluate:
|
|
nu = normalize_url(l["url"])
|
|
is_github = "github.com" in nu
|
|
|
|
# Fast-Track Eligibility:
|
|
# 1. Has AI summary (previous run)
|
|
# 2. Is GitHub and has stars (metadata present)
|
|
# 3. Has decent manual description (> 40 chars)
|
|
# 4. Is already in inventory (we have title/category context)
|
|
has_ai_summary = l.get("ai_summary") is not None and len(l.get("ai_summary")) > 50
|
|
has_stars = l.get("gh_stars") is not None
|
|
has_desc = len(l.get("description", "")) > 40
|
|
is_known = nu in self.inventory
|
|
|
|
if has_ai_summary or has_stars or has_desc or is_known:
|
|
fast_track.append(l)
|
|
else:
|
|
# Grounded-Track is ONLY for "Unknown" resources with zero context
|
|
grounded_track.append(l)
|
|
|
|
log_event(f"[*] Agent Phase 1: Analyst Evaluation ({len(to_evaluate)} resources)...")
|
|
log_event(f" [>] Fast-Track: {len(fast_track)} | Grounded-Track: {len(grounded_track)}")
|
|
|
|
analyst_results = []
|
|
|
|
# 1.1 Fast-Track: Large Batches, NO GROUNDING (Fast)
|
|
BATCH_SIZE_FAST = 50 # Balanced "Sweet Spot" for RPM/TPM and timeout safety (2026)
|
|
total_fast = len(fast_track)
|
|
for i in range(0, total_fast, BATCH_SIZE_FAST):
|
|
batch = fast_track[i:i+BATCH_SIZE_FAST]
|
|
batch_num = (i // BATCH_SIZE_FAST) + 1
|
|
total_batches = (total_fast + BATCH_SIZE_FAST - 1) // BATCH_SIZE_FAST
|
|
log_event(f" [>] Fast-Track: Processing Batch {batch_num}/{total_batches}...")
|
|
|
|
prompt = (
|
|
f"You are the Nubenetes Technical Analyst (2026).\n"
|
|
f"{dynamic_mandates}\n"
|
|
f"{self.library_criteria}\n"
|
|
"PHASE 5: TECHNICAL SYNTHESIS (FAST-TRACK)\n"
|
|
"- Use provided metadata, AI summaries, and descriptions to classify maturity.\n"
|
|
"Respond ONLY JSON: {{\"results\": [{{ \"idx\": int, \"year\": \"YYYY\", \"stars\": 0-5, \"hierarchy\": [\"Area\", \"Topic\", ...], \"tags\": [\"...\"], \"summary\": \"Synthesis...\", \"language\": \"...\", \"type\": \"...\", \"complexity\": \"...\", \"is_microservice\": bool }}, ...]}}\n\n"
|
|
"LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) | Stars: {l.get('gh_stars', l.get('stars'))} | Existing Summary: {l.get('ai_summary', l.get('description'))}" for idx, l in enumerate(batch)])
|
|
)
|
|
try:
|
|
data = await call_gemini_with_retry(prompt, prefer_flash=True, use_grounding=False, role="Analyst-Fast")
|
|
for res in data.get("results", []):
|
|
idx = int(res["idx"])
|
|
if idx < len(batch):
|
|
item = batch[idx].copy()
|
|
eval_data = {
|
|
"year": str(res.get("year", "N/A")), "stars": min(max(int(res.get("stars", 0)), 0), 5),
|
|
"ai_summary": res.get("summary", item.get("ai_summary", "")),
|
|
"language": res.get("language", "English"),
|
|
"resource_type": res.get("type", "Reference"), "complexity": res.get("complexity", "Intermediate"),
|
|
"hierarchy": res.get("hierarchy", ["General"]), "tags": res.get("tags", []),
|
|
"is_microservice": bool(res.get("is_microservice", False)),
|
|
"status": "online", "is_special": item.get("is_special", False)
|
|
}
|
|
item.update(eval_data)
|
|
analyst_results.append(item)
|
|
|
|
# Mandate 22: Incremental Persistence to avoid data loss
|
|
norm_url = normalize_url(item["url"])
|
|
self.inventory[norm_url] = {k:v for k,v in item.items() if k not in ["url", "title", "original_file", "is_special", "aliases"]}
|
|
self.inventory[norm_url]["title"] = item["title"]
|
|
|
|
except Exception:
|
|
for l in batch: analyst_results.append(l)
|
|
|
|
# Mandate 22: Save every 20 batches to disk
|
|
if batch_num % 20 == 0:
|
|
log_event(f" [💾] Periodic Save: Persisting inventory at batch {batch_num}...")
|
|
from src.inventory_manager import save_inventory
|
|
save_inventory(self.inventory)
|
|
|
|
await asyncio.sleep(2.0) # Safety delay to respect TPM limits
|
|
|
|
# 1.2 Grounded-Track: Small Batches, WITH GROUNDING (Slower but precise)
|
|
BATCH_SIZE_GROUNDED = 15 # Increased from 5
|
|
total_grounded = len(grounded_track)
|
|
for i in range(0, total_grounded, BATCH_SIZE_GROUNDED):
|
|
batch = grounded_track[i:i+BATCH_SIZE_GROUNDED]
|
|
batch_num = (i // BATCH_SIZE_GROUNDED) + 1
|
|
total_batches = (total_grounded + BATCH_SIZE_GROUNDED - 1) // BATCH_SIZE_GROUNDED
|
|
log_event(f" [🌟] Grounded-Track: Processing Batch {batch_num}/{total_batches} (Grounding active)...")
|
|
|
|
# MANDATE 25: Pre-enrich YouTube links with real metadata
|
|
enriched_batch = []
|
|
for item in batch:
|
|
url = item["url"]
|
|
if "youtube.com" in url or "youtu.be" in url:
|
|
log_event(f" [YT] Pre-fetching metadata for: {url}")
|
|
meta = await fetch_youtube_metadata(url)
|
|
if meta:
|
|
item["description"] = f"TITLE: {meta['raw_title']}\nDESCRIPTION: {meta['raw_description']}"
|
|
enriched_batch.append(item)
|
|
|
|
prompt = (
|
|
f"You are the Nubenetes Technical Analyst (2026).\n"
|
|
f"{dynamic_mandates}\n"
|
|
f"{self.library_criteria}\n"
|
|
"PHASE 5: DOUBLE-EVIDENCE SYNTHESIS & RICH SUMMARY (GROUNDED)\n"
|
|
"- Cross-reference provided title/desc with search grounding.\n"
|
|
"Respond ONLY JSON: {{\"results\": [{{ \"idx\": int, \"year\": \"YYYY\", \"stars\": 0-5, \"hierarchy\": [\"Area\", \"Topic\", ...], \"tags\": [\"...\"], \"summary\": \"Synthesis...\", \"language\": \"...\", \"type\": \"...\", \"complexity\": \"...\", \"is_microservice\": bool }}, ...]}}\n\n"
|
|
"LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) | Input Context: {l.get('description', 'N/A')}" for idx, l in enumerate(enriched_batch)])
|
|
)
|
|
try:
|
|
data = await call_gemini_with_retry(prompt, prefer_flash=True, use_grounding=True, role="Analyst-Grounded")
|
|
for res in data.get("results", []):
|
|
idx = int(res["idx"])
|
|
if idx < len(batch):
|
|
item = batch[idx].copy()
|
|
eval_data = {
|
|
"year": str(res.get("year", "N/A")), "stars": min(max(int(res.get("stars", 0)), 0), 5),
|
|
"ai_summary": res.get("summary", ""), "language": res.get("language", "English"),
|
|
"resource_type": res.get("type", "Reference"), "complexity": res.get("complexity", "Intermediate"),
|
|
"hierarchy": res.get("hierarchy", ["General"]), "tags": res.get("tags", []),
|
|
"is_microservice": bool(res.get("is_microservice", False)),
|
|
"status": "online", "is_special": item.get("is_special", False)
|
|
}
|
|
item.update(eval_data)
|
|
analyst_results.append(item)
|
|
except Exception:
|
|
for l in batch: analyst_results.append(l)
|
|
await asyncio.sleep(4.0) # Higher delay for Grounding tasks # --- AGENT PHASE 2: SELECTIVE AUDIT (MCP-Grounded) ---
|
|
# Identify candidates for high-trust verification
|
|
audit_candidates = [l for l in analyst_results if "[DE FACTO STANDARD]" in l.get("tags", []) or "[ENTERPRISE-STABLE]" in l.get("tags", [])]
|
|
|
|
if audit_candidates:
|
|
log_event(f"[*] Agent Phase 2: Auditor Verification ({len(audit_candidates)} high-impact candidates)...")
|
|
# AUDIT BATCH: Very small for max grounding precision
|
|
for i in range(0, len(audit_candidates), 5):
|
|
batch = audit_candidates[i:i+5]
|
|
audit_prompt = (
|
|
f"You are the Nubenetes Auditor (2026).\n"
|
|
f"{dynamic_mandates}\n"
|
|
"MISSION: Perform 'Double-Evidence' verification using your GOOGLE_SEARCH tool.\n"
|
|
"PROTOCOL:\n"
|
|
"1. SEARCH: Look for community reputation (Reddit, HN) and repo status (GitHub).\n"
|
|
"2. CONTRAST: Compare findings with the proposed Analyst summary.\n"
|
|
"3. REFINE: Correct any 'vaporware' or 'hype' claims. Ensure technical accuracy.\n"
|
|
"CRITERIA:\n"
|
|
"- [DE FACTO STANDARD]: Industry baseline, used by everyone.\n"
|
|
"- [ENTERPRISE-STABLE]: Proven, high-trust, supported.\n"
|
|
"Respond ONLY JSON: {{\"audits\": [{{ \"idx\": int, \"verified_tags\": [\"...\"], \"refined_summary\": \"Synthesized and verified technical summary...\", \"reputation_summary\": \"...\", \"reputation_penalty\": bool }}, ...]}}\n\n"
|
|
"RESOURCES TO AUDIT:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) - Proposed: {l.get('tags')}" for idx, l in enumerate(batch)])
|
|
)
|
|
try:
|
|
# AUDIT USES PRO MODEL (High Reasoning) + GROUNDING (Live Data)
|
|
audit_data = await call_gemini_with_retry(audit_prompt, prefer_flash=False, use_grounding=True, role="Auditor")
|
|
for aud in audit_data.get("audits", []):
|
|
idx = int(aud["idx"])
|
|
if idx < len(batch):
|
|
# Update tags, summary and add reputation metadata (Mandate 32/33)
|
|
batch[idx]["tags"] = aud.get("verified_tags", batch[idx]["tags"])
|
|
if aud.get("refined_summary"): batch[idx]["ai_summary"] = aud["refined_summary"]
|
|
batch[idx]["reputation_summary"] = aud.get("reputation_summary", "")
|
|
if aud.get("reputation_penalty"):
|
|
batch[idx]["stars"] = max(batch[idx].get("stars", 1) - 1, 1)
|
|
if "[DE FACTO STANDARD]" in batch[idx]["tags"]: batch[idx]["tags"].remove("[DE FACTO STANDARD]")
|
|
except: pass
|
|
await asyncio.sleep(0.5)
|
|
|
|
# Finalize Registry
|
|
for item in analyst_results:
|
|
norm_url = normalize_url(item["url"])
|
|
p_id = norm_url
|
|
if "github.com" in norm_url:
|
|
m = re.search(r'github\.com/([^/]+/[^/]+)', norm_url)
|
|
if m: p_id = m.group(1).lower()
|
|
|
|
# Persist to inventory
|
|
self.inventory[norm_url] = {k:v for k,v in item.items() if k not in ["url", "title", "original_file", "is_special", "aliases"]}
|
|
self.inventory[norm_url]["title"] = item["title"]
|
|
|
|
if p_id not in project_registry or item.get("stars", 0) > project_registry[p_id].get("stars", 0):
|
|
if p_id in project_registry and project_registry[p_id].get("is_special"): item["is_special"] = True
|
|
project_registry[p_id] = item
|
|
|
|
return list(project_registry.values())
|
|
|
|
def _calculate_tags(self, item: Dict) -> List[str]:
|
|
"""
|
|
Mandate 40: Multi-Dimensional Tagging (1:N).
|
|
Merges AI-assigned tags with rule-based maturity signals to ensure high-fidelity classification.
|
|
Utilizes MCP-style grounding data (GitHub stars, resource types) to override generic defaults.
|
|
"""
|
|
# 0. Collect all possible tag sources
|
|
ai_tags = item.get("tags", [])
|
|
if isinstance(ai_tags, str): ai_tags = [ai_tags] # Resiliency
|
|
|
|
valid_set = {"[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING]", "[GUIDE]", "[CASE STUDY]", "[COMMUNITY-TOOL]", "[LEGACY]"}
|
|
|
|
# Start with filtered AI tags
|
|
tags = set([t for t in ai_tags if t in valid_set])
|
|
|
|
# 1. GitHub Objective Reality (Mandate 43)
|
|
raw_gh = item.get("gh_stars", 0)
|
|
gh_stars = int(raw_gh) if str(raw_gh).isdigit() else 0
|
|
curator_stars = int(item.get("stars", 0))
|
|
|
|
if gh_stars > 15000 or curator_stars >= 5:
|
|
tags.add("[DE FACTO STANDARD]")
|
|
if "[COMMUNITY-TOOL]" in tags: tags.remove("[COMMUNITY-TOOL]")
|
|
elif gh_stars > 3000 or curator_stars >= 4:
|
|
tags.add("[ENTERPRISE-STABLE]")
|
|
if "[COMMUNITY-TOOL]" in tags: tags.remove("[COMMUNITY-TOOL]")
|
|
|
|
# 2. Type Mapping (AI based labels)
|
|
res_type = item.get("resource_type", "Reference").lower()
|
|
if any(x in res_type for x in ["guide", "tutorial", "hands-on", "learning", "course"]):
|
|
tags.add("[GUIDE]")
|
|
if any(x in res_type for x in ["case study", "report", "whitepaper", "success story", "usage"]):
|
|
tags.add("[CASE STUDY]")
|
|
|
|
# 3. Emerging / Legacy logic
|
|
ai_summary = item.get("ai_summary", "").lower()
|
|
complexity = item.get("complexity", "Intermediate")
|
|
if complexity == "Cutting Edge" or "emerging" in ai_summary or "experimental" in ai_summary or "alpha" in ai_summary:
|
|
tags.add("[EMERGING]")
|
|
if "legacy" in ai_summary or "deprecated" in ai_summary or "archived" in ai_summary or "v1-only" in ai_summary:
|
|
tags.add("[LEGACY]")
|
|
|
|
# 4. Fallback: Only use [COMMUNITY-TOOL] if no other maturity tag is present
|
|
maturity_tags = {"[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING]", "[LEGACY]"}
|
|
if not (tags & maturity_tags):
|
|
tags.add("[COMMUNITY-TOOL]")
|
|
|
|
# Clean up: If we have high maturity, remove community-tool
|
|
if (tags & {"[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]"}) and "[COMMUNITY-TOOL]" in tags:
|
|
tags.remove("[COMMUNITY-TOOL]")
|
|
|
|
return sorted(list(tags))
|
|
|
|
async def _rebuild_structure(self, library_inventory: List[Dict]):
|
|
special_rules = {sa["file"]: sa for sa in self.special_assets_rules.get("special_assets", [])}
|
|
v2_structure = {}
|
|
|
|
file_to_dim = {f + ".md": dim for dim, files in self.dimensions.items() for f in files}
|
|
|
|
for item in library_inventory:
|
|
# Calculate multi-tags
|
|
item["tags"] = self._calculate_tags(item)
|
|
|
|
# Mandate: Persist tags back to inventory for reporting & caching
|
|
norm_url = normalize_url(item["url"])
|
|
orig_file = item.get("original_file", "unknown.md")
|
|
if norm_url in self.inventory:
|
|
self.inventory[norm_url]["tags"] = item["tags"]
|
|
# Track V2 locations for reporting (Mandate 22)
|
|
v2_locs = self.inventory[norm_url].get("v2_locations", [])
|
|
if orig_file not in v2_locs:
|
|
v2_locs.append(orig_file)
|
|
self.inventory[norm_url]["v2_locations"] = v2_locs
|
|
|
|
dim = file_to_dim.get(orig_file, "Architectural Foundations")
|
|
|
|
# Populate Maturity Audit for GitOps Reporting
|
|
self.maturity_audit.append({
|
|
"url": item["url"],
|
|
"tag": ", ".join(item["tags"]),
|
|
"stars": item.get("stars", 0),
|
|
"dimension": dim,
|
|
"v2_locations": True # All candidates here are Elite
|
|
})
|
|
|
|
# Mandate: High density preservation (Keep almost everything)
|
|
is_special = item.get("is_special", False) or orig_file in special_rules
|
|
if orig_file == "introduction.md" and item.get("stars", 0) < 3 and not item.get("is_microservice"): continue
|
|
|
|
if orig_file not in v2_structure:
|
|
v2_structure[orig_file] = {
|
|
"dim": dim,
|
|
"title": orig_file.replace(".md", "").replace("-", " ").title(),
|
|
"content": {"__links__": []}
|
|
}
|
|
|
|
hierarchy = item.get("hierarchy", [])
|
|
# Skip redundant top-level labels
|
|
if hierarchy and (hierarchy[0] == dim or hierarchy[0] == v2_structure[orig_file]["title"]): hierarchy = hierarchy[1:]
|
|
|
|
current = v2_structure[orig_file]["content"]
|
|
for h_name in hierarchy[:self.max_depth]:
|
|
if h_name not in current: current[h_name] = {"__links__": []}
|
|
current = current[h_name]
|
|
current["__links__"].append(item)
|
|
|
|
def sort_rec(node):
|
|
if "__links__" in node: node["__links__"].sort(key=lambda x: (-x.get("stars", 1), -(int(x["year"]) if str(x.get("year", "")).isdigit() else 0)))
|
|
for k, v in node.items():
|
|
if k != "__links__" and isinstance(v, dict): sort_rec(v)
|
|
|
|
for f_name in v2_structure:
|
|
sort_rec(v2_structure[f_name]["content"])
|
|
|
|
return v2_structure
|
|
|
|
async def _generate_comparison_table(self, links: List[Dict]) -> str:
|
|
standard_tools = [l for l in links if l.get("stars", 0) >= 3]
|
|
if len(standard_tools) < 5: return ""
|
|
table = "\n??? abstract \"Architect's Technical Comparison Table\"\n"
|
|
table += " | Solution | Maturity | Primary Focus | Language | Stars |\n"
|
|
table += " | :--- | :--- | :--- | :--- | :--- |\n"
|
|
for l in standard_tools[:10]:
|
|
stars = "🌟" * l.get("stars", 0)
|
|
focus = l.get("topic", l.get("hierarchy", ["General"])[-1])
|
|
# Mandate 30: MD039 - Strip all whitespace (including non-breaking space) from link text
|
|
clean_title = nuclear_strip(l['title'])
|
|
table += f" | [{clean_title}]({l['url'].strip()}) | {l.get('tag','').replace('[','').replace(']','')} | {focus} | {l.get('language','English')} | {stars} |\n"
|
|
return table + "\n"
|
|
|
|
async def _render_single_link(self, l: Dict, is_intro: bool) -> str:
|
|
md = ""
|
|
is_gold = is_intro and l.get("stars", 0) >= 4
|
|
title = nuclear_strip(l['title'])
|
|
if is_gold:
|
|
img = f" })\n" if l.get('social_preview_url') else ""
|
|
md += f"??? note \"{title}\"\n{img} **[Access Resource]({l['url'].strip()})** {'🌟'*l.get('stars',4)} | Level: {l.get('complexity', 'Beginner')}\n \n {l.get('ai_summary', l.get('description', ''))}\n\n"
|
|
else:
|
|
year = l.get('year', 'N/A')
|
|
year_prefix = f"**({year})** " if year != 'N/A' else ""
|
|
gh_info = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>" if l.get('gh_stars') else ""
|
|
|
|
icon = " 🎥" if l.get("is_video") else ""
|
|
lang = l.get("language", "English")
|
|
lang_tag = f" <span class='md-tag md-tag--warning'>[{lang.upper()} CONTENT]</span>" if lang.lower() != "english" else ""
|
|
comp = l.get("complexity", "Intermediate")
|
|
level_tag = f" <span class='md-tag md-tag--critical'>[{comp.upper()} LEVEL]</span>" if comp.lower() in ["architect", "advanced"] else ""
|
|
res_type = l.get("resource_type", "Reference")
|
|
type_tag = f" <span class='md-tag md-tag--primary'>[{res_type.upper()}]</span>" if res_type.lower() in ["case study", "guide", "documentation"] else ""
|
|
rich = "".join([f" <small>by **{l['author']}**</small>" if l.get("author") else "", f" <span class='md-tag md-tag--info'>⏱️ {l['duration']}</span>" if l.get("duration") else "", f" <span class='md-tag md-tag--info'>📖 {l['reading_time']}</span>" if l.get("reading_time") else ""])
|
|
tag_html = ""
|
|
for tag in l.get("tags", ["[COMMUNITY-TOOL]"]):
|
|
color = "success" if "STANDARD" in tag else "warning" if "EMERGING" in tag else "secondary" if "CASE STUDY" in tag or "GUIDE" in tag else "info"
|
|
tag_html += f" <span class='md-tag md-tag--{color}'>{tag}</span>"
|
|
|
|
# Apply Visual Highlighting based on stars
|
|
raw_stars = l.get('stars', 0)
|
|
link_content = title
|
|
if raw_stars >= 5:
|
|
link_content = f"=={link_content}=="
|
|
elif raw_stars >= 4:
|
|
link_content = f"**{link_content}**"
|
|
|
|
md += f" - {year_prefix}[{link_content}]({l['url'].strip()}){icon}{gh_info}{lang_tag}{level_tag}{type_tag}{rich} {'🌟'*raw_stars}{tag_html}"
|
|
|
|
# Layer 2: High-Density Technical Summary (Always Visible Inline)
|
|
summary = l.get('ai_summary', l.get('description', ''))
|
|
if summary:
|
|
# Use a separator and append summary directly to the same line
|
|
md += f" — {summary.strip()}\n"
|
|
else:
|
|
md += "\n"
|
|
return md
|
|
|
|
|
|
async def _write_premium_files(self, data: Dict[str, Dict], mosaic_html: str, videos_html: str):
|
|
# 1. Update Index with Pulse
|
|
trending_pool = sorted([dict(meta, url=url) for url, meta in self.inventory.items() if isinstance(meta, dict) and meta.get("stars", 0) >= 4], key=lambda x: (x.get("pub_date", "0000"), -x.get("stars", 0)), reverse=True)
|
|
pulse_md = "## The Agentic Pulse\n" + "\n".join([f"- **({l.get('pub_date', 'N/A')[:10]})** [**=={nuclear_strip(l['title'])}==**]({l['url'].strip()}) {'🌟'*l.get('stars',3)}" for l in trending_pool[:5]])
|
|
|
|
# Calculate coverage for the index
|
|
total_v1 = len(self.inventory)
|
|
v2_links_all = [dict(meta, url=url) for url, meta in self.inventory.items() if isinstance(meta, dict) and meta.get("v2_locations")]
|
|
total_v2 = len(v2_links_all)
|
|
v2_efficiency = round((total_v2 / total_v1) * 100, 2) if total_v1 > 0 else 0
|
|
enriched = len([l for l in v2_links_all if l.get('hierarchy') or l.get('ai_summary')])
|
|
coverage_pct = round((enriched / total_v2) * 100, 2) if total_v2 > 0 else 0
|
|
|
|
# GitHub Metadata Coverage for index
|
|
gh_links = [l for l in v2_links_all if "github.com" in str(l.get('url', ''))]
|
|
total_gh = len(gh_links)
|
|
gh_meta = len([l for l in gh_links if l.get('gh_stars') is not None])
|
|
gh_coverage = round((gh_meta / total_gh) * 100, 2) if total_gh > 0 else 0
|
|
|
|
coverage_info = (
|
|
"\n??? info \"Knowledge Architecture and AI Coverage Status\"\n"
|
|
" The Nubenetes Elite Portal operates on a dual-layer knowledge architecture:\n"
|
|
" 1. **Elite Layer (AI-Enriched)**: Resources individually analyzed by our Agentic AI with high-density summaries and hierarchical indexing.\n"
|
|
" 2. **Standard Layer (Mapped)**: Resources identified as candidates for Elite status but pending deep AI analysis.\n\n"
|
|
" **Current Inventory Coverage:**\n"
|
|
f" - **V1 Base Inventory**: {total_v1} total resources analyzed.\n"
|
|
f" - **V2 Elite Selection**: {total_v2} candidates identified ({v2_efficiency}% density ratio).\n"
|
|
f" - **AI Enrichment Coverage**: {enriched} / {total_v2} ({coverage_pct}%)\n"
|
|
f" - **GitHub Metadata Coverage**: {gh_meta} / {total_gh} ({gh_coverage}%) - *Critical for Maturity Tagging*\n"
|
|
" - **Status**: The system is incrementally processing pending resources to complete the knowledge graph.\n"
|
|
)
|
|
|
|
index_md = (
|
|
"# Nubenetes Elite Portal (V2) | Awesome Kubernetes & Cloud [](https://github.com/sindresorhus/awesome)\n\n"
|
|
"<center markdown=\"1\">\n"
|
|
"[](https://kubernetes.io)\n"
|
|
"</center>\n\n"
|
|
"\"I do not believe you can do today's job with yesterday's methods and be in business tomorrow\" ([Horatio Nelson Jackson](https://en.wikipedia.org/wiki/Horatio_Nelson_Jackson))\n"
|
|
"<center markdown=\"1\">\n\n"
|
|
"[](https://www.cncf.io/certification/software-conformance) <br/>\n\n"
|
|
"</center>\n\n"
|
|
"!!! abstract \"The High-Density Vision\"\n"
|
|
" The V2 Edition is a curated, high-density version of the Nubenetes archive. Using **Agentic AI Orchestration**, "
|
|
"the system selects only the most relevant, stable, and impactful resources for the modern Cloud Native ecosystem (2026 and beyond).\n\n"
|
|
f"{coverage_info}\n\n"
|
|
f"<center markdown=\"1\">\n{mosaic_html}\n</center>\n\n"
|
|
f"{pulse_md}\n\n"
|
|
"## Strategic Dimensions\n"
|
|
"- **[🎥 Agentic Video Hub (Architectural Summary)](./videos.md)**\n\n"
|
|
)
|
|
|
|
# Group by dimension for index
|
|
dim_groups = {}
|
|
for f_name, info in data.items():
|
|
dim_groups.setdefault(info["dim"], []).append(f_name)
|
|
|
|
for dim in sorted(self.dimensions.keys()):
|
|
if dim in dim_groups:
|
|
index_md += f"### {dim}\n"
|
|
for f in sorted(dim_groups[dim]):
|
|
index_md += f"- **[{data[f]['title']}](./{f})**\n"
|
|
|
|
index_md += (
|
|
"\n---\n\n"
|
|
"## The Maturity Taxonomy\n\n"
|
|
"To ensure industrial-grade precision, every resource in V2 is classified using our proprietary 5-tier maturity system:\n\n"
|
|
"| Tag | Description | Engineering Context |\n"
|
|
"| :--- | :--- | :--- |\n"
|
|
"| **`[DE FACTO STANDARD]`** | The industry baseline. | Tools like Kubernetes, Terraform, or Prometheus that define the current architecture. |\n"
|
|
"| **`[ENTERPRISE-STABLE]`** | Battle-tested and reliable. | Proven solutions with strong community and commercial support. |\n"
|
|
"| **`[EMERGING]`** | The cutting edge. | High-potential tools and patterns (e.g., AI Agents, MCP) shaping the future. |\n"
|
|
"| **`[GUIDE]`** | Strategic knowledge. | High-quality tutorials, architectural deep-dives, and decision matrices. |\n"
|
|
"| **`[LEGACY]`** | Historical context. | Established tools that are being replaced or are primarily for maintaining older stacks. |\n\n"
|
|
"## Technical Impact (Relevance Score)\n\n"
|
|
"The stars accompanying each resource represent its **Technical Impact** and **Architectural Relevance** for a 2026 Senior Architect:\n\n"
|
|
"| Impact | Level | Meaning | Visual Code |\n"
|
|
"| :---: | :--- | :--- | :--- |\n"
|
|
"| 🌟🌟🌟🌟🌟 | **Platinum Standard** | Critical industry foundation. Essential knowledge for any Cloud Native architecture. | `==[Highlighted Link]==` |\n"
|
|
"| 🌟🌟🌟🌟 | **Gold Standard** | Highly recommended. Proven value and significant community/enterprise momentum. | `**[Bold Link]**` |\n"
|
|
"| 🌟🌟🌟 | **Silver Standard** | Solid technical reference. Useful for specific use cases or established patterns. | Standard Link |\n"
|
|
"| 🌟🌟 | **Bronze Standard** | Interesting alternative or niche tool. Good to have in the toolkit for specific scenarios. | Standard Link |\n"
|
|
"| 🌟 | **Reference Only** | Basic documentation or historical reference without major current impact. | Standard Link |\n"
|
|
)
|
|
|
|
with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md)
|
|
|
|
async def render_node(node, depth, base_slug, used_headers, is_intro=False):
|
|
md = ""
|
|
# Mandate: Process links at this level FIRST if they have NO further hierarchy
|
|
# This handles links that are candidates but haven't been deeply classified yet (orphans)
|
|
if "__links__" in node and depth == -1:
|
|
orphan_links = node["__links__"]
|
|
if orphan_links:
|
|
md += "## Standard Reference\n\n"
|
|
for l in orphan_links:
|
|
md += await self._render_single_link(l, is_intro)
|
|
md += "\n"
|
|
|
|
for name, subnode in sorted(node.items()):
|
|
if name == "__links__": continue
|
|
clean_name = clean_toc_text(name)
|
|
|
|
# Mandate 30: MD024 - Deduplicate headings to prevent Linter errors
|
|
h_name = clean_name
|
|
counter = 1
|
|
while h_name in used_headers:
|
|
h_name = f"{clean_name} ({counter})"
|
|
counter += 1
|
|
used_headers.add(h_name)
|
|
|
|
slug = f"{base_slug}-{h_name.lower().replace(' ', '-')}"
|
|
# MD025: Ensure only one H1. Main title is H1, so internal headers start at H2 (depth + 3)
|
|
header_level = min(6, depth + 3)
|
|
md += f"{'#' * header_level} {h_name}\n\n"
|
|
|
|
if depth == 1 and "__links__" in subnode:
|
|
md += await self._generate_comparison_table(subnode["__links__"])
|
|
|
|
md += await render_node(subnode, depth + 1, slug, used_headers, is_intro)
|
|
|
|
if "__links__" in node and depth >= 0:
|
|
for l in node["__links__"]:
|
|
md += await self._render_single_link(l, is_intro)
|
|
return md
|
|
|
|
for f_name, info in data.items():
|
|
used_headers = {info['title']} # Mandate 30: MD024 - Pre-populate with H1 to avoid duplicates
|
|
md = f"# {info['title']}\n\n!!! info \"Architectural Context\"\n Detailed reference for {info['title']} in the context of {info['dim']}.\n\n"
|
|
|
|
if f_name == "introduction.md":
|
|
md += "## Vision 2026\n\n!!! quote \"The Evolution of Autonomy\"\n From manual curation to agentic intelligence.\n\n### Ecosystem Map\n\n\n```mermaid\ngraph TD\n A[Foundations] --> B[AI & Intelligence]\n A --> C[Hardened Infra]\n B --> D[Agentic Curation]\n C --> E[Enterprise Stability]\n D --> F[Nubenetes Portal]\n E --> F\n```\n\n\n"
|
|
|
|
|
|
md += await render_node(info["content"], -1, f_name.replace(".md", ""), used_headers, is_intro=(f_name=="introduction.md"))
|
|
|
|
# Add Semantic "See Also" ONLY ONCE at the end of the page
|
|
related = [f"[{data[f]['title']}](./{f})" for f in data if f != f_name and data[f]["dim"] == info["dim"]]
|
|
if related:
|
|
md += f"\n---\n💡 **Explore Related:** {' | '.join(related[:3])}\n\n"
|
|
|
|
# Smart Write: Only update disk if content changed
|
|
target_path = os.path.join(V2_DIR, f_name)
|
|
existing_content = ""
|
|
if os.path.exists(target_path):
|
|
with open(target_path, "r") as f: existing_content = f.read()
|
|
|
|
if md != existing_content:
|
|
with open(target_path, "w") as f: f.write(md)
|
|
|
|
async def _sync_enterprise_navigation(self, data: Dict[str, Dict]):
|
|
try:
|
|
with open("v2-mkdocs.yml", "r") as f: content = f.read()
|
|
nav = [
|
|
"nav:",
|
|
" - \"🔙 Back to V1 (Exhaustive)\": https://nubenetes.com/",
|
|
" - \"The 2026 Vision\": index.md",
|
|
" - \"Agentic Video Hub\": videos.md"
|
|
]
|
|
|
|
# Group files by dimension
|
|
dim_groups = {}
|
|
for f_name, info in data.items():
|
|
dim_groups.setdefault(info["dim"], []).append(f_name)
|
|
|
|
for dim in sorted(self.dimensions.keys()):
|
|
if dim in dim_groups:
|
|
dim_nav = [f" - \"{dim}\":"]
|
|
for f in sorted(dim_groups[dim]):
|
|
dim_nav.append(f" - \"{data[f]['title']}\": {f}")
|
|
nav.extend(dim_nav)
|
|
|
|
updated = re.sub(r'nav:.*', "\n".join(nav), content, flags=re.DOTALL)
|
|
with open("v2-mkdocs.yml", "w") as f: f.write(updated)
|
|
except: pass
|
|
|
|
import argparse
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--render-only", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
engine = V2VisionEngine(render_only=args.render_only)
|
|
asyncio.run(engine.analyze_and_cluster())
|
|
|
|
# --- PLATINUM GITOPS REPORTING (Multi-Comment) ---
|
|
from src.gitops_manager import RepositoryController
|
|
from src.config import TARGET_REPO
|
|
|
|
# 1. High-Density Metrics Calculation
|
|
total_v1_links = len(engine.inventory)
|
|
v2_links_all = [dict(meta, url=url) for url, meta in engine.inventory.items() if isinstance(meta, dict) and meta.get("v2_locations")]
|
|
total_v2_links = len(v2_links_all)
|
|
|
|
# Coverage Metrics (Mandate: Transparency in Knowledge Discovery)
|
|
enriched_v2 = [l for l in v2_links_all if l.get('hierarchy') or l.get('ai_summary')]
|
|
total_enriched = len(enriched_v2)
|
|
coverage_pct = round((total_enriched / total_v2_links) * 100, 2) if total_v2_links > 0 else 0
|
|
|
|
# GitHub Metadata Coverage
|
|
gh_links = [l for l in v2_links_all if "github.com" in str(l.get('url', ''))]
|
|
total_gh = len(gh_links)
|
|
gh_with_metadata = len([l for l in gh_links if l.get('gh_stars') is not None])
|
|
gh_coverage_pct = round((gh_with_metadata / total_gh) * 100, 2) if total_gh > 0 else 0
|
|
|
|
# Delta & Efficiency
|
|
density_ratio = round((total_v2_links / total_v1_links) * 100, 2) if total_v1_links > 0 else 0
|
|
reduction_delta = total_v1_links - total_v2_links
|
|
|
|
# Maturity Distribution
|
|
maturity_counts = {}
|
|
for l in v2_links_all:
|
|
tags = l.get('tags', ['[COMMUNITY-TOOL]'])
|
|
for tag in tags:
|
|
maturity_counts[tag] = maturity_counts.get(tag, 0) + 1
|
|
|
|
# 2. Document Architecture Audit
|
|
v2_files = sorted([f for f in os.listdir(V2_DIR) if f.endswith(".md")])
|
|
file_list_md = "| # | Document Name | Description |\n| :--- | :--- | :--- |\n"
|
|
for i, f in enumerate(v2_files, 1):
|
|
# Quick extract title from file
|
|
title = "Elite Category"
|
|
try:
|
|
with open(os.path.join(V2_DIR, f), "r") as doc:
|
|
line = doc.readline()
|
|
if line.startswith("# "): title = line.replace("# ", "").strip()
|
|
except: pass
|
|
file_list_md += f"| {i} | `{f}` | {title} |\n"
|
|
|
|
# 3. Decision Matrix (Maturity Audit)
|
|
matrix_rows = []
|
|
header_table = "| # | Status | Maturity | Stars | Dimension | Resource |\n| :--- | :--- | :--- | :---: | :--- | :--- |\n"
|
|
for idx, entry in enumerate(engine.maturity_audit, 1):
|
|
status = "💎 ELITE" if entry.get('v2_locations') else "📦 ARCHIVE"
|
|
row = f"| {idx} | {status} | {entry.get('tag', 'N/A')} | {'🌟'*entry.get('stars',0)} | {entry.get('dimension', 'N/A')} | {entry.get('url', 'N/A')} |\n"
|
|
matrix_rows.append(row)
|
|
|
|
# 4. Generate PR Body (Main Report)
|
|
with open("pr_description.md", "w") as f:
|
|
f.write(f"## 🏆 V2 Elite: Agentic Optimization Sync (2026)\n\n")
|
|
f.write(f"The V2 Portal has been synchronized with the latest V1 changes. This update enforces the **Minimum Viable Quality (MVQ)** and O'Reilly-style architectural standards.\n\n")
|
|
|
|
f.write(f"### 📊 High-Density Efficiency\n")
|
|
f.write(f"| Metric | V1 Archive | V2 Elite | Delta / Efficiency |\n")
|
|
f.write(f"| :--- | :---: | :---: | :---: |\n")
|
|
f.write(f"| **Total Resources** | {total_v1_links} | {total_v2_links} | -{reduction_delta} ({density_ratio}% Density) |\n")
|
|
f.write(f"| **AI Enrichment** | N/A | {total_enriched} / {total_v2_links} | {coverage_pct}% Coverage |\n")
|
|
f.write(f"| **GitHub Metadata** | N/A | {gh_with_metadata} / {total_gh} | {gh_coverage_pct}% Coverage |\n")
|
|
f.write(f"| **Maturity Tagging** | Manual | AI-Vetted | 100% Coverage |\n")
|
|
f.write(f"| **Hierarchical Depth** | Flat | Recursive | Max Depth: {engine.max_depth} |\n\n")
|
|
|
|
f.write("### 🏗️ Evidence of Elite Status\n")
|
|
f.write("<details><summary>📊 Clic para ver Gráfico de Distribución</summary>\n\n")
|
|
f.write("```mermaid\npie title V2 Maturity Distribution\n")
|
|
for tag, count in maturity_counts.items():
|
|
tag_name = tag.replace('[','').replace(']','')
|
|
f.write(f" \"{tag_name}\" : {count}\n")
|
|
f.write("```\n\n</details>\n\n")
|
|
|
|
from src.gemini_utils import SESSION_TRACKER
|
|
f.write(SESSION_TRACKER.get_intelligence_report())
|
|
f.write("\n\n---\n**Detailed Architectural Audit and Decision Matrix follow in comments.**\n")
|
|
|
|
# 5. Save Supplementary Reports for Workflow/GitOps
|
|
with open("v2_file_audit.md", "w") as f:
|
|
f.write("### 📜 V2 Document Architecture\n")
|
|
f.write(f"Exhaustive list of {len(v2_files)} generated elite documents.\n\n")
|
|
f.write(file_list_md)
|
|
|
|
with open("v2_decision_matrix.md", "w") as f:
|
|
f.write("### 📋 Elite Decision Matrix\n")
|
|
f.write("Detailed logs of maturity promotions and elite selections.\n\n")
|
|
f.write(header_table)
|
|
for row in matrix_rows: f.write(row)
|