awesome-kubernetes/src/v2_optimizer.py

import os
import re
import json
import asyncio
import yaml
import httpx
from datetime import datetime
from typing import List, Dict, Set, Any, Tuple
from src.config import GEMINI_API_KEYS, GH_TOKEN, TARGET_REPO, MADRID_TZ, INVENTORY_PATH
from src.gemini_utils import call_gemini_with_retry, normalize_url, clean_toc_text, get_github_activity, fetch_youtube_metadata
from src.logger import log_event

def nuclear_strip(text: str) -> str:
    """Mandate 30: MD039 - Removes all leading/trailing whitespace including hidden unicode characters."""
    if not text: return ""
    # Purge all known whitespace characters (standard, non-breaking, thin, etc.)
    text = re.sub(r'^[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+', '', text)
    text = re.sub(r'[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+$', '', text)
    return text.replace("==", "")

V1_DIR = "docs"
V2_DIR = "v2-docs"

class V2VisionEngine:
    def __init__(self, render_only: bool = False):
        self.render_only = render_only
        # Load Config & Policy
        self.special_assets_rules = self._load_special_assets()
        self.link_rules = self._load_link_rules()
        self.max_depth = self.link_rules.get("hierarchy_rules", {}).get("max_depth", 10)

        # 100% Comprehensive 2026 Taxonomy
        self.dimensions = {
            "AI and Artificial Intelligence": ["ai", "ai-agents-mcp", "chatgpt", "mlops"],
            "Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"],
            "Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"],
            "Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"],
            "Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"],
            "Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"],
            "The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"],
            "Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"],
            "Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"],
            "Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"],
            "Career & Industry": ["recruitment", "hr", "finops", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"]
        }

        self.library_criteria = (
            "You are a Senior Technical Architect in 2026. Your mission is to organize a high-density technical reference portal "
            "structured like a professional technical book (O'Reilly style).\n"
            "PHASE 1: TECHNICAL PRESERVATION & CURATION\n"
            "- KEEP >90% of technical resources (except for 'introduction.md' where only high-impact links are kept).\n"
            "PHASE 2: SOPHISTICATED HIERARCHICAL CLASSIFICATION\n"
            "- Identify TECHNICAL_HIERARCHY: A list of strings (max 10) representing Area > Topic > Subtopics.\n"
            "- For 'introduction.md', identify links related to MICROSERVICES for extraction.\n"
            "PHASE 3: KNOWLEDGE ASSIMILATION FLOW\n"
            "- Order hierarchy to facilitate a structured learning journey.\n"
            "PHASE 4: HIGH-DENSITY TECHNICAL SUMMARIES (Double-Evidence Synthesis)\n"
            "- Generate professional, neutral, and advanced technical summaries. Style: O'Reilly technical.\n"
            "- PROTOCOL: Contrast 'Curator Insight' (from source) with 'Live Grounding' (from search).\n"
            "- If discrepancies are found (e.g. project is archived but source says it's new), PRIORITIZE live engineering truth.\n"
            "- Summaries MUST be high-density: Include architectural value, key features, and technical significance.\n"
            "- Format: Use paragraphs and bullet points for complex tools. Aim for 2-5 sentences of depth.\n"
            "PHASE 5: ADVANCED MATURITY TAGGING\n"
            "- Assign 1 to 3 tags from: [DE FACTO STANDARD], [ENTERPRISE-STABLE], [EMERGING], [GUIDE], [CASE STUDY], [COMMUNITY-TOOL], [LEGACY].\n"
        )
        self.inventory = self._load_inventory()
        self.maturity_audit = []

    def _load_special_assets(self) -> Dict:
        path = "data/special_assets.yaml"
        if os.path.exists(path):
            try: return yaml.safe_load(open(path, "r")) or {}
            except: return {}
        return {}

    def _load_link_rules(self) -> Dict:
        path = "data/link_rules.yaml"
        if os.path.exists(path):
            try: return yaml.safe_load(open(path, "r")) or {}
            except: return {}
        return {}

    def _load_inventory(self) -> Dict:
        from src.inventory_manager import load_inventory
        return load_inventory()

    def _save_inventory(self):
        from src.inventory_manager import save_inventory
        save_inventory(self.inventory)

    async def analyze_and_cluster(self):
        log_event("STARTING V2 HIGH-DENSITY O'REILLY LIBRARY GENERATION", section_break=True)

        # Mandate 30: MD039 - Global Data Sanitization (Purge all whitespace/hidden chars from titles)
        for url in list(self.inventory.keys()):
            if isinstance(self.inventory[url], dict) and "title" in self.inventory[url]:
                # Purge all known whitespace characters (standard, non-breaking, thin, etc.)
                t = self.inventory[url]["title"]
                t = re.sub(r'^[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+', '', t)
                t = re.sub(r'[\s\u00a0\u200b\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000]+$', '', t)
                self.inventory[url]["title"] = t

        # 0. Mandate Sync
        try:
            from src.mandate_ingestor import MandateIngestor
            MandateIngestor().save_system_instructions()
        except: pass

        all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()

        log_event(f"[*] Discovery: Found {len(all_v1_links)} resources to process.")

        log_event("[*] Phase 1: Health Check...")
        if self.render_only:
            health_inventory = [l for l in all_v1_links if self.inventory.get(normalize_url(l["url"]), {}).get("status") == "online"]
        else:
            health_inventory = await self._verify_link_health(all_v1_links)

        log_event("[*] Phase 2: Evaluation & Deep Indexing (Semantic Dedup)...")
        library_inventory = await self._evaluate_and_score_resources(health_inventory)

        log_event("[*] Phase 3: Recursive Hierarchy Construction...")
        v2_data = await self._rebuild_structure(library_inventory)

        log_event("[*] Phase 4: Generating Premium Portal Hubs...")
        os.makedirs(V2_DIR, exist_ok=True)

        # --- SURGICAL GARBAGE COLLECTION ---
        # Track every file we generate
        generated_files = {"index.md", "audit-log.md", "videos.md"}
        for f_name in v2_data.keys():
            generated_files.add(f_name)


        await self._write_premium_files(v2_data, mosaic_html, videos_html)
        await self._sync_enterprise_navigation(v2_data)

        # Delete only orphaned files
        log_event("[*] Phase 5: Pruning Orphaned V2 Assets...")
        for f in os.listdir(V2_DIR):
            if f.endswith(".md") and f not in generated_files:
                log_event(f"  [DEL] Pruning obsolete V2 page: {f}")
                os.remove(os.path.join(V2_DIR, f))

        self._save_inventory()

        # --- FINAL SAFETY AUDIT ---
        try:
            from src.safety_guard import SafetyGuard
            guard = SafetyGuard()
            report = guard.generate_audit_report()
            with open("v2_safety_report.md", "w") as f: f.write(report)
        except Exception as e:
            log_event(f"  [!] V2 Safety Audit Error: {e}")

        log_event("V2 ELITE PORTAL GENERATED SUCCESSFULLY.")

    async def _gather_all_v1_content(self):
        all_links, mosaic_html, videos_html = [], "", ""
        if os.path.exists("docs/index.md"):
            with open("docs/index.md", "r") as f:
                idx_content = f.read()
                mosaics = re.findall(r'<center markdown="1">\s*\n(.*?)\n\s*</center>', idx_content, re.DOTALL)
                if mosaics:
                    for m in mosaics:
                        if m.count("[![") > 5: mosaic_html = m; break
                videos_match = re.search(r'\?\?\? note "Top Videos & Clips.*?\n(.*?\n)\s*</center>', idx_content, re.DOTALL)
                if videos_match: videos_html = videos_match.group(1)

        for root, _, files in os.walk(V1_DIR):
            for file in files:
                if not file.endswith(".md") or file == "index.md": continue
                path = os.path.join(root, file)
                with open(path, "r") as f: content = f.read()
                matches = re.finditer(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*?(?:\n\s{2,}.*)*)', content, re.MULTILINE)
                for m in matches:
                    title, url, full_desc = m.groups()
                    if not url.startswith(("http", "mailto", "#")):
                        url = f"https://nubenetes.com/{url.replace('.md', '/')}"
                    # Mandate 30: MD039 - Strip all whitespace (including non-breaking space) from link text
                    all_links.append({"title": nuclear_strip(title), "url": url.strip(), "description": full_desc.strip(), "original_file": file})
        return all_links, mosaic_html, videos_html

    async def _verify_link_health(self, links: List[Dict]):
        force_full = os.getenv("FORCE_FULL_CHECK", "false").lower() == "true"
        fast_online = []
        needs_check = []

        for l in links:
            nu = normalize_url(l["url"])
            entry = self.inventory.get(nu, {})
            # Mandate 32: skip links under review
            if entry.get("status") == "review_required": continue

            if not force_full and entry.get("status") == "online":
                fast_online.append(l)
            else:
                needs_check.append(l)

        if not needs_check: return fast_online

        log_event(f"    [>] Fast-Track Health: {len(fast_online)} | Network-Check: {len(needs_check)}")

        online_links = list(fast_online)
        total_needs = len(needs_check)
        async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, verify=False) as client:
            for i in range(0, total_needs, 50):
                batch = needs_check[i:i+50]
                tasks = [self._check_single_link_resilient(client, l) for l in batch]
                results = await asyncio.gather(*tasks)
                online_links.extend([r for r in results if r is not None])
                if i % 100 == 0:
                    log_event(f"    [>] Progress: [{i}/{total_needs}] links validated over network...")
                await asyncio.sleep(0.1)
        return online_links

    async def _check_single_link_resilient(self, client, link: Dict):
        url = link["url"]
        norm_url = normalize_url(url)
        entry = self.inventory.get(norm_url, {})

        # Mandate 31: Skip links under review for V2 Elite
        if entry.get("status") == "review_required":
            log_event(f"  [-] SKIPPING V2: {url} is under Review.")
            return None

        if entry.get("status") == "online" and os.getenv("FORCE_FULL_CHECK", "false").lower() != "true": return link
        try:
            resp = await client.get(url, timeout=10.0)
            if resp.status_code < 400:
                final_url = str(resp.url)
                from src.gemini_utils import sanitize_trailing_slashes
                final_url = sanitize_trailing_slashes(final_url)

                # Update URL if it was redirected/normalized
                if final_url != url:
                    link["url"] = final_url

                self.inventory.setdefault(normalize_url(final_url), {})["status"] = "online"
                # Mandate 22: Update last_checked for the inventory entry
                self.inventory[normalize_url(final_url)]["last_checked"] = datetime.now().timestamp()
                return link
        except: pass
        return None

    async def _evaluate_and_score_resources(self, links: List[Dict]):
        to_evaluate = []
        project_registry = {}
        force_eval = os.getenv("FORCE_EVAL", "false").lower() == "true"
        force_full_check = os.getenv("FORCE_FULL_CHECK", "false").lower() == "true"
        # Bypassing GitHub UI limitation: If force_eval or force_full_check is ON, we must enrich metadata
        enrich_metadata = os.getenv("ENRICH_METADATA", "false").lower() == "true" or force_eval or force_full_check
        special_files = [sa["file"] for sa in self.special_assets_rules.get("special_assets", [])]

        # Mandate 47: Zero-Redundancy & Smart Grounding
        from src.mandate_ingestor import get_system_mandates
        dynamic_mandates = get_system_mandates()

        # Mandate 15: Proactive Enrichment for V2 (GitHub metadata is critical for tags)
        # To avoid duplicate logs and redundant API calls, we deduplicate unique GitHub repos first
        processed_gh_metadata = set()
        gh_fetch_count = 0
        for l in links:
            norm_url = normalize_url(l["url"])
            if "github.com" not in norm_url or self.render_only: continue

            cached = self.inventory.get(norm_url, {})
            # Mandate 43: Always ensure GH metadata for GitHub links in V2 to power [DE FACTO STANDARD] logic
            if (enrich_metadata or not cached.get("gh_stars")) and norm_url not in processed_gh_metadata:
                log_event(f"  [METADATA] V2 Pulse: Fetching GH Activity for {norm_url}")
                processed_gh_metadata.add(norm_url) # Add BEFORE await to block any (even theoretical) parallelism
                gh_data = await get_github_activity(norm_url)
                if gh_data:
                    if norm_url not in self.inventory: self.inventory[norm_url] = {}
                    self.inventory[norm_url].update(gh_data)

                gh_fetch_count += 1
                if gh_fetch_count % 500 == 0:
                    log_event(f"    [💾] Periodic Save: Persisting inventory after {gh_fetch_count} metadata fetches...")
                    from src.inventory_manager import save_inventory
                    save_inventory(self.inventory)

        for l in links:
            item = l.copy()
            norm_url = normalize_url(l["url"])
            orig_file = l.get("original_file", "unknown.md")
            is_special = orig_file in special_files
            item["is_special"] = is_special
            project_id = norm_url
            if "github.com" in norm_url:
                match = re.search(r'github\.com/([^/]+/[^/]+)', norm_url)
                if match: project_id = match.group(1).lower()

            # Reuse enriched metadata from inventory
            if "github.com" in norm_url:
                item.update(self.inventory.get(norm_url, {}))

            if not force_eval and norm_url in self.inventory and "stars" in self.inventory[norm_url]:
                cached = self.inventory[norm_url]
                item.update(cached)
                if is_special: item["is_special"] = True
                if cached.get("hierarchy"):
                    if project_id not in project_registry:
                        project_registry[project_id] = item
                    else:
                        existing = project_registry[project_id]
                        if item.get("is_special"): existing["is_special"] = True
                        if "github.com" not in norm_url or item.get("stars", 0) > existing.get("stars", 0):
                            item.setdefault("aliases", []).append(existing["url"])
                            if existing.get("is_special"): item["is_special"] = True
                            project_registry[project_id] = item
                        else:
                            existing.setdefault("aliases", []).append(l["url"])
                    continue
            to_evaluate.append(item)

        if to_evaluate and not self.render_only:
            # Mandate 47: Zero-Redundancy & Smart Grounding
            # Fast-Track (Metadata/Desc present) vs Grounded-Track (Needs deep search)
            fast_track = []
            grounded_track = []

            for l in to_evaluate:
                nu = normalize_url(l["url"])
                is_github = "github.com" in nu

                # Fast-Track Eligibility:
                # 1. Has AI summary (previous run)
                # 2. Is GitHub and has stars (metadata present)
                # 3. Has decent manual description (> 40 chars)
                # 4. Is already in inventory (we have title/category context)
                has_ai_summary = l.get("ai_summary") is not None and len(l.get("ai_summary")) > 50
                has_stars = l.get("gh_stars") is not None
                has_desc = len(l.get("description", "")) > 40
                is_known = nu in self.inventory

                if has_ai_summary or has_stars or has_desc or is_known:
                    fast_track.append(l)
                else:
                    # Grounded-Track is ONLY for "Unknown" resources with zero context
                    grounded_track.append(l)

            log_event(f"[*] Agent Phase 1: Analyst Evaluation ({len(to_evaluate)} resources)...")
            log_event(f"    [>] Fast-Track: {len(fast_track)} | Grounded-Track: {len(grounded_track)}")

            analyst_results = []

            # 1.1 Fast-Track: Large Batches, NO GROUNDING (Fast)
            BATCH_SIZE_FAST = 50 # Balanced "Sweet Spot" for RPM/TPM and timeout safety (2026)
            total_fast = len(fast_track)
            for i in range(0, total_fast, BATCH_SIZE_FAST):
                batch = fast_track[i:i+BATCH_SIZE_FAST]
                batch_num = (i // BATCH_SIZE_FAST) + 1
                total_batches = (total_fast + BATCH_SIZE_FAST - 1) // BATCH_SIZE_FAST
                log_event(f"    [>] Fast-Track: Processing Batch {batch_num}/{total_batches}...")

                prompt = (
                    f"You are the Nubenetes Technical Analyst (2026).\n"
                    f"{dynamic_mandates}\n"
                    f"{self.library_criteria}\n"
                    "PHASE 5: TECHNICAL SYNTHESIS (FAST-TRACK)\n"
                    "- Use provided metadata, AI summaries, and descriptions to classify maturity.\n"
                    "Respond ONLY JSON: {{\"results\": [{{ \"idx\": int, \"year\": \"YYYY\", \"stars\": 0-5, \"hierarchy\": [\"Area\", \"Topic\", ...], \"tags\": [\"...\"], \"summary\": \"Synthesis...\", \"language\": \"...\", \"type\": \"...\", \"complexity\": \"...\", \"is_microservice\": bool }}, ...]}}\n\n"
                    "LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) | Stars: {l.get('gh_stars', l.get('stars'))} | Existing Summary: {l.get('ai_summary', l.get('description'))}" for idx, l in enumerate(batch)])
                )
                try:
                    data = await call_gemini_with_retry(prompt, prefer_flash=True, use_grounding=False, role="Analyst-Fast")
                    for res in data.get("results", []):
                        idx = int(res["idx"])
                        if idx < len(batch):
                            item = batch[idx].copy()
                            eval_data = {
                                "year": str(res.get("year", "N/A")), "stars": min(max(int(res.get("stars", 0)), 0), 5),
                                "ai_summary": res.get("summary", item.get("ai_summary", "")),
                                "language": res.get("language", "English"),
                                "resource_type": res.get("type", "Reference"), "complexity": res.get("complexity", "Intermediate"),
                                "hierarchy": res.get("hierarchy", ["General"]), "tags": res.get("tags", []),
                                "is_microservice": bool(res.get("is_microservice", False)),
                                "status": "online", "is_special": item.get("is_special", False)
                            }
                            item.update(eval_data)
                            analyst_results.append(item)

                            # Mandate 22: Incremental Persistence to avoid data loss
                            norm_url = normalize_url(item["url"])
                            self.inventory[norm_url] = {k:v for k,v in item.items() if k not in ["url", "title", "original_file", "is_special", "aliases"]}
                            self.inventory[norm_url]["title"] = item["title"]

                except Exception:
                    for l in batch: analyst_results.append(l)

                # Mandate 22: Save every 20 batches to disk
                if batch_num % 20 == 0:
                    log_event(f"    [💾] Periodic Save: Persisting inventory at batch {batch_num}...")
                    from src.inventory_manager import save_inventory
                    save_inventory(self.inventory)

                await asyncio.sleep(2.0) # Safety delay to respect TPM limits

            # 1.2 Grounded-Track: Small Batches, WITH GROUNDING (Slower but precise)
            BATCH_SIZE_GROUNDED = 15 # Increased from 5
            total_grounded = len(grounded_track)
            for i in range(0, total_grounded, BATCH_SIZE_GROUNDED):
                batch = grounded_track[i:i+BATCH_SIZE_GROUNDED]
                batch_num = (i // BATCH_SIZE_GROUNDED) + 1
                total_batches = (total_grounded + BATCH_SIZE_GROUNDED - 1) // BATCH_SIZE_GROUNDED
                log_event(f"    [🌟] Grounded-Track: Processing Batch {batch_num}/{total_batches} (Grounding active)...")

                # MANDATE 25: Pre-enrich YouTube links with real metadata
                enriched_batch = []
                for item in batch:
                    url = item["url"]
                    if "youtube.com" in url or "youtu.be" in url:
                        log_event(f"      [YT] Pre-fetching metadata for: {url}")
                        meta = await fetch_youtube_metadata(url)
                        if meta:
                            item["description"] = f"TITLE: {meta['raw_title']}\nDESCRIPTION: {meta['raw_description']}"
                    enriched_batch.append(item)

                prompt = (
                    f"You are the Nubenetes Technical Analyst (2026).\n"
                    f"{dynamic_mandates}\n"
                    f"{self.library_criteria}\n"
                    "PHASE 5: DOUBLE-EVIDENCE SYNTHESIS & RICH SUMMARY (GROUNDED)\n"
                    "- Cross-reference provided title/desc with search grounding.\n"
                    "Respond ONLY JSON: {{\"results\": [{{ \"idx\": int, \"year\": \"YYYY\", \"stars\": 0-5, \"hierarchy\": [\"Area\", \"Topic\", ...], \"tags\": [\"...\"], \"summary\": \"Synthesis...\", \"language\": \"...\", \"type\": \"...\", \"complexity\": \"...\", \"is_microservice\": bool }}, ...]}}\n\n"
                    "LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) | Input Context: {l.get('description', 'N/A')}" for idx, l in enumerate(enriched_batch)])
                )
                try:
                    data = await call_gemini_with_retry(prompt, prefer_flash=True, use_grounding=True, role="Analyst-Grounded")
                    for res in data.get("results", []):
                        idx = int(res["idx"])
                        if idx < len(batch):
                            item = batch[idx].copy()
                            eval_data = {
                                "year": str(res.get("year", "N/A")), "stars": min(max(int(res.get("stars", 0)), 0), 5),
                                "ai_summary": res.get("summary", ""), "language": res.get("language", "English"),
                                "resource_type": res.get("type", "Reference"), "complexity": res.get("complexity", "Intermediate"),
                                "hierarchy": res.get("hierarchy", ["General"]), "tags": res.get("tags", []),
                                "is_microservice": bool(res.get("is_microservice", False)),
                                "status": "online", "is_special": item.get("is_special", False)
                            }
                            item.update(eval_data)
                            analyst_results.append(item)
                except Exception:
                    for l in batch: analyst_results.append(l)
                await asyncio.sleep(4.0) # Higher delay for Grounding tasks            # --- AGENT PHASE 2: SELECTIVE AUDIT (MCP-Grounded) ---
            # Identify candidates for high-trust verification
            audit_candidates = [l for l in analyst_results if "[DE FACTO STANDARD]" in l.get("tags", []) or "[ENTERPRISE-STABLE]" in l.get("tags", [])]

            if audit_candidates:
                log_event(f"[*] Agent Phase 2: Auditor Verification ({len(audit_candidates)} high-impact candidates)...")
                # AUDIT BATCH: Very small for max grounding precision
                for i in range(0, len(audit_candidates), 5):
                    batch = audit_candidates[i:i+5]
                    audit_prompt = (
                        f"You are the Nubenetes Auditor (2026).\n"
                        f"{dynamic_mandates}\n"
                        "MISSION: Perform 'Double-Evidence' verification using your GOOGLE_SEARCH tool.\n"
                        "PROTOCOL:\n"
                        "1. SEARCH: Look for community reputation (Reddit, HN) and repo status (GitHub).\n"
                        "2. CONTRAST: Compare findings with the proposed Analyst summary.\n"
                        "3. REFINE: Correct any 'vaporware' or 'hype' claims. Ensure technical accuracy.\n"
                        "CRITERIA:\n"
                        "- [DE FACTO STANDARD]: Industry baseline, used by everyone.\n"
                        "- [ENTERPRISE-STABLE]: Proven, high-trust, supported.\n"
                        "Respond ONLY JSON: {{\"audits\": [{{ \"idx\": int, \"verified_tags\": [\"...\"], \"refined_summary\": \"Synthesized and verified technical summary...\", \"reputation_summary\": \"...\", \"reputation_penalty\": bool }}, ...]}}\n\n"
                        "RESOURCES TO AUDIT:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) - Proposed: {l.get('tags')}" for idx, l in enumerate(batch)])
                    )
                    try:
                        # AUDIT USES PRO MODEL (High Reasoning) + GROUNDING (Live Data)
                        audit_data = await call_gemini_with_retry(audit_prompt, prefer_flash=False, use_grounding=True, role="Auditor")
                        for aud in audit_data.get("audits", []):
                            idx = int(aud["idx"])
                            if idx < len(batch):
                                # Update tags, summary and add reputation metadata (Mandate 32/33)
                                batch[idx]["tags"] = aud.get("verified_tags", batch[idx]["tags"])
                                if aud.get("refined_summary"): batch[idx]["ai_summary"] = aud["refined_summary"]
                                batch[idx]["reputation_summary"] = aud.get("reputation_summary", "")
                                if aud.get("reputation_penalty"):
                                    batch[idx]["stars"] = max(batch[idx].get("stars", 1) - 1, 1)
                                    if "[DE FACTO STANDARD]" in batch[idx]["tags"]: batch[idx]["tags"].remove("[DE FACTO STANDARD]")
                    except: pass
                    await asyncio.sleep(0.5)

            # Finalize Registry
            for item in analyst_results:
                norm_url = normalize_url(item["url"])
                p_id = norm_url
                if "github.com" in norm_url:
                    m = re.search(r'github\.com/([^/]+/[^/]+)', norm_url)
                    if m: p_id = m.group(1).lower()

                # Persist to inventory
                self.inventory[norm_url] = {k:v for k,v in item.items() if k not in ["url", "title", "original_file", "is_special", "aliases"]}
                self.inventory[norm_url]["title"] = item["title"]

                if p_id not in project_registry or item.get("stars", 0) > project_registry[p_id].get("stars", 0):
                    if p_id in project_registry and project_registry[p_id].get("is_special"): item["is_special"] = True
                    project_registry[p_id] = item

        return list(project_registry.values())

    def _calculate_tags(self, item: Dict) -> List[str]:
        """
        Mandate 40: Multi-Dimensional Tagging (1:N).
        Merges AI-assigned tags with rule-based maturity signals to ensure high-fidelity classification.
        Utilizes MCP-style grounding data (GitHub stars, resource types) to override generic defaults.
        """
        # 0. Collect all possible tag sources
        ai_tags = item.get("tags", [])
        if isinstance(ai_tags, str): ai_tags = [ai_tags] # Resiliency

        valid_set = {"[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING]", "[GUIDE]", "[CASE STUDY]", "[COMMUNITY-TOOL]", "[LEGACY]"}

        # Start with filtered AI tags
        tags = set([t for t in ai_tags if t in valid_set])

        # 1. GitHub Objective Reality (Mandate 43)
        raw_gh = item.get("gh_stars", 0)
        gh_stars = int(raw_gh) if str(raw_gh).isdigit() else 0
        curator_stars = int(item.get("stars", 0))

        if gh_stars > 15000 or curator_stars >= 5:
            tags.add("[DE FACTO STANDARD]")
            if "[COMMUNITY-TOOL]" in tags: tags.remove("[COMMUNITY-TOOL]")
        elif gh_stars > 3000 or curator_stars >= 4:
            tags.add("[ENTERPRISE-STABLE]")
            if "[COMMUNITY-TOOL]" in tags: tags.remove("[COMMUNITY-TOOL]")

        # 2. Type Mapping (AI based labels)
        res_type = item.get("resource_type", "Reference").lower()
        if any(x in res_type for x in ["guide", "tutorial", "hands-on", "learning", "course"]):
            tags.add("[GUIDE]")
        if any(x in res_type for x in ["case study", "report", "whitepaper", "success story", "usage"]):
            tags.add("[CASE STUDY]")

        # 3. Emerging / Legacy logic
        ai_summary = item.get("ai_summary", "").lower()
        complexity = item.get("complexity", "Intermediate")
        if complexity == "Cutting Edge" or "emerging" in ai_summary or "experimental" in ai_summary or "alpha" in ai_summary:
            tags.add("[EMERGING]")
        if "legacy" in ai_summary or "deprecated" in ai_summary or "archived" in ai_summary or "v1-only" in ai_summary:
            tags.add("[LEGACY]")

        # 4. Fallback: Only use [COMMUNITY-TOOL] if no other maturity tag is present
        maturity_tags = {"[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING]", "[LEGACY]"}
        if not (tags & maturity_tags):
            tags.add("[COMMUNITY-TOOL]")

        # Clean up: If we have high maturity, remove community-tool
        if (tags & {"[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]"}) and "[COMMUNITY-TOOL]" in tags:
            tags.remove("[COMMUNITY-TOOL]")

        return sorted(list(tags))

    async def _rebuild_structure(self, library_inventory: List[Dict]):
        special_rules = {sa["file"]: sa for sa in self.special_assets_rules.get("special_assets", [])}
        v2_structure = {}

        file_to_dim = {f + ".md": dim for dim, files in self.dimensions.items() for f in files}

        for item in library_inventory:
            # Calculate multi-tags
            item["tags"] = self._calculate_tags(item)

            # Mandate: Persist tags back to inventory for reporting & caching
            norm_url = normalize_url(item["url"])
            orig_file = item.get("original_file", "unknown.md")
            if norm_url in self.inventory:
                self.inventory[norm_url]["tags"] = item["tags"]
                # Track V2 locations for reporting (Mandate 22)
                v2_locs = self.inventory[norm_url].get("v2_locations", [])
                if orig_file not in v2_locs:
                    v2_locs.append(orig_file)
                    self.inventory[norm_url]["v2_locations"] = v2_locs

            dim = file_to_dim.get(orig_file, "Architectural Foundations")

            # Populate Maturity Audit for GitOps Reporting
            self.maturity_audit.append({
                "url": item["url"],
                "tag": ", ".join(item["tags"]),
                "stars": item.get("stars", 0),
                "dimension": dim,
                "v2_locations": True # All candidates here are Elite
            })

            # Mandate: High density preservation (Keep almost everything)
            is_special = item.get("is_special", False) or orig_file in special_rules
            if orig_file == "introduction.md" and item.get("stars", 0) < 3 and not item.get("is_microservice"): continue

            if orig_file not in v2_structure:
                v2_structure[orig_file] = {
                    "dim": dim,
                    "title": orig_file.replace(".md", "").replace("-", " ").title(),
                    "content": {"__links__": []}
                }

            hierarchy = item.get("hierarchy", [])
            # Skip redundant top-level labels
            if hierarchy and (hierarchy[0] == dim or hierarchy[0] == v2_structure[orig_file]["title"]): hierarchy = hierarchy[1:]

            current = v2_structure[orig_file]["content"]
            for h_name in hierarchy[:self.max_depth]:
                if h_name not in current: current[h_name] = {"__links__": []}
                current = current[h_name]
            current["__links__"].append(item)

        def sort_rec(node):
            if "__links__" in node: node["__links__"].sort(key=lambda x: (-x.get("stars", 1), -(int(x["year"]) if str(x.get("year", "")).isdigit() else 0)))
            for k, v in node.items():
                if k != "__links__" and isinstance(v, dict): sort_rec(v)

        for f_name in v2_structure:
            sort_rec(v2_structure[f_name]["content"])

        return v2_structure

    async def _generate_comparison_table(self, links: List[Dict]) -> str:
        standard_tools = [l for l in links if l.get("stars", 0) >= 3]
        if len(standard_tools) < 5: return ""
        table = "\n??? abstract \"Architect's Technical Comparison Table\"\n"
        table += "    | Solution | Maturity | Primary Focus | Language | Stars |\n"
        table += "    | :--- | :--- | :--- | :--- | :--- |\n"
        for l in standard_tools[:10]:
            stars = "🌟" * l.get("stars", 0)
            focus = l.get("topic", l.get("hierarchy", ["General"])[-1])
            # Mandate 30: MD039 - Strip all whitespace (including non-breaking space) from link text
            clean_title = nuclear_strip(l['title'])
            table += f"    | [{clean_title}]({l['url'].strip()}) | {l.get('tag','').replace('[','').replace(']','')} | {focus} | {l.get('language','English')} | {stars} |\n"
        return table + "\n"

    async def _render_single_link(self, l: Dict, is_intro: bool) -> str:
        md = ""
        is_gold = is_intro and l.get("stars", 0) >= 4
        title = nuclear_strip(l['title'])
        if is_gold:
            img = f"    ![Preview]({l.get('social_preview_url')})\n" if l.get('social_preview_url') else ""
            md += f"??? note \"{title}\"\n{img}    **[Access Resource]({l['url'].strip()})** {'🌟'*l.get('stars',4)} | Level: {l.get('complexity', 'Beginner')}\n    \n    {l.get('ai_summary', l.get('description', ''))}\n\n"
        else:
            year = l.get('year', 'N/A')
            year_prefix = f"**({year})** " if year != 'N/A' else ""
            gh_info = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>" if l.get('gh_stars') else ""

            icon = " 🎥" if l.get("is_video") else ""
            lang = l.get("language", "English")
            lang_tag = f" <span class='md-tag md-tag--warning'>[{lang.upper()} CONTENT]</span>" if lang.lower() != "english" else ""
            comp = l.get("complexity", "Intermediate")
            level_tag = f" <span class='md-tag md-tag--critical'>[{comp.upper()} LEVEL]</span>" if comp.lower() in ["architect", "advanced"] else ""
            res_type = l.get("resource_type", "Reference")
            type_tag = f" <span class='md-tag md-tag--primary'>[{res_type.upper()}]</span>" if res_type.lower() in ["case study", "guide", "documentation"] else ""
            rich = "".join([f" <small>by **{l['author']}**</small>" if l.get("author") else "", f" <span class='md-tag md-tag--info'>⏱️ {l['duration']}</span>" if l.get("duration") else "", f" <span class='md-tag md-tag--info'>📖 {l['reading_time']}</span>" if l.get("reading_time") else ""])
            tag_html = ""
            for tag in l.get("tags", ["[COMMUNITY-TOOL]"]):
                color = "success" if "STANDARD" in tag else "warning" if "EMERGING" in tag else "secondary" if "CASE STUDY" in tag or "GUIDE" in tag else "info"
                tag_html += f" <span class='md-tag md-tag--{color}'>{tag}</span>"

            # Apply Visual Highlighting based on stars
            raw_stars = l.get('stars', 0)
            link_content = title
            if raw_stars >= 5:
                link_content = f"=={link_content}=="
            elif raw_stars >= 4:
                link_content = f"**{link_content}**"

            md += f"  - {year_prefix}[{link_content}]({l['url'].strip()}){icon}{gh_info}{lang_tag}{level_tag}{type_tag}{rich} {'🌟'*raw_stars}{tag_html}"

            # Layer 2: High-Density Technical Summary (Always Visible Inline)
            summary = l.get('ai_summary', l.get('description', ''))
            if summary:
                # Use a separator and append summary directly to the same line
                md += f" — {summary.strip()}\n"
            else:
                md += "\n"
        return md


    async def _write_premium_files(self, data: Dict[str, Dict], mosaic_html: str, videos_html: str):
        # 1. Update Index with Pulse
        trending_pool = sorted([dict(meta, url=url) for url, meta in self.inventory.items() if isinstance(meta, dict) and meta.get("stars", 0) >= 4], key=lambda x: (x.get("pub_date", "0000"), -x.get("stars", 0)), reverse=True)
        pulse_md = "## The Agentic Pulse\n" + "\n".join([f"- **({l.get('pub_date', 'N/A')[:10]})** [**=={nuclear_strip(l['title'])}==**]({l['url'].strip()}) {'🌟'*l.get('stars',3)}" for l in trending_pool[:5]])

        # Calculate coverage for the index
        total_v1 = len(self.inventory)
        v2_links_all = [dict(meta, url=url) for url, meta in self.inventory.items() if isinstance(meta, dict) and meta.get("v2_locations")]
        total_v2 = len(v2_links_all)
        v2_efficiency = round((total_v2 / total_v1) * 100, 2) if total_v1 > 0 else 0
        enriched = len([l for l in v2_links_all if l.get('hierarchy') or l.get('ai_summary')])
        coverage_pct = round((enriched / total_v2) * 100, 2) if total_v2 > 0 else 0

        # GitHub Metadata Coverage for index
        gh_links = [l for l in v2_links_all if "github.com" in str(l.get('url', ''))]
        total_gh = len(gh_links)
        gh_meta = len([l for l in gh_links if l.get('gh_stars') is not None])
        gh_coverage = round((gh_meta / total_gh) * 100, 2) if total_gh > 0 else 0

        coverage_info = (
            "\n??? info \"Knowledge Architecture and AI Coverage Status\"\n"
            "    The Nubenetes Elite Portal operates on a dual-layer knowledge architecture:\n"
            "    1. **Elite Layer (AI-Enriched)**: Resources individually analyzed by our Agentic AI with high-density summaries and hierarchical indexing.\n"
            "    2. **Standard Layer (Mapped)**: Resources identified as candidates for Elite status but pending deep AI analysis.\n\n"
            "    **Current Inventory Coverage:**\n"
            f"    - **V1 Base Inventory**: {total_v1} total resources analyzed.\n"
            f"    - **V2 Elite Selection**: {total_v2} candidates identified ({v2_efficiency}% density ratio).\n"
            f"    - **AI Enrichment Coverage**: {enriched} / {total_v2} ({coverage_pct}%)\n"
            f"    - **GitHub Metadata Coverage**: {gh_meta} / {total_gh} ({gh_coverage}%) - *Critical for Maturity Tagging*\n"
            "    - **Status**: The system is incrementally processing pending resources to complete the knowledge graph.\n"
        )

        index_md = (
            "# Nubenetes Elite Portal (V2) | Awesome Kubernetes & Cloud [![Awesome](https://cdn.jsdelivr.net/gh/sindresorhus/awesome@d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg)](https://github.com/sindresorhus/awesome)\n\n"
            "<center markdown=\"1\">\n"
            "[![Banner](images/kubernetes_logo.jpg)](https://kubernetes.io)\n"
            "</center>\n\n"
            "\"I do not believe you can do today's job with yesterday's methods and be in business tomorrow\" ([Horatio Nelson Jackson](https://en.wikipedia.org/wiki/Horatio_Nelson_Jackson))\n"
            "<center markdown=\"1\">\n\n"
            "[![container_with_cars](images/container_with_cars_v2.png)](https://www.cncf.io/certification/software-conformance) <br/>\n\n"
            "</center>\n\n"
            "!!! abstract \"The High-Density Vision\"\n"
            "    The V2 Edition is a curated, high-density version of the Nubenetes archive. Using **Agentic AI Orchestration**, "
            "the system selects only the most relevant, stable, and impactful resources for the modern Cloud Native ecosystem (2026 and beyond).\n\n"
            f"{coverage_info}\n\n"
            f"<center markdown=\"1\">\n{mosaic_html}\n</center>\n\n"
            f"{pulse_md}\n\n"
            "## Strategic Dimensions\n"
            "- **[🎥 Agentic Video Hub (Architectural Summary)](./videos.md)**\n\n"
        )

        # Group by dimension for index
        dim_groups = {}
        for f_name, info in data.items():
            dim_groups.setdefault(info["dim"], []).append(f_name)

        for dim in sorted(self.dimensions.keys()):
            if dim in dim_groups:
                index_md += f"### {dim}\n"
                for f in sorted(dim_groups[dim]):
                    index_md += f"- **[{data[f]['title']}](./{f})**\n"

        index_md += (
            "\n---\n\n"
            "## The Maturity Taxonomy\n\n"
            "To ensure industrial-grade precision, every resource in V2 is classified using our proprietary 5-tier maturity system:\n\n"
            "| Tag | Description | Engineering Context |\n"
            "| :--- | :--- | :--- |\n"
            "| **`[DE FACTO STANDARD]`** | The industry baseline. | Tools like Kubernetes, Terraform, or Prometheus that define the current architecture. |\n"
            "| **`[ENTERPRISE-STABLE]`** | Battle-tested and reliable. | Proven solutions with strong community and commercial support. |\n"
            "| **`[EMERGING]`** | The cutting edge. | High-potential tools and patterns (e.g., AI Agents, MCP) shaping the future. |\n"
            "| **`[GUIDE]`** | Strategic knowledge. | High-quality tutorials, architectural deep-dives, and decision matrices. |\n"
            "| **`[LEGACY]`** | Historical context. | Established tools that are being replaced or are primarily for maintaining older stacks. |\n\n"
            "## Technical Impact (Relevance Score)\n\n"
            "The stars accompanying each resource represent its **Technical Impact** and **Architectural Relevance** for a 2026 Senior Architect:\n\n"
            "| Impact | Level | Meaning | Visual Code |\n"
            "| :---: | :--- | :--- | :--- |\n"
            "| 🌟🌟🌟🌟🌟 | **Platinum Standard** | Critical industry foundation. Essential knowledge for any Cloud Native architecture. | `==[Highlighted Link]==` |\n"
            "| 🌟🌟🌟🌟 | **Gold Standard** | Highly recommended. Proven value and significant community/enterprise momentum. | `**[Bold Link]**` |\n"
            "| 🌟🌟🌟 | **Silver Standard** | Solid technical reference. Useful for specific use cases or established patterns. | Standard Link |\n"
            "| 🌟🌟 | **Bronze Standard** | Interesting alternative or niche tool. Good to have in the toolkit for specific scenarios. | Standard Link |\n"
            "| 🌟 | **Reference Only** | Basic documentation or historical reference without major current impact. | Standard Link |\n"
        )

        with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md)

        async def render_node(node, depth, base_slug, used_headers, is_intro=False):
            md = ""
            # Mandate: Process links at this level FIRST if they have NO further hierarchy
            # This handles links that are candidates but haven't been deeply classified yet (orphans)
            if "__links__" in node and depth == -1:
                orphan_links = node["__links__"]
                if orphan_links:
                    md += "## Standard Reference\n\n"
                    for l in orphan_links:
                        md += await self._render_single_link(l, is_intro)
                    md += "\n"

            for name, subnode in sorted(node.items()):
                if name == "__links__": continue
                clean_name = clean_toc_text(name)

                # Mandate 30: MD024 - Deduplicate headings to prevent Linter errors
                h_name = clean_name
                counter = 1
                while h_name in used_headers:
                    h_name = f"{clean_name} ({counter})"
                    counter += 1
                used_headers.add(h_name)

                slug = f"{base_slug}-{h_name.lower().replace(' ', '-')}"
                # MD025: Ensure only one H1. Main title is H1, so internal headers start at H2 (depth + 3)
                header_level = min(6, depth + 3)
                md += f"{'#' * header_level} {h_name}\n\n"

                if depth == 1 and "__links__" in subnode:
                    md += await self._generate_comparison_table(subnode["__links__"])

                md += await render_node(subnode, depth + 1, slug, used_headers, is_intro)

            if "__links__" in node and depth >= 0:
                for l in node["__links__"]:
                    md += await self._render_single_link(l, is_intro)
            return md

        for f_name, info in data.items():
            used_headers = {info['title']} # Mandate 30: MD024 - Pre-populate with H1 to avoid duplicates
            md = f"# {info['title']}\n\n!!! info \"Architectural Context\"\n    Detailed reference for {info['title']} in the context of {info['dim']}.\n\n"

            if f_name == "introduction.md":
                md += "## Vision 2026\n\n!!! quote \"The Evolution of Autonomy\"\n    From manual curation to agentic intelligence.\n\n### Ecosystem Map\n\n\n```mermaid\ngraph TD\n    A[Foundations] --> B[AI & Intelligence]\n    A --> C[Hardened Infra]\n    B --> D[Agentic Curation]\n    C --> E[Enterprise Stability]\n    D --> F[Nubenetes Portal]\n    E --> F\n```\n\n\n"


            md += await render_node(info["content"], -1, f_name.replace(".md", ""), used_headers, is_intro=(f_name=="introduction.md"))

            # Add Semantic "See Also" ONLY ONCE at the end of the page
            related = [f"[{data[f]['title']}](./{f})" for f in data if f != f_name and data[f]["dim"] == info["dim"]]
            if related:
                md += f"\n---\n💡 **Explore Related:** {' | '.join(related[:3])}\n\n"

            # Smart Write: Only update disk if content changed
            target_path = os.path.join(V2_DIR, f_name)
            existing_content = ""
            if os.path.exists(target_path):
                with open(target_path, "r") as f: existing_content = f.read()

            if md != existing_content:
                with open(target_path, "w") as f: f.write(md)

    async def _sync_enterprise_navigation(self, data: Dict[str, Dict]):
        try:
            with open("v2-mkdocs.yml", "r") as f: content = f.read()
            nav = [
                "nav:",
                "  - \"🔙 Back to V1 (Exhaustive)\": https://nubenetes.com/",
                "  - \"The 2026 Vision\": index.md",
                "  - \"Agentic Video Hub\": videos.md"
            ]

            # Group files by dimension
            dim_groups = {}
            for f_name, info in data.items():
                dim_groups.setdefault(info["dim"], []).append(f_name)

            for dim in sorted(self.dimensions.keys()):
                if dim in dim_groups:
                    dim_nav = [f"  - \"{dim}\":"]
                    for f in sorted(dim_groups[dim]):
                        dim_nav.append(f"    - \"{data[f]['title']}\": {f}")
                    nav.extend(dim_nav)

            updated = re.sub(r'nav:.*', "\n".join(nav), content, flags=re.DOTALL)
            with open("v2-mkdocs.yml", "w") as f: f.write(updated)
        except: pass

import argparse
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--render-only", action="store_true")
    args = parser.parse_args()

    engine = V2VisionEngine(render_only=args.render_only)
    asyncio.run(engine.analyze_and_cluster())

    # --- PLATINUM GITOPS REPORTING (Multi-Comment) ---
    from src.gitops_manager import RepositoryController
    from src.config import TARGET_REPO

    # 1. High-Density Metrics Calculation
    total_v1_links = len(engine.inventory)
    v2_links_all = [dict(meta, url=url) for url, meta in engine.inventory.items() if isinstance(meta, dict) and meta.get("v2_locations")]
    total_v2_links = len(v2_links_all)

    # Coverage Metrics (Mandate: Transparency in Knowledge Discovery)
    enriched_v2 = [l for l in v2_links_all if l.get('hierarchy') or l.get('ai_summary')]
    total_enriched = len(enriched_v2)
    coverage_pct = round((total_enriched / total_v2_links) * 100, 2) if total_v2_links > 0 else 0

    # GitHub Metadata Coverage
    gh_links = [l for l in v2_links_all if "github.com" in str(l.get('url', ''))]
    total_gh = len(gh_links)
    gh_with_metadata = len([l for l in gh_links if l.get('gh_stars') is not None])
    gh_coverage_pct = round((gh_with_metadata / total_gh) * 100, 2) if total_gh > 0 else 0

    # Delta & Efficiency
    density_ratio = round((total_v2_links / total_v1_links) * 100, 2) if total_v1_links > 0 else 0
    reduction_delta = total_v1_links - total_v2_links

    # Maturity Distribution
    maturity_counts = {}
    for l in v2_links_all:
        tags = l.get('tags', ['[COMMUNITY-TOOL]'])
        for tag in tags:
            maturity_counts[tag] = maturity_counts.get(tag, 0) + 1

    # 2. Document Architecture Audit
    v2_files = sorted([f for f in os.listdir(V2_DIR) if f.endswith(".md")])
    file_list_md = "| # | Document Name | Description |\n| :--- | :--- | :--- |\n"
    for i, f in enumerate(v2_files, 1):
        # Quick extract title from file
        title = "Elite Category"
        try:
            with open(os.path.join(V2_DIR, f), "r") as doc:
                line = doc.readline()
                if line.startswith("# "): title = line.replace("# ", "").strip()
        except: pass
        file_list_md += f"| {i} | `{f}` | {title} |\n"

    # 3. Decision Matrix (Maturity Audit)
    matrix_rows = []
    header_table = "| # | Status | Maturity | Stars | Dimension | Resource |\n| :--- | :--- | :--- | :---: | :--- | :--- |\n"
    for idx, entry in enumerate(engine.maturity_audit, 1):
        status = "💎 ELITE" if entry.get('v2_locations') else "📦 ARCHIVE"
        row = f"| {idx} | {status} | {entry.get('tag', 'N/A')} | {'🌟'*entry.get('stars',0)} | {entry.get('dimension', 'N/A')} | {entry.get('url', 'N/A')} |\n"
        matrix_rows.append(row)

    # 4. Generate PR Body (Main Report)
    with open("pr_description.md", "w") as f:
        f.write(f"## 🏆 V2 Elite: Agentic Optimization Sync (2026)\n\n")
        f.write(f"The V2 Portal has been synchronized with the latest V1 changes. This update enforces the **Minimum Viable Quality (MVQ)** and O'Reilly-style architectural standards.\n\n")

        f.write(f"### 📊 High-Density Efficiency\n")
        f.write(f"| Metric | V1 Archive | V2 Elite | Delta / Efficiency |\n")
        f.write(f"| :--- | :---: | :---: | :---: |\n")
        f.write(f"| **Total Resources** | {total_v1_links} | {total_v2_links} | -{reduction_delta} ({density_ratio}% Density) |\n")
        f.write(f"| **AI Enrichment** | N/A | {total_enriched} / {total_v2_links} | {coverage_pct}% Coverage |\n")
        f.write(f"| **GitHub Metadata** | N/A | {gh_with_metadata} / {total_gh} | {gh_coverage_pct}% Coverage |\n")
        f.write(f"| **Maturity Tagging** | Manual | AI-Vetted | 100% Coverage |\n")
        f.write(f"| **Hierarchical Depth** | Flat | Recursive | Max Depth: {engine.max_depth} |\n\n")

        f.write("### 🏗️ Evidence of Elite Status\n")
        f.write("<details><summary>📊 Clic para ver Gráfico de Distribución</summary>\n\n")
        f.write("```mermaid\npie title V2 Maturity Distribution\n")
        for tag, count in maturity_counts.items():
            tag_name = tag.replace('[','').replace(']','')
            f.write(f"    \"{tag_name}\" : {count}\n")
        f.write("```\n\n</details>\n\n")

        from src.gemini_utils import SESSION_TRACKER
        f.write(SESSION_TRACKER.get_intelligence_report())
        f.write("\n\n---\n**Detailed Architectural Audit and Decision Matrix follow in comments.**\n")

    # 5. Save Supplementary Reports for Workflow/GitOps
    with open("v2_file_audit.md", "w") as f:
        f.write("### 📜 V2 Document Architecture\n")
        f.write(f"Exhaustive list of {len(v2_files)} generated elite documents.\n\n")
        f.write(file_list_md)

    with open("v2_decision_matrix.md", "w") as f:
        f.write("### 📋 Elite Decision Matrix\n")
        f.write("Detailed logs of maturity promotions and elite selections.\n\n")
        f.write(header_table)
        for row in matrix_rows: f.write(row)