awesome-kubernetes/src/v2_optimizer.py

import os
import re
import json
import asyncio
import yaml
import httpx
from datetime import datetime
from typing import List, Dict, Set, Any
from src.config import GEMINI_API_KEYS, GH_TOKEN, TARGET_REPO, MADRID_TZ
from src.gemini_utils import call_gemini_with_retry
from src.logger import log_event

V1_DIR = "docs"
V2_DIR = "v2-docs"
V2_CACHE_PATH = "data/v2_cache.json"

class V2VisionEngine:
    def __init__(self):
        # 100% Comprehensive 2026 Taxonomy
        self.dimensions = {
            "Intelligent Control Plane": ["ai", "ai-agents-mcp", "chatgpt", "mlops"],
            "Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"],
            "Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"],
            "Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"],
            "Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"],
            "Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"],
            "The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"],
            "Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"],
            "Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"],
            "Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"],
            "Career & Industry": ["recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"]
        }

        self.library_criteria = (
            "You are a Technical Librarian in 2026. Your mission is to build a high-density, professional reference library.\n"
            "PHASE 1: TECHNICAL PRESERVATION (HIGH INCLUSIVITY)\n"
            "- KEEP >90% of technical resources.\n"
            "PHASE 2: SOPHISTICATED SYNTHESIS & DATING\n"
            "- Extract precise PUBLICATION YEAR: Look for dates in the URL, Twitter/X post dates, or text context. Return 'N/A' if truly unknown.\n"
            "- Assign QUALITY level (1-3 stars).\n"
            "- Assign a MATURITY TAG based on content type/status.\n"
            "PHASE 3: MANDATORY DESCRIPTIONS (V1 PRIORITY)\n"
            "- If 'Current Desc' is already provided and descriptive, DO NOT CHANGE IT.\n"
            "- If 'Current Desc' is empty, too short, or non-descriptive, generate a professional 1-2 sentence summary.\n"
            "- To generate the summary: Analyze the URL and title. If you recognize the technical resource, describe its core value proposition for a Cloud Architect in 2026.\n"
            "- Style: Technical, neutral, and informative. Language: English only.\n"
        )
        self.cache = self._load_cache()

    def _load_cache(self) -> Dict:
        if os.path.exists(V2_CACHE_PATH):
            try:
                with open(V2_CACHE_PATH, "r") as f: return json.load(f)
            except: return {}
        return {}

    def _save_cache(self):
        os.makedirs(os.path.dirname(V2_CACHE_PATH), exist_ok=True)
        with open(V2_CACHE_PATH, "w") as f: json.dump(self.cache, f, indent=2)

    async def analyze_and_cluster(self):
        log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
        all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
        log_event(f"[*] Discovery: Found {len(all_v1_links)} resources in V1 archive.")

        log_event("[*] Phase 1: Health Check & Metadata Enrichment...")
        # Rapid Async Health Check
        health_inventory = await self._verify_link_health(all_v1_links)
        log_event(f"[*] Health Check Complete. {len(health_inventory)}/{len(all_v1_links)} links are online.")

        log_event("[*] Phase 2: Library Evaluation, Year Extraction & Quality Scoring...")
        library_inventory = await self._evaluate_and_score_resources(health_inventory)
        log_event(f"[*] Inventory Refined: {len(library_inventory)} resources kept.")

        log_event("[*] Phase 3: Dimensional Clustering & Chronological Sorting...")
        v2_data = await self._rebuild_structure(library_inventory)

        log_event("[*] Phase 4: Generating Premium Portal Pages...")
        os.makedirs(V2_DIR, exist_ok=True)
        await self._write_premium_files(v2_data, mosaic_html, videos_html)
        await self._sync_enterprise_navigation(v2_data)

        self._save_cache()
        log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)

    async def _gather_all_v1_content(self) -> (List[Dict], str, str):
        all_links = []
        mosaic_html = ""
        videos_html = ""

        if os.path.exists("docs/index.md"):
            with open("docs/index.md", "r") as f:
                idx_content = f.read()
                # Find the BIG mosaic (the one with many images)
                # Support both old <center> and new <div style="text-align: center;" markdown="1">
                mosaics = re.findall(r'<(?:div style="text-align: center;" markdown="1"|center markdown="1"|center)>\s*(.*?)\s*</(?:div|center)>', idx_content, re.DOTALL)
                if mosaics:
                    # Filter for the one containing many image links
                    for m in mosaics:
                        if m.count("[![") > 5:
                            mosaic_html = m
                            break

                videos_match = re.search(r'\?\?\? note "Top Videos & Clips.*?\n(.*?\n)\s*</center>', idx_content, re.DOTALL)
                if videos_match: videos_html = videos_match.group(1)

        for root, _, files in os.walk(V1_DIR):
            for file in files:
                if not file.endswith(".md") or file == "index.md": continue
                path = os.path.join(root, file)
                with open(path, "r") as f:
                    content = f.read()
                matches = re.finditer(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*?(?:\n\s{2,}.*)*)', content, re.MULTILINE)
                for m in matches:
                    title, url, full_desc = m.groups()

                    # FIX: Convert relative .md links to absolute V1 links for cross-edition stability
                    if not url.startswith(("http://", "https://", "mailto:", "#")):
                        if url.endswith(".md"):
                            url = f"https://nubenetes.com/{url.replace('.md', '/')}"
                        elif url.startswith("images/"):
                            # Use relative path from V2 to V1 images (handled via symlink)
                            url = f"{url}"

                    all_links.append({
                        "title": title,
                        "url": url,
                        "description": full_desc.strip(),
                        "original_file": file
                    })
        return all_links, mosaic_html, videos_html

    async def _verify_link_health(self, links: List[Dict]) -> List[Dict]:
        online_links = []
        BATCH_SIZE = 50  # Smaller batches for stability

        # User-Agent rotation to mimic real browsers
        user_agents = [
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
        ]

        async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, verify=False) as client:
            for i in range(0, len(links), BATCH_SIZE):
                batch = links[i:i+BATCH_SIZE]
                tasks = []
                for l in batch:
                    ua = user_agents[i % len(user_agents)]
                    tasks.append(self._check_single_link_resilient(client, l, ua))

                results = await asyncio.gather(*tasks)
                online_links.extend([r for r in results if r is not None])

                if i % 500 == 0:
                    log_event(f"    [Resilient Health] Verified {i}/{len(links)} links...")

                # Brief pause to avoid triggering Rate Limits
                await asyncio.sleep(0.1)

        return online_links

    async def _check_single_link_resilient(self, client, link: Dict, ua: str, attempts: int = 3) -> Dict:
        url = link["url"]

        # 1. Immediate Pass for Trusted / Logic-Enriched Domains
        if "github.com" in url or "awesome" in link["title"].lower():
            link["health_status"] = "trusted"
            return link

        # 2. Cached Health
        if url in self.cache and self.cache[url].get("status") == "online":
            link["health_status"] = "cached"
            return link

        # 3. Multi-Attempt Verification with Identity Rotation
        headers = {
            "User-Agent": ua,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Referer": "https://www.google.com/"
        }

        for attempt in range(attempts):
            try:
                # Use GET instead of HEAD as many sites block HEAD or return 405
                resp = await client.get(url, headers=headers, timeout=10.0)
                if resp.status_code < 400:
                    self.cache.setdefault(url, {})["status"] = "online"
                    link["health_status"] = "online"
                    return link

                # If 404, it's a definitive fail
                if resp.status_code == 404:
                    log_event(f"    [Health] Definitive 404: {url}")
                    return None

            except Exception as e:
                if attempt == attempts - 1:
                    # Final attempt failed - Soft Flagging instead of removal
                    # If it's not a 404, we keep it but with a warning
                    link["health_status"] = "uncertain"
                    link["warning"] = "offline"
                    return link

            # Backoff before retry
            await asyncio.sleep(0.5 * (attempt + 1))

        return link

    async def _evaluate_and_score_resources(self, links: List[Dict]) -> List[Dict]:
        refined = []
        to_evaluate = []
        force_eval = os.getenv("FORCE_EVAL", "false").lower() == "true"

        # We want to re-evaluate the tags and years, so we will bypass cache for tagging logic,
        # but use cache for AI stars if available to save cost.
        for l in links:
            url = l["url"]
            # To allow the new logic to apply to cached items, we re-process GitHub links
            # and re-apply the tag logic even if it's in the cache.
            item = l.copy()
            if not force_eval and url in self.cache and "stars" in self.cache[url]:
                item.update(self.cache[url])
                # If cache has a generated description and item is missing one, use it
                if "ai_summary" in self.cache[url] and not item["description"]:
                    item["description"] = self.cache[url]["ai_summary"]

            # Re-evaluate if description is still missing even after cache check
            if not item.get("description"):
                to_evaluate.append(item)
                continue

            # Re-apply GitHub metadata and mature tagging for cached items
            if "github.com" in url:
                gh_meta = await self._fetch_github_metadata(url)
                item.update(gh_meta)
                if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
                    item["year"] = gh_meta["gh_updated"].split("-")[0]

            item["tag"] = self._calculate_tag(item)
            refined.append(item)

        if not to_evaluate: return refined

        BATCH_SIZE = 50
        for i in range(0, len(to_evaluate), BATCH_SIZE):
            batch = to_evaluate[i:i+BATCH_SIZE]
            batch_num = i//BATCH_SIZE + 1
            log_event(f"  [>] Processing Batch {batch_num} with AI (Mandatory Descriptions)...")

            prompt = (
                f"{self.library_criteria}\n"
                "Respond ONLY with a JSON object: {\"results\": [{\"idx\": int, \"year\": \"YYYY\", \"stars\": int, \"is_video\": bool, \"tag\": \"[TAG]\", \"summary\": \"1-2 sentences description\"}, ...]}\n\n"
                "LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']}) - Current Desc: {l['description'][:50]}" for idx, l in enumerate(batch)])
            )

            try:
                data = await call_gemini_with_retry(prompt)
                results = data.get("results", [])

                for res in results:
                    try:
                        idx = int(res["idx"])
                        if idx < len(batch):
                            item = batch[idx].copy()
                            eval_data = {
                                "year": str(res.get("year", "N/A")),
                                "stars": min(max(int(res.get("stars", 1)), 1), 3),
                                "is_video": res.get("is_video", False),
                                "tag": res.get("tag", "[ENTERPRISE-STABLE]"),
                                "ai_summary": res.get("summary", "")
                            }
                            item.update(eval_data)
                            if not item["description"] and item["ai_summary"]:
                                item["description"] = item["ai_summary"]

                            # GitHub overrides
                            if "github.com" in item["url"]:
                                gh_meta = await self._fetch_github_metadata(item["url"])
                                item.update(gh_meta)
                                if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
                                    item["year"] = gh_meta["gh_updated"].split("-")[0]
                                    eval_data["year"] = item["year"]

                            item["tag"] = self._calculate_tag(item)
                            eval_data["tag"] = item["tag"]

                            # Save to cache
                            self.cache[item["url"]] = eval_data
                            refined.append(item)
                    except: continue
            except:
                for l in batch:
                    item = l.copy()
                    item["year"], item["stars"], item["is_video"] = "N/A", 1, "youtube" in l["url"]
                    item["tag"] = self._calculate_tag(item)
                    refined.append(item)
            await asyncio.sleep(0.3)
        return refined

    def _calculate_tag(self, item: Dict) -> str:
        # Dynamic Tagging Strategy based on Maturity and Real Data
        if "github.com" in item["url"] and "gh_stars" in item:
            stars = item["gh_stars"]
            year = int(item.get("year")) if item.get("year", "").isdigit() else 2024
            if stars > 10000: return "[DE FACTO STANDARD]"
            if stars > 500 and year >= 2024: return "[ENTERPRISE-STABLE]"
            if year >= 2025: return "[EMERGING / INNOVATION]"
            if year <= 2022: return "[LEGACY / MAINTENANCE]"
            return "[TOOLING]"

        # Fallback to AI's tag or defaults for articles
        tag = item.get("tag", "").upper()
        valid_tags = ["[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING / INNOVATION]", "[LEGACY / MAINTENANCE]", "[ARCHITECTURE-GUIDE]", "[TOOLING]", "[CASE-STUDY]", "[CHEATSHEET]"]
        if tag in valid_tags:
            return tag

        # Basic inference for articles
        title = item.get("title", "").lower()
        if "awesome" in title: return "[FOUNDATIONAL]"
        if "guide" in title or "architecture" in title: return "[ARCHITECTURE-GUIDE]"
        if "how to" in title or "tutorial" in title: return "[CASE-STUDY]"
        return "[ENTERPRISE-STABLE]"

    async def _fetch_github_metadata(self, url: str) -> Dict:
        match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
        if not match: return {}
        owner, repo = match.groups()
        repo = repo.split("#")[0].split("?")[0] # Clean up

        headers = {"Authorization": f"token {GH_TOKEN}"} if GH_TOKEN else {}
        api_url = f"https://api.github.com/repos/{owner}/{repo}"

        try:
            async with httpx.AsyncClient(timeout=5.0) as client:
                resp = await client.get(api_url, headers=headers)
                if resp.status_code == 200:
                    data = resp.json()
                    return {
                        "gh_stars": data.get("stargazers_count", 0),
                        "gh_updated": data.get("updated_at", "").split("T")[0]
                    }
        except: pass
        return {}

    async def _rebuild_structure(self, inventory: List[Dict]) -> Dict[str, Dict]:
        v2_structure = {dim: {"summary": "", "categories": {}} for dim in self.dimensions.keys()}
        file_to_dim = {}
        for dim, files in self.dimensions.items():
            for f in files: file_to_dim[f + ".md"] = dim

        for item in inventory:
            dim = file_to_dim.get(item["original_file"], "Architectural Foundations")
            cat_name = item["original_file"].replace(".md", "").capitalize()
            if cat_name not in v2_structure[dim]["categories"]:
                v2_structure[dim]["categories"][cat_name] = []
            v2_structure[dim]["categories"][cat_name].append(item)

        for dim in v2_structure.keys():
            if not v2_structure[dim]["categories"]: continue
            for cat in v2_structure[dim]["categories"]:
                # Sort by: 1. Stars (DESC), 2. Year (DESC, N/A at the end)
                v2_structure[dim]["categories"][cat].sort(
                    key=lambda x: (
                        -x.get("stars", 1),
                        -(int(x["year"]) if x.get("year", "").isdigit() else 0)
                    )
                )

            prompt = f"Write a professional 2026 executive summary for '{dim}'. Focus on high-density value. 1 sentence only."
            try:
                v2_structure[dim]["summary"] = await call_gemini_with_retry(prompt, response_format="text")
            except:
                v2_structure[dim]["summary"] = f"Impact-driven reference library for {dim}."

        return v2_structure

    async def _write_premium_files(self, data: Dict[str, Dict], mosaic_html: str, videos_html: str):
        # FIX: Ensure mosaic images point to V1 root via symlink
        mosaic_html = mosaic_html.replace('src="images/', 'src="images/').replace('](images/', '](images/')

        master_selection = []
        for dim in data.values():
            for cat_links in dim["categories"].values():
                master_selection.extend([l for l in cat_links if l.get("stars", 1) == 3])

        # Sort master selection by Year (DESC), then Title (ASC)
        # (Relevance is already fixed at 3 stars for this list)
        master_selection.sort(
            key=lambda x: (
                -(int(x["year"]) if x.get("year", "").isdigit() else 0),
                x["title"]
            )
        )

        index_md = (
            "# Nubenetes V2 | The High-Density Library (2026)\n\n"
            "![Banner](images/kubernetes_logo.jpg)\n\n"
            "!!! quote \"The Library of 2026\"\n"
            "    A meticulously curated reference of over 15,000 resources. This V2 portal preserves technical depth while providing "
            "    impact-driven synthesis and expert quality classification.\n\n"
            f"<center markdown=\"1\">\n{mosaic_html}\n</center>\n\n"

            "## 🛡️ V2 Taxonomy & Maturity Tags\n"
            "To maximize technical clarity, V2 resources are classified by maturity rather than subjective quality:\n\n"
            "- <span class='md-tag md-tag--success'>[DE FACTO STANDARD]</span>: Foundational industry tools with massive adoption (>10k GitHub stars).\n"
            "- <span class='md-tag md-tag--info'>[ENTERPRISE-STABLE]</span>: Production-ready tools actively maintained.\n"
            "- <span class='md-tag md-tag--warning'>[EMERGING / INNOVATION]</span>: High-growth technologies released or heavily updated recently (≥2025).\n"
            "- <span class='md-tag md-tag--critical'>[LEGACY / MAINTENANCE]</span>: Proven solutions with no major updates since 2022. Use with caution.\n"
            "- <span class='md-tag md-tag--primary'>[ARCHITECTURE-GUIDE]</span> / <span class='md-tag md-tag--primary'>[CASE-STUDY]</span>: High-value reading material and use cases.\n\n"

            "## 🌟 Master Selection (Top-Tier Gems)\n"
            "A global selection of the most impactful resources across all dimensions.\n\n"
        )
        for l in master_selection[:100]:
            gh_info = f" `[⭐ {l['gh_stars']}]`" if "gh_stars" in l else ""
            year_prefix = f"**({l['year']})** " if l.get("year") and l["year"] != "N/A" else ""
            title_clean = l['title'].replace("==", "")
            # Master selection links are 3 stars, so we highlight
            title_display = f"**=={title_clean}==**"
            index_md += f"- {year_prefix}[{title_display}]({l['url']}){gh_info} 🌟🌟🌟\n"

        index_md += "\n??? note \"Elite Video Selection - Click to expand!\"\n"
        index_md += f"    <center markdown=\"1\">\n{videos_html}\n    </center>\n\n"

        index_md += "## Strategic Dimensions\n"
        for dim, content in data.items():
            if not content["categories"]: continue
            slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
            index_md += f"- **[{dim}](./{slug}.md)**: {content['summary']}\n"

        with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md)

        for dim, content in data.items():
            if not content["categories"]: continue
            slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
            md = f"# {dim}\n\n"
            md += f"!!! info \"Architectural Context\"\n    {content['summary']}\n\n"
            for cat, links in content["categories"].items():
                md += f"## {cat}\n"
                for l in links:
                    year, stars_val = l.get("year", "N/A"), l.get("stars", 1)
                    stars = "🌟" * stars_val
                    tag = l.get("tag", "[ENTERPRISE-STABLE]")

                    # Determine color mapping for new tags
                    if "STANDARD" in tag or "FOUNDATIONAL" in tag: color = "success"
                    elif "EMERGING" in tag: color = "warning"
                    elif "LEGACY" in tag: color = "critical"
                    elif "STABLE" in tag: color = "info"
                    else: color = "primary"

                    title_clean = l['title'].replace("==", "")
                    if stars_val == 3 or "STANDARD" in tag:
                        title_display = f"**=={title_clean}==**"
                    elif stars_val == 2:
                        title_display = f"**{title_clean}**"
                    else:
                        title_display = title_clean

                    year_prefix = f"**({year})** " if year and year != "N/A" else ""

                    gh_info = f" <span class='md-tag md-tag--info'>⭐ {l['gh_stars']}</span>" if "gh_stars" in l else ""
                    icon = " 🎥" if l.get("is_video") else ""
                    md += f"  - {year_prefix}[{title_display}]({l['url']}){icon}{gh_info} {stars} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
                    if l['description']:
                        desc = l['description']
                        if "\n" in desc:
                            md += "\n" + "\n".join(["      " + line for line in desc.split("\n")]) + "\n\n"
                        else:
                            md += f"      {desc}\n"
                md += "\n"
            with open(os.path.join(V2_DIR, f"{slug}.md"), "w") as f: f.write(md)

    async def _sync_enterprise_navigation(self, data: Dict[str, Dict]):
        try:
            with open("v2-mkdocs.yml", "r") as f: content = f.read()
            nav_items = [
                "nav:",
                "  - \"🔙 Back to V1 (Exhaustive)\": https://nubenetes.com/",
                "  - \"The 2026 Vision\": index.md"
            ]
            for dim in data.keys():
                if not data[dim]["categories"]: continue
                slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
                nav_items.append(f"  - \"{dim}\": {slug}.md")
            new_nav = "\n".join(nav_items)
            updated_content = re.sub(r'nav:.*', new_nav, content, flags=re.DOTALL)
            with open("v2-mkdocs.yml", "w") as f: f.write(updated_content)
        except: pass

if __name__ == "__main__":
    engine = V2VisionEngine()
    asyncio.run(engine.analyze_and_cluster())