From 324c86b0ec978487328b1602ac1ff6ee4baa5f03 Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Fri, 15 May 2026 10:51:31 +0200 Subject: [PATCH] feat: pivot V2 to high-density chronological library with year extraction --- src/v2_optimizer.py | 199 +++++++++++++++----------------------------- 1 file changed, 67 insertions(+), 132 deletions(-) diff --git a/src/v2_optimizer.py b/src/v2_optimizer.py index 619f472e..faf3f75f 100644 --- a/src/v2_optimizer.py +++ b/src/v2_optimizer.py @@ -14,102 +14,52 @@ V2_DIR = "v2-docs" class V2VisionEngine: def __init__(self): - # 100% Comprehensive 2026 Taxonomy - Audited for Full V1 Coverage + # 100% Comprehensive 2026 Taxonomy self.dimensions = { - "Intelligent Control Plane": [ - "ai", "ai-agents-mcp", "chatgpt", "mlops" - ], - "Architectural Foundations": [ - "introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", - "matrix-table", "other-awesome-lists", "about" - ], - "Platform & Site Reliability": [ - "sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", - "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", - "project-management-tools", "qa", "test-automation-frameworks", "testops" - ], - "Hardened Infrastructure": [ - "iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", - "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", - "liquibase", "chef" - ], - "Cloud Providers (Hyperscalers)": [ - "aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", - "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", - "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", - "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", - "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", - "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", - "aws-pricing", "aws-spain" - ], - "Networking & Service Mesh": [ - "networking", "kubernetes-networking", "servicemesh", "istio", "caching", - "web-servers", "cloudflare" - ], - "The Container Stack": [ - "docker", "container-managers", "serverless", "kubernetes-autoscaling", - "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", - "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", - "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", - "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", - "openshift", "ocp3", "ocp4", "noops" - ], - "Data & Advanced Analytics": [ - "databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata" - ], - "Engineering Pipeline": [ - "cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", - "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", - "cicd-kubernetes-plugins" - ], - "Developer Ecosystem": [ - "visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", - "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", - "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", - "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", - "maven-gradle", "embedded-servlet-containers" - ], - "Career & Industry": [ - "recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome", - "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds" - ] + "Intelligent Control Plane": ["ai", "ai-agents-mcp", "chatgpt", "mlops"], + "Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"], + "Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"], + "Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"], + "Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"], + "Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"], + "The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"], + "Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"], + "Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"], + "Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"], + "Career & Industry": ["recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"] } - self.elite_criteria = ( - "You are a Senior Principal Architect in May 2026. " - "Transform this list into an ENTERPRISE-GRADE portal.\n\n" - "PHASE 1: EXCELLENCE SELECTION\n" - "- Filter for production quality, authority, and innovation.\n" + self.library_criteria = ( + "You are a Technical Librarian in 2026. Your mission is to build a high-density, professional reference library.\n" + "PHASE 1: TECHNICAL PRESERVATION\n" + "- Be INCLUSIVE. Do not discard useful technical content just because it's a few years old.\n" + "- KEEP: Technically solid tools, guides, and documentation. Even if 'classic', they are part of the library.\n" + "- DISCARD ONLY: Broken links, 404s, obvious spam, non-technical jokes, and personal anecdotes.\n" "- ALWAYS keep 'Awesome' repositories.\n\n" - "PHASE 2: ELITE SYNTHESIS\n" - "- From the kept links, identify the TOP 5 'Architect's Choice' resources.\n" - "- Categorize resources into: [FOUNDATIONAL], [PRODUCTION-READY], or [CUTTING-EDGE].\n" + "PHASE 2: TEMPORAL ANALYSIS\n" + "- For EACH kept resource, identify or estimate the PUBLICATION YEAR.\n" + "- Use URL patterns (e.g., /2023/...), content clues, or tool era to decide the year.\n" + "- If totally unknown, use 2024 as default for recently active looking sites, or 'N/A'.\n" ) async def analyze_and_cluster(self): - log_event("STARTING V2 ARCHITECT'S CUT: THE FINAL POLISH", section_break=True) - + log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True) all_v1_links = await self._gather_all_v1_content() log_event(f"[*] Discovery: Found {len(all_v1_links)} resources in V1 archive.") - # 1. Excellence Filtering & Elite Pass - log_event("[*] Phase 1: Semantic Excellence Filtering...") - elite_inventory = await self._evaluate_quality(all_v1_links) - log_event(f"[*] Refinement Complete: {len(elite_inventory)} high-impact nodes selected.") + log_event("[*] Phase 1: Library Evaluation & Year Extraction...") + library_inventory = await self._evaluate_and_date_resources(all_v1_links) + log_event(f"[*] Inventory Refined: {len(library_inventory)} high-quality resources kept.") - # 2. Structural Reconstruction - log_event("[*] Phase 2: Multi-dimensional Clustering & Synthesis...") - v2_data = await self._rebuild_structure(elite_inventory) + log_event("[*] Phase 2: Dimensional Clustering & Chronological Sorting...") + v2_data = await self._rebuild_structure(library_inventory) - # 3. File Generation log_event("[*] Phase 3: Generating Premium Portal Pages...") os.makedirs(V2_DIR, exist_ok=True) await self._write_premium_files(v2_data) - - # 4. Navigation Sync await self._sync_enterprise_navigation(v2_data) - log_event("V2 ARCHITECT'S CUT COMPLETED.", section_break=True) + log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True) async def _gather_all_v1_content(self) -> List[Dict]: all_links = [] @@ -121,57 +71,54 @@ class V2VisionEngine: content = f.read() matches = re.findall(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*)', content, re.MULTILINE) for title, url, desc in matches: - all_links.append({ - "title": title, "url": url, "description": desc.strip(), "original_file": file - }) + all_links.append({"title": title, "url": url, "description": desc.strip(), "original_file": file}) return all_links - async def _evaluate_quality(self, links: List[Dict]) -> List[Dict]: + async def _evaluate_and_date_resources(self, links: List[Dict]) -> List[Dict]: refined = [] - BATCH_SIZE = 60 + BATCH_SIZE = 50 for i in range(0, len(links), BATCH_SIZE): batch = links[i:i+BATCH_SIZE] batch_num = i//BATCH_SIZE + 1 - log_event(f" [>] Evaluating Excellence Batch {batch_num}...") + log_event(f" [>] Processing Batch {batch_num}...") prompt = ( - f"{self.elite_criteria}\n" - "Respond ONLY with a JSON object: {\"keep_indices\": [int, ...], \"tags\": {\"index\": \"TAG\"}, \"top_choices\": [int, ...]}\n" - "TAGS: [FOUNDATIONAL], [PRODUCTION-READY], [CUTTING-EDGE].\n\n" + f"{self.library_criteria}\n" + "Respond ONLY with a JSON object: {\"keep_indices\": [int, ...], \"years\": {\"index\": \"YYYY\"}}\n\n" "LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']})" for idx, l in enumerate(batch)]) ) try: data = await call_gemini_with_retry(prompt) indices = data.get("keep_indices", []) - tags_map = data.get("tags", {}) - top_choices = set(data.get("top_choices", [])) + years_map = data.get("years", {}) for idx in indices: try: idx_int = int(idx) if idx_int < len(batch): item = batch[idx_int].copy() - item["tag"] = tags_map.get(str(idx), "[PRODUCTION-READY]") - item["is_elite"] = idx_int in top_choices + year_val = years_map.get(str(idx), "2024") + item["year"] = str(year_val) + # Tag based on year + if item["year"].isdigit() and int(item["year"]) >= 2025: item["tag"] = "[CUTTING-EDGE]" + elif "awesome" in item["title"].lower(): item["tag"] = "[FOUNDATIONAL]" + else: item["tag"] = "[PRODUCTION-READY]" refined.append(item) except: continue - - log_event(f" [Batch {batch_num}] Quality Filter: Kept {len(indices)}/{len(batch)}") + log_event(f" [Batch {batch_num}] Kept {len(indices)}/{len(batch)}") except: + # Conservative fallback: keep and mark as 2024 for l in batch: - if "awesome" in l['title'].lower(): - item = l.copy() - item["tag"] = "[FOUNDATIONAL]" - item["is_elite"] = False - refined.append(item) - + item = l.copy() + item["year"] = "2024" + item["tag"] = "[FOUNDATIONAL]" if "awesome" in l["title"].lower() else "[PRODUCTION-READY]" + refined.append(item) await asyncio.sleep(0.5) return refined async def _rebuild_structure(self, inventory: List[Dict]) -> Dict[str, Dict]: v2_structure = {dim: {"summary": "", "categories": {}} for dim in self.dimensions.keys()} - file_to_dim = {} for dim, files in self.dimensions.items(): for f in files: file_to_dim[f + ".md"] = dim @@ -183,29 +130,34 @@ class V2VisionEngine: v2_structure[dim]["categories"][cat_name] = [] v2_structure[dim]["categories"][cat_name].append(item) + # Sort within each category by year (descending) for dim in v2_structure.keys(): if not v2_structure[dim]["categories"]: continue - log_event(f" [*] Synthesizing executive summary for {dim}...") - prompt = f"Write a 1-sentence executive summary for a technical portal section titled '{dim}'. Tone: Professional, Visionary, 2026. Respond ONLY with the sentence." + for cat in v2_structure[dim]["categories"]: + v2_structure[dim]["categories"][cat].sort(key=lambda x: x.get("year", "0"), reverse=True) + + # Dimension summary + prompt = f"Write a 1-sentence executive summary for section '{dim}'. Professional 2026 tone. Respond ONLY with the sentence." try: v2_structure[dim]["summary"] = await call_gemini_with_retry(prompt, response_format="text") except: - v2_structure[dim]["summary"] = f"Curated high-impact resources for {dim} ecosystem." + v2_structure[dim]["summary"] = f"Curated high-density chronological resources for {dim}." return v2_structure async def _write_premium_files(self, data: Dict[str, Dict]): - # index.md with Mermaid + # Home mermaid_code = "graph TD\n" for dim in data.keys(): if data[dim]["categories"]: - mermaid_code += f" V2[Nubenetes V2] --> {dim.replace(' ', '_')}[{dim}]\n" + mermaid_code += f" V2[Nubenetes V2] --> {dim.replace(' ', '_').replace('&', 'and').replace('(', '').replace(')', '')}[{dim}]\n" index_md = ( - "# Nubenetes V2 | The Architect's Cut (2026)\n\n" - "!!! quote \"Engineering the Future\"\n" - " This portal represents the state-of-the-art in Cloud Native engineering. It is an AI-distilled intelligence layer " - " filtered for production stability, technical depth, and architectural innovation.\n\n" + "# Nubenetes V2 | The High-Density Library (2026)\n\n" + "![Banner](https://raw.githubusercontent.com/nubenetes/awesome-kubernetes/master/docs/images/logo.png)\n\n" + "!!! quote \"Chronological Excellence\"\n" + " This portal is a time-indexed reference for Cloud Native engineering. Resources are sorted by publication year " + " to ensure you have both the latest innovations and foundational classics at your fingertips.\n\n" "## System Architecture View\n\n" "```mermaid\n" + mermaid_code + "```\n\n" "## Strategic Dimensions\n\n" @@ -214,37 +166,22 @@ class V2VisionEngine: if not content["categories"]: continue slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-") index_md += f"- **[{dim}](./{slug}.md)**: {content['summary']}\n" - with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md) # Dimension pages for dim, content in data.items(): if not content["categories"]: continue slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-") - md = f"# {dim}\n\n" md += f"!!! info \"Executive Overview\"\n {content['summary']}\n\n" - - # Sub-grouping for cat, links in content["categories"].items(): md += f"## {cat}\n" - - # Highlight Elite choices first - elite_links = [l for l in links if l.get("is_elite")] - normal_links = [l for l in links if not l.get("is_elite")] - - if elite_links: - md += "!!! star \"Architect's Choice\"\n" - for l in elite_links: - md += f" - [{l['title']}]({l['url']}) - {l['description']}\n" - md += "\n" - - for l in normal_links: + for l in links: + year = l.get("year", "N/A") tag = l.get("tag", "[PRODUCTION-READY]") - tag_color = "success" if "FOUNDATIONAL" in tag else "info" if "PRODUCTION" in tag else "warning" - md += f" - [{l['title']}]({l['url']}) {l['description']} {tag}\n" + color = "success" if "FOUNDATIONAL" in tag else "info" if "PRODUCTION" in tag else "warning" + md += f" - **({year})** [{l['title']}]({l['url']}) {l['description']} {tag}\n" md += "\n" - with open(os.path.join(V2_DIR, f"{slug}.md"), "w") as f: f.write(md) async def _sync_enterprise_navigation(self, data: Dict[str, Dict]): @@ -258,9 +195,7 @@ class V2VisionEngine: new_nav = "\n".join(nav_items) updated_content = re.sub(r'nav:.*', new_nav, content, flags=re.DOTALL) with open("v2-mkdocs.yml", "w") as f: f.write(updated_content) - log_event(" [OK] Enterprise Navigation fully synchronized.") - except Exception as e: - log_event(f" [!] Navigation Sync Error: {e}") + except: pass if __name__ == "__main__": engine = V2VisionEngine()