feat: pivot V2 to high-density chronological library with year extraction

This commit is contained in:
Nubenetes Bot
2026-05-15 10:51:31 +02:00
parent 544f37ebfb
commit 324c86b0ec

View File

@@ -14,102 +14,52 @@ V2_DIR = "v2-docs"
class V2VisionEngine:
def __init__(self):
# 100% Comprehensive 2026 Taxonomy - Audited for Full V1 Coverage
# 100% Comprehensive 2026 Taxonomy
self.dimensions = {
"Intelligent Control Plane": [
"ai", "ai-agents-mcp", "chatgpt", "mlops"
],
"Architectural Foundations": [
"introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams",
"matrix-table", "other-awesome-lists", "about"
],
"Platform & Site Reliability": [
"sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering",
"performance-testing-with-jenkins-and-jmeter", "project-management-methodology",
"project-management-tools", "qa", "test-automation-frameworks", "testops"
],
"Hardened Infrastructure": [
"iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode",
"kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize",
"liquibase", "chef"
],
"Cloud Providers (Hyperscalers)": [
"aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean",
"cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions",
"private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security",
"aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac",
"aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless",
"aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous",
"aws-pricing", "aws-spain"
],
"Networking & Service Mesh": [
"networking", "kubernetes-networking", "servicemesh", "istio", "caching",
"web-servers", "cloudflare"
],
"The Container Stack": [
"docker", "container-managers", "serverless", "kubernetes-autoscaling",
"kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring",
"kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise",
"kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases",
"kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher",
"openshift", "ocp3", "ocp4", "noops"
],
"Data & Advanced Analytics": [
"databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"
],
"Engineering Pipeline": [
"cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives",
"openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm",
"cicd-kubernetes-plugins"
],
"Developer Ecosystem": [
"visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers",
"java-and-java-performance-optimization", "dotnet", "angular", "react", "web3",
"api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode",
"devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table",
"maven-gradle", "embedded-servlet-containers"
],
"Career & Industry": [
"recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome",
"interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"
]
"Intelligent Control Plane": ["ai", "ai-agents-mcp", "chatgpt", "mlops"],
"Architectural Foundations": ["introduction", "faq", "kubernetes", "linux", "git", "cloud-arch-diagrams", "matrix-table", "other-awesome-lists", "about"],
"Platform & Site Reliability": ["sre", "devops", "developerportals", "scaffolding", "finops", "chaos-engineering", "performance-testing-with-jenkins-and-jmeter", "project-management-methodology", "project-management-tools", "qa", "test-automation-frameworks", "testops"],
"Hardened Infrastructure": ["iac", "terraform", "pulumi", "crossplane", "ansible", "securityascode", "kubernetes-security", "aws-security", "oauth", "devsecops", "kustomize", "liquibase", "chef"],
"Cloud Providers (Hyperscalers)": ["aws", "azure", "GoogleCloudPlatform", "ibm_cloud", "oraclecloud", "digitalocean", "cloudflare", "scaleway", "managed-kubernetes-in-public-cloud", "public-cloud-solutions", "private-cloud-solutions", "edge-computing", "aws-architecture", "aws-security", "aws-networking", "aws-databases", "aws-storage", "aws-monitoring", "aws-iac", "aws-tools-scripts", "aws-messaging", "aws-data", "aws-devops", "aws-serverless", "aws-containers", "aws-backup", "aws-training", "aws-newfeatures", "aws-miscellaneous", "aws-pricing", "aws-spain"],
"Networking & Service Mesh": ["networking", "kubernetes-networking", "servicemesh", "istio", "caching", "web-servers", "cloudflare"],
"The Container Stack": ["docker", "container-managers", "serverless", "kubernetes-autoscaling", "kubernetes-operators-controllers", "kubernetes-storage", "kubernetes-monitoring", "kubernetes-troubleshooting", "kubernetes-backup-migrations", "kubernetes-on-premise", "kubernetes-bigdata", "kubernetes-client-libraries", "kubernetes-releases", "kubernetes-based-devel", "kubernetes-alternatives", "kubectl-commands", "rancher", "openshift", "ocp3", "ocp4", "noops"],
"Data & Advanced Analytics": ["databases", "nosql", "newsql", "message-queue", "crunchydata", "yaml", "bigdata"],
"Engineering Pipeline": ["cicd", "gitops", "argo", "flux", "tekton", "jenkins", "jenkins-alternatives", "openshift-pipelines", "sonarqube", "registries", "keptn", "stackstorm", "cicd-kubernetes-plugins"],
"Developer Ecosystem": ["visual-studio", "javascript", "golang", "python", "java_frameworks", "java_app_servers", "java-and-java-performance-optimization", "dotnet", "angular", "react", "web3", "api", "swagger-code-generator-for-rest-apis", "postman", "lowcode-nocode", "devel-sites", "dom", "linux-dev-env", "ChromeDevTools", "xamarin", "jvm-parameters-matrix-table", "maven-gradle", "embedded-servlet-containers"],
"Career & Industry": ["recruitment", "hr", "freelancing", "remote-tech-jobs", "workfromhome", "interview-questions", "elearning", "digital-money", "appointment-scheduling", "newsfeeds"]
}
self.elite_criteria = (
"You are a Senior Principal Architect in May 2026. "
"Transform this list into an ENTERPRISE-GRADE portal.\n\n"
"PHASE 1: EXCELLENCE SELECTION\n"
"- Filter for production quality, authority, and innovation.\n"
self.library_criteria = (
"You are a Technical Librarian in 2026. Your mission is to build a high-density, professional reference library.\n"
"PHASE 1: TECHNICAL PRESERVATION\n"
"- Be INCLUSIVE. Do not discard useful technical content just because it's a few years old.\n"
"- KEEP: Technically solid tools, guides, and documentation. Even if 'classic', they are part of the library.\n"
"- DISCARD ONLY: Broken links, 404s, obvious spam, non-technical jokes, and personal anecdotes.\n"
"- ALWAYS keep 'Awesome' repositories.\n\n"
"PHASE 2: ELITE SYNTHESIS\n"
"- From the kept links, identify the TOP 5 'Architect's Choice' resources.\n"
"- Categorize resources into: [FOUNDATIONAL], [PRODUCTION-READY], or [CUTTING-EDGE].\n"
"PHASE 2: TEMPORAL ANALYSIS\n"
"- For EACH kept resource, identify or estimate the PUBLICATION YEAR.\n"
"- Use URL patterns (e.g., /2023/...), content clues, or tool era to decide the year.\n"
"- If totally unknown, use 2024 as default for recently active looking sites, or 'N/A'.\n"
)
async def analyze_and_cluster(self):
log_event("STARTING V2 ARCHITECT'S CUT: THE FINAL POLISH", section_break=True)
log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
all_v1_links = await self._gather_all_v1_content()
log_event(f"[*] Discovery: Found {len(all_v1_links)} resources in V1 archive.")
# 1. Excellence Filtering & Elite Pass
log_event("[*] Phase 1: Semantic Excellence Filtering...")
elite_inventory = await self._evaluate_quality(all_v1_links)
log_event(f"[*] Refinement Complete: {len(elite_inventory)} high-impact nodes selected.")
log_event("[*] Phase 1: Library Evaluation & Year Extraction...")
library_inventory = await self._evaluate_and_date_resources(all_v1_links)
log_event(f"[*] Inventory Refined: {len(library_inventory)} high-quality resources kept.")
# 2. Structural Reconstruction
log_event("[*] Phase 2: Multi-dimensional Clustering & Synthesis...")
v2_data = await self._rebuild_structure(elite_inventory)
log_event("[*] Phase 2: Dimensional Clustering & Chronological Sorting...")
v2_data = await self._rebuild_structure(library_inventory)
# 3. File Generation
log_event("[*] Phase 3: Generating Premium Portal Pages...")
os.makedirs(V2_DIR, exist_ok=True)
await self._write_premium_files(v2_data)
# 4. Navigation Sync
await self._sync_enterprise_navigation(v2_data)
log_event("V2 ARCHITECT'S CUT COMPLETED.", section_break=True)
log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)
async def _gather_all_v1_content(self) -> List[Dict]:
all_links = []
@@ -121,57 +71,54 @@ class V2VisionEngine:
content = f.read()
matches = re.findall(r'^\s*-\s*\[([^\]]+)\]\(([^\)]+)\)(.*)', content, re.MULTILINE)
for title, url, desc in matches:
all_links.append({
"title": title, "url": url, "description": desc.strip(), "original_file": file
})
all_links.append({"title": title, "url": url, "description": desc.strip(), "original_file": file})
return all_links
async def _evaluate_quality(self, links: List[Dict]) -> List[Dict]:
async def _evaluate_and_date_resources(self, links: List[Dict]) -> List[Dict]:
refined = []
BATCH_SIZE = 60
BATCH_SIZE = 50
for i in range(0, len(links), BATCH_SIZE):
batch = links[i:i+BATCH_SIZE]
batch_num = i//BATCH_SIZE + 1
log_event(f" [>] Evaluating Excellence Batch {batch_num}...")
log_event(f" [>] Processing Batch {batch_num}...")
prompt = (
f"{self.elite_criteria}\n"
"Respond ONLY with a JSON object: {\"keep_indices\": [int, ...], \"tags\": {\"index\": \"TAG\"}, \"top_choices\": [int, ...]}\n"
"TAGS: [FOUNDATIONAL], [PRODUCTION-READY], [CUTTING-EDGE].\n\n"
f"{self.library_criteria}\n"
"Respond ONLY with a JSON object: {\"keep_indices\": [int, ...], \"years\": {\"index\": \"YYYY\"}}\n\n"
"LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']})" for idx, l in enumerate(batch)])
)
try:
data = await call_gemini_with_retry(prompt)
indices = data.get("keep_indices", [])
tags_map = data.get("tags", {})
top_choices = set(data.get("top_choices", []))
years_map = data.get("years", {})
for idx in indices:
try:
idx_int = int(idx)
if idx_int < len(batch):
item = batch[idx_int].copy()
item["tag"] = tags_map.get(str(idx), "[PRODUCTION-READY]")
item["is_elite"] = idx_int in top_choices
year_val = years_map.get(str(idx), "2024")
item["year"] = str(year_val)
# Tag based on year
if item["year"].isdigit() and int(item["year"]) >= 2025: item["tag"] = "[CUTTING-EDGE]"
elif "awesome" in item["title"].lower(): item["tag"] = "[FOUNDATIONAL]"
else: item["tag"] = "[PRODUCTION-READY]"
refined.append(item)
except: continue
log_event(f" [Batch {batch_num}] Quality Filter: Kept {len(indices)}/{len(batch)}")
log_event(f" [Batch {batch_num}] Kept {len(indices)}/{len(batch)}")
except:
# Conservative fallback: keep and mark as 2024
for l in batch:
if "awesome" in l['title'].lower():
item = l.copy()
item["tag"] = "[FOUNDATIONAL]"
item["is_elite"] = False
refined.append(item)
item = l.copy()
item["year"] = "2024"
item["tag"] = "[FOUNDATIONAL]" if "awesome" in l["title"].lower() else "[PRODUCTION-READY]"
refined.append(item)
await asyncio.sleep(0.5)
return refined
async def _rebuild_structure(self, inventory: List[Dict]) -> Dict[str, Dict]:
v2_structure = {dim: {"summary": "", "categories": {}} for dim in self.dimensions.keys()}
file_to_dim = {}
for dim, files in self.dimensions.items():
for f in files: file_to_dim[f + ".md"] = dim
@@ -183,29 +130,34 @@ class V2VisionEngine:
v2_structure[dim]["categories"][cat_name] = []
v2_structure[dim]["categories"][cat_name].append(item)
# Sort within each category by year (descending)
for dim in v2_structure.keys():
if not v2_structure[dim]["categories"]: continue
log_event(f" [*] Synthesizing executive summary for {dim}...")
prompt = f"Write a 1-sentence executive summary for a technical portal section titled '{dim}'. Tone: Professional, Visionary, 2026. Respond ONLY with the sentence."
for cat in v2_structure[dim]["categories"]:
v2_structure[dim]["categories"][cat].sort(key=lambda x: x.get("year", "0"), reverse=True)
# Dimension summary
prompt = f"Write a 1-sentence executive summary for section '{dim}'. Professional 2026 tone. Respond ONLY with the sentence."
try:
v2_structure[dim]["summary"] = await call_gemini_with_retry(prompt, response_format="text")
except:
v2_structure[dim]["summary"] = f"Curated high-impact resources for {dim} ecosystem."
v2_structure[dim]["summary"] = f"Curated high-density chronological resources for {dim}."
return v2_structure
async def _write_premium_files(self, data: Dict[str, Dict]):
# index.md with Mermaid
# Home
mermaid_code = "graph TD\n"
for dim in data.keys():
if data[dim]["categories"]:
mermaid_code += f" V2[Nubenetes V2] --> {dim.replace(' ', '_')}[{dim}]\n"
mermaid_code += f" V2[Nubenetes V2] --> {dim.replace(' ', '_').replace('&', 'and').replace('(', '').replace(')', '')}[{dim}]\n"
index_md = (
"# Nubenetes V2 | The Architect's Cut (2026)\n\n"
"!!! quote \"Engineering the Future\"\n"
" This portal represents the state-of-the-art in Cloud Native engineering. It is an AI-distilled intelligence layer "
" filtered for production stability, technical depth, and architectural innovation.\n\n"
"# Nubenetes V2 | The High-Density Library (2026)\n\n"
"![Banner](https://raw.githubusercontent.com/nubenetes/awesome-kubernetes/master/docs/images/logo.png)\n\n"
"!!! quote \"Chronological Excellence\"\n"
" This portal is a time-indexed reference for Cloud Native engineering. Resources are sorted by publication year "
" to ensure you have both the latest innovations and foundational classics at your fingertips.\n\n"
"## System Architecture View\n\n"
"```mermaid\n" + mermaid_code + "```\n\n"
"## Strategic Dimensions\n\n"
@@ -214,37 +166,22 @@ class V2VisionEngine:
if not content["categories"]: continue
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
index_md += f"- **[{dim}](./{slug}.md)**: {content['summary']}\n"
with open(os.path.join(V2_DIR, "index.md"), "w") as f: f.write(index_md)
# Dimension pages
for dim, content in data.items():
if not content["categories"]: continue
slug = dim.lower().replace(" ", "-").replace("&", "and").replace("(", "").replace(")", "").replace(" ", "-")
md = f"# {dim}\n\n"
md += f"!!! info \"Executive Overview\"\n {content['summary']}\n\n"
# Sub-grouping
for cat, links in content["categories"].items():
md += f"## {cat}\n"
# Highlight Elite choices first
elite_links = [l for l in links if l.get("is_elite")]
normal_links = [l for l in links if not l.get("is_elite")]
if elite_links:
md += "!!! star \"Architect's Choice\"\n"
for l in elite_links:
md += f" - [{l['title']}]({l['url']}) - {l['description']}\n"
md += "\n"
for l in normal_links:
for l in links:
year = l.get("year", "N/A")
tag = l.get("tag", "[PRODUCTION-READY]")
tag_color = "success" if "FOUNDATIONAL" in tag else "info" if "PRODUCTION" in tag else "warning"
md += f" - [{l['title']}]({l['url']}) {l['description']} <span class='md-tag md-tag--{tag_color}'>{tag}</span>\n"
color = "success" if "FOUNDATIONAL" in tag else "info" if "PRODUCTION" in tag else "warning"
md += f" - **({year})** [{l['title']}]({l['url']}) {l['description']} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
md += "\n"
with open(os.path.join(V2_DIR, f"{slug}.md"), "w") as f: f.write(md)
async def _sync_enterprise_navigation(self, data: Dict[str, Dict]):
@@ -258,9 +195,7 @@ class V2VisionEngine:
new_nav = "\n".join(nav_items)
updated_content = re.sub(r'nav:.*', new_nav, content, flags=re.DOTALL)
with open("v2-mkdocs.yml", "w") as f: f.write(updated_content)
log_event(" [OK] Enterprise Navigation fully synchronized.")
except Exception as e:
log_event(f" [!] Navigation Sync Error: {e}")
except: pass
if __name__ == "__main__":
engine = V2VisionEngine()