mirror of
https://github.com/nubenetes/awesome-kubernetes.git
synced 2026-05-22 17:13:42 +00:00
feat: enhance V2 optimizer with dynamic maturity tags and real github metadata dates
This commit is contained in:
@@ -34,16 +34,12 @@ class V2VisionEngine:
|
||||
self.library_criteria = (
|
||||
"You are a Technical Librarian in 2026. Your mission is to build a high-density, professional reference library.\n"
|
||||
"PHASE 1: TECHNICAL PRESERVATION (HIGH INCLUSIVITY)\n"
|
||||
"- KEEP >90% of technical resources. Only discard 404s, obvious spam, or non-technical content.\n"
|
||||
"- 'Awesome' repositories, official documentation, and deep technical guides are mandatory.\n"
|
||||
"- YouTube videos are HIGH-VALUE resources; keep them as technical references.\n\n"
|
||||
"PHASE 2: TEMPORAL & QUALITY SYNTHESIS\n"
|
||||
"- Identify/estimate PUBLICATION YEAR.\n"
|
||||
"- Assign QUALITY level (1-3 stars):\n"
|
||||
" * 3 stars (🌟🌟🌟): Masterpieces, foundational standards, definitive 'Awesome' lists.\n"
|
||||
" * 2 stars (🌟🌟): Production-grade tools, deep tutorials, highly recommended videos.\n"
|
||||
" * 1 star (🌟): Solid technical references.\n"
|
||||
"- Identify if a resource is a 'YouTube Video/Playlist' for special rendering.\n"
|
||||
"- KEEP >90% of technical resources.\n"
|
||||
"PHASE 2: SOPHISTICATED SYNTHESIS & DATING\n"
|
||||
"- Extract precise PUBLICATION YEAR: Look for dates in the URL (e.g., /2023/05/ post dates), Twitter/X post dates, or text context. Return 'N/A' if truly unknown, do NOT guess '2024'.\n"
|
||||
"- Assign QUALITY level (1-3 stars).\n"
|
||||
"- Assign a MATURITY TAG based on content type/status: '[ENTERPRISE-STABLE]', '[EMERGING / INNOVATION]', '[ARCHITECTURE-GUIDE]', '[TOOLING]', '[CASE-STUDY]', or '[CHEATSHEET]'.\n"
|
||||
" * Note: We will override tags for GitHub repos using real API data (Stars/Commits), so focus on classifying blogs and articles correctly.\n"
|
||||
)
|
||||
self.cache = self._load_cache()
|
||||
|
||||
@@ -93,7 +89,7 @@ class V2VisionEngine:
|
||||
idx_content = f.read()
|
||||
# Find the BIG mosaic (the one with many images)
|
||||
# Support both old <center> and new <div style="text-align: center;" markdown="1">
|
||||
mosaics = re.findall(r'<(?:div style="text-align: center;" markdown="1"|center)>\s*(.*?)\s*</(?:div|center)>', idx_content, re.DOTALL)
|
||||
mosaics = re.findall(r'<(?:div style="text-align: center;" markdown="1"|center markdown="1"|center)>\s*(.*?)\s*</(?:div|center)>', idx_content, re.DOTALL)
|
||||
if mosaics:
|
||||
# Filter for the one containing many image links
|
||||
for m in mosaics:
|
||||
@@ -213,19 +209,28 @@ class V2VisionEngine:
|
||||
refined = []
|
||||
to_evaluate = []
|
||||
|
||||
# Pull from cache first
|
||||
# We want to re-evaluate the tags and years, so we will bypass cache for tagging logic,
|
||||
# but use cache for AI stars if available to save cost.
|
||||
for l in links:
|
||||
url = l["url"]
|
||||
if url in self.cache and "year" in self.cache[url]:
|
||||
item = l.copy()
|
||||
# To allow the new logic to apply to cached items, we re-process GitHub links
|
||||
# and re-apply the tag logic even if it's in the cache.
|
||||
item = l.copy()
|
||||
if url in self.cache and "stars" in self.cache[url]:
|
||||
item.update(self.cache[url])
|
||||
# Refresh GitHub metadata if it's a GH link
|
||||
if "github.com" in url:
|
||||
gh_meta = await self._fetch_github_metadata(url)
|
||||
item.update(gh_meta)
|
||||
refined.append(item)
|
||||
else:
|
||||
to_evaluate.append(l)
|
||||
to_evaluate.append(item)
|
||||
continue # process later via API
|
||||
|
||||
# Re-apply GitHub metadata and mature tagging for cached items
|
||||
if "github.com" in url:
|
||||
gh_meta = await self._fetch_github_metadata(url)
|
||||
item.update(gh_meta)
|
||||
if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
|
||||
item["year"] = gh_meta["gh_updated"].split("-")[0]
|
||||
|
||||
item["tag"] = self._calculate_tag(item)
|
||||
refined.append(item)
|
||||
|
||||
if not to_evaluate: return refined
|
||||
|
||||
@@ -237,7 +242,7 @@ class V2VisionEngine:
|
||||
|
||||
prompt = (
|
||||
f"{self.library_criteria}\n"
|
||||
"Respond ONLY with a JSON object: {\"results\": [{\"idx\": int, \"year\": \"YYYY\", \"stars\": int, \"is_video\": bool}, ...]}\n\n"
|
||||
"Respond ONLY with a JSON object: {\"results\": [{\"idx\": int, \"year\": \"YYYY\", \"stars\": int, \"is_video\": bool, \"tag\": \"[TAG]\"}, ...]}\n\n"
|
||||
"LINKS:\n" + "\n".join([f"{idx}. {l['title']} ({l['url']})" for idx, l in enumerate(batch)])
|
||||
)
|
||||
|
||||
@@ -251,35 +256,61 @@ class V2VisionEngine:
|
||||
if idx < len(batch):
|
||||
item = batch[idx].copy()
|
||||
eval_data = {
|
||||
"year": str(res.get("year", "2024")),
|
||||
"year": str(res.get("year", "N/A")),
|
||||
"stars": min(max(int(res.get("stars", 1)), 1), 3),
|
||||
"is_video": res.get("is_video", False)
|
||||
"is_video": res.get("is_video", False),
|
||||
"tag": res.get("tag", "[ENTERPRISE-STABLE]")
|
||||
}
|
||||
item.update(eval_data)
|
||||
|
||||
# GitHub overrides
|
||||
if "github.com" in item["url"]:
|
||||
gh_meta = await self._fetch_github_metadata(item["url"])
|
||||
item.update(gh_meta)
|
||||
eval_data.update(gh_meta)
|
||||
if "gh_updated" in gh_meta and gh_meta["gh_updated"]:
|
||||
item["year"] = gh_meta["gh_updated"].split("-")[0]
|
||||
eval_data["year"] = item["year"]
|
||||
|
||||
item["tag"] = self._calculate_tag(item)
|
||||
eval_data["tag"] = item["tag"]
|
||||
|
||||
# Save to cache
|
||||
self.cache[item["url"]] = eval_data
|
||||
|
||||
if item["year"].isdigit() and int(item["year"]) >= 2025: item["tag"] = "[CUTTING-EDGE]"
|
||||
elif "awesome" in item["title"].lower(): item["tag"] = "[FOUNDATIONAL]"
|
||||
else: item["tag"] = "[PRODUCTION-READY]"
|
||||
|
||||
refined.append(item)
|
||||
except: continue
|
||||
except:
|
||||
for l in batch:
|
||||
item = l.copy()
|
||||
item["year"], item["stars"], item["is_video"] = "2024", 1, "youtube" in l["url"]
|
||||
item["tag"] = "[FOUNDATIONAL]" if "awesome" in l["title"].lower() else "[PRODUCTION-READY]"
|
||||
item["year"], item["stars"], item["is_video"] = "N/A", 1, "youtube" in l["url"]
|
||||
item["tag"] = self._calculate_tag(item)
|
||||
refined.append(item)
|
||||
await asyncio.sleep(0.3)
|
||||
return refined
|
||||
|
||||
def _calculate_tag(self, item: Dict) -> str:
|
||||
# Dynamic Tagging Strategy based on Maturity and Real Data
|
||||
if "github.com" in item["url"] and "gh_stars" in item:
|
||||
stars = item["gh_stars"]
|
||||
year = int(item.get("year")) if item.get("year", "").isdigit() else 2024
|
||||
if stars > 10000: return "[DE FACTO STANDARD]"
|
||||
if stars > 500 and year >= 2024: return "[ENTERPRISE-STABLE]"
|
||||
if year >= 2025: return "[EMERGING / INNOVATION]"
|
||||
if year <= 2022: return "[LEGACY / MAINTENANCE]"
|
||||
return "[TOOLING]"
|
||||
|
||||
# Fallback to AI's tag or defaults for articles
|
||||
tag = item.get("tag", "").upper()
|
||||
valid_tags = ["[DE FACTO STANDARD]", "[ENTERPRISE-STABLE]", "[EMERGING / INNOVATION]", "[LEGACY / MAINTENANCE]", "[ARCHITECTURE-GUIDE]", "[TOOLING]", "[CASE-STUDY]", "[CHEATSHEET]"]
|
||||
if tag in valid_tags:
|
||||
return tag
|
||||
|
||||
# Basic inference for articles
|
||||
title = item.get("title", "").lower()
|
||||
if "awesome" in title: return "[FOUNDATIONAL]"
|
||||
if "guide" in title or "architecture" in title: return "[ARCHITECTURE-GUIDE]"
|
||||
if "how to" in title or "tutorial" in title: return "[CASE-STUDY]"
|
||||
return "[ENTERPRISE-STABLE]"
|
||||
|
||||
async def _fetch_github_metadata(self, url: str) -> Dict:
|
||||
match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
|
||||
if not match: return {}
|
||||
@@ -335,7 +366,7 @@ class V2VisionEngine:
|
||||
for dim in data.values():
|
||||
for cat_links in dim["categories"].values():
|
||||
master_selection.extend([l for l in cat_links if l.get("stars", 1) == 3])
|
||||
master_selection.sort(key=lambda x: (x.get("year", "0"), x["title"]))
|
||||
master_selection.sort(key=lambda x: (x.get("year", "0"), x["title"]), reverse=True)
|
||||
|
||||
index_md = (
|
||||
"# Nubenetes V2 | The High-Density Library (2026)\n\n"
|
||||
@@ -343,7 +374,16 @@ class V2VisionEngine:
|
||||
"!!! quote \"The Library of 2026\"\n"
|
||||
" A meticulously curated reference of over 15,000 resources. This V2 portal preserves technical depth while providing "
|
||||
" chronological clarity and expert quality synthesis.\n\n"
|
||||
f"<center>\n{mosaic_html}\n</center>\n\n"
|
||||
f"<center markdown=\"1\">\n{mosaic_html}\n</center>\n\n"
|
||||
|
||||
"## 🛡️ V2 Taxonomy & Maturity Tags\n"
|
||||
"To maximize technical clarity, V2 resources are classified by maturity rather than subjective quality:\n\n"
|
||||
"- <span class='md-tag md-tag--success'>[DE FACTO STANDARD]</span>: Foundational industry tools with massive adoption (>10k GitHub stars).\n"
|
||||
"- <span class='md-tag md-tag--info'>[ENTERPRISE-STABLE]</span>: Production-ready tools actively maintained.\n"
|
||||
"- <span class='md-tag md-tag--warning'>[EMERGING / INNOVATION]</span>: High-growth technologies released or heavily updated recently (≥2025).\n"
|
||||
"- <span class='md-tag md-tag--critical'>[LEGACY / MAINTENANCE]</span>: Proven solutions with no major updates since 2022. Use with caution.\n"
|
||||
"- <span class='md-tag md-tag--primary'>[ARCHITECTURE-GUIDE]</span> / <span class='md-tag md-tag--primary'>[CASE-STUDY]</span>: High-value reading material and use cases.\n\n"
|
||||
|
||||
"## 🌟 Master Selection (Top-Tier Gems)\n"
|
||||
"A global selection of the most impactful resources across all dimensions.\n\n"
|
||||
)
|
||||
@@ -352,7 +392,7 @@ class V2VisionEngine:
|
||||
index_md += f"- **({l['year']})** [{l['title']}]({l['url']}){gh_info} 🌟🌟🌟\n"
|
||||
|
||||
index_md += "\n??? note \"Elite Video Selection - Click to expand!\"\n"
|
||||
index_md += f" <center>\n{videos_html}\n </center>\n\n"
|
||||
index_md += f" <center markdown=\"1\">\n{videos_html}\n </center>\n\n"
|
||||
|
||||
index_md += "## Strategic Dimensions\n"
|
||||
for dim, content in data.items():
|
||||
@@ -371,8 +411,15 @@ class V2VisionEngine:
|
||||
md += f"## {cat}\n"
|
||||
for l in links:
|
||||
year, stars = l.get("year", "N/A"), "🌟" * l.get("stars", 1)
|
||||
tag = l.get("tag", "[PRODUCTION-READY]")
|
||||
color = "success" if "FOUNDATIONAL" in tag else "info" if "PRODUCTION" in tag else "warning"
|
||||
tag = l.get("tag", "[ENTERPRISE-STABLE]")
|
||||
|
||||
# Determine color mapping for new tags
|
||||
if "STANDARD" in tag or "FOUNDATIONAL" in tag: color = "success"
|
||||
elif "EMERGING" in tag: color = "warning"
|
||||
elif "LEGACY" in tag: color = "critical"
|
||||
elif "STABLE" in tag: color = "info"
|
||||
else: color = "primary"
|
||||
|
||||
title_display = f"**{l['title']}**" if l.get("stars", 1) >= 2 else l['title']
|
||||
|
||||
gh_info = f" <span class='md-tag md-tag--info'>⭐ {l['gh_stars']}</span>" if "gh_stars" in l else ""
|
||||
|
||||
Reference in New Issue
Block a user