mirror of
https://github.com/nubenetes/awesome-kubernetes.git
synced 2026-05-23 09:33:33 +00:00
feat(ops): final mandate compliance sync - restored database-first, linguistic diversity, and repository consolidation
This commit is contained in:
@@ -57,19 +57,35 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
|
||||
evaluations = {}
|
||||
curator = AgenticCurator()
|
||||
for i, asset in enumerate(raw_assets):
|
||||
log_event(f"--- EVALUATING {i+1}/{len(raw_assets)}: {asset['url']} ---")
|
||||
norm_url = normalize_url(asset["url"])
|
||||
url = asset["url"]
|
||||
log_event(f"--- EVALUATING {i+1}/{len(raw_assets)}: {url} ---")
|
||||
norm_url = normalize_url(url)
|
||||
|
||||
# --- DATABASE-FIRST: Reuse insights ---
|
||||
if norm_url in curator.inventory:
|
||||
cached = curator.inventory[norm_url]
|
||||
if cached.get("title") and cached.get("hierarchy"):
|
||||
log_event(f" [⚡] REUSING CACHED INSIGHTS: {cached['title']}")
|
||||
from src.gemini_utils import SESSION_TRACKER
|
||||
SESSION_TRACKER.track_cache_hit(est_tokens=2200)
|
||||
evaluations[url] = {"status": "INCLUDED", **cached}
|
||||
continue
|
||||
|
||||
# 1. Fetch & Fingerprint
|
||||
web_content, rich_meta = await _deep_fetch_content(asset["url"])
|
||||
web_content, rich_meta = await _deep_fetch_content(url)
|
||||
content_hash = hashlib.sha256(web_content.encode()).hexdigest() if web_content else "N/A"
|
||||
|
||||
# 2. AI Logic
|
||||
# 2. AI Logic (O'Reilly + Linguistic Diversity)
|
||||
is_primary = "nubenetes" in asset.get("source_type", "Social").lower()
|
||||
strictness = "BE EXTREMELY SELECTIVE.\n" if not is_primary else ""
|
||||
prompt = (
|
||||
"You act as a Senior Technical Librarian in 2026.\n" + strictness +
|
||||
"Analyze the resource and respond ONLY with JSON: {\"impact_score\": int, \"pub_date\": \"YYYY-MM-DD\", \"primary_category\": \"cat\", \"related_categories\": [\"cat1\"], \"title\": \"...\", \"desc\": \"...\", \"en_summary\": \"...\", \"language\": \"...\", \"resource_type\": \"...\", \"complexity\": \"...\", \"technical_hierarchy\": [\"Area\", \"Topic\", ...], \"is_microservice\": bool}\n"
|
||||
"PHASE 1: LINGUISTIC DIVERSITY (Mandate 10)\n" +
|
||||
"- DESC (V1 Archive): Provide a professional summary in the RESOURCE'S NATIVE LANGUAGE.\n" +
|
||||
"- EN_SUMMARY (V2 Portal): Provide a professional English synthesis.\n" +
|
||||
"PHASE 2: ARCHITECTURAL CLASSIFICATION (O'REILLY STYLE)\n" +
|
||||
"- Identify TECHNICAL_HIERARCHY: List (max 10 strings) Area > Topic > Subtopics.\n" +
|
||||
"Respond ONLY with JSON: {\"impact_score\": int, \"pub_date\": \"YYYY-MM-DD\", \"primary_category\": \"cat\", \"title\": \"...\", \"desc\": \"...\", \"en_summary\": \"...\", \"language\": \"...\", \"resource_type\": \"...\", \"complexity\": \"...\", \"technical_hierarchy\": [\"Area\", ...], \"is_microservice\": bool}\n"
|
||||
f"CONTENT: {web_content[:2000]}"
|
||||
)
|
||||
|
||||
@@ -90,11 +106,11 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
|
||||
"category": primary_cat, "status": "online", "last_checked": datetime.now().timestamp()
|
||||
}
|
||||
curator.inventory[norm_url] = eval_data
|
||||
evaluations[asset["url"]] = {"status": "INCLUDED", **eval_data}
|
||||
evaluations[url] = {"status": "INCLUDED", **eval_data}
|
||||
curator._save_inventory()
|
||||
log_event(f" [+] ACCEPTED: {data['title']}")
|
||||
else:
|
||||
evaluations[asset["url"]] = {"status": "FILTERED"}
|
||||
evaluations[url] = {"status": "FILTERED"}
|
||||
except Exception as e: log_event(f" [!] AI Error: {e}")
|
||||
return evaluations
|
||||
|
||||
|
||||
@@ -109,12 +109,27 @@ class IntelligentLinkCleaner:
|
||||
text = resp.text.lower()
|
||||
if any(kw in text for kw in parked_indicators): return False, "parked", None
|
||||
return True, "OK", str(resp.url) if str(resp.url) != url else None
|
||||
|
||||
# Definitive Failures
|
||||
if resp.status_code in [404, 410]:
|
||||
# AUTO-HEAL GitHub Branches (master -> main)
|
||||
if "github.com" in url and "/master/" in url:
|
||||
heal = url.replace("/master/", "/main/")
|
||||
try:
|
||||
if (await client.get(heal)).status_code < 200: return True, "healed", heal
|
||||
if (await client.get(heal)).status_code < 400: return True, "healed", heal
|
||||
except: pass
|
||||
|
||||
# Mandate 8: Repository Consolidation
|
||||
if "github.com" in url:
|
||||
match = re.search(r'(https?://github\.com/[^/]+/[^/]+)', url)
|
||||
if match:
|
||||
root_url = match.group(1)
|
||||
if root_url != url:
|
||||
try:
|
||||
if (await client.get(root_url)).status_code < 400:
|
||||
return True, "consolidated_to_root", root_url
|
||||
except: pass
|
||||
|
||||
return False, "404", None
|
||||
return True, f"Soft Block {resp.status_code}", None
|
||||
except: return True, "Connection Error", None
|
||||
|
||||
@@ -298,9 +298,29 @@ class V2VisionEngine:
|
||||
img = f" })\n" if l.get('social_preview_url') else ""
|
||||
md += f"!!! note \"{title}\"\n{img} **[Access Resource]({l['url']})** {'🌟'*l.get('stars',4)} | Level: {l.get('complexity', 'Beginner')}\n \n {l.get('ai_summary', l.get('description', ''))}\n\n"
|
||||
else:
|
||||
date = f"**({l.get('year', 'N/A')})** "
|
||||
tags = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>"
|
||||
md += f" - {date}[{title}]({l['url']}){tags} {'🌟'*l.get('stars',0)}\n"
|
||||
year_prefix = f"**({l.get('year', 'N/A')})** "
|
||||
gh_info = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>" if l.get('gh_stars') else ""
|
||||
icon = " 🎥" if l.get("is_video") else ""
|
||||
|
||||
lang = l.get("language", "English")
|
||||
lang_tag = f" <span class='md-tag md-tag--warning'>[{lang.upper()} CONTENT]</span>" if lang.lower() != "english" else ""
|
||||
|
||||
comp = l.get("complexity", "Intermediate")
|
||||
comp_tag = f" <span class='md-tag md-tag--critical'>[{comp.upper()} LEVEL]</span>" if comp.lower() in ["architect", "advanced"] else ""
|
||||
|
||||
res_type = l.get("resource_type", "Reference")
|
||||
type_tag = f" <span class='md-tag md-tag--primary'>[{res_type.upper()}]</span>" if res_type.lower() in ["case study", "guide", "documentation"] else ""
|
||||
|
||||
rich = "".join([
|
||||
f" <small>by **{l['author']}**</small>" if l.get("author") else "",
|
||||
f" <span class='md-tag md-tag--info'>⏱️ {l['duration']}</span>" if l.get("duration") else "",
|
||||
f" <span class='md-tag md-tag--info'>📖 {l['reading_time']}</span>" if l.get("reading_time") else ""
|
||||
])
|
||||
|
||||
tag = l.get("tag", "[COMMUNITY-TOOL]")
|
||||
color = "success" if "STANDARD" in tag else "warning" if "EMERGING" in tag else "info"
|
||||
|
||||
md += f" - {year_prefix}[{title}]({l['url']}){icon}{gh_info}{lang_tag}{comp_tag}{type_tag}{rich} {'🌟'*l.get('stars',0)} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
|
||||
if l.get('ai_summary'): md += f"\n {l['ai_summary']}\n\n"
|
||||
return md
|
||||
|
||||
|
||||
Reference in New Issue
Block a user