feat(ops): final mandate compliance sync - restored database-first, linguistic diversity, and repository consolidation

This commit is contained in:
Nubenetes Bot
2026-05-17 13:58:51 +02:00
parent 644d9cc0cc
commit d376f86e7e
3 changed files with 62 additions and 11 deletions

View File

@@ -57,19 +57,35 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
evaluations = {}
curator = AgenticCurator()
for i, asset in enumerate(raw_assets):
log_event(f"--- EVALUATING {i+1}/{len(raw_assets)}: {asset['url']} ---")
norm_url = normalize_url(asset["url"])
url = asset["url"]
log_event(f"--- EVALUATING {i+1}/{len(raw_assets)}: {url} ---")
norm_url = normalize_url(url)
# --- DATABASE-FIRST: Reuse insights ---
if norm_url in curator.inventory:
cached = curator.inventory[norm_url]
if cached.get("title") and cached.get("hierarchy"):
log_event(f" [⚡] REUSING CACHED INSIGHTS: {cached['title']}")
from src.gemini_utils import SESSION_TRACKER
SESSION_TRACKER.track_cache_hit(est_tokens=2200)
evaluations[url] = {"status": "INCLUDED", **cached}
continue
# 1. Fetch & Fingerprint
web_content, rich_meta = await _deep_fetch_content(asset["url"])
web_content, rich_meta = await _deep_fetch_content(url)
content_hash = hashlib.sha256(web_content.encode()).hexdigest() if web_content else "N/A"
# 2. AI Logic
# 2. AI Logic (O'Reilly + Linguistic Diversity)
is_primary = "nubenetes" in asset.get("source_type", "Social").lower()
strictness = "BE EXTREMELY SELECTIVE.\n" if not is_primary else ""
prompt = (
"You act as a Senior Technical Librarian in 2026.\n" + strictness +
"Analyze the resource and respond ONLY with JSON: {\"impact_score\": int, \"pub_date\": \"YYYY-MM-DD\", \"primary_category\": \"cat\", \"related_categories\": [\"cat1\"], \"title\": \"...\", \"desc\": \"...\", \"en_summary\": \"...\", \"language\": \"...\", \"resource_type\": \"...\", \"complexity\": \"...\", \"technical_hierarchy\": [\"Area\", \"Topic\", ...], \"is_microservice\": bool}\n"
"PHASE 1: LINGUISTIC DIVERSITY (Mandate 10)\n" +
"- DESC (V1 Archive): Provide a professional summary in the RESOURCE'S NATIVE LANGUAGE.\n" +
"- EN_SUMMARY (V2 Portal): Provide a professional English synthesis.\n" +
"PHASE 2: ARCHITECTURAL CLASSIFICATION (O'REILLY STYLE)\n" +
"- Identify TECHNICAL_HIERARCHY: List (max 10 strings) Area > Topic > Subtopics.\n" +
"Respond ONLY with JSON: {\"impact_score\": int, \"pub_date\": \"YYYY-MM-DD\", \"primary_category\": \"cat\", \"title\": \"...\", \"desc\": \"...\", \"en_summary\": \"...\", \"language\": \"...\", \"resource_type\": \"...\", \"complexity\": \"...\", \"technical_hierarchy\": [\"Area\", ...], \"is_microservice\": bool}\n"
f"CONTENT: {web_content[:2000]}"
)
@@ -90,11 +106,11 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
"category": primary_cat, "status": "online", "last_checked": datetime.now().timestamp()
}
curator.inventory[norm_url] = eval_data
evaluations[asset["url"]] = {"status": "INCLUDED", **eval_data}
evaluations[url] = {"status": "INCLUDED", **eval_data}
curator._save_inventory()
log_event(f" [+] ACCEPTED: {data['title']}")
else:
evaluations[asset["url"]] = {"status": "FILTERED"}
evaluations[url] = {"status": "FILTERED"}
except Exception as e: log_event(f" [!] AI Error: {e}")
return evaluations

View File

@@ -109,12 +109,27 @@ class IntelligentLinkCleaner:
text = resp.text.lower()
if any(kw in text for kw in parked_indicators): return False, "parked", None
return True, "OK", str(resp.url) if str(resp.url) != url else None
# Definitive Failures
if resp.status_code in [404, 410]:
# AUTO-HEAL GitHub Branches (master -> main)
if "github.com" in url and "/master/" in url:
heal = url.replace("/master/", "/main/")
try:
if (await client.get(heal)).status_code < 200: return True, "healed", heal
if (await client.get(heal)).status_code < 400: return True, "healed", heal
except: pass
# Mandate 8: Repository Consolidation
if "github.com" in url:
match = re.search(r'(https?://github\.com/[^/]+/[^/]+)', url)
if match:
root_url = match.group(1)
if root_url != url:
try:
if (await client.get(root_url)).status_code < 400:
return True, "consolidated_to_root", root_url
except: pass
return False, "404", None
return True, f"Soft Block {resp.status_code}", None
except: return True, "Connection Error", None

View File

@@ -298,9 +298,29 @@ class V2VisionEngine:
img = f" ![Preview]({l.get('social_preview_url')})\n" if l.get('social_preview_url') else ""
md += f"!!! note \"{title}\"\n{img} **[Access Resource]({l['url']})** {'🌟'*l.get('stars',4)} | Level: {l.get('complexity', 'Beginner')}\n \n {l.get('ai_summary', l.get('description', ''))}\n\n"
else:
date = f"**({l.get('year', 'N/A')})** "
tags = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>"
md += f" - {date}[{title}]({l['url']}){tags} {'🌟'*l.get('stars',0)}\n"
year_prefix = f"**({l.get('year', 'N/A')})** "
gh_info = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>" if l.get('gh_stars') else ""
icon = " 🎥" if l.get("is_video") else ""
lang = l.get("language", "English")
lang_tag = f" <span class='md-tag md-tag--warning'>[{lang.upper()} CONTENT]</span>" if lang.lower() != "english" else ""
comp = l.get("complexity", "Intermediate")
comp_tag = f" <span class='md-tag md-tag--critical'>[{comp.upper()} LEVEL]</span>" if comp.lower() in ["architect", "advanced"] else ""
res_type = l.get("resource_type", "Reference")
type_tag = f" <span class='md-tag md-tag--primary'>[{res_type.upper()}]</span>" if res_type.lower() in ["case study", "guide", "documentation"] else ""
rich = "".join([
f" <small>by **{l['author']}**</small>" if l.get("author") else "",
f" <span class='md-tag md-tag--info'>⏱️ {l['duration']}</span>" if l.get("duration") else "",
f" <span class='md-tag md-tag--info'>📖 {l['reading_time']}</span>" if l.get("reading_time") else ""
])
tag = l.get("tag", "[COMMUNITY-TOOL]")
color = "success" if "STANDARD" in tag else "warning" if "EMERGING" in tag else "info"
md += f" - {year_prefix}[{title}]({l['url']}){icon}{gh_info}{lang_tag}{comp_tag}{type_tag}{rich} {'🌟'*l.get('stars',0)} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
if l.get('ai_summary'): md += f"\n {l['ai_summary']}\n\n"
return md