From 68f96e65aed9936aa4e4c4513a84e44392babe8e Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Sun, 17 May 2026 23:02:25 +0200 Subject: [PATCH] feat(ops): implement unified 'Review Required' metadata lifecycle and PR reporting --- src/agentic_curator.py | 7 +++++++ src/intelligent_health_checker.py | 21 +++++++++++++------- src/safety_guard.py | 33 ++++++++++++++++++++++--------- src/v2_optimizer.py | 9 ++++++++- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/agentic_curator.py b/src/agentic_curator.py index 8f65b4c0..944b6a16 100644 --- a/src/agentic_curator.py +++ b/src/agentic_curator.py @@ -81,6 +81,13 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]: # --- DATABASE-FIRST: Reuse insights --- if norm_url in curator.inventory: cached = curator.inventory[norm_url] + + # Mandate 31: Absolute protection for links under review + if cached.get("status") == "review_required": + log_event(f" [🔒] PRESERVING REVIEW STATUS: {url}") + evaluations[url] = {"status": "REVIEW_PENDING", **cached} + continue + if cached.get("title") and cached.get("hierarchy"): log_event(f" [⚡] REUSING CACHED INSIGHTS: {cached['title']}") from src.gemini_utils import SESSION_TRACKER diff --git a/src/intelligent_health_checker.py b/src/intelligent_health_checker.py index 5ddd4845..7cd90422 100644 --- a/src/intelligent_health_checker.py +++ b/src/intelligent_health_checker.py @@ -149,26 +149,33 @@ class IntelligentLinkCleaner: except: pass except: pass - # 2.8. Finalize Status with Foundational Preservation + # 2.8. Finalize Status with Foundational Preservation & Metadata for url, (alive, reason, final) in check_results.items(): nu = normalize_url(url); entry = self.inventory.get(nu, {}) score = entry.get("health_score", 100) score = (score * 0.8) + (100 if alive else 0) * 0.2 entry["health_score"] = round(score, 1); entry["last_checked"] = datetime.now().timestamp() - - # --- MANDATE 31: HIGH-VALUE PROTECTION --- - # Check importance from either current mapping or historical stars + + # Identify high-value status is_important = any(occ.get("is_important") for occ in self.link_registry.get(nu, [])) if entry.get("stars", 0) >= 3: is_important = True - if not alive: + if not alive or reason == "generic_redirect_loss": if is_important: entry["status"] = "review_required" - log_event(f" [⚠️] PRESERVED (Review Needed): {url} is HIGH-VALUE.") + entry["review_metadata"] = { + "original_url": url, + "proposed_url": final if final else "NONE", + "reason": f"High-Value Preservation: {reason}", + "timestamp": datetime.now().isoformat() + } + log_event(f" [⚠️] REVIEW STORED: {url} in inventory. Metadata preserved.") elif score < 20: entry["status"] = "dead"; self.dead_links[url] = (None, reason) elif final and alive: - self.dead_links[url] = (f"CANONICAL:{final}", "Redirect") + # If it's rescued or a valid redirect, we update + self.dead_links[url] = (f"CANONICAL:{final}", "Redirect/Resurrection") + self.inventory[nu] = entry await self.apply_changes() diff --git a/src/safety_guard.py b/src/safety_guard.py index 63b75c21..3d486154 100644 --- a/src/safety_guard.py +++ b/src/safety_guard.py @@ -45,7 +45,6 @@ class SafetyGuard: def validate_semantic_interlinking(self): """Mandate 5: Verificar interconexión semántica en V1.""" - log_event("[Safety] Auditing Semantic Interlinking...") for url, meta in self.inventory.items(): related = meta.get("related_categories", []) for rel_cat in related: @@ -53,7 +52,7 @@ class SafetyGuard: if os.path.exists(path): content = open(path, "r").read() if url not in content: - self.warnings.append(f"🔗 **Interlink Missing**: `{meta['title']}` should be referenced in `{rel_cat}.md` (See also)") + self.warnings.append(f"🔗 **Interlink Missing**: `{meta['title']}` in `{rel_cat}.md` (See also)") def validate_special_assets_completeness(self): """Mandate 27: Inclusión exhaustiva de Activos Especiales en V2.""" @@ -70,7 +69,6 @@ class SafetyGuard: for link in v1_links: nu = normalize_url(link) if nu in self.inventory and self.inventory[nu].get("status") == "online": - # Check for inherited is_special flag instead of v2_locations (which are built later) if not self.inventory[nu].get("is_special"): self.errors.append(f"💎 **VIP Flag Missing**: `{link}` from `{file_name}` is not marked as Special") @@ -144,7 +142,7 @@ class SafetyGuard: keywords = re.findall(r'\w+', topic.lower()) found = any(kw in workflow_content.lower() for kw in keywords) if not found: - self.warnings.append(f"🔄 **Sync Warning**: Topic `{topic}` might not be represented in `{WORKFLOW_PATH}` inputs") + self.warnings.append(f"🔄 **Sync Warning**: Topic `{topic}` not in `{WORKFLOW_PATH}`") def validate_toc_and_anchors(self): """🛠️ Structural Evolution: TOC Consistency & Lowercase Slugs.""" @@ -155,7 +153,7 @@ class SafetyGuard: if file in exempt_files or file == "index.md": continue content = open(os.path.join(root, file), "r").read() if not self.has_valid_toc(content) and len(re.findall(r'^## ', content, re.M)) > 2: - self.warnings.append(f"📍 **V1 TOC Missing**: `{file}` has many sections but no TOC") + self.warnings.append(f"📍 **V1 TOC Missing**: `{file}` has sections but no TOC") anchors = re.findall(r'\(#([^\)]+)\)', content) for a in anchors: if any(c.isupper() for c in a): @@ -163,7 +161,15 @@ class SafetyGuard: def generate_audit_report(self, old_inv_path=None) -> str: """Generates a comprehensive Markdown report based on ALL Mandates.""" - log_event("[Safety] Executing Full Mandate Audit (GEMINI.md compliance)...") + log_event("[Safety] Executing Full Mandate Audit...") + # 1. Identify Pending Reviews + pending_reviews = [] + for url, meta in self.inventory.items(): + if meta.get("status") == "review_required": + rev = meta.get("review_metadata", {}) + pending_reviews.append(f"📍 `{meta.get('title', url)}`: Original: {rev.get('original_url')} | Proposed: {rev.get('proposed_url')}") + + # 2. Run standard validations if old_inv_path and os.path.exists(old_inv_path): try: with open(old_inv_path, "r") as f: @@ -177,21 +183,30 @@ class SafetyGuard: self.validate_v2_architecture() self.validate_navigation_sync() self.validate_toc_and_anchors() + status = "✅ PASS" if not self.errors else "❌ FAILED" if not self.errors and self.warnings: status = "⚠️ WARNING" + report = f"\n## 🛡️ Safety & Mandate Audit: {status}\n*Audit executed on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n" + + if pending_reviews: + report += "### 🔍 High-Value Pending Reviews\n" + report += "> ⚠️ The following resources have been preserved in V1 but hidden from V2 for manual audit.\n\n" + for pr in pending_reviews: report += f"- {pr}\n" + report += "\n" + if not self.errors and not self.warnings: - report += "✨ **All project mandates from GEMINI.md and technical integrity checks passed successfully.**\n" + report += "✨ **All project mandates and technical integrity checks passed successfully.**\n" else: if self.errors: - report += "### 🔴 Critical Failures (Mandate Violations)\n" + report += "### 🔴 Critical Failures\n" for err in self.errors: report += f"- {err}\n" report += "\n" if self.warnings: report += "### 🟡 Warnings & Recommendations\n" report += "
Click to view " + str(len(self.warnings)) + " recommendations\n\n" for warn in self.warnings: report += f"- {warn}\n" - report += "\n> 💡 **Note**: Warnings suggest improvements to align with Nubenetes Excellence standards.\n
\n" + report += "\n> 💡 **Note**: Align with Nubenetes Excellence standards.\n\n" return report if __name__ == "__main__": diff --git a/src/v2_optimizer.py b/src/v2_optimizer.py index fd1d10b4..2b50d422 100644 --- a/src/v2_optimizer.py +++ b/src/v2_optimizer.py @@ -168,7 +168,14 @@ class V2VisionEngine: async def _check_single_link_resilient(self, client, link: Dict): url = link["url"] norm_url = normalize_url(url) - if norm_url in self.inventory and self.inventory[norm_url].get("status") == "online": return link + entry = self.inventory.get(norm_url, {}) + + # Mandate 31: Skip links under review for V2 Elite + if entry.get("status") == "review_required": + log_event(f" [-] SKIPPING V2: {url} is under Review.") + return None + + if entry.get("status") == "online": return link try: resp = await client.get(url, timeout=10.0) if resp.status_code < 400: