From 5ffab5b5d9b4af7828da9074f65e45179c401e0c Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Sun, 17 May 2026 23:14:11 +0200 Subject: [PATCH] feat(ops): implement high-density multi-part reporting engine for PRs and extraction logs --- src/gitops_manager.py | 108 +++++++++++++++++------------- src/intelligent_health_checker.py | 50 ++++++++------ 2 files changed, 91 insertions(+), 67 deletions(-) diff --git a/src/gitops_manager.py b/src/gitops_manager.py index aded1e69..e9d27d84 100644 --- a/src/gitops_manager.py +++ b/src/gitops_manager.py @@ -2,6 +2,7 @@ import json import base64 from github import Github from datetime import datetime +from typing import List, Dict from src.gemini_utils import SESSION_TRACKER @@ -23,42 +24,14 @@ class RepositoryController: except: return "" - def apply_historical_chunk(self, updates: dict, next_since: str) -> None: - branch_name = "bot/historical-accumulator" - - # Check if branch exists, if not, create from develop - try: - self.repository.get_branch(branch_name) - except: - self._create_feature_branch(branch_name) - - for file_path, content in updates.items(): - try: - try: - file_meta = self.repository.get_contents(file_path, ref=branch_name) - self.repository.update_file( - path=file_path, message=f"chore(historical): chunk sync since {next_since}", - content=content, sha=file_meta.sha, branch=branch_name - ) - except Exception as e: - if "404" in str(e): - self.repository.create_file( - path=file_path, message=f"chore(historical): init {file_path}", - content=content, branch=branch_name - ) - except Exception as e: - print(f"Error in historical chunk for {file_path}: {e}") - def apply_multi_file_changes(self, updates: dict, metrics: dict, safety_report: str = "") -> str: timestamp_slug = datetime.now().strftime("%Y%m%d-%H%M") branch_name = f"bot/knowledge-update-{timestamp_slug}" - # In the last historical chunk, use the accumulator as base if it exists - accumulator_branch = "bot/historical-accumulator" try: - acc = self.repository.get_branch(accumulator_branch) - self.repository.create_git_ref(ref=f"refs/heads/{branch_name}", sha=acc.commit.sha) + self._create_feature_branch(branch_name) except: + branch_name = f"bot/knowledge-update-{timestamp_slug}-{id(updates)}" self._create_feature_branch(branch_name) if not updates: @@ -88,33 +61,48 @@ class RepositoryController: # --- REPORT CONSTRUCTION --- full_report = metrics.get('full_report', []) - sorted_report = sorted(full_report, key=lambda x: 0 if x['status'] == 'INCLUDED' else 1) + sorted_report = sorted(full_report, key=lambda x: 0 if x['status'] == 'INCLUDED' else (1 if x['status'] == 'DUPLICATE' else 2)) counts = {"INCLUDED": 0, "DUPLICATE": 0, "FILTERED": 0} source_counts = {} all_rows = [] + + header_table = "| # | Status | Score | Lang | Type | Date | Source | Reason | URL |\n| :--- | :--- | :---: | :---: | :--- | :---: | :--- | :--- | :--- |\n" + for idx, item in enumerate(sorted_report, 1): status_emoji = {"INCLUDED": "✅", "DUPLICATE": "👯", "FILTERED": "🛡️"}.get(item['status'], "❓") - date_str = item.get('post_date', 'N/A')[:10] if item.get('post_date') else 'N/A' - all_rows.append(f"| {idx} | {status_emoji} {item['status']} | {date_str} | {item.get('source', 'N/A')} | {item['reason']} | `{item['category']}` | {item['url']} |\n") + date_str = str(item.get('post_date', 'N/A'))[:10] if item.get('post_date') else 'N/A' + score = item.get('impact_score', 'N/A') + lang = item.get('language', 'N/A')[:2].upper() if item.get('language') else 'EN' + res_type = item.get('type', 'Ref') + + row = f"| {idx} | {status_emoji} {item['status']} | {score} | {lang} | {res_type} | {date_str} | {item.get('source', 'N/A')} | {item['reason']} | {item['url']} |\n" + all_rows.append(row) + counts[item['status']] = counts.get(item['status'], 0) + 1 - if item['status'] == "INCLUDED": - src = item.get('source', 'Unknown') - source_counts[src] = source_counts.get(src, 0) + 1 + src = item.get('source', 'Unknown') + source_counts[src] = source_counts.get(src, 0) + 1 - # 1. Mermaid Diagrams & Stats + # AI Intel and Mermaid ai_intel = SESSION_TRACKER.get_intelligence_report() + source_md = "#### 📊 Source Distribution\n| Source | Count |\n| :--- | :---: |\n" + for src, count in sorted(source_counts.items(), key=lambda x: x[1], reverse=True): + source_md += f"| {src} | {count} |\n" + mermaid = f"### 📊 Decision Metrics\n```mermaid\npie title Agentic Decision Distribution\n \"Accepted\" : {counts['INCLUDED']}\n \"Duplicates\" : {counts['DUPLICATE']}\n \"Filtered\" : {counts['FILTERED']}\n```\n" - pr_body = ( - f"## 💎 Knowledge Update: {datetime.now().strftime('%d %b %Y')}\n\n" - f"Processed **{metrics.get('total_extracted', 0)}** links.\n\n" - f"{safety_report}\n\n" - f"{ai_intel}\n\n" - f"{mermaid}\n" - f"---\n" - f"**Audit Matrix follows in comments due to scale.**\n" - ) + # Build PR Body (With Safety Guard Splitting) + pr_body = f"## 💎 Knowledge Update: {datetime.now().strftime('%d %b %Y')}\n\nProcessed **{metrics.get('total_extracted', 0)}** links.\n\n" + + # If safety_report is huge, move it to its own comment + safety_in_body = True + if len(safety_report) > 30000: + pr_body += "⚠️ **Detailed Safety Audit moved to comments due to scale.**\n\n" + safety_in_body = False + else: + pr_body += f"{safety_report}\n\n" + + pr_body += f"{ai_intel}\n\n{mermaid}\n{source_md}\n---\n**Audit Matrix and Logs follow in successive comments.**\n" pr = self.repository.create_pull( title=f"💎 Knowledge Update & Optimization: {datetime.now().strftime('%d %b %Y')}", @@ -123,8 +111,32 @@ class RepositoryController: base=self.default_branch_name ) - # 2. Split Audit Matrix into Comments - header_table = "| # | Status | Date | Source | Reason | Category | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n" + # 1. Safety Report (if huge) + if not safety_in_body: + log_header = "## 🛡️ Safety & Mandate Audit (Detailed)\n" + current_chunk = log_header + for line in safety_report.splitlines(): + if len(current_chunk) + len(line) > 60000: + pr.create_issue_comment(current_chunk) + current_chunk = log_header + line + "\n" + else: + current_chunk += line + "\n" + pr.create_issue_comment(current_chunk) + + # 2. X.com Extraction Audit Trail + x_audit = metrics.get('x_audit', []) + if x_audit: + log_header = "### 📜 Extraction Audit Trail\n*Detailed logs of social and RSS discovery attempts.*\n\n" + current_log = log_header + for entry in x_audit: + if len(current_log) + len(entry) > 60000: + pr.create_issue_comment(current_log) + current_log = log_header + entry + "\n" + else: + current_log += entry + "\n" + pr.create_issue_comment(current_log) + + # 3. Split Audit Matrix into Comments current_comment = header_table part = 1 for row in all_rows: diff --git a/src/intelligent_health_checker.py b/src/intelligent_health_checker.py index 7cd90422..2440ae60 100644 --- a/src/intelligent_health_checker.py +++ b/src/intelligent_health_checker.py @@ -29,7 +29,7 @@ class IntelligentLinkCleaner: self.dead_links: Dict[str, Tuple[str, str]] = {} self.learning_data = self._load_memory() self.inventory = self._load_inventory() - self.action_log: List[Dict] = [] + self.full_report_metrics = [] # Track what happened to every link self.detailed_stats = {"total_scanned": 0, "skipped_recent": 0, "by_file": {}, "operation_types": {"removals": 0, "consolidated": 0, "healed": 0, "enriched": 0}} self.stats = {"total_links": 0, "dead_links_removed": 0, "orphans_fixed": 0, "enriched_descriptions": 0} @@ -63,18 +63,15 @@ class IntelligentLinkCleaner: content = open(path, "r").read() lines = content.splitlines() for idx, line in enumerate(lines): - # Enhanced Regex to capture surrounding formatting matches = re.finditer(r'(\*\*|==)?\s*\[(.*?)\]\((https?://.*?)\)\s*(\*\*|==)?\s*(.*)', line) for m in matches: fmt_pre, title, url, fmt_post, desc = m.groups() nu = normalize_url(url) - - # Identify Importance Markers (Mandate 31 Expansion) is_important = False - if fmt_pre or fmt_post: is_important = True # Bold or Highlighted - if "🌟" in title or "🌟" in desc: is_important = True # Stars - if len(desc.strip()) > 100: is_important = True # Deep description - if path in CORE_FILES: is_important = True # Foundational files + if fmt_pre or fmt_post: is_important = True + if "🌟" in title or "🌟" in (desc or ""): is_important = True + if desc and len(desc.strip()) > 100: is_important = True + if path in CORE_FILES: is_important = True self.link_registry.setdefault(nu, []).append({ "file": path, "line_index": idx, "url": url, @@ -84,7 +81,6 @@ class IntelligentLinkCleaner: unique_urls = list(self.link_registry.keys()) random.shuffle(unique_urls) - # 1.5. Identify prioritized links for validation to_check = [] for u in unique_urls: nu = normalize_url(u); entry = self.inventory.get(nu, {}) @@ -155,27 +151,36 @@ class IntelligentLinkCleaner: score = entry.get("health_score", 100) score = (score * 0.8) + (100 if alive else 0) * 0.2 entry["health_score"] = round(score, 1); entry["last_checked"] = datetime.now().timestamp() - - # Identify high-value status + is_important = any(occ.get("is_important") for occ in self.link_registry.get(nu, [])) if entry.get("stars", 0) >= 3: is_important = True + status = "INCLUDED" if alive else "FILTERED" + final_reason = reason + if not alive or reason == "generic_redirect_loss": if is_important: entry["status"] = "review_required" entry["review_metadata"] = { - "original_url": url, - "proposed_url": final if final else "NONE", - "reason": f"High-Value Preservation: {reason}", - "timestamp": datetime.now().isoformat() + "original_url": url, "proposed_url": final if final else "NONE", + "reason": f"High-Value Preservation: {reason}", "timestamp": datetime.now().isoformat() } - log_event(f" [⚠️] REVIEW STORED: {url} in inventory. Metadata preserved.") + log_event(f" [⚠️] REVIEW STORED: {url}") + status = "INCLUDED" # Kept in V1 + final_reason = f"Preserved for Review ({reason})" elif score < 20: entry["status"] = "dead"; self.dead_links[url] = (None, reason) + status = "FILTERED"; final_reason = f"Dead: {reason}" elif final and alive: - # If it's rescued or a valid redirect, we update self.dead_links[url] = (f"CANONICAL:{final}", "Redirect/Resurrection") - + final_reason = "Updated (Redirect/Rescued)" + + self.full_report_metrics.append({ + "url": url, "status": status, "reason": final_reason, + "category": entry.get("category", "N/A"), "post_date": entry.get("pub_date"), + "source": "Health Checker", "impact_score": entry.get("stars", 0) * 20, + "language": entry.get("language", "EN"), "type": entry.get("resource_type", "Ref") + }) self.inventory[nu] = entry await self.apply_changes() @@ -228,7 +233,14 @@ class IntelligentLinkCleaner: from src.safety_guard import SafetyGuard report = SafetyGuard().generate_audit_report() - if final_payload: self.git_controller.apply_multi_file_changes(final_payload, {"total_extracted": len(self.link_registry)}, safety_report=report) + + metrics = { + "total_extracted": len(self.link_registry), + "full_report": self.full_report_metrics, + "end_date": datetime.now().isoformat() + } + + if final_payload: self.git_controller.apply_multi_file_changes(final_payload, metrics, safety_report=report) async def prune_orphaned_metadata(self): valid_map = {}