From 591f006a5356fdb5586d9613e67987a84b63d659 Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Sun, 10 May 2026 22:22:11 +0200 Subject: [PATCH] fix: ensure PR creation with technical commit and deepen extraction scroll --- src/gitops_manager.py | 14 +++++++++++++- src/ingestion_twikit.py | 8 ++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/gitops_manager.py b/src/gitops_manager.py index c0f67e0c..4514bc74 100644 --- a/src/gitops_manager.py +++ b/src/gitops_manager.py @@ -16,6 +16,14 @@ class RepositoryController: branch_name = f"bot/knowledge-update-{timestamp_slug}" self._create_feature_branch(branch_name) + # Si no hay cambios en Markdowns, añadimos un cambio técnico para permitir abrir la PR con el reporte + if not updates: + updates["src/memory/last_audit_run.json"] = json.dumps({ + "timestamp": metrics.get("end_date"), + "total_extracted": metrics.get("total_extracted"), + "status": "Audit Only (No new links injected)" + }, indent=2) + for file_path, content in updates.items(): try: commit_signature = f"chore: update {file_path} [{timestamp_slug}]" @@ -38,15 +46,19 @@ class RepositoryController: full_report = metrics.get('full_report', []) # 1. Tabla Matricial de Auditoría + # Limitamos la tabla si es muy larga para evitar errores de API de GitHub matrix_table = "### 📋 Matriz de Auditoría de Enlaces (Full Extraction)\n" matrix_table += "| Estado | Motivo | Categoría | URL |\n| :--- | :--- | :--- | :--- |\n" counts = {"INCLUDED": 0, "DUPLICATE": 0, "FILTERED": 0} - for item in full_report: + for item in full_report[:200]: # Mostrar solo los primeros 200 para no romper el límite de caracteres del PR status_emoji = {"INCLUDED": "✅", "DUPLICATE": "👯", "FILTERED": "🛡️"}.get(item['status'], "❓") matrix_table += f"| {status_emoji} {item['status']} | {item['reason']} | `{item['category']}` | {item['url']} |\n" counts[item['status']] = counts.get(item['status'], 0) + 1 + if len(full_report) > 200: + matrix_table += f"\n> 💡 *... y {len(full_report) - 200} enlaces más procesados.*" + # 2. Diagrama Mermaid mermaid_pie = "### 📊 Métricas de Decisión\n```mermaid\npie title Distribución de Decisión Agéntica\n" mermaid_pie += f" \"Aceptados (Inyectados)\" : {counts['INCLUDED']}\n" diff --git a/src/ingestion_twikit.py b/src/ingestion_twikit.py index 7dee236d..c50c2f2b 100644 --- a/src/ingestion_twikit.py +++ b/src/ingestion_twikit.py @@ -71,14 +71,14 @@ class SocialDataExtractor: await page.goto(f"https://x.com/{self.target_account}", wait_until="domcontentloaded", timeout=60000) await asyncio.sleep(8) - for _ in range(4): # Scroll moderado + for _ in range(10): # Scroll profundo para histórico html = await page.content() urls = self._extract_urls_from_text(html) for u in urls: - if all(x not in u for x in ["x.com", "twitter.com", "t.co", "abs.twimg", "archive.org"]): + if all(x not in u for x in ["x.com", "twitter.com", "t.co", "abs.twimg", "archive.org", "pbs.twimg"]): results.append({"url": u, "context": "Playwright Browser", "timestamp": datetime.now(MADRID_TZ).isoformat()}) - await page.evaluate("window.scrollBy(0, 1200)") - await asyncio.sleep(4) + await page.evaluate("window.scrollBy(0, 2000)") + await asyncio.sleep(5) await browser.close() return results