diff --git a/.github/workflows/intelligent_link_cleaner.yml b/.github/workflows/intelligent_link_cleaner.yml index d6496206..6c477483 100644 --- a/.github/workflows/intelligent_link_cleaner.yml +++ b/.github/workflows/intelligent_link_cleaner.yml @@ -2,7 +2,7 @@ name: Nubenetes Intelligent Link Cleaner & Dedup on: schedule: - - cron: '0 0 1 * *' # Mensual (el día 1 de cada mes) + - cron: '0 0 1 * *' workflow_dispatch: permissions: @@ -12,6 +12,8 @@ permissions: jobs: intelligent-clean-process: runs-on: ubuntu-latest + env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true steps: - name: Sincronización del repositorio uses: actions/checkout@v4 @@ -24,13 +26,13 @@ jobs: - name: Instalación de dependencias y Playwright run: | python -m pip install --upgrade pip - pip install --no-cache-dir pydantic PyGithub aiohttp beautifulsoup4 httpx fake-useragent pytz python-dotenv playwright + pip install --no-cache-dir pydantic PyGithub aiohttp beautifulsoup4 httpx fake-useragent pytz python-dotenv playwright PyYAML playwright install chromium --with-deps - name: Ejecución de la Limpieza Inteligente Global env: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PYTHONPATH: . + PYTHONPATH: ${{ github.workspace }} run: | python src/intelligent_health_checker.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/agentic_curator.py b/src/agentic_curator.py index 056477c8..3a714768 100644 --- a/src/agentic_curator.py +++ b/src/agentic_curator.py @@ -13,7 +13,12 @@ class AgenticCurator: self.docs_dir = "docs" self.index_path = os.path.join(self.docs_dir, "index.md") self.mkdocs_path = "mkdocs.yml" - self.stats = {"orphans_found": 0, "orphans_linked": 0, "structural_improvements": 0} + self.stats = { + "orphans_found": 0, + "orphans_linked": 0, + "structural_improvements": 0, + "orphan_details": [] + } def _get_all_docs(self) -> Set[str]: return {f for f in os.listdir(self.docs_dir) if f.endswith('.md')} @@ -21,7 +26,8 @@ class AgenticCurator: def _get_nav_files(self) -> Set[str]: with open(self.mkdocs_path, 'r') as f: content = f.read() - return set(re.findall(r'[:\s]([a-zA-Z0-9_-]+\.md)', content)) + # Captura archivos .md precedidos por ":" o espacio, terminando con salto de línea o espacio + return set(re.findall(r'[:\s]([a-zA-Z0-9_\-\./]+\.md)', content)) def _get_index_links(self) -> Set[str]: with open(self.index_path, 'r') as f: @@ -56,6 +62,11 @@ class AgenticCurator: if decision: await self._apply_placement(orphan, decision) self.stats["orphans_linked"] += 1 + self.stats["orphan_details"].append({ + "file": orphan, + "title": decision.get("title"), + "category": decision.get("category") + }) async def _ask_gemini_placement(self, filename: str, content: str) -> Dict: with open(self.mkdocs_path, 'r') as f: diff --git a/src/intelligent_health_checker.py b/src/intelligent_health_checker.py index 5958eec4..6043034d 100644 --- a/src/intelligent_health_checker.py +++ b/src/intelligent_health_checker.py @@ -225,12 +225,13 @@ class IntelligentLinkCleaner: except: pass report = "## 🧠 Nubenetes Autonomous Health & Curation Engine\n\n" + # Mermaid Pie Chart report += "### 📊 Distribución de Operaciones\n" report += "```mermaid\npie title Operaciones de Mantenimiento\n" - report += f" \"Muertos (Eliminados)\" : {self.detailed_stats['operation_types']['removals']}\n" - report += f" \"Archivados (Wayback)\" : {self.detailed_stats['operation_types']['archived']}\n" - report += f" \"Consolidados (Git)\" : {self.detailed_stats['operation_types']['consolidated']}\n" - report += f" \"Nuevos (Huérfanos)\" : {self.detailed_stats['operation_types']['orphans']}\n```\n\n" + report += f" \"Eliminados\" : {self.detailed_stats['operation_types']['removals']}\n" + report += f" \"Archivados\" : {self.detailed_stats['operation_types']['archived']}\n" + report += f" \"Consolidados\" : {self.detailed_stats['operation_types']['consolidated']}\n" + report += f" \"Nuevos\" : {self.detailed_stats['operation_types']['orphans']}\n```\n\n" report += "### 📈 Resumen de Eficiencia\n" report += f"| Métrica | Cantidad | Detalle |\n| :--- | :---: | :--- |\n" @@ -257,11 +258,18 @@ class IntelligentLinkCleaner: self.git_controller.repository.create_pull(title=f"🧹 Autonomous Engine Health Report: {datetime.now().strftime('%d %b %Y')}", body=report, head=branch_name, base="master") async def main(): - cleaner = IntelligentLinkCleaner() - await cleaner.build_global_registry() - await cleaner.validate_links_tiered() - await cleaner.curator.audit_navigation() - await cleaner.curator.suggest_reorganization() - await cleaner.apply_changes() + try: + cleaner = IntelligentLinkCleaner() + await cleaner.build_global_registry() + await cleaner.validate_links_tiered() + await cleaner.curator.audit_navigation() + await cleaner.curator.suggest_reorganization() + await cleaner.apply_changes() + except Exception as e: + import traceback + print(f"[CRITICAL ERROR]: {e}") + traceback.print_exc() + exit(1) -if __name__ == "__main__": asyncio.run(main()) +if __name__ == "__main__": + asyncio.run(main())