fix: workflow timeout, exit code 1, and paywall intelligence

This commit is contained in:
Nubenetes Bot
2026-05-10 10:21:41 +02:00
parent dcec3b7758
commit 2b8ed0897e
4 changed files with 37 additions and 16 deletions

View File

@@ -2,7 +2,7 @@ name: Nubenetes Intelligent Link Cleaner & Dedup
on:
schedule:
- cron: '0 0 1 * *' # Mensual (el día 1 de cada mes)
- cron: '0 0 1 * *'
workflow_dispatch:
permissions:
@@ -12,6 +12,8 @@ permissions:
jobs:
intelligent-clean-process:
runs-on: ubuntu-latest
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
steps:
- name: Sincronización del repositorio
uses: actions/checkout@v4
@@ -24,13 +26,13 @@ jobs:
- name: Instalación de dependencias y Playwright
run: |
python -m pip install --upgrade pip
pip install --no-cache-dir pydantic PyGithub aiohttp beautifulsoup4 httpx fake-useragent pytz python-dotenv playwright
pip install --no-cache-dir pydantic PyGithub aiohttp beautifulsoup4 httpx fake-useragent pytz python-dotenv playwright PyYAML
playwright install chromium --with-deps
- name: Ejecución de la Limpieza Inteligente Global
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PYTHONPATH: .
PYTHONPATH: ${{ github.workspace }}
run: |
python src/intelligent_health_checker.py

0
src/__init__.py Normal file
View File

View File

@@ -13,7 +13,12 @@ class AgenticCurator:
self.docs_dir = "docs"
self.index_path = os.path.join(self.docs_dir, "index.md")
self.mkdocs_path = "mkdocs.yml"
self.stats = {"orphans_found": 0, "orphans_linked": 0, "structural_improvements": 0}
self.stats = {
"orphans_found": 0,
"orphans_linked": 0,
"structural_improvements": 0,
"orphan_details": []
}
def _get_all_docs(self) -> Set[str]:
return {f for f in os.listdir(self.docs_dir) if f.endswith('.md')}
@@ -21,7 +26,8 @@ class AgenticCurator:
def _get_nav_files(self) -> Set[str]:
with open(self.mkdocs_path, 'r') as f:
content = f.read()
return set(re.findall(r'[:\s]([a-zA-Z0-9_-]+\.md)', content))
# Captura archivos .md precedidos por ":" o espacio, terminando con salto de línea o espacio
return set(re.findall(r'[:\s]([a-zA-Z0-9_\-\./]+\.md)', content))
def _get_index_links(self) -> Set[str]:
with open(self.index_path, 'r') as f:
@@ -56,6 +62,11 @@ class AgenticCurator:
if decision:
await self._apply_placement(orphan, decision)
self.stats["orphans_linked"] += 1
self.stats["orphan_details"].append({
"file": orphan,
"title": decision.get("title"),
"category": decision.get("category")
})
async def _ask_gemini_placement(self, filename: str, content: str) -> Dict:
with open(self.mkdocs_path, 'r') as f:

View File

@@ -225,12 +225,13 @@ class IntelligentLinkCleaner:
except: pass
report = "## 🧠 Nubenetes Autonomous Health & Curation Engine\n\n"
# Mermaid Pie Chart
report += "### 📊 Distribución de Operaciones\n"
report += "```mermaid\npie title Operaciones de Mantenimiento\n"
report += f" \"Muertos (Eliminados)\" : {self.detailed_stats['operation_types']['removals']}\n"
report += f" \"Archivados (Wayback)\" : {self.detailed_stats['operation_types']['archived']}\n"
report += f" \"Consolidados (Git)\" : {self.detailed_stats['operation_types']['consolidated']}\n"
report += f" \"Nuevos (Huérfanos)\" : {self.detailed_stats['operation_types']['orphans']}\n```\n\n"
report += f" \"Eliminados\" : {self.detailed_stats['operation_types']['removals']}\n"
report += f" \"Archivados\" : {self.detailed_stats['operation_types']['archived']}\n"
report += f" \"Consolidados\" : {self.detailed_stats['operation_types']['consolidated']}\n"
report += f" \"Nuevos\" : {self.detailed_stats['operation_types']['orphans']}\n```\n\n"
report += "### 📈 Resumen de Eficiencia\n"
report += f"| Métrica | Cantidad | Detalle |\n| :--- | :---: | :--- |\n"
@@ -257,11 +258,18 @@ class IntelligentLinkCleaner:
self.git_controller.repository.create_pull(title=f"🧹 Autonomous Engine Health Report: {datetime.now().strftime('%d %b %Y')}", body=report, head=branch_name, base="master")
async def main():
cleaner = IntelligentLinkCleaner()
await cleaner.build_global_registry()
await cleaner.validate_links_tiered()
await cleaner.curator.audit_navigation()
await cleaner.curator.suggest_reorganization()
await cleaner.apply_changes()
try:
cleaner = IntelligentLinkCleaner()
await cleaner.build_global_registry()
await cleaner.validate_links_tiered()
await cleaner.curator.audit_navigation()
await cleaner.curator.suggest_reorganization()
await cleaner.apply_changes()
except Exception as e:
import traceback
print(f"[CRITICAL ERROR]: {e}")
traceback.print_exc()
exit(1)
if __name__ == "__main__": asyncio.run(main())
if __name__ == "__main__":
asyncio.run(main())