From 8d662e118835ace4fd9e54d2acd19c3c8474154a Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Thu, 14 May 2026 17:23:54 +0200 Subject: [PATCH] feat: add intuitive since_date selector to GitHub workflow --- .github/workflows/agentic_cron.yml | 12 ++++++++---- src/main.py | 13 +++++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/agentic_cron.yml b/.github/workflows/agentic_cron.yml index a225fc63..6f3b6eb7 100644 --- a/.github/workflows/agentic_cron.yml +++ b/.github/workflows/agentic_cron.yml @@ -5,6 +5,10 @@ on: - cron: '0 5 * * 0' workflow_dispatch: inputs: + since_date: + description: 'Extraer posts DESDE esta fecha (Formato: YYYY-MM-DD)' + required: false + default: '' extraction_strategy: description: 'Estrategia de Extracción' required: true @@ -14,7 +18,7 @@ on: - search - scroll historical_mode: - description: 'Activar Modo Histórico' + description: 'Activar Modo Histórico (Ignora fecha de 30 días)' required: false default: 'false' type: boolean @@ -59,15 +63,15 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} EXTRACTION_STRATEGY: ${{ github.event.inputs.extraction_strategy || 'search' }} HISTORICAL_MODE: ${{ github.event.inputs.historical_mode || 'false' }} + SINCE_DATE_OVERRIDE: ${{ github.event.inputs.since_date || '' }} HISTORICAL_UNTIL_DATE: ${{ github.event.inputs.historical_until_date || '' }} HISTORICAL_CHUNK_DAYS: '180' PYTHONPATH: . run: | - python src/main.py > output.log 2>&1 - cat output.log + python src/main.py # Lógica de Re-disparo para Modo Histórico - if grep -q "NEXT_CHUNK_START:" output.log; then + if [ -f output.log ] && grep -q "NEXT_CHUNK_START:" output.log; then NEXT_DATE=$(grep "NEXT_CHUNK_START:" output.log | awk '{print $2}') echo "Disparando siguiente tramo histórico hasta: $NEXT_DATE" gh workflow run agentic_cron.yml -f historical_mode=true -f historical_until_date=$NEXT_DATE diff --git a/src/main.py b/src/main.py index aeb17f49..29aa305c 100644 --- a/src/main.py +++ b/src/main.py @@ -38,11 +38,16 @@ async def master_orchestrator(): log_event(f"[*] MODO HISTÓRICO: Tramo {since_date.date()} -> {until_date.date()}") else: - # Modo Normal (30 días) - days_back = int(os.getenv("CURATION_DAYS_BACK", "30")) - since_date = datetime.now(MADRID_TZ) - timedelta(days=days_back) + # Modo Normal (30 días o Manual Override) + since_override = os.getenv("SINCE_DATE_OVERRIDE") + if since_override: + since_date = datetime.strptime(since_override, "%Y-%m-%d").replace(tzinfo=MADRID_TZ) + log_event(f"[*] Modo Manual (Override): Desde {since_date.date()}") + else: + days_back = int(os.getenv("CURATION_DAYS_BACK", "30")) + since_date = datetime.now(MADRID_TZ) - timedelta(days=days_back) + log_event(f"[*] Modo Normal: Desde {since_date.date()}") until_date = None - log_event(f"[*] Modo Normal: Desde {since_date.date()}") # 2. Ingesta Multi-fuente backup_file = os.getenv("BACKUP_FILE")