From f8f326c69828d35b4c42d3a6709f56e007ca0e22 Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Thu, 14 May 2026 21:45:59 +0200 Subject: [PATCH] feat: support unified historical curation as default and optional chunking --- .github/workflows/agentic_cron.yml | 12 +++++++--- src/main.py | 35 ++++++++++++++++++------------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/.github/workflows/agentic_cron.yml b/.github/workflows/agentic_cron.yml index be3e145c..71e35a74 100644 --- a/.github/workflows/agentic_cron.yml +++ b/.github/workflows/agentic_cron.yml @@ -22,6 +22,11 @@ on: required: false default: 'true' type: boolean + historical_chunked: + description: 'Trocear ejecución (múltiples PRs)' + required: false + default: 'false' + type: boolean historical_until_date: description: 'Fecha límite superior (tramo)' required: false @@ -64,6 +69,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} EXTRACTION_STRATEGY: ${{ github.event.inputs.extraction_strategy || 'search' }} HISTORICAL_MODE: ${{ github.event.inputs.historical_mode || 'false' }} + HISTORICAL_CHUNKED: ${{ github.event.inputs.historical_chunked || 'false' }} HISTORICAL_UNTIL_DATE: ${{ github.event.inputs.historical_until_date || '' }} CURATION_START_DATE: ${{ github.event.inputs.start_date || '' }} HISTORICAL_CHUNK_DAYS: '180' @@ -71,9 +77,9 @@ jobs: run: | python -u src/main.py 2>&1 | tee output.log - # Lógica de Re-disparo para Modo Histórico - if grep -q "NEXT_CHUNK_START:" output.log; then + # Lógica de Re-disparo para Modo Histórico (SOLO SI SE SOLICITA TROCEADO) + if [ "${{ github.event.inputs.historical_chunked }}" == "true" ] && grep -q "NEXT_CHUNK_START:" output.log; then NEXT_DATE=$(grep "NEXT_CHUNK_START:" output.log | awk '{print $2}') echo "Disparando siguiente tramo histórico hasta: $NEXT_DATE" - gh workflow run agentic_cron.yml -f historical_mode=true -f historical_until_date=$NEXT_DATE + gh workflow run agentic_cron.yml -f historical_mode=true -f historical_chunked=true -f historical_until_date=$NEXT_DATE fi diff --git a/src/main.py b/src/main.py index 2c86b196..b0a5be9d 100644 --- a/src/main.py +++ b/src/main.py @@ -22,24 +22,31 @@ async def master_orchestrator(): # 1. Dynamic / Historical Time Horizon is_historical = os.getenv("HISTORICAL_MODE", "false").lower() == "true" + is_chunked = os.getenv("HISTORICAL_CHUNKED", "false").lower() == "true" + + until_date = datetime.now(MADRID_TZ) if is_historical: - # Historical Mode by Chunks (e.g., 180-day chunks) + # Unified mode is now DEFAULT for historical final_stop_date = datetime(2024, 10, 1, 0, 0, tzinfo=MADRID_TZ) - chunk_days = int(os.getenv("HISTORICAL_CHUNK_DAYS", "180")) - # Current chunk ends where the previous one started (or 'now' if first) - until_str = os.getenv("HISTORICAL_UNTIL_DATE") - if until_str: - until_date = datetime.fromisoformat(until_str).replace(tzinfo=MADRID_TZ) + if is_chunked: + # Chunked Mode: Use chunks (e.g., 180 days) + chunk_days = int(os.getenv("HISTORICAL_CHUNK_DAYS", "180")) + until_str = os.getenv("HISTORICAL_UNTIL_DATE") + if until_str: + until_date = datetime.fromisoformat(until_str).replace(tzinfo=MADRID_TZ) + else: + until_date = datetime.now(MADRID_TZ) + + since_date = until_date - timedelta(days=chunk_days) + if since_date < final_stop_date: + since_date = final_stop_date + log_event(f"[*] HISTORICAL MODE (CHUNKED): Chunk {since_date.date()} -> {until_date.date()}") else: - until_date = datetime.now(MADRID_TZ) - - since_date = until_date - timedelta(days=chunk_days) - if since_date < final_stop_date: + # Unified Historical Mode: process all in one go (Single PR) since_date = final_stop_date - - log_event(f"[*] HISTORICAL MODE: Chunk {since_date.date()} -> {until_date.date()}") + log_event(f"[*] HISTORICAL MODE (UNIFIED): Processing all since {since_date.date()} in a single run") else: # Normal Mode: Use CURATION_START_DATE if exists, else state.json env_start = os.getenv("CURATION_START_DATE") @@ -284,8 +291,8 @@ async def master_orchestrator(): if max_tweet_date > since_date: save_state(max_tweet_date + timedelta(seconds=1)) - # Re-trigger logic for Historical Mode in GitHub Actions - if is_historical and since_date > final_stop_date: + # Re-trigger logic for Historical Mode in GitHub Actions (ONLY IF CHUNKED) + if is_historical and is_chunked and since_date > final_stop_date: # Print for YAML to capture print(f"\nNEXT_CHUNK_START: {since_date.isoformat()}") log_event(f"[*] CHUNK FINISHED. Suggesting next chunk from: {since_date.date()}", section_break=True)