feat: support unified historical curation as default and optional chunking

This commit is contained in:
Nubenetes Bot
2026-05-14 21:45:59 +02:00
parent 9025528870
commit f8f326c698
2 changed files with 30 additions and 17 deletions

View File

@@ -22,6 +22,11 @@ on:
required: false
default: 'true'
type: boolean
historical_chunked:
description: 'Trocear ejecución (múltiples PRs)'
required: false
default: 'false'
type: boolean
historical_until_date:
description: 'Fecha límite superior (tramo)'
required: false
@@ -64,6 +69,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
EXTRACTION_STRATEGY: ${{ github.event.inputs.extraction_strategy || 'search' }}
HISTORICAL_MODE: ${{ github.event.inputs.historical_mode || 'false' }}
HISTORICAL_CHUNKED: ${{ github.event.inputs.historical_chunked || 'false' }}
HISTORICAL_UNTIL_DATE: ${{ github.event.inputs.historical_until_date || '' }}
CURATION_START_DATE: ${{ github.event.inputs.start_date || '' }}
HISTORICAL_CHUNK_DAYS: '180'
@@ -71,9 +77,9 @@ jobs:
run: |
python -u src/main.py 2>&1 | tee output.log
# Lógica de Re-disparo para Modo Histórico
if grep -q "NEXT_CHUNK_START:" output.log; then
# Lógica de Re-disparo para Modo Histórico (SOLO SI SE SOLICITA TROCEADO)
if [ "${{ github.event.inputs.historical_chunked }}" == "true" ] && grep -q "NEXT_CHUNK_START:" output.log; then
NEXT_DATE=$(grep "NEXT_CHUNK_START:" output.log | awk '{print $2}')
echo "Disparando siguiente tramo histórico hasta: $NEXT_DATE"
gh workflow run agentic_cron.yml -f historical_mode=true -f historical_until_date=$NEXT_DATE
gh workflow run agentic_cron.yml -f historical_mode=true -f historical_chunked=true -f historical_until_date=$NEXT_DATE
fi

View File

@@ -22,24 +22,31 @@ async def master_orchestrator():
# 1. Dynamic / Historical Time Horizon
is_historical = os.getenv("HISTORICAL_MODE", "false").lower() == "true"
is_chunked = os.getenv("HISTORICAL_CHUNKED", "false").lower() == "true"
until_date = datetime.now(MADRID_TZ)
if is_historical:
# Historical Mode by Chunks (e.g., 180-day chunks)
# Unified mode is now DEFAULT for historical
final_stop_date = datetime(2024, 10, 1, 0, 0, tzinfo=MADRID_TZ)
chunk_days = int(os.getenv("HISTORICAL_CHUNK_DAYS", "180"))
# Current chunk ends where the previous one started (or 'now' if first)
until_str = os.getenv("HISTORICAL_UNTIL_DATE")
if until_str:
until_date = datetime.fromisoformat(until_str).replace(tzinfo=MADRID_TZ)
if is_chunked:
# Chunked Mode: Use chunks (e.g., 180 days)
chunk_days = int(os.getenv("HISTORICAL_CHUNK_DAYS", "180"))
until_str = os.getenv("HISTORICAL_UNTIL_DATE")
if until_str:
until_date = datetime.fromisoformat(until_str).replace(tzinfo=MADRID_TZ)
else:
until_date = datetime.now(MADRID_TZ)
since_date = until_date - timedelta(days=chunk_days)
if since_date < final_stop_date:
since_date = final_stop_date
log_event(f"[*] HISTORICAL MODE (CHUNKED): Chunk {since_date.date()} -> {until_date.date()}")
else:
until_date = datetime.now(MADRID_TZ)
since_date = until_date - timedelta(days=chunk_days)
if since_date < final_stop_date:
# Unified Historical Mode: process all in one go (Single PR)
since_date = final_stop_date
log_event(f"[*] HISTORICAL MODE: Chunk {since_date.date()} -> {until_date.date()}")
log_event(f"[*] HISTORICAL MODE (UNIFIED): Processing all since {since_date.date()} in a single run")
else:
# Normal Mode: Use CURATION_START_DATE if exists, else state.json
env_start = os.getenv("CURATION_START_DATE")
@@ -284,8 +291,8 @@ async def master_orchestrator():
if max_tweet_date > since_date:
save_state(max_tweet_date + timedelta(seconds=1))
# Re-trigger logic for Historical Mode in GitHub Actions
if is_historical and since_date > final_stop_date:
# Re-trigger logic for Historical Mode in GitHub Actions (ONLY IF CHUNKED)
if is_historical and is_chunked and since_date > final_stop_date:
# Print for YAML to capture
print(f"\nNEXT_CHUNK_START: {since_date.isoformat()}")
log_event(f"[*] CHUNK FINISHED. Suggesting next chunk from: {since_date.date()}", section_break=True)