mirror of
https://github.com/nubenetes/awesome-kubernetes.git
synced 2026-05-23 09:33:33 +00:00
feat: support unified historical curation as default and optional chunking
This commit is contained in:
12
.github/workflows/agentic_cron.yml
vendored
12
.github/workflows/agentic_cron.yml
vendored
@@ -22,6 +22,11 @@ on:
|
||||
required: false
|
||||
default: 'true'
|
||||
type: boolean
|
||||
historical_chunked:
|
||||
description: 'Trocear ejecución (múltiples PRs)'
|
||||
required: false
|
||||
default: 'false'
|
||||
type: boolean
|
||||
historical_until_date:
|
||||
description: 'Fecha límite superior (tramo)'
|
||||
required: false
|
||||
@@ -64,6 +69,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
EXTRACTION_STRATEGY: ${{ github.event.inputs.extraction_strategy || 'search' }}
|
||||
HISTORICAL_MODE: ${{ github.event.inputs.historical_mode || 'false' }}
|
||||
HISTORICAL_CHUNKED: ${{ github.event.inputs.historical_chunked || 'false' }}
|
||||
HISTORICAL_UNTIL_DATE: ${{ github.event.inputs.historical_until_date || '' }}
|
||||
CURATION_START_DATE: ${{ github.event.inputs.start_date || '' }}
|
||||
HISTORICAL_CHUNK_DAYS: '180'
|
||||
@@ -71,9 +77,9 @@ jobs:
|
||||
run: |
|
||||
python -u src/main.py 2>&1 | tee output.log
|
||||
|
||||
# Lógica de Re-disparo para Modo Histórico
|
||||
if grep -q "NEXT_CHUNK_START:" output.log; then
|
||||
# Lógica de Re-disparo para Modo Histórico (SOLO SI SE SOLICITA TROCEADO)
|
||||
if [ "${{ github.event.inputs.historical_chunked }}" == "true" ] && grep -q "NEXT_CHUNK_START:" output.log; then
|
||||
NEXT_DATE=$(grep "NEXT_CHUNK_START:" output.log | awk '{print $2}')
|
||||
echo "Disparando siguiente tramo histórico hasta: $NEXT_DATE"
|
||||
gh workflow run agentic_cron.yml -f historical_mode=true -f historical_until_date=$NEXT_DATE
|
||||
gh workflow run agentic_cron.yml -f historical_mode=true -f historical_chunked=true -f historical_until_date=$NEXT_DATE
|
||||
fi
|
||||
|
||||
35
src/main.py
35
src/main.py
@@ -22,24 +22,31 @@ async def master_orchestrator():
|
||||
|
||||
# 1. Dynamic / Historical Time Horizon
|
||||
is_historical = os.getenv("HISTORICAL_MODE", "false").lower() == "true"
|
||||
is_chunked = os.getenv("HISTORICAL_CHUNKED", "false").lower() == "true"
|
||||
|
||||
until_date = datetime.now(MADRID_TZ)
|
||||
|
||||
if is_historical:
|
||||
# Historical Mode by Chunks (e.g., 180-day chunks)
|
||||
# Unified mode is now DEFAULT for historical
|
||||
final_stop_date = datetime(2024, 10, 1, 0, 0, tzinfo=MADRID_TZ)
|
||||
chunk_days = int(os.getenv("HISTORICAL_CHUNK_DAYS", "180"))
|
||||
|
||||
# Current chunk ends where the previous one started (or 'now' if first)
|
||||
until_str = os.getenv("HISTORICAL_UNTIL_DATE")
|
||||
if until_str:
|
||||
until_date = datetime.fromisoformat(until_str).replace(tzinfo=MADRID_TZ)
|
||||
if is_chunked:
|
||||
# Chunked Mode: Use chunks (e.g., 180 days)
|
||||
chunk_days = int(os.getenv("HISTORICAL_CHUNK_DAYS", "180"))
|
||||
until_str = os.getenv("HISTORICAL_UNTIL_DATE")
|
||||
if until_str:
|
||||
until_date = datetime.fromisoformat(until_str).replace(tzinfo=MADRID_TZ)
|
||||
else:
|
||||
until_date = datetime.now(MADRID_TZ)
|
||||
|
||||
since_date = until_date - timedelta(days=chunk_days)
|
||||
if since_date < final_stop_date:
|
||||
since_date = final_stop_date
|
||||
log_event(f"[*] HISTORICAL MODE (CHUNKED): Chunk {since_date.date()} -> {until_date.date()}")
|
||||
else:
|
||||
until_date = datetime.now(MADRID_TZ)
|
||||
|
||||
since_date = until_date - timedelta(days=chunk_days)
|
||||
if since_date < final_stop_date:
|
||||
# Unified Historical Mode: process all in one go (Single PR)
|
||||
since_date = final_stop_date
|
||||
|
||||
log_event(f"[*] HISTORICAL MODE: Chunk {since_date.date()} -> {until_date.date()}")
|
||||
log_event(f"[*] HISTORICAL MODE (UNIFIED): Processing all since {since_date.date()} in a single run")
|
||||
else:
|
||||
# Normal Mode: Use CURATION_START_DATE if exists, else state.json
|
||||
env_start = os.getenv("CURATION_START_DATE")
|
||||
@@ -284,8 +291,8 @@ async def master_orchestrator():
|
||||
if max_tweet_date > since_date:
|
||||
save_state(max_tweet_date + timedelta(seconds=1))
|
||||
|
||||
# Re-trigger logic for Historical Mode in GitHub Actions
|
||||
if is_historical and since_date > final_stop_date:
|
||||
# Re-trigger logic for Historical Mode in GitHub Actions (ONLY IF CHUNKED)
|
||||
if is_historical and is_chunked and since_date > final_stop_date:
|
||||
# Print for YAML to capture
|
||||
print(f"\nNEXT_CHUNK_START: {since_date.isoformat()}")
|
||||
log_event(f"[*] CHUNK FINISHED. Suggesting next chunk from: {since_date.date()}", section_break=True)
|
||||
|
||||
Reference in New Issue
Block a user