chore: localize PR reports, logs, and core utilities to English

This commit is contained in:
Nubenetes Bot
2026-05-14 21:09:13 +02:00
parent 56c5b48ed5
commit bfebd939f1
4 changed files with 46 additions and 50 deletions

View File

@@ -94,16 +94,16 @@ def is_fuzzy_duplicate(url_a: str, url_b: str) -> bool:
async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3):
"""
Llama a Gemini optimizando el uso de cuota (pay-per-use).
Rota llaves inmediatamente en 429 y usa backoff exponencial inteligente.
Calls Gemini API optimizing for quota usage (pay-per-use).
Rotates keys immediately on 429 and uses smart exponential backoff.
"""
global CURRENT_KEY_INDEX
if not GEMINI_API_KEYS:
raise ValueError("No hay GEMINI_API_KEYS configuradas.")
raise ValueError("No GEMINI_API_KEYS configured.")
diagnostics = GeminiDiagnostics()
# Intentamos rotar entre todas las llaves disponibles antes de fallar
# Try rotating through all available keys before failing
for key_attempt in range(len(GEMINI_API_KEYS)):
api_key = GEMINI_API_KEYS[CURRENT_KEY_INDEX]
@@ -126,17 +126,17 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
if match:
data = json.loads(match.group(0))
return data[0] if isinstance(data, list) and len(data) > 0 else data
diagnostics.add_attempt(model, 200, "JSON no encontrado", text_resp)
diagnostics.add_attempt(model, 200, "JSON not found", text_resp)
break
return text_resp
diagnostics.add_attempt(model, 200, "Sin candidates")
diagnostics.add_attempt(model, 200, "No candidates")
break
elif response.status_code == 429:
# 429: Rotamos llave inmediatamente para no desperdiciar tiempo
log_event(f" [!] API 429 en llave {CURRENT_KEY_INDEX+1}. Rotando...")
# 429: Rotate key immediately to save time
log_event(f" [!] API 429 on key {CURRENT_KEY_INDEX+1}. Rotating...")
CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
# Rompemos el bucle de intentos para este modelo/llave y pasamos a la siguiente llave
# Break current model loop and move to next key
break
elif response.status_code in [500, 503, 504]:
@@ -148,16 +148,16 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
break
except Exception as e:
diagnostics.add_attempt(model, 0, f"Excepción: {str(e)}")
diagnostics.add_attempt(model, 0, f"Exception: {str(e)}")
break
# Si terminamos los modelos de una llave con 429, saltamos a la siguiente
# If we finished all models for a key with 429, skip to next key
if response.status_code == 429:
continue
# Si llegamos aquí y no tuvimos éxito, probamos la siguiente llave tras un breve respiro
# If we are here and didn't succeed, try next key after a brief pause
CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
await asyncio.sleep(1)
raise Exception(f"Fallo crítico Gemini tras rotación de llaves.\n{diagnostics.get_report()}")
raise Exception(f"Critical Gemini failure after key rotation.\n{diagnostics.get_report()}")

View File

@@ -23,7 +23,7 @@ class RepositoryController:
def apply_historical_chunk(self, updates: dict, next_since: str) -> None:
branch_name = "bot/historical-accumulator"
# Verificar si la rama existe, si no, crearla desde master
# Check if branch exists, if not, create from master
try:
self.repository.get_branch(branch_name)
except:
@@ -44,21 +44,18 @@ class RepositoryController:
content=content, branch=branch_name
)
except Exception as e:
print(f"Error en tramo histórico para {file_path}: {e}")
print(f"Error in historical chunk for {file_path}: {e}")
def apply_multi_file_changes(self, updates: dict, metrics: dict) -> None:
timestamp_slug = datetime.now().strftime("%Y%m%d-%H%M")
is_historical = "historical" in metrics.get("start_date", "").lower() or metrics.get("total_extracted", 0) > 500
branch_name = f"bot/knowledge-update-{timestamp_slug}"
# En el último tramo histórico, usamos el accumulator como base si existe
# In the last historical chunk, use the accumulator as base if it exists
accumulator_branch = "bot/historical-accumulator"
base_sha = None
try:
acc = self.repository.get_branch(accumulator_branch)
base_sha = acc.commit.sha
# Si venimos de histórico, la rama destino debe ser creada desde el accumulator
self.repository.create_git_ref(ref=f"refs/heads/{branch_name}", sha=base_sha)
except:
self._create_feature_branch(branch_name)
@@ -86,15 +83,15 @@ class RepositoryController:
content=content, branch=branch_name
)
except Exception as e:
print(f"Error procesando {file_path}: {e}")
print(f"Error processing {file_path}: {e}")
# --- CONSTRUCCIÓN DEL REPORTE ---
# --- REPORT CONSTRUCTION ---
full_report = metrics.get('full_report', [])
# 1. Tabla Matricial con Índice Numérico y Fecha (Priorizando INCLUDED)
# 1. Matrix Table with Numeric Index and Date (Prioritizing INCLUDED)
sorted_report = sorted(full_report, key=lambda x: 0 if x['status'] == 'INCLUDED' else 1)
header_table = "| # | Estado | Fecha Post | Origen | Motivo | Categoría | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
header_table = "| # | Status | Post Date | Source | Reason | Category | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
counts = {"INCLUDED": 0, "DUPLICATE": 0, "FILTERED": 0}
source_counts = {}
@@ -110,8 +107,7 @@ class RepositoryController:
src = item.get('source', 'Unknown')
source_counts[src] = source_counts.get(src, 0) + 1
# Dividir filas en fragmentos para PR Body y Comentarios
# Límite GH: 65,536. Usamos fragmentos de ~50k chars para seguridad.
# Split rows into chunks for PR Body and Comments
chunks = []
current_chunk = header_table
for row in all_rows:
@@ -124,10 +120,10 @@ class RepositoryController:
matrix_table_body = chunks[0]
if len(chunks) > 1:
matrix_table_body += f"\n> **Nota:** La tabla continúa en los comentarios del PR ({len(chunks)-1} partes adicionales).\n"
matrix_table_body += f"\n> **Note:** The table continues in PR comments ({len(chunks)-1} additional parts).\n"
# 2. Diagnóstico de Extracción Histórica
extraction_audit = "### 🕵️ Diagnóstico de Horizonte Temporal\n"
# 2. Historical Extraction Diagnosis
extraction_audit = "### 🕵️ Time Horizon Diagnosis\n"
start_date_str = metrics.get('start_date')[:10] if metrics.get('start_date') else 'N/A'
actual_oldest = "N/A"
@@ -135,37 +131,37 @@ class RepositoryController:
if dates: actual_oldest = min(dates)[:10]
if actual_oldest != "N/A" and actual_oldest > start_date_str:
extraction_audit += f"⚠️ **Límite Alcanzado:** Se solicitó desde `{start_date_str}`, pero se detuvo en `{actual_oldest}`.\n"
extraction_audit += f"⚠️ **Limit Reached:** Requested from `{start_date_str}`, but stopped at `{actual_oldest}`.\n"
else:
extraction_audit += f"✅ **Horizonte Alcanzado:** La extracción cubrió exitosamente desde `{start_date_str}`.\n"
extraction_audit += f"✅ **Horizon Reached:** Extraction successfully covered since `{start_date_str}`.\n"
# 3. Diagramas Mermaid
mermaid_pie = "### 📊 Métricas de Decisión\n```mermaid\npie title Distribución de Decisión Agéntica\n"
mermaid_pie += f" \"Aceptados (Inyectados)\" : {counts['INCLUDED']}\n"
mermaid_pie += f" \"Duplicados (Ignorados)\" : {counts['DUPLICATE']}\n"
mermaid_pie += f" \"Filtrados (Calidad/Impacto)\" : {counts['FILTERED']}\n```\n"
# 3. Mermaid Diagrams
mermaid_pie = "### 📊 Decision Metrics\n```mermaid\npie title Agentic Decision Distribution\n"
mermaid_pie += f" \"Accepted (Injected)\" : {counts['INCLUDED']}\n"
mermaid_pie += f" \"Duplicates (Ignored)\" : {counts['DUPLICATE']}\n"
mermaid_pie += f" \"Filtered (Quality/Impact)\" : {counts['FILTERED']}\n```\n"
mermaid_origin = ""
if source_counts:
mermaid_origin = "### 🌍 Origen de las Novedades Inyectadas\n```mermaid\npie title Fuentes de Referencias Añadidas\n"
mermaid_origin = "### 🌍 Source of Injected Updates\n```mermaid\npie title Added References Sources\n"
for src, val in source_counts.items():
mermaid_origin += f" \"{src}\" : {val}\n"
mermaid_origin += "```\n"
# 4. Log de Ingesta
x_log = "### ⚡ Audit Trail de Ingesta (X.com)\n"
# 4. Ingestion Log
x_log = "### ⚡ Ingestion Audit Trail (X.com)\n"
for entry in metrics.get('x_audit', []): x_log += f"- {entry}\n"
pr_narrative = (
f"## 💎 Knowledge Update War Room: Kubernetes & Cloud Native\n\n"
f"Este reporte detalla el procesamiento de **{metrics.get('total_extracted', 0)}** enlaces detectados.\n\n"
f"This report details the processing of **{metrics.get('total_extracted', 0)}** detected links.\n\n"
f"{extraction_audit}\n"
f"{mermaid_pie}\n"
f"{mermaid_origin}\n"
f"{x_log}\n"
f"### 📋 Matriz de Auditoría (Parte 1)\n{matrix_table_body}\n"
f"### 📋 Audit Matrix (Part 1)\n{matrix_table_body}\n"
f"---\n"
f"**Nota de Evaluación:** Se ha analizado exitosamente el histórico completo."
f"**Evaluation Note:** Full history analyzed successfully."
)
pr = self.repository.create_pull(
@@ -175,8 +171,8 @@ class RepositoryController:
base=self.default_branch_name
)
# 5. Publicar comentarios con el resto de la tabla
# 5. Post comments with the rest of the table
if len(chunks) > 1:
for i, chunk in enumerate(chunks[1:], 2):
pr.create_issue_comment(f"### 📋 Matriz de Auditoría (Parte {i})\n{chunk}")
pr.create_issue_comment(f"### 📋 Audit Matrix (Part {i})\n{chunk}")

View File

@@ -6,8 +6,8 @@ DEFAULT_LOG_PATH = "/home/inafev/.gemini/tmp/awesome-kubernetes/curation_progres
def log_event(message: str, section_break: bool = False):
"""
Registra un evento tanto en la consola (STDOUT) como en el archivo de log local si existe.
En GitHub Actions, esto aparecerá en los logs del workflow.
Logs an event to both console (STDOUT) and local log file if it exists.
In GitHub Actions, this will appear in the workflow logs.
"""
timestamp = datetime.now().strftime('%H:%M:%S')
formatted_msg = f"[{timestamp}] {message}"
@@ -21,10 +21,10 @@ def log_event(message: str, section_break: bool = False):
_write_to_file(formatted_msg)
def _write_to_file(message: str):
# Solo intentamos escribir en archivo si no estamos en GitHub Actions
# Only try to write to file if not in GitHub Actions
if not os.getenv("GITHUB_ACTIONS"):
try:
# Aseguramos que el directorio existe
# Ensure directory exists
os.makedirs(os.path.dirname(DEFAULT_LOG_PATH), exist_ok=True)
with open(DEFAULT_LOG_PATH, "a") as f:
f.write(message + "\n")

View File

@@ -15,7 +15,7 @@ def load_state() -> dict:
with open(STATE_FILE, 'r') as f:
return json.load(f)
except Exception as e:
log_event(f"[!] Error cargando state.json: {e}")
log_event(f"[!] Error loading state.json: {e}")
return default_state
def save_state(last_date: datetime):
@@ -26,9 +26,9 @@ def save_state(last_date: datetime):
try:
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
log_event(f"[*] Estado guardado: última fecha procesada {last_date.date()}")
log_event(f"[*] State saved: last processed date {last_date.date()}")
except Exception as e:
log_event(f"[!] Error guardando state.json: {e}")
log_event(f"[!] Error saving state.json: {e}")
def get_last_date() -> datetime:
state = load_state()