From bfebd939f1f082fe2600fb288c82d7e305e7f2c0 Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Thu, 14 May 2026 21:09:13 +0200 Subject: [PATCH] chore: localize PR reports, logs, and core utilities to English --- src/gemini_utils.py | 26 ++++++++++---------- src/gitops_manager.py | 56 ++++++++++++++++++++----------------------- src/logger.py | 8 +++---- src/state_manager.py | 6 ++--- 4 files changed, 46 insertions(+), 50 deletions(-) diff --git a/src/gemini_utils.py b/src/gemini_utils.py index dd8b8523..6e15e23d 100644 --- a/src/gemini_utils.py +++ b/src/gemini_utils.py @@ -94,16 +94,16 @@ def is_fuzzy_duplicate(url_a: str, url_b: str) -> bool: async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3): """ - Llama a Gemini optimizando el uso de cuota (pay-per-use). - Rota llaves inmediatamente en 429 y usa backoff exponencial inteligente. + Calls Gemini API optimizing for quota usage (pay-per-use). + Rotates keys immediately on 429 and uses smart exponential backoff. """ global CURRENT_KEY_INDEX if not GEMINI_API_KEYS: - raise ValueError("No hay GEMINI_API_KEYS configuradas.") + raise ValueError("No GEMINI_API_KEYS configured.") diagnostics = GeminiDiagnostics() - # Intentamos rotar entre todas las llaves disponibles antes de fallar + # Try rotating through all available keys before failing for key_attempt in range(len(GEMINI_API_KEYS)): api_key = GEMINI_API_KEYS[CURRENT_KEY_INDEX] @@ -126,17 +126,17 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max if match: data = json.loads(match.group(0)) return data[0] if isinstance(data, list) and len(data) > 0 else data - diagnostics.add_attempt(model, 200, "JSON no encontrado", text_resp) + diagnostics.add_attempt(model, 200, "JSON not found", text_resp) break return text_resp - diagnostics.add_attempt(model, 200, "Sin candidates") + diagnostics.add_attempt(model, 200, "No candidates") break elif response.status_code == 429: - # 429: Rotamos llave inmediatamente para no desperdiciar tiempo - log_event(f" [!] API 429 en llave {CURRENT_KEY_INDEX+1}. Rotando...") + # 429: Rotate key immediately to save time + log_event(f" [!] API 429 on key {CURRENT_KEY_INDEX+1}. Rotating...") CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS) - # Rompemos el bucle de intentos para este modelo/llave y pasamos a la siguiente llave + # Break current model loop and move to next key break elif response.status_code in [500, 503, 504]: @@ -148,16 +148,16 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max break except Exception as e: - diagnostics.add_attempt(model, 0, f"Excepción: {str(e)}") + diagnostics.add_attempt(model, 0, f"Exception: {str(e)}") break - # Si terminamos los modelos de una llave con 429, saltamos a la siguiente + # If we finished all models for a key with 429, skip to next key if response.status_code == 429: continue - # Si llegamos aquí y no tuvimos éxito, probamos la siguiente llave tras un breve respiro + # If we are here and didn't succeed, try next key after a brief pause CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS) await asyncio.sleep(1) - raise Exception(f"Fallo crítico Gemini tras rotación de llaves.\n{diagnostics.get_report()}") + raise Exception(f"Critical Gemini failure after key rotation.\n{diagnostics.get_report()}") diff --git a/src/gitops_manager.py b/src/gitops_manager.py index 35a37b0b..f656026e 100644 --- a/src/gitops_manager.py +++ b/src/gitops_manager.py @@ -23,7 +23,7 @@ class RepositoryController: def apply_historical_chunk(self, updates: dict, next_since: str) -> None: branch_name = "bot/historical-accumulator" - # Verificar si la rama existe, si no, crearla desde master + # Check if branch exists, if not, create from master try: self.repository.get_branch(branch_name) except: @@ -44,21 +44,18 @@ class RepositoryController: content=content, branch=branch_name ) except Exception as e: - print(f"Error en tramo histórico para {file_path}: {e}") + print(f"Error in historical chunk for {file_path}: {e}") def apply_multi_file_changes(self, updates: dict, metrics: dict) -> None: timestamp_slug = datetime.now().strftime("%Y%m%d-%H%M") - is_historical = "historical" in metrics.get("start_date", "").lower() or metrics.get("total_extracted", 0) > 500 - branch_name = f"bot/knowledge-update-{timestamp_slug}" - # En el último tramo histórico, usamos el accumulator como base si existe + # In the last historical chunk, use the accumulator as base if it exists accumulator_branch = "bot/historical-accumulator" base_sha = None try: acc = self.repository.get_branch(accumulator_branch) base_sha = acc.commit.sha - # Si venimos de histórico, la rama destino debe ser creada desde el accumulator self.repository.create_git_ref(ref=f"refs/heads/{branch_name}", sha=base_sha) except: self._create_feature_branch(branch_name) @@ -86,15 +83,15 @@ class RepositoryController: content=content, branch=branch_name ) except Exception as e: - print(f"Error procesando {file_path}: {e}") + print(f"Error processing {file_path}: {e}") - # --- CONSTRUCCIÓN DEL REPORTE --- + # --- REPORT CONSTRUCTION --- full_report = metrics.get('full_report', []) - # 1. Tabla Matricial con Índice Numérico y Fecha (Priorizando INCLUDED) + # 1. Matrix Table with Numeric Index and Date (Prioritizing INCLUDED) sorted_report = sorted(full_report, key=lambda x: 0 if x['status'] == 'INCLUDED' else 1) - header_table = "| # | Estado | Fecha Post | Origen | Motivo | Categoría | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n" + header_table = "| # | Status | Post Date | Source | Reason | Category | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n" counts = {"INCLUDED": 0, "DUPLICATE": 0, "FILTERED": 0} source_counts = {} @@ -110,8 +107,7 @@ class RepositoryController: src = item.get('source', 'Unknown') source_counts[src] = source_counts.get(src, 0) + 1 - # Dividir filas en fragmentos para PR Body y Comentarios - # Límite GH: 65,536. Usamos fragmentos de ~50k chars para seguridad. + # Split rows into chunks for PR Body and Comments chunks = [] current_chunk = header_table for row in all_rows: @@ -124,10 +120,10 @@ class RepositoryController: matrix_table_body = chunks[0] if len(chunks) > 1: - matrix_table_body += f"\n> ℹ️ **Nota:** La tabla continúa en los comentarios del PR ({len(chunks)-1} partes adicionales).\n" + matrix_table_body += f"\n> ℹ️ **Note:** The table continues in PR comments ({len(chunks)-1} additional parts).\n" - # 2. Diagnóstico de Extracción Histórica - extraction_audit = "### 🕵️ Diagnóstico de Horizonte Temporal\n" + # 2. Historical Extraction Diagnosis + extraction_audit = "### 🕵️ Time Horizon Diagnosis\n" start_date_str = metrics.get('start_date')[:10] if metrics.get('start_date') else 'N/A' actual_oldest = "N/A" @@ -135,37 +131,37 @@ class RepositoryController: if dates: actual_oldest = min(dates)[:10] if actual_oldest != "N/A" and actual_oldest > start_date_str: - extraction_audit += f"⚠️ **Límite Alcanzado:** Se solicitó desde `{start_date_str}`, pero se detuvo en `{actual_oldest}`.\n" + extraction_audit += f"⚠️ **Limit Reached:** Requested from `{start_date_str}`, but stopped at `{actual_oldest}`.\n" else: - extraction_audit += f"✅ **Horizonte Alcanzado:** La extracción cubrió exitosamente desde `{start_date_str}`.\n" + extraction_audit += f"✅ **Horizon Reached:** Extraction successfully covered since `{start_date_str}`.\n" - # 3. Diagramas Mermaid - mermaid_pie = "### 📊 Métricas de Decisión\n```mermaid\npie title Distribución de Decisión Agéntica\n" - mermaid_pie += f" \"Aceptados (Inyectados)\" : {counts['INCLUDED']}\n" - mermaid_pie += f" \"Duplicados (Ignorados)\" : {counts['DUPLICATE']}\n" - mermaid_pie += f" \"Filtrados (Calidad/Impacto)\" : {counts['FILTERED']}\n```\n" + # 3. Mermaid Diagrams + mermaid_pie = "### 📊 Decision Metrics\n```mermaid\npie title Agentic Decision Distribution\n" + mermaid_pie += f" \"Accepted (Injected)\" : {counts['INCLUDED']}\n" + mermaid_pie += f" \"Duplicates (Ignored)\" : {counts['DUPLICATE']}\n" + mermaid_pie += f" \"Filtered (Quality/Impact)\" : {counts['FILTERED']}\n```\n" mermaid_origin = "" if source_counts: - mermaid_origin = "### 🌍 Origen de las Novedades Inyectadas\n```mermaid\npie title Fuentes de Referencias Añadidas\n" + mermaid_origin = "### 🌍 Source of Injected Updates\n```mermaid\npie title Added References Sources\n" for src, val in source_counts.items(): mermaid_origin += f" \"{src}\" : {val}\n" mermaid_origin += "```\n" - # 4. Log de Ingesta - x_log = "### ⚡ Audit Trail de Ingesta (X.com)\n" + # 4. Ingestion Log + x_log = "### ⚡ Ingestion Audit Trail (X.com)\n" for entry in metrics.get('x_audit', []): x_log += f"- {entry}\n" pr_narrative = ( f"## 💎 Knowledge Update War Room: Kubernetes & Cloud Native\n\n" - f"Este reporte detalla el procesamiento de **{metrics.get('total_extracted', 0)}** enlaces detectados.\n\n" + f"This report details the processing of **{metrics.get('total_extracted', 0)}** detected links.\n\n" f"{extraction_audit}\n" f"{mermaid_pie}\n" f"{mermaid_origin}\n" f"{x_log}\n" - f"### 📋 Matriz de Auditoría (Parte 1)\n{matrix_table_body}\n" + f"### 📋 Audit Matrix (Part 1)\n{matrix_table_body}\n" f"---\n" - f"**Nota de Evaluación:** Se ha analizado exitosamente el histórico completo." + f"**Evaluation Note:** Full history analyzed successfully." ) pr = self.repository.create_pull( @@ -175,8 +171,8 @@ class RepositoryController: base=self.default_branch_name ) - # 5. Publicar comentarios con el resto de la tabla + # 5. Post comments with the rest of the table if len(chunks) > 1: for i, chunk in enumerate(chunks[1:], 2): - pr.create_issue_comment(f"### 📋 Matriz de Auditoría (Parte {i})\n{chunk}") + pr.create_issue_comment(f"### 📋 Audit Matrix (Part {i})\n{chunk}") diff --git a/src/logger.py b/src/logger.py index 08eef44b..74a99ba5 100644 --- a/src/logger.py +++ b/src/logger.py @@ -6,8 +6,8 @@ DEFAULT_LOG_PATH = "/home/inafev/.gemini/tmp/awesome-kubernetes/curation_progres def log_event(message: str, section_break: bool = False): """ - Registra un evento tanto en la consola (STDOUT) como en el archivo de log local si existe. - En GitHub Actions, esto aparecerá en los logs del workflow. + Logs an event to both console (STDOUT) and local log file if it exists. + In GitHub Actions, this will appear in the workflow logs. """ timestamp = datetime.now().strftime('%H:%M:%S') formatted_msg = f"[{timestamp}] {message}" @@ -21,10 +21,10 @@ def log_event(message: str, section_break: bool = False): _write_to_file(formatted_msg) def _write_to_file(message: str): - # Solo intentamos escribir en archivo si no estamos en GitHub Actions + # Only try to write to file if not in GitHub Actions if not os.getenv("GITHUB_ACTIONS"): try: - # Aseguramos que el directorio existe + # Ensure directory exists os.makedirs(os.path.dirname(DEFAULT_LOG_PATH), exist_ok=True) with open(DEFAULT_LOG_PATH, "a") as f: f.write(message + "\n") diff --git a/src/state_manager.py b/src/state_manager.py index aee4dd0e..a0a0d803 100644 --- a/src/state_manager.py +++ b/src/state_manager.py @@ -15,7 +15,7 @@ def load_state() -> dict: with open(STATE_FILE, 'r') as f: return json.load(f) except Exception as e: - log_event(f"[!] Error cargando state.json: {e}") + log_event(f"[!] Error loading state.json: {e}") return default_state def save_state(last_date: datetime): @@ -26,9 +26,9 @@ def save_state(last_date: datetime): try: with open(STATE_FILE, 'w') as f: json.dump(state, f, indent=2) - log_event(f"[*] Estado guardado: última fecha procesada {last_date.date()}") + log_event(f"[*] State saved: last processed date {last_date.date()}") except Exception as e: - log_event(f"[!] Error guardando state.json: {e}") + log_event(f"[!] Error saving state.json: {e}") def get_last_date() -> datetime: state = load_state()