From bfebd939f1f082fe2600fb288c82d7e305e7f2c0 Mon Sep 17 00:00:00 2001
From: Nubenetes Bot <bot@nubenetes.com>
Date: Thu, 14 May 2026 21:09:13 +0200
Subject: [PATCH] chore: localize PR reports, logs, and core utilities to
 English

---
 src/gemini_utils.py   | 26 ++++++++++----------
 src/gitops_manager.py | 56 ++++++++++++++++++++-----------------------
 src/logger.py         |  8 +++----
 src/state_manager.py  |  6 ++---
 4 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/src/gemini_utils.py b/src/gemini_utils.py
index dd8b8523..6e15e23d 100644
--- a/src/gemini_utils.py
+++ b/src/gemini_utils.py
@@ -94,16 +94,16 @@ def is_fuzzy_duplicate(url_a: str, url_b: str) -> bool:
 
 async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3):
     """
-    Llama a Gemini optimizando el uso de cuota (pay-per-use).
-    Rota llaves inmediatamente en 429 y usa backoff exponencial inteligente.
+    Calls Gemini API optimizing for quota usage (pay-per-use).
+    Rotates keys immediately on 429 and uses smart exponential backoff.
     """
     global CURRENT_KEY_INDEX
     if not GEMINI_API_KEYS:
-        raise ValueError("No hay GEMINI_API_KEYS configuradas.")
+        raise ValueError("No GEMINI_API_KEYS configured.")
 
     diagnostics = GeminiDiagnostics()
     
-    # Intentamos rotar entre todas las llaves disponibles antes de fallar
+    # Try rotating through all available keys before failing
     for key_attempt in range(len(GEMINI_API_KEYS)):
         api_key = GEMINI_API_KEYS[CURRENT_KEY_INDEX]
         
@@ -126,17 +126,17 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
                                     if match:
                                         data = json.loads(match.group(0))
                                         return data[0] if isinstance(data, list) and len(data) > 0 else data
-                                    diagnostics.add_attempt(model, 200, "JSON no encontrado", text_resp)
+                                    diagnostics.add_attempt(model, 200, "JSON not found", text_resp)
                                     break 
                                 return text_resp
-                            diagnostics.add_attempt(model, 200, "Sin candidates")
+                            diagnostics.add_attempt(model, 200, "No candidates")
                             break
                         
                         elif response.status_code == 429:
-                            # 429: Rotamos llave inmediatamente para no desperdiciar tiempo
-                            log_event(f"  [!] API 429 en llave {CURRENT_KEY_INDEX+1}. Rotando...")
+                            # 429: Rotate key immediately to save time
+                            log_event(f"  [!] API 429 on key {CURRENT_KEY_INDEX+1}. Rotating...")
                             CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
-                            # Rompemos el bucle de intentos para este modelo/llave y pasamos a la siguiente llave
+                            # Break current model loop and move to next key
                             break 
                         
                         elif response.status_code in [500, 503, 504]:
@@ -148,16 +148,16 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
                             break
                             
                     except Exception as e:
-                        diagnostics.add_attempt(model, 0, f"Excepción: {str(e)}")
+                        diagnostics.add_attempt(model, 0, f"Exception: {str(e)}")
                         break
             
-            # Si terminamos los modelos de una llave con 429, saltamos a la siguiente
+            # If we finished all models for a key with 429, skip to next key
             if response.status_code == 429:
                 continue
             
-            # Si llegamos aquí y no tuvimos éxito, probamos la siguiente llave tras un breve respiro
+            # If we are here and didn't succeed, try next key after a brief pause
             CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
             await asyncio.sleep(1)
 
-    raise Exception(f"Fallo crítico Gemini tras rotación de llaves.\n{diagnostics.get_report()}")
+    raise Exception(f"Critical Gemini failure after key rotation.\n{diagnostics.get_report()}")
 
diff --git a/src/gitops_manager.py b/src/gitops_manager.py
index 35a37b0b..f656026e 100644
--- a/src/gitops_manager.py
+++ b/src/gitops_manager.py
@@ -23,7 +23,7 @@ class RepositoryController:
     def apply_historical_chunk(self, updates: dict, next_since: str) -> None:
         branch_name = "bot/historical-accumulator"
         
-        # Verificar si la rama existe, si no, crearla desde master
+        # Check if branch exists, if not, create from master
         try:
             self.repository.get_branch(branch_name)
         except:
@@ -44,21 +44,18 @@ class RepositoryController:
                             content=content, branch=branch_name
                         )
             except Exception as e:
-                print(f"Error en tramo histórico para {file_path}: {e}")
+                print(f"Error in historical chunk for {file_path}: {e}")
 
     def apply_multi_file_changes(self, updates: dict, metrics: dict) -> None:
         timestamp_slug = datetime.now().strftime("%Y%m%d-%H%M")
-        is_historical = "historical" in metrics.get("start_date", "").lower() or metrics.get("total_extracted", 0) > 500
-        
         branch_name = f"bot/knowledge-update-{timestamp_slug}"
         
-        # En el último tramo histórico, usamos el accumulator como base si existe
+        # In the last historical chunk, use the accumulator as base if it exists
         accumulator_branch = "bot/historical-accumulator"
         base_sha = None
         try:
             acc = self.repository.get_branch(accumulator_branch)
             base_sha = acc.commit.sha
-            # Si venimos de histórico, la rama destino debe ser creada desde el accumulator
             self.repository.create_git_ref(ref=f"refs/heads/{branch_name}", sha=base_sha)
         except:
             self._create_feature_branch(branch_name)
@@ -86,15 +83,15 @@ class RepositoryController:
                             content=content, branch=branch_name
                         )
             except Exception as e:
-                print(f"Error procesando {file_path}: {e}")
+                print(f"Error processing {file_path}: {e}")
 
-        # --- CONSTRUCCIÓN DEL REPORTE ---
+        # --- REPORT CONSTRUCTION ---
         full_report = metrics.get('full_report', [])
         
-        # 1. Tabla Matricial con Índice Numérico y Fecha (Priorizando INCLUDED)
+        # 1. Matrix Table with Numeric Index and Date (Prioritizing INCLUDED)
         sorted_report = sorted(full_report, key=lambda x: 0 if x['status'] == 'INCLUDED' else 1)
         
-        header_table = "| # | Estado | Fecha Post | Origen | Motivo | Categoría | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
+        header_table = "| # | Status | Post Date | Source | Reason | Category | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
         
         counts = {"INCLUDED": 0, "DUPLICATE": 0, "FILTERED": 0}
         source_counts = {}
@@ -110,8 +107,7 @@ class RepositoryController:
                 src = item.get('source', 'Unknown')
                 source_counts[src] = source_counts.get(src, 0) + 1
 
-        # Dividir filas en fragmentos para PR Body y Comentarios
-        # Límite GH: 65,536. Usamos fragmentos de ~50k chars para seguridad.
+        # Split rows into chunks for PR Body and Comments
         chunks = []
         current_chunk = header_table
         for row in all_rows:
@@ -124,10 +120,10 @@ class RepositoryController:
 
         matrix_table_body = chunks[0]
         if len(chunks) > 1:
-            matrix_table_body += f"\n> ℹ️ **Nota:** La tabla continúa en los comentarios del PR ({len(chunks)-1} partes adicionales).\n"
+            matrix_table_body += f"\n> ℹ️ **Note:** The table continues in PR comments ({len(chunks)-1} additional parts).\n"
 
-        # 2. Diagnóstico de Extracción Histórica
-        extraction_audit = "### 🕵️ Diagnóstico de Horizonte Temporal\n"
+        # 2. Historical Extraction Diagnosis
+        extraction_audit = "### 🕵️ Time Horizon Diagnosis\n"
         start_date_str = metrics.get('start_date')[:10] if metrics.get('start_date') else 'N/A'
         
         actual_oldest = "N/A"
@@ -135,37 +131,37 @@ class RepositoryController:
         if dates: actual_oldest = min(dates)[:10]
 
         if actual_oldest != "N/A" and actual_oldest > start_date_str:
-            extraction_audit += f"⚠️ **Límite Alcanzado:** Se solicitó desde `{start_date_str}`, pero se detuvo en `{actual_oldest}`.\n"
+            extraction_audit += f"⚠️ **Limit Reached:** Requested from `{start_date_str}`, but stopped at `{actual_oldest}`.\n"
         else:
-            extraction_audit += f"✅ **Horizonte Alcanzado:** La extracción cubrió exitosamente desde `{start_date_str}`.\n"
+            extraction_audit += f"✅ **Horizon Reached:** Extraction successfully covered since `{start_date_str}`.\n"
 
-        # 3. Diagramas Mermaid
-        mermaid_pie = "### 📊 Métricas de Decisión\n```mermaid\npie title Distribución de Decisión Agéntica\n"
-        mermaid_pie += f"    \"Aceptados (Inyectados)\" : {counts['INCLUDED']}\n"
-        mermaid_pie += f"    \"Duplicados (Ignorados)\" : {counts['DUPLICATE']}\n"
-        mermaid_pie += f"    \"Filtrados (Calidad/Impacto)\" : {counts['FILTERED']}\n```\n"
+        # 3. Mermaid Diagrams
+        mermaid_pie = "### 📊 Decision Metrics\n```mermaid\npie title Agentic Decision Distribution\n"
+        mermaid_pie += f"    \"Accepted (Injected)\" : {counts['INCLUDED']}\n"
+        mermaid_pie += f"    \"Duplicates (Ignored)\" : {counts['DUPLICATE']}\n"
+        mermaid_pie += f"    \"Filtered (Quality/Impact)\" : {counts['FILTERED']}\n```\n"
 
         mermaid_origin = ""
         if source_counts:
-            mermaid_origin = "### 🌍 Origen de las Novedades Inyectadas\n```mermaid\npie title Fuentes de Referencias Añadidas\n"
+            mermaid_origin = "### 🌍 Source of Injected Updates\n```mermaid\npie title Added References Sources\n"
             for src, val in source_counts.items():
                 mermaid_origin += f"    \"{src}\" : {val}\n"
             mermaid_origin += "```\n"
 
-        # 4. Log de Ingesta
-        x_log = "### ⚡ Audit Trail de Ingesta (X.com)\n"
+        # 4. Ingestion Log
+        x_log = "### ⚡ Ingestion Audit Trail (X.com)\n"
         for entry in metrics.get('x_audit', []): x_log += f"- {entry}\n"
 
         pr_narrative = (
             f"## 💎 Knowledge Update War Room: Kubernetes & Cloud Native\n\n"
-            f"Este reporte detalla el procesamiento de **{metrics.get('total_extracted', 0)}** enlaces detectados.\n\n"
+            f"This report details the processing of **{metrics.get('total_extracted', 0)}** detected links.\n\n"
             f"{extraction_audit}\n"
             f"{mermaid_pie}\n"
             f"{mermaid_origin}\n"
             f"{x_log}\n"
-            f"### 📋 Matriz de Auditoría (Parte 1)\n{matrix_table_body}\n"
+            f"### 📋 Audit Matrix (Part 1)\n{matrix_table_body}\n"
             f"---\n"
-            f"**Nota de Evaluación:** Se ha analizado exitosamente el histórico completo."
+            f"**Evaluation Note:** Full history analyzed successfully."
         )
 
         pr = self.repository.create_pull(
@@ -175,8 +171,8 @@ class RepositoryController:
             base=self.default_branch_name
         )
 
-        # 5. Publicar comentarios con el resto de la tabla
+        # 5. Post comments with the rest of the table
         if len(chunks) > 1:
             for i, chunk in enumerate(chunks[1:], 2):
-                pr.create_issue_comment(f"### 📋 Matriz de Auditoría (Parte {i})\n{chunk}")
+                pr.create_issue_comment(f"### 📋 Audit Matrix (Part {i})\n{chunk}")
 
diff --git a/src/logger.py b/src/logger.py
index 08eef44b..74a99ba5 100644
--- a/src/logger.py
+++ b/src/logger.py
@@ -6,8 +6,8 @@ DEFAULT_LOG_PATH = "/home/inafev/.gemini/tmp/awesome-kubernetes/curation_progres
 
 def log_event(message: str, section_break: bool = False):
     """
-    Registra un evento tanto en la consola (STDOUT) como en el archivo de log local si existe.
-    En GitHub Actions, esto aparecerá en los logs del workflow.
+    Logs an event to both console (STDOUT) and local log file if it exists.
+    In GitHub Actions, this will appear in the workflow logs.
     """
     timestamp = datetime.now().strftime('%H:%M:%S')
     formatted_msg = f"[{timestamp}] {message}"
@@ -21,10 +21,10 @@ def log_event(message: str, section_break: bool = False):
         _write_to_file(formatted_msg)
 
 def _write_to_file(message: str):
-    # Solo intentamos escribir en archivo si no estamos en GitHub Actions
+    # Only try to write to file if not in GitHub Actions
     if not os.getenv("GITHUB_ACTIONS"):
         try:
-            # Aseguramos que el directorio existe
+            # Ensure directory exists
             os.makedirs(os.path.dirname(DEFAULT_LOG_PATH), exist_ok=True)
             with open(DEFAULT_LOG_PATH, "a") as f:
                 f.write(message + "\n")
diff --git a/src/state_manager.py b/src/state_manager.py
index aee4dd0e..a0a0d803 100644
--- a/src/state_manager.py
+++ b/src/state_manager.py
@@ -15,7 +15,7 @@ def load_state() -> dict:
             with open(STATE_FILE, 'r') as f:
                 return json.load(f)
         except Exception as e:
-            log_event(f"[!] Error cargando state.json: {e}")
+            log_event(f"[!] Error loading state.json: {e}")
     return default_state
 
 def save_state(last_date: datetime):
@@ -26,9 +26,9 @@ def save_state(last_date: datetime):
     try:
         with open(STATE_FILE, 'w') as f:
             json.dump(state, f, indent=2)
-        log_event(f"[*] Estado guardado: última fecha procesada {last_date.date()}")
+        log_event(f"[*] State saved: last processed date {last_date.date()}")
     except Exception as e:
-        log_event(f"[!] Error guardando state.json: {e}")
+        log_event(f"[!] Error saving state.json: {e}")
 
 def get_last_date() -> datetime:
     state = load_state()