chore: localize PR reports, logs, and core utilities to English

2026-07-12 09:51:00 +00:00 · 2026-05-14 21:09:13 +02:00
parent 56c5b48ed5
commit bfebd939f1
4 changed files with 46 additions and 50 deletions
--- a/src/gemini_utils.py
+++ b/src/gemini_utils.py
@@ -94,16 +94,16 @@ def is_fuzzy_duplicate(url_a: str, url_b: str) -> bool:

 async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3):
    """
-    Llama a Gemini optimizando el uso de cuota (pay-per-use).
-    Rota llaves inmediatamente en 429 y usa backoff exponencial inteligente.
+    Calls Gemini API optimizing for quota usage (pay-per-use).
+    Rotates keys immediately on 429 and uses smart exponential backoff.
    """
    global CURRENT_KEY_INDEX
    if not GEMINI_API_KEYS:
-        raise ValueError("No hay GEMINI_API_KEYS configuradas.")
+        raise ValueError("No GEMINI_API_KEYS configured.")

    diagnostics = GeminiDiagnostics()
    
-    # Intentamos rotar entre todas las llaves disponibles antes de fallar
+    # Try rotating through all available keys before failing
    for key_attempt in range(len(GEMINI_API_KEYS)):
        api_key = GEMINI_API_KEYS[CURRENT_KEY_INDEX]
        
@@ -126,17 +126,17 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
                                    if match:
                                        data = json.loads(match.group(0))
                                        return data[0] if isinstance(data, list) and len(data) > 0 else data
-                                    diagnostics.add_attempt(model, 200, "JSON no encontrado", text_resp)
+                                    diagnostics.add_attempt(model, 200, "JSON not found", text_resp)
                                    break 
                                return text_resp
-                            diagnostics.add_attempt(model, 200, "Sin candidates")
+                            diagnostics.add_attempt(model, 200, "No candidates")
                            break
                        
                        elif response.status_code == 429:
-                            # 429: Rotamos llave inmediatamente para no desperdiciar tiempo
-                            log_event(f"  [!] API 429 en llave {CURRENT_KEY_INDEX+1}. Rotando...")
+                            # 429: Rotate key immediately to save time
+                            log_event(f"  [!] API 429 on key {CURRENT_KEY_INDEX+1}. Rotating...")
                            CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
-                            # Rompemos el bucle de intentos para este modelo/llave y pasamos a la siguiente llave
+                            # Break current model loop and move to next key
                            break 
                        
                        elif response.status_code in [500, 503, 504]:
@@ -148,16 +148,16 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
                            break
                            
                    except Exception as e:
-                        diagnostics.add_attempt(model, 0, f"Excepción: {str(e)}")
+                        diagnostics.add_attempt(model, 0, f"Exception: {str(e)}")
                        break
            
-            # Si terminamos los modelos de una llave con 429, saltamos a la siguiente
+            # If we finished all models for a key with 429, skip to next key
            if response.status_code == 429:
                continue
            
-            # Si llegamos aquí y no tuvimos éxito, probamos la siguiente llave tras un breve respiro
+            # If we are here and didn't succeed, try next key after a brief pause
            CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
            await asyncio.sleep(1)

-    raise Exception(f"Fallo crítico Gemini tras rotación de llaves.\n{diagnostics.get_report()}")
+    raise Exception(f"Critical Gemini failure after key rotation.\n{diagnostics.get_report()}")

--- a/src/gitops_manager.py
+++ b/src/gitops_manager.py
@@ -23,7 +23,7 @@ class RepositoryController:
    def apply_historical_chunk(self, updates: dict, next_since: str) -> None:
        branch_name = "bot/historical-accumulator"
        
-        # Verificar si la rama existe, si no, crearla desde master
+        # Check if branch exists, if not, create from master
        try:
            self.repository.get_branch(branch_name)
        except:
@@ -44,21 +44,18 @@ class RepositoryController:
                            content=content, branch=branch_name
                        )
            except Exception as e:
-                print(f"Error en tramo histórico para {file_path}: {e}")
+                print(f"Error in historical chunk for {file_path}: {e}")

    def apply_multi_file_changes(self, updates: dict, metrics: dict) -> None:
        timestamp_slug = datetime.now().strftime("%Y%m%d-%H%M")
-        is_historical = "historical" in metrics.get("start_date", "").lower() or metrics.get("total_extracted", 0) > 500
-        
        branch_name = f"bot/knowledge-update-{timestamp_slug}"
        
-        # En el último tramo histórico, usamos el accumulator como base si existe
+        # In the last historical chunk, use the accumulator as base if it exists
        accumulator_branch = "bot/historical-accumulator"
        base_sha = None
        try:
            acc = self.repository.get_branch(accumulator_branch)
            base_sha = acc.commit.sha
-            # Si venimos de histórico, la rama destino debe ser creada desde el accumulator
            self.repository.create_git_ref(ref=f"refs/heads/{branch_name}", sha=base_sha)
        except:
            self._create_feature_branch(branch_name)
@@ -86,15 +83,15 @@ class RepositoryController:
                            content=content, branch=branch_name
                        )
            except Exception as e:
-                print(f"Error procesando {file_path}: {e}")
+                print(f"Error processing {file_path}: {e}")

-        # --- CONSTRUCCIÓN DEL REPORTE ---
+        # --- REPORT CONSTRUCTION ---
        full_report = metrics.get('full_report', [])
        
-        # 1. Tabla Matricial con Índice Numérico y Fecha (Priorizando INCLUDED)
+        # 1. Matrix Table with Numeric Index and Date (Prioritizing INCLUDED)
        sorted_report = sorted(full_report, key=lambda x: 0 if x['status'] == 'INCLUDED' else 1)
        
-        header_table = "| # | Estado | Fecha Post | Origen | Motivo | Categoría | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
+        header_table = "| # | Status | Post Date | Source | Reason | Category | URL |\n| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
        
        counts = {"INCLUDED": 0, "DUPLICATE": 0, "FILTERED": 0}
        source_counts = {}
@@ -110,8 +107,7 @@ class RepositoryController:
                src = item.get('source', 'Unknown')
                source_counts[src] = source_counts.get(src, 0) + 1

-        # Dividir filas en fragmentos para PR Body y Comentarios
-        # Límite GH: 65,536. Usamos fragmentos de ~50k chars para seguridad.
+        # Split rows into chunks for PR Body and Comments
        chunks = []
        current_chunk = header_table
        for row in all_rows:
@@ -124,10 +120,10 @@ class RepositoryController:

        matrix_table_body = chunks[0]
        if len(chunks) > 1:
-            matrix_table_body += f"\n> ℹ️ **Nota:** La tabla continúa en los comentarios del PR ({len(chunks)-1} partes adicionales).\n"
+            matrix_table_body += f"\n> ℹ️ **Note:** The table continues in PR comments ({len(chunks)-1} additional parts).\n"

-        # 2. Diagnóstico de Extracción Histórica
-        extraction_audit = "### 🕵️ Diagnóstico de Horizonte Temporal\n"
+        # 2. Historical Extraction Diagnosis
+        extraction_audit = "### 🕵️ Time Horizon Diagnosis\n"
        start_date_str = metrics.get('start_date')[:10] if metrics.get('start_date') else 'N/A'
        
        actual_oldest = "N/A"
@@ -135,37 +131,37 @@ class RepositoryController:
        if dates: actual_oldest = min(dates)[:10]

        if actual_oldest != "N/A" and actual_oldest > start_date_str:
-            extraction_audit += f"⚠️ **Límite Alcanzado:** Se solicitó desde `{start_date_str}`, pero se detuvo en `{actual_oldest}`.\n"
+            extraction_audit += f"⚠️ **Limit Reached:** Requested from `{start_date_str}`, but stopped at `{actual_oldest}`.\n"
        else:
-            extraction_audit += f"✅ **Horizonte Alcanzado:** La extracción cubrió exitosamente desde `{start_date_str}`.\n"
+            extraction_audit += f"✅ **Horizon Reached:** Extraction successfully covered since `{start_date_str}`.\n"

-        # 3. Diagramas Mermaid
-        mermaid_pie = "### 📊 Métricas de Decisión\n```mermaid\npie title Distribución de Decisión Agéntica\n"
-        mermaid_pie += f"    \"Aceptados (Inyectados)\" : {counts['INCLUDED']}\n"
-        mermaid_pie += f"    \"Duplicados (Ignorados)\" : {counts['DUPLICATE']}\n"
-        mermaid_pie += f"    \"Filtrados (Calidad/Impacto)\" : {counts['FILTERED']}\n```\n"
+        # 3. Mermaid Diagrams
+        mermaid_pie = "### 📊 Decision Metrics\n```mermaid\npie title Agentic Decision Distribution\n"
+        mermaid_pie += f"    \"Accepted (Injected)\" : {counts['INCLUDED']}\n"
+        mermaid_pie += f"    \"Duplicates (Ignored)\" : {counts['DUPLICATE']}\n"
+        mermaid_pie += f"    \"Filtered (Quality/Impact)\" : {counts['FILTERED']}\n```\n"

        mermaid_origin = ""
        if source_counts:
-            mermaid_origin = "### 🌍 Origen de las Novedades Inyectadas\n```mermaid\npie title Fuentes de Referencias Añadidas\n"
+            mermaid_origin = "### 🌍 Source of Injected Updates\n```mermaid\npie title Added References Sources\n"
            for src, val in source_counts.items():
                mermaid_origin += f"    \"{src}\" : {val}\n"
            mermaid_origin += "```\n"

-        # 4. Log de Ingesta
-        x_log = "### ⚡ Audit Trail de Ingesta (X.com)\n"
+        # 4. Ingestion Log
+        x_log = "### ⚡ Ingestion Audit Trail (X.com)\n"
        for entry in metrics.get('x_audit', []): x_log += f"- {entry}\n"

        pr_narrative = (
            f"## 💎 Knowledge Update War Room: Kubernetes & Cloud Native\n\n"
-            f"Este reporte detalla el procesamiento de **{metrics.get('total_extracted', 0)}** enlaces detectados.\n\n"
+            f"This report details the processing of **{metrics.get('total_extracted', 0)}** detected links.\n\n"
            f"{extraction_audit}\n"
            f"{mermaid_pie}\n"
            f"{mermaid_origin}\n"
            f"{x_log}\n"
-            f"### 📋 Matriz de Auditoría (Parte 1)\n{matrix_table_body}\n"
+            f"### 📋 Audit Matrix (Part 1)\n{matrix_table_body}\n"
            f"---\n"
-            f"**Nota de Evaluación:** Se ha analizado exitosamente el histórico completo."
+            f"**Evaluation Note:** Full history analyzed successfully."
        )

        pr = self.repository.create_pull(
@@ -175,8 +171,8 @@ class RepositoryController:
            base=self.default_branch_name
        )

-        # 5. Publicar comentarios con el resto de la tabla
+        # 5. Post comments with the rest of the table
        if len(chunks) > 1:
            for i, chunk in enumerate(chunks[1:], 2):
-                pr.create_issue_comment(f"### 📋 Matriz de Auditoría (Parte {i})\n{chunk}")
+                pr.create_issue_comment(f"### 📋 Audit Matrix (Part {i})\n{chunk}")

--- a/src/logger.py
+++ b/src/logger.py
@@ -6,8 +6,8 @@ DEFAULT_LOG_PATH = "/home/inafev/.gemini/tmp/awesome-kubernetes/curation_progres

 def log_event(message: str, section_break: bool = False):
    """
-    Registra un evento tanto en la consola (STDOUT) como en el archivo de log local si existe.
-    En GitHub Actions, esto aparecerá en los logs del workflow.
+    Logs an event to both console (STDOUT) and local log file if it exists.
+    In GitHub Actions, this will appear in the workflow logs.
    """
    timestamp = datetime.now().strftime('%H:%M:%S')
    formatted_msg = f"[{timestamp}] {message}"
@@ -21,10 +21,10 @@ def log_event(message: str, section_break: bool = False):
        _write_to_file(formatted_msg)

 def _write_to_file(message: str):
-    # Solo intentamos escribir en archivo si no estamos en GitHub Actions
+    # Only try to write to file if not in GitHub Actions
    if not os.getenv("GITHUB_ACTIONS"):
        try:
-            # Aseguramos que el directorio existe
+            # Ensure directory exists
            os.makedirs(os.path.dirname(DEFAULT_LOG_PATH), exist_ok=True)
            with open(DEFAULT_LOG_PATH, "a") as f:
                f.write(message + "\n")
--- a/src/state_manager.py
+++ b/src/state_manager.py
@@ -15,7 +15,7 @@ def load_state() -> dict:
            with open(STATE_FILE, 'r') as f:
                return json.load(f)
        except Exception as e:
-            log_event(f"[!] Error cargando state.json: {e}")
+            log_event(f"[!] Error loading state.json: {e}")
    return default_state

 def save_state(last_date: datetime):
@@ -26,9 +26,9 @@ def save_state(last_date: datetime):
    try:
        with open(STATE_FILE, 'w') as f:
            json.dump(state, f, indent=2)
-        log_event(f"[*] Estado guardado: última fecha procesada {last_date.date()}")
+        log_event(f"[*] State saved: last processed date {last_date.date()}")
    except Exception as e:
-        log_event(f"[!] Error guardando state.json: {e}")
+        log_event(f"[!] Error saving state.json: {e}")

 def get_last_date() -> datetime:
    state = load_state()