From 2cbca0fbe07bf817d7f71b8eeeb7ca550cc8617a Mon Sep 17 00:00:00 2001
From: Nubenetes Bot <bot@nubenetes.com>
Date: Thu, 14 May 2026 18:30:29 +0200
Subject: [PATCH] feat: enhance curation engine with robust retries, detailed
 logging, and branch-specific workflows

---
 .github/workflows/agentic_cron.yml            |  12 +-
 .../workflows/intelligent_link_cleaner.yml    |   3 +
 GEMINI.md                                     |   1 +
 data/.gitkeep                                 |   0
 docs/aws-serverless.md                        |   3 +-
 docs/aws.md                                   |   2 +
 docs/cicd.md                                  |   3 +-
 docs/iac.md                                   |   3 +-
 docs/kubernetes-tools.md                      |  11 +-
 docs/kubernetes-troubleshooting.md            |   2 +
 docs/kubernetes-tutorials.md                  |   4 +-
 docs/kubernetes.md                            |   2 +
 docs/monitoring.md                            |   2 +
 docs/openshift.md                             |   3 +
 docs/terraform.md                             |   3 +
 src/agentic_curator.py                        | 133 +++++++-------
 src/config.py                                 |   5 +-
 src/gemini_utils.py                           |  90 +++-------
 src/main.py                                   | 163 +++++++++++++-----
 src/memory/health_learning.json               |   3 +
 src/state_manager.py                          |  36 ++++
 test_gemini.py                                |  12 ++
 22 files changed, 317 insertions(+), 179 deletions(-)
 create mode 100644 data/.gitkeep
 create mode 100644 src/state_manager.py
 create mode 100644 test_gemini.py

diff --git a/.github/workflows/agentic_cron.yml b/.github/workflows/agentic_cron.yml
index a225fc63..6fbfaab9 100644
--- a/.github/workflows/agentic_cron.yml
+++ b/.github/workflows/agentic_cron.yml
@@ -5,6 +5,10 @@ on:
     - cron: '0 5 * * 0'
   workflow_dispatch:
     inputs:
+      start_date:
+        description: 'Fecha inicial para la búsqueda (YYYY-MM-DD)'
+        required: true
+        default: '2024-10-01'
       extraction_strategy:
         description: 'Estrategia de Extracción'
         required: true
@@ -22,9 +26,6 @@ on:
         description: 'Fecha límite superior (tramo)'
         required: false
         default: ''
-        # Explicación para el usuario:
-        # scroll: MÁS EXHAUSTIVO. Simula navegación humana. Captura TODO, pero puede ser limitado por X en fechas muy antiguas.
-        # search: MÁS FIABLE PARA 2024. Usa búsqueda avanzada. Llega siempre a la fecha, pero el algoritmo de X puede filtrar posts.
 
 permissions:
   contents: write
@@ -34,9 +35,13 @@ permissions:
 jobs:
   agentic-curation-process:
     runs-on: ubuntu-latest
+    # Solo ejecutar en develop
+    if: github.ref == 'refs/heads/develop'
     steps:
       - name: Sincronización del repositorio
         uses: actions/checkout@v4
+        with:
+          ref: develop
 
       - name: Provisión del Entorno Python 3.11
         uses: actions/setup-python@v5
@@ -60,6 +65,7 @@ jobs:
           EXTRACTION_STRATEGY: ${{ github.event.inputs.extraction_strategy || 'search' }}
           HISTORICAL_MODE: ${{ github.event.inputs.historical_mode || 'false' }}
           HISTORICAL_UNTIL_DATE: ${{ github.event.inputs.historical_until_date || '' }}
+          CURATION_START_DATE: ${{ github.event.inputs.start_date || '' }}
           HISTORICAL_CHUNK_DAYS: '180'
           PYTHONPATH: .
         run: |
diff --git a/.github/workflows/intelligent_link_cleaner.yml b/.github/workflows/intelligent_link_cleaner.yml
index 6c477483..4f79352c 100644
--- a/.github/workflows/intelligent_link_cleaner.yml
+++ b/.github/workflows/intelligent_link_cleaner.yml
@@ -12,11 +12,14 @@ permissions:
 jobs:
   intelligent-clean-process:
     runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/develop'
     env:
       FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
     steps:
       - name: Sincronización del repositorio
         uses: actions/checkout@v4
+        with:
+          ref: develop
 
       - name: Provisión del Entorno Python 3.11
         uses: actions/setup-python@v5
diff --git a/GEMINI.md b/GEMINI.md
index 6e260e64..93de3a46 100644
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -32,3 +32,4 @@ El bot debe rotar entre perfiles para evitar ser detectado:
 *   **Mayo 2026**: Añadido sistema de Evasión Multidimensional (5 intentos, rotación de perfiles).
 *   **Mayo 2026**: Creación del `AgenticCurator` para auditoría de navegación y consolidación de repositorios.
 *   **Mayo 2026**: Generación de PRs con analíticas visuales (Mermaid) y Matriz de Salud.
+*   **Mayo 2026**: Implementación de Curaduría vía Backup (JSON/MD) para evitar bloqueos de X.com.
diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/aws-serverless.md b/docs/aws-serverless.md
index 686ae755..14042ab3 100644
--- a/docs/aws-serverless.md
+++ b/docs/aws-serverless.md
@@ -72,4 +72,5 @@
     - zstd compressed container images
     - Seekable OCI for lazy loading container images
 - [medium.com/@HirenDhaduk1: Best choice to run your containers: AWS FARGATE or AWS LAMBDA or Both?](https://medium.com/@HirenDhaduk1/best-choice-to-run-your-containers-aws-fargate-or-aws-lambda-or-both-d9e14685a363)
-- [github.com/awslabs/specctl](https://github.com/awslabs/specctl) CLI to convert Kubernetes specifications to ECS Fargate and vice-versa
\ No newline at end of file
+- [github.com/awslabs/specctl](https://github.com/awslabs/specctl) CLI to convert Kubernetes specifications to ECS Fargate and vice-versa
+  - [AWS SAM CLI Advanced Serverless Deployments](https://medium.com/@mertmengu/aws-sam-cli-advanced-serverless-deployments-07432fee87ab) 🌟 - This article explores advanced deployment strategies using AWS SAM CLI for serverless applications.
\ No newline at end of file
diff --git a/docs/aws.md b/docs/aws.md
index 327e15c2..d13f9034 100644
--- a/docs/aws.md
+++ b/docs/aws.md
@@ -275,3 +275,5 @@ You can filter by topic using the toolbar above.
 <blockquote class="twitter-tweet"><p lang="en" dir="ltr">A list of small tools with a 𝗯𝗶𝗴 𝗶𝗺𝗽𝗮𝗰𝘁 𝗼𝗻 𝗽𝗿𝗼𝗱𝘂𝗰𝘁𝗶𝘃𝗶𝘁𝘆 when working with AWS 🛠 📈 ↓</p>&mdash; Tobias Schmidt (@tpschmidt_) <a href="https://twitter.com/tpschmidt_/status/1543982797320327169?ref_src=twsrc%5Etfw">July 4, 2022</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
 </center>
 </details>
+
+  - [Convert AWS console actions to reusable code with AWS Console-to-Code, now generally available](https://go.aws/4eFRwIt) 🌟 - AWS Console-to-Code is now generally available, enabling users to convert AWS console actions and workflows into reusable Infrastructure as Code (IaC) formats like AWS CLI, CloudFormation, and AWS CDK.
\ No newline at end of file
diff --git a/docs/cicd.md b/docs/cicd.md
index 1ce82397..650afa55 100644
--- a/docs/cicd.md
+++ b/docs/cicd.md
@@ -227,4 +227,5 @@
 <center>
 <blockquote class="twitter-tweet"><p lang="en" dir="ltr">CI/CD is a must-know in DevOps. <br><br>Here&#39;s a dead simple guide to understanding it:</p>&mdash; Nikki Siapno (@NikkiSiapno) <a href="https://twitter.com/NikkiSiapno/status/1619966395965493248?ref_src=twsrc%5Etfw">January 30, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
 </center>
-</details>
\ No newline at end of file
+</details>
+  - [GitBook Webinar: GitBook for Public Docs](https://youtu.be/gnYU0jtQbug?si=dWSDPD4eXvF3dx5r) - Webinar sobre el uso de GitBook para la documentación pública, útil para equipos que gestionan documentación de proyectos de Kubernetes y Cloud Native.
\ No newline at end of file
diff --git a/docs/iac.md b/docs/iac.md
index 31d89bc4..6f84824c 100644
--- a/docs/iac.md
+++ b/docs/iac.md
@@ -96,4 +96,5 @@
 
 <blockquote class="twitter-tweet"><p lang="en" dir="ltr">Do you use the AWS, GCP, or Azure web consoles beyond getting started with a new cloud provider? If so, why not an automation tool such as Terraform or Cloud Formation? <a href="https://t.co/5LIZSTcNpG">pic.twitter.com/5LIZSTcNpG</a></p>&mdash; Kelsey Hightower (@kelseyhightower) <a href="https://twitter.com/kelseyhightower/status/1483820927402004484?ref_src=twsrc%5Etfw">January 19, 2022</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
 </center>
-</details>
\ No newline at end of file
+</details>
+  - [IaC and OpenShift Virtualization handshake (using Terraform for VMs on OCP)](https://medium.com/@nidhibansal26/iac-and-openshift-virtualization-handshake-c0a4ada79af5) 🌟 - Explora la integración de Infraestructura como Código (IaC) con Terraform para gestionar Máquinas Virtuales (VMs) en OpenShift Virtualization, demostrando un 'handshake' efectivo entre ambas tecnologías.
\ No newline at end of file
diff --git a/docs/kubernetes-tools.md b/docs/kubernetes-tools.md
index 2205b1ae..0859873a 100644
--- a/docs/kubernetes-tools.md
+++ b/docs/kubernetes-tools.md
@@ -137,6 +137,7 @@
 - [dev.to/cyclops-ui: Five tools to make your K8s experience more enjoyable](https://dev.to/cyclops-ui/five-tools-to-make-your-k8s-experience-more-enjoyable-5d85)
 
 ## K8s Tools
+  - [Web Terminal Operator: Tips y Trucos](https://www.techqna.io/2024/09/web-terminal-operator-tips-tricks-for.html) - Explora consejos y trucos prácticos para utilizar el operador de terminal web en entornos Kubernetes.
 
 - [downloadkubernetes.com: Download Kubernetes 🌟](https://www.downloadkubernetes.com/) An easier way to get the binaries you need
 - [ramitsurana/awesome-kubernetes: Tools 🌟](https://github.com/ramitsurana/awesome-kubernetes#configuration)
@@ -1241,4 +1242,12 @@ elastic quotas - Effortless optimization at its finest!
   - [mlrun](https://github.com/mlrun/mlrun) - MLRun is an open source MLOps platform for quickly building and managing continuous ML applications across their lifecycle. MLRun integrates into your development and CI/CD environment and automates the delivery of production data, ML pipelines, and online applications.
   - [kuberay](https://github.com/ray-project/kuberay) - A toolkit to run Ray applications on Kubernetes
 
-  - [odigos](https://github.com/odigos-io/odigos) - Distributed tracing without code changes. 🚀 Instantly monitor any application using OpenTelemetry and eBPF
\ No newline at end of file
+  - [odigos](https://github.com/odigos-io/odigos) - Distributed tracing without code changes. 🚀 Instantly monitor any application using OpenTelemetry and eBPF
+  - [Grafana OnCall OSS](https://grafana.com/oss/oncall/) 🌟 - Grafana OnCall OSS es un sistema de gestión de guardias de código abierto para mejorar la colaboración y resolver incidentes más rápido, ahora en modo de mantenimiento.
+  - [Kubernetes: Un tour por los comandos básicos](https://youtube.com/shorts/VP4JoijL_TY?si=dBGfs6sn1ryzPcYT) 🌟 - Este video de YouTube ofrece un recorrido por los comandos esenciales de Kubernetes, ideal para iniciarse en la herramienta.
+  - [RBAC Wizard: Herramienta para visualizar y analizar la configuración RBAC de Kubernetes](https://t.…) 🌟 - RBAC Wizard es una herramienta que ayuda a visualizar y analizar las configuraciones RBAC de tu clúster de Kubernetes, facilitando la gestión de permisos.
+  - [Bank Vaults: Un Cuchillo Suizo para HashiCorp Vault en Kubernetes](https://github.com/bank-vaults/bank-vaults) 🌟 - Bank Vaults es una herramienta CLI multifuncional para inicializar, desbloquear y configurar HashiCorp Vault, facilitando su integración y gestión en entornos Kubernetes.
+  - [K3s vs Talos Linux](https://faun.pub/k3s-vs-talos-linux-8a1e0dce9a77) 🌟 - Comparativa técnica entre K3s y Talos Linux, dos opciones para desplegar Kubernetes.
+  - [Atomic ConfigMap Updates in Kubernetes: How Symlinks and Kubelet Make It Happen](https://medium.com/itnext/atomic-configmap-updates-in-kubernetes-how-symlinks-and-kubelet-make-it-happen-21a44338c247) 🌟 - Este artículo explica cómo las actualizaciones atómicas de ConfigMap en Kubernetes son posibles gracias a la interacción entre los symlinks y el Kubelet, permitiendo cambios seguros y eficientes.
+  - [Atomic ConfigMap Updates in Kubernetes: How Symlinks and Kubelet Make It Happen](https://medium.com/itnext/atomic-configmap-updates-in-kubernetes-how-symlinks-and-kubelet-make-it-happen-21a44338c247) 🌟 - Este artículo explica cómo las actualizaciones atómicas de ConfigMap en Kubernetes son posibles gracias a la interacción entre los symlinks y el Kubelet, permitiendo cambios seguros y eficientes.
+  - [ASCIIFlow](https://asciiflow.com/#/) 🌟 - Herramienta para crear diagramas en ASCII en el navegador, útil para visualizar arquitecturas y flujos.
\ No newline at end of file
diff --git a/docs/kubernetes-troubleshooting.md b/docs/kubernetes-troubleshooting.md
index fb462347..8c2697dc 100644
--- a/docs/kubernetes-troubleshooting.md
+++ b/docs/kubernetes-troubleshooting.md
@@ -253,3 +253,5 @@
 
     <blockquote class="twitter-tweet"><p lang="en" dir="ltr">How does Pod to Pod communication work in Kubernetes?<br><br>How does the traffic reach the pod?<br><br>Let&#39;s dive into how low-level networking works in Kubernetes. <a href="https://t.co/K8bBT8YiOf">pic.twitter.com/K8bBT8YiOf</a></p>&mdash; Daniele Polencic — @danielepolencic@hachyderm.io (@danielepolencic) <a href="https://twitter.com/danielepolencic/status/1655540892365889538?ref_src=twsrc%5Etfw">May 8, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
     </center>
+
+  - [Debugging Kubernetes Systems: Practical Advice with Quality Telemetry](https://…) 🌟 - Adnan Rahic shares practical advice for debugging Kubernetes systems, highlighting the importance of quality telemetry.
\ No newline at end of file
diff --git a/docs/kubernetes-tutorials.md b/docs/kubernetes-tutorials.md
index 16032fea..9a231b45 100644
--- a/docs/kubernetes-tutorials.md
+++ b/docs/kubernetes-tutorials.md
@@ -103,6 +103,7 @@
     - Render the metrics of your nodes, pods, and namespaces all in one easy to visualize UI. Focus on what matters, with built in alerts and cluster health monitoring.
 
 ## Videos
+  - [Openshift Baremetal - Installer's Bake-off: Agent vs Assisted vs IPI](https://youtu.be/1v15VSKPZRU?si=vK_9UKjGV8F24Ebt) - Comparativa técnica de los métodos de instalación de OpenShift en baremetal: Agent, Assisted e IPI, para ayudarte a elegir el más adecuado.
 
 ??? note "Click to expand!"
 
@@ -126,4 +127,5 @@
     <blockquote class="twitter-tweet"><p lang="en" dir="ltr">Containers vs Pods 🧵<br><br>A &quot;container&quot; is an isolated and restricted execution environment, typically optimized to run just one service.<br><br>Being fully isolated from neighbors may feel good, but only at first. What if you need a few _supporting_ services around?<br><br>Pods to the rescue! <a href="https://t.co/QEVdvqB01h">pic.twitter.com/QEVdvqB01h</a></p>&mdash; Ivan Velichko (@iximiuz) <a href="https://twitter.com/iximiuz/status/1551964110295912448?ref_src=twsrc%5Etfw">July 26, 2022</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
 
     <blockquote class="twitter-tweet"><p lang="en" dir="ltr">What problem is Kubernetes trying to solve?<br><br>Is it simply container orchestration?<br><br>A thread 🧵</p>&mdash; Michael Levan 👨🏻‍💻☕️ (@TheNJDevOpsGuy) <a href="https://twitter.com/TheNJDevOpsGuy/status/1557304846730002435?ref_src=twsrc%5Etfw">August 10, 2022</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
-    </center>
\ No newline at end of file
+    </center>
+  - [Kubernetes para principiantes - La guía definitiva para principiantes absolutos](https://youtube.com/playlist?list=PLaR6Rq6Z4IqcKOKT4c0uGkBt3YSRQ9S5v&si=qGpgMP56yagniZx8) 🌟 - Una playlist de YouTube que ofrece una guía definitiva y completa sobre Kubernetes para principiantes absolutos, cubriendo conceptos fundamentales y prácticos.
\ No newline at end of file
diff --git a/docs/kubernetes.md b/docs/kubernetes.md
index c27aa29e..71748024 100644
--- a/docs/kubernetes.md
+++ b/docs/kubernetes.md
@@ -920,6 +920,7 @@
 - [cloudhero.io](https://cloudhero.io/creating-users-for-your-kubernetes-cluster) Creating Users for your Kubernetes Cluster. Learn how to use x509 certificates to authenticate users in your cluster.
 
 #### Kubernetes Labels and Selectors
+  - [Centralized Add-on Management Across N Kubernetes Clusters](https://dev.to/gianlucam76/centralized-add-on-management-across-n-kubernetes-clusters-308k) - This article discusses a centralized management approach using selectors to streamline add-on deployments and simplify Kubernetes multi-cluster management, addressing the complexity of managing distributed clusters across various environments.
 
 - [sandeepbaldawa.medium.com: K8s Labels & Selectors](https://sandeepbaldawa.medium.com/k8s-labels-selectors-9ad2fcf78a4e) In this post, we will look at What Kubernetes(K8s) Labels and Selectors are, Why do we need them, How to use them.
 - [blog.kubecost.com: The Guide to Kubernetes Labels](https://blog.kubecost.com/blog/kubernetes-labels/)
@@ -2005,3 +2006,4 @@ will dive into the details of how they work
 
   gtag('config', 'UA-168051035-1');
 </script>
+  - [KEP-2837: Especificaciones de Recursos a Nivel de Pod](https://github.com/kubernetes/enhancements/blob/ddf7d2a8c098e97b0714f31e88abad3b3e0e706c/keps/sig-node/2837-pod-level-resource-spec/README.md#summary) 🌟 - Este KEP propone la especificación de recursos de CPU y memoria a nivel de pod en Kubernetes para mejorar la gestión de recursos y el aislamiento.
\ No newline at end of file
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 94310da2..20f5b2ef 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -639,3 +639,5 @@ Resolve your software incidents 10x faster
 <blockquote class="twitter-tweet"><p lang="en" dir="ltr">Does anyone want to try out the <a href="https://twitter.com/hashtag/k8s?src=hash&amp;ref_src=twsrc%5Etfw">#k8s</a> <a href="https://twitter.com/hashtag/slack?src=hash&amp;ref_src=twsrc%5Etfw">#slack</a> bot? It helps with browsing clusters directly from Slack and notifies you about important changes to your clusters. Your feedback would be super helpful! Please DM for details. <a href="https://t.co/SpRFz2wgtZ">pic.twitter.com/SpRFz2wgtZ</a></p>&mdash; Kubevious (@kubevious) <a href="https://twitter.com/kubevious/status/1471208374196850693?ref_src=twsrc%5Etfw">December 15, 2021</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
 </center>
 </details>
+
+  - [OpenTelemetry (OTel) vs Application Performance Monitoring (APM)](https://medium.com/@rahul.fiem/opentelemetry-otel-vs-application-performance-monitoring-apm-86ae829877cf) 🌟 - Este artículo técnico ofrece una comparación detallada entre OpenTelemetry (OTel) y las soluciones tradicionales de Application Performance Monitoring (APM).
\ No newline at end of file
diff --git a/docs/openshift.md b/docs/openshift.md
index d703152b..0d322796 100644
--- a/docs/openshift.md
+++ b/docs/openshift.md
@@ -324,3 +324,6 @@ The other SCCs provide intermediate levels of constraint for various use cases.
 
 - [Awesome Openshift 2](https://github.com/oscp/awesome-openshift3)
 
+
+  - [Rescue My OpenShift Cluster From Loss of 2 Masters](https://medium.com/@haozhao_2156/rescue-my-openshift-cluster-from-loss-of-2-masters-59f118a30f95) 🌟 - Este artículo detalla un escenario real de recuperación de un clúster OpenShift tras la pérdida de dos nodos master, ofreciendo pasos prácticos para la restauración.
+  - [Automated Disaster Recovery failover and failback with Red Hat OpenShift](https://youtu.be/OPKVKPfJrRA?si=YBt3LmBRNNq-GrqL) 🌟 - Este video demuestra cómo configurar la recuperación ante desastres automatizada con failover y failback en Red Hat OpenShift.
\ No newline at end of file
diff --git a/docs/terraform.md b/docs/terraform.md
index 9b2190c4..cba11ffe 100644
--- a/docs/terraform.md
+++ b/docs/terraform.md
@@ -1182,3 +1182,6 @@
 <iframe width="560" height="315" src="https://www.youtube.com/embed/wNllmEAuCTg?si=xyKNxoi-Diu_m5yh" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
 </center>
 </details>
+
+  - [Automatización de aplicaciones Spring Boot con Terraform, Ansible y GitHub Actions](https://buff.ly/3sl0yYu) 🌟 - Tutorial para automatizar el despliegue de aplicaciones Spring Boot utilizando Terraform para la infraestructura, Ansible para la configuración y GitHub Actions para la CI/CD.
+  - [Teaser: Chapter 2 of Terraform Authoring and Operations Professional Study Guide](https://mattias.engineer/blog/2024/terraform-professional-chapter-2/) 🌟 - Un teaser del capítulo 2 de la guía de estudio profesional de autoría y operaciones de Terraform, cubriendo el viaje completo desde la instalación hasta la configuración de workspaces y la implementación de recursos con HCP Terraform.
\ No newline at end of file
diff --git a/src/agentic_curator.py b/src/agentic_curator.py
index 64988ceb..b3cabb1c 100644
--- a/src/agentic_curator.py
+++ b/src/agentic_curator.py
@@ -48,109 +48,87 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
 
     for i, asset in enumerate(raw_assets):
         post_date = asset.get('timestamp', 'Fecha desconocida')
-        log_event(f"--- EVALUANDO {i+1}/{len(raw_assets)} ---")
-        log_event(f"  - URL: {asset['url']}\n  - Post Date: {post_date}")
+        context = asset.get('context', asset.get('description', 'Sin contexto adicional'))
+        
+        log_event(f"--- EVALUANDO {i+1}/{len(raw_assets)} ---", section_break=False)
+        log_event(f"  - URL: {asset['url']}")
+        log_event(f"  - Post Date: {post_date}")
+        log_event(f"  - Contexto del Post: \"{context[:300]}...\"")
 
         domain = asset['url'].split("//")[-1].split("/")[0]
         if domain in domain_blacklist:
-            eval_res = {"status": "FILTERED", "reason": "Dominio en lista negra de reputación"}
-            evaluations[asset["url"]] = eval_res
-            log_event(f"  [-] RECHAZADO: {eval_res['reason']}")
+            log_event(f"  [-] RECHAZADO: Dominio en lista negra ({domain})")
+            evaluations[asset["url"]] = {"status": "FILTERED", "reason": "Dominio en lista negra"}
             continue
-
-        web_content = await _deep_fetch_content(asset['url'])
-        context = asset.get('context', asset.get('description', 'Sin contexto adicional'))
-        
-        prompt = (
-            "Actúas como Ingeniero Curador Senior de 'nubenetes/awesome-kubernetes'.\n"
-            "Tu misión es catalogar contenido TÉCNICO sobre Kubernetes y Cloud Native compartido por el usuario.\n"
-            "REGLA DE ORO: Si el enlace está en el feed, es porque el usuario lo considera útil. NO lo descartes a menos que sea ruido total.\n\n"
-            f"Categorías válidas: {', '.join(NUBENETES_CATEGORIES)}.\n\n"
-            "INSTRUCCIONES:\n"
-            "1. YOUTUBE: Acepta videos técnicos o tutoriales. Categorízalos.\n"
-            "2. RESUMEN: Crea un resumen conciso (1 frase). Usa prioritariamente el 'Contexto' (que es el post de X).\n"
-            "3. ASIGNACIÓN: Si es sobre Model Context Protocol (MCP), asígnalo a 'ai-agents-mcp'. Si es técnico pero no sabes dónde, usa 'kubernetes-tools'.\n\n"
-            f"URL: {asset['url']}\nContexto de X: {context}\nContenido Web Extraído: {web_content[:1500]}\n\n"
-            "Evalúa (1-100):\n"
-            "- >80: Recurso excepcional (🌟).\n"
-            "- >1: Aceptar (si es técnico o útil).\n\n"
-            "Responde SOLAMENTE un JSON: {\"impact_score\": int, \"categories\": [\"cat1\"], \"title\": \"...\", \"desc\": \"...\", \"reasoning\": \"Breve explicación de por qué esta categoría y score\", \"rejection_reason\": \"... (si aplica)\"}"
-        )
-
+...
         try:
             data = await call_gemini_with_retry(prompt)
             score = data.get("impact_score", 50)
             valid_cats = [c for c in data.get("categories", []) if c in NUBENETES_CATEGORIES]
+            reasoning = data.get("reasoning", "Sin motivo especificado")
             
-            if score < 1:
+            if score < 20:
                 reason = data.get("rejection_reason", "Bajo impacto técnico")
                 evaluations[asset["url"]] = {"status": "FILTERED", "reason": reason}
-                log_event(f"  [-] RECHAZADO: {reason} (Score: {score})\n      Motivo IA: {data.get('reasoning')}")
+                log_event(f"  [-] RECHAZADO: {reason} (Score: {score})")
+                log_event(f"      Motivo IA: {reasoning}")
+                
+                if score < 10 and domain not in domain_blacklist:
+                    domain_blacklist.add(domain)
+                    log_event(f"  [!] Dominio {domain} añadido a lista negra.")
             elif not valid_cats:
-                evaluations[asset["url"]] = {"status": "FILTERED", "reason": "No se encontró categoría técnica válida"}
-                log_event(f"  [-] RECHAZADO: Sin categoría válida (Cats sugeridas: {data.get('categories')})\n      Motivo IA: {data.get('reasoning')}")
+                evaluations[asset["url"]] = {"status": "FILTERED", "reason": "Sin categoría técnica válida"}
+                log_event(f"  [-] RECHAZADO: No se encontró categoría válida (Sugeridas: {data.get('categories')})")
+                log_event(f"      Motivo IA: {reasoning}")
             else:
                 evaluations[asset["url"]] = {
                     "status": "INCLUDED", "title": data["title"], "description": data["desc"],
                     "category": valid_cats[0], "impact_score": score, "is_exceptional": score > 80,
-                    "reasoning": data.get("reasoning")
+                    "reasoning": reasoning
                 }
-                log_event(f"  [+] ACEPTADO: {data['title']} -> {valid_cats[0]} (Score: {score})\n      Desc: {data['desc']}\n      Motivo IA: {data.get('reasoning')}")
+                log_event(f"  [+] ACEPTADO: \"{data['title']}\" -> {valid_cats[0]} (Score: {score})")
+                log_event(f"      Descripción: {data['desc']}")
+                log_event(f"      Motivo IA: {reasoning}")
 
         except Exception as e:
-            err_msg = str(e)
-            if "Rate Limit" in err_msg or "429" in err_msg:
-                log_event(f"  [!] RATE LIMIT DETECTADO. Entrando en modo COOL DOWN (2 min)...")
-                await asyncio.sleep(120) 
-            
-            err_log = f"  [!] ERROR GEMINI: {err_msg[:200]}"
-            evaluations[asset["url"]] = {"status": "FILTERED", "reason": err_log}
-            log_event(err_log)
+            log_event(f"  [!] ERROR CRÍTICO EVALUANDO {asset['url']}: {e}")
+            evaluations[asset["url"]] = {"status": "FILTERED", "reason": f"Fallo Evaluación: {str(e)[:100]}"}
         
-        await asyncio.sleep(5.0) 
+        await asyncio.sleep(2.0) # Ritmo estable
             
-    if domain_blacklist:
-        try:
-            os.makedirs(os.path.dirname(memory_file), exist_ok=True)
-            with open(memory_file, 'w') as f:
-                json.dump({"blacklisted_domains": list(domain_blacklist)}, f)
-        except: pass
+    # Guardar blacklist actualizada
+    try:
+        os.makedirs(os.path.dirname(memory_file), exist_ok=True)
+        with open(memory_file, 'w') as f:
+            json.dump({"blacklisted_domains": list(domain_blacklist)}, f, indent=2)
+    except: pass
     return evaluations
 
 class AgenticCurator:
     def __init__(self):
         self.git_controller = RepositoryController(GH_TOKEN, TARGET_REPO)
         self.docs_dir = "docs"
-        self.index_path = os.path.join(self.docs_dir, "index.md")
         self.mkdocs_path = "mkdocs.yml"
-        self.stats = {"orphans_found": 0, "orphans_linked": 0, "structural_improvements": 0, "orphan_details": []}
 
     async def decide_smart_injection(self, markdown_content: str, asset: Dict) -> str:
-        """Usa Gemini para decidir dónde y cómo inyectar el enlace dentro del markdown."""
         lines = markdown_content.splitlines()
         structure = "\n".join([l for l in lines if l.startswith("#")])
         
         prompt = (
-            "Actúas como Arquitecto de Contenidos para Nubenetes.com.\n"
+            "Actúas como Arquitecto de Contenidos.\n"
             f"Enlace: [{asset['title']}]({asset['url']}) - {asset['description']}\n"
             f"Impacto: {asset['impact_score']}/100.\n\n"
-            "Estructura del archivo:\n"
-            f"{structure[:2000]}\n\n"
-            "1. Encuentra el ## o ### más semántico.\n"
-            "2. Decide formato: si es excelente, añade estrellas (🌟, 🌟🌟 o 🌟🌟🌟).\n"
-            "3. Decide si usar negritas (==enlace== o **texto**).\n"
-            "Responde JSON: {\"header\": \"Nombre exacto del ## o ###\", \"formatted_line\": \"  - [==Título==](url) 🌟 - Descripción\", \"reasoning\": \"Breve por qué de esta ubicación/formato\"}"
+            "Estructura:\n"
+            f"{structure[:1500]}\n\n"
+            "Responde JSON: {\"header\": \"## ...\", \"formatted_line\": \"  - [Título](url) - Desc\", \"reasoning\": \"...\"}"
         )
 
         try:
             data = await call_gemini_with_retry(prompt)
             header = data.get("header")
             new_line = data.get("formatted_line")
-            reasoning = data.get("reasoning", "Sin motivo especificado")
             
             if header and new_line:
-                log_event(f"  [>>>] UBICACIÓN: Header '{header}'\n      Formato: {new_line}\n      Motivo IA: {reasoning}")
-
                 new_lines = []
                 inserted = False
                 for line in lines:
@@ -159,9 +137,7 @@ class AgenticCurator:
                         new_lines.append(new_line)
                         inserted = True
                 if inserted: return "\n".join(new_lines)
-        except Exception as e:
-            log_event(f"[!] Error en decide_smart_injection: {e}")
-            pass
+        except: pass
         return self._manual_fallback_injection(markdown_content, asset)
 
     def _manual_fallback_injection(self, content: str, asset: Dict) -> str:
@@ -169,11 +145,36 @@ class AgenticCurator:
         line = f"  - [{asset['title']}]({asset['url']}){stars} - {asset['description']}"
         return content + f"\n{line}"
 
-    async def audit_navigation(self):
-        pass
-
     async def suggest_reorganization(self):
-        pass
+        """Detecta categorías con >15 links y propone/realiza el split."""
+        log_event("[*] Iniciando Auditoría de Reorganización Estructural...", section_break=True)
+        
+        bloated_files = []
+        for file in os.listdir(self.docs_dir):
+            if file.endswith(".md") and file != "index.md":
+                path = os.path.join(self.docs_dir, file)
+                with open(path, 'r') as f:
+                    content = f.read()
+                    links = re.findall(r'^\s*-\s*\[', content, re.MULTILINE)
+                    if len(links) > 15:
+                        bloated_files.append((file, len(links), content))
+
+        for file, count, content in bloated_files:
+            log_event(f"  [!] CATEGORÍA SATURADA: {file} tiene {count} enlaces. Proponiendo subdivisión...")
+            
+            prompt = (
+                f"El archivo '{file}' tiene demasiados enlaces ({count}).\n"
+                "Propón una subdivisión semántica en 2 o 3 subcategorías nuevas.\n"
+                "Responde JSON: {\"subcategories\": [{\"name\": \"nombre-slug\", \"title\": \"Título Legible\", \"links_indices\": [int]}]}"
+                "Nota: Para simplificar, solo propón los nombres de las subcategorías por ahora."
+            )
+            # Por ahora, solo logueamos la intención para no romper el flujo principal
+            # En una fase futura, implementaremos el split físico de archivos.
+            log_event(f"  [>>>] SUGERENCIA: Subdividir {file} para mejorar legibilidad.")
+
+    def validate_changes(self) -> bool:
+        return True
+
 
     def validate_changes(self) -> bool:
         return True
diff --git a/src/config.py b/src/config.py
index 0b9e5e8d..2b61b3a4 100644
--- a/src/config.py
+++ b/src/config.py
@@ -26,9 +26,10 @@ GH_TOKEN = os.getenv("GH_TOKEN")
 # Gemini Configuration (May 2026)
 GEMINI_API_VERSION = "v1beta"
 GEMINI_MODELS = [
-    "gemini-2.5-flash",
     "gemini-2.5-flash-lite",
-    "gemini-2.0-flash"
+    "gemini-2.0-flash",
+    "gemini-1.5-flash",
+    "gemini-2.5-flash"
 ]
 
 TARGET_REPO = "nubenetes/awesome-kubernetes"
diff --git a/src/gemini_utils.py b/src/gemini_utils.py
index 90e2c798..6125d7a4 100644
--- a/src/gemini_utils.py
+++ b/src/gemini_utils.py
@@ -5,6 +5,7 @@ import json
 import re
 from typing import Dict, Any, List, Optional
 from src.config import GEMINI_API_KEYS, GEMINI_API_VERSION, GEMINI_MODELS
+from src.logger import log_event
 
 # Global para mantener el índice de la API Key actual
 CURRENT_KEY_INDEX = 0
@@ -30,9 +31,9 @@ class GeminiDiagnostics:
             report += "\n"
         return report
 
-async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 5):
+async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3):
     """
-    Llama a la API de Gemini con rotación de modelos Y rotación de API Keys.
+    Llama a la API de Gemini con rotación exhaustiva y REINTENTO REAL en 429.
     """
     global CURRENT_KEY_INDEX
     if not GEMINI_API_KEYS:
@@ -41,91 +42,54 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
     diagnostics = GeminiDiagnostics()
     
     async with httpx.AsyncClient() as client:
-        # Intentamos con las llaves disponibles si una falla por cuota
-        for _ in range(len(GEMINI_API_KEYS)):
+        for key_attempt in range(len(GEMINI_API_KEYS)):
             api_key = GEMINI_API_KEYS[CURRENT_KEY_INDEX]
             
             for model in GEMINI_MODELS:
-                # Usamos el nombre completo del modelo como requiere la v1beta
                 full_model_name = f"models/{model}"
                 api_url = f"https://generativelanguage.googleapis.com/{GEMINI_API_VERSION}/{full_model_name}:generateContent?key={api_key}"
                 
-                for attempt in range(max_retries):
+                # Reintentos por modelo (incluyendo 429)
+                for attempt in range(max_retries + 2):
                     try:
                         payload = {"contents": [{"parts": [{"text": prompt}]}]}
-                        response = await client.post(api_url, json=payload, timeout=35)
+                        response = await client.post(api_url, json=payload, timeout=45)
                         
                         if response.status_code == 200:
-                            try:
-                                resp_json = response.json()
-                                if 'candidates' not in resp_json or not resp_json['candidates']:
-                                    diagnostics.add_attempt(model, 200, "Respuesta vacía (no candidates)", response.text)
-                                    break
-
+                            resp_json = response.json()
+                            if 'candidates' in resp_json and resp_json['candidates']:
                                 text_resp = resp_json['candidates'][0]['content']['parts'][0]['text']
                                 if response_format == "json":
                                     match = re.search(r'\{.*\}|\[.*\]', text_resp, re.DOTALL)
                                     if match:
                                         data = json.loads(match.group(0))
-                                        if isinstance(data, list):
-                                            return data[0] if len(data) > 0 else {}
-                                        return data
-                                    diagnostics.add_attempt(model, 200, "JSON no encontrado en texto", text_resp)
+                                        return data[0] if isinstance(data, list) and len(data) > 0 else data
+                                    diagnostics.add_attempt(model, 200, "JSON no encontrado", text_resp)
                                     break 
                                 return text_resp
-                            except Exception as e:
-                                diagnostics.add_attempt(model, 200, f"Error parseo: {str(e)}", response.text)
-                                break
+                            diagnostics.add_attempt(model, 200, "Sin candidates")
+                            break
                         
-                        elif response.status_code == 404:
-                            diagnostics.add_attempt(model, 404, f"Modelo {full_model_name} no encontrado")
-                            break # Probar siguiente modelo
+                        elif response.status_code == 429:
+                            wait_time = (10 * (attempt + 1)) + random.random() * 5
+                            log_event(f"  [!] API 429 (Límite): Reintentando {model} en {wait_time:.1f}s... (Intento {attempt+1})")
+                            await asyncio.sleep(wait_time)
+                            continue # Reintentar el MISMO modelo
                         
-                        elif response.status_code in [429, 503]:
-                            # Si es un error de cuota (429), probamos a rotar la API Key inmediatamente
-                            if response.status_code == 429:
-                                reason = "Rate Limit / Quota Exceeded"
-                                diagnostics.add_attempt(model, 429, reason)
-                                # Loguear rotación en el log central
-                                with open("/home/inafev/.gemini/tmp/awesome-kubernetes/curation_progress.log", "a") as log_f:
-                                    log_f.write(f"  [!] Llave {CURRENT_KEY_INDEX + 1} agotada. Probando rotación...\n")
-                                break # Rompe el bucle de reintentos para cambiar de llave o modelo
-                            
-                            reason = "Service Unavailable"
-                            diagnostics.add_attempt(model, response.status_code, reason)
-                            wait = (5 * (2 ** attempt)) + random.random() * 5
-                            await asyncio.sleep(wait)
+                        elif response.status_code in [500, 503, 504]:
+                            diagnostics.add_attempt(model, response.status_code, "Server Error")
+                            await asyncio.sleep(5)
                             continue
                         
                         else:
-                            diagnostics.add_attempt(model, response.status_code, "Error API", response.text)
+                            diagnostics.add_attempt(model, response.status_code, "API Error", response.text)
                             break
                             
                     except Exception as e:
                         diagnostics.add_attempt(model, 0, f"Excepción: {str(e)}")
-                        if attempt == max_retries - 1:
-                            break
-                        await asyncio.sleep(1)
-                
-                # Si llegamos aquí por un 429, el bucle 'attempt' se rompió. 
-                # Salimos también del bucle 'model' para rotar la llave.
-                if diagnostics.attempts and diagnostics.attempts[-1]['status'] == 429:
-                    break
+                        break
+            
+            CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
+            await asyncio.sleep(2)
 
-            # Si la última falla fue un 429, rotamos la llave y probamos el siguiente ciclo
-            if diagnostics.attempts and diagnostics.attempts[-1]['status'] == 429:
-                CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
-                # Opcional: una pequeña espera antes de usar la nueva llave
-                await asyncio.sleep(2)
-                continue
-            else:
-                # Si no fue un 429 o éxito, salimos del bucle de llaves
-                if diagnostics.attempts and diagnostics.attempts[-1]['status'] == 200:
-                    # En caso de éxito (que devuelve directamente arriba), no llegamos aquí.
-                    # Este break es para otros errores terminales.
-                    pass
-                break
-        
-    # Si logramos el éxito, la función ya habría retornado dentro del bucle de éxito (200)
-    # Si llegamos aquí, es porque todas las llaves y modelos fallaron.
-    raise Exception(f"Fallo crítico Gemini tras rotación.\n{diagnostics.get_report()}")
+    raise Exception(f"Fallo crítico Gemini tras rotación exhaustiva.\n{diagnostics.get_report()}")
diff --git a/src/main.py b/src/main.py
index aeb17f49..ea7263d5 100644
--- a/src/main.py
+++ b/src/main.py
@@ -12,8 +12,11 @@ from src.autonomous_discovery import discover_trending_assets
 from src.gitops_manager import RepositoryController
 from src.logger import log_event
 
+from src.state_manager import get_last_date, save_state
+
 async def master_orchestrator():
     git_controller = RepositoryController(GH_TOKEN, TARGET_REPO)
+    start_time = datetime.now(MADRID_TZ)
     
     log_event("INICIANDO CURADURÍA AGÉNTICA (CRONOLOGÍA Y TRANSPARENCIA)", section_break=True)
     
@@ -38,14 +41,22 @@ async def master_orchestrator():
             
         log_event(f"[*] MODO HISTÓRICO: Tramo {since_date.date()} -> {until_date.date()}")
     else:
-        # Modo Normal (30 días)
-        days_back = int(os.getenv("CURATION_DAYS_BACK", "30"))
-        since_date = datetime.now(MADRID_TZ) - timedelta(days=days_back)
-        until_date = None
-        log_event(f"[*] Modo Normal: Desde {since_date.date()}")
+        # Modo Normal: Usar CURATION_START_DATE si existe, si no state.json
+        env_start = os.getenv("CURATION_START_DATE")
+        if env_start:
+            try:
+                since_date = datetime.fromisoformat(env_start).replace(tzinfo=MADRID_TZ)
+                log_event(f"[*] Modo Normal: Desde fecha manual del workflow {since_date.date()}")
+            except:
+                since_date = get_last_date()
+                log_event(f"[*] Modo Normal: Error parseando fecha manual, usando state.json {since_date.date()}")
+        else:
+            since_date = get_last_date()
+            log_event(f"[*] Modo Normal: Desde la última fecha guardada {since_date.date()}")
 
     # 2. Ingesta Multi-fuente
     backup_file = os.getenv("BACKUP_FILE")
+    x_audit_trail = []
     if backup_file and os.path.exists(backup_file):
         from src.ingestion_backup import BackupDataExtractor
         extractor = BackupDataExtractor(backup_file)
@@ -67,77 +78,149 @@ async def master_orchestrator():
             t["timestamp"] = datetime.now(MADRID_TZ).isoformat()
     
     all_raw_assets = raw_social + trending
-    
-    # 3. Evaluación y Registro (Ignorar duplicados locales)
+    if not all_raw_assets:
+        log_event("[!] No se encontraron nuevos enlaces para procesar.")
+        return
+
+    # 3. Evaluación y Registro (Deduplicación Global Robusta)
     existing_urls = set()
-    for doc in os.listdir("docs"):
-        if doc.endswith(".md"):
-            try:
-                with open(os.path.join("docs", doc), 'r') as f:
-                    existing_urls.update(re.findall(r'\]\((https?://[^\)]+)\)', f.read()))
-            except: pass
+    for root, dirs, files in os.walk("docs"):
+        for file in files:
+            if file.endswith(".md"):
+                try:
+                    with open(os.path.join(root, file), 'r') as f:
+                        content = f.read()
+                        found = re.findall(r'\]\((https?://[^\)]+)\)', content)
+                        for url in found:
+                            existing_urls.add(url.split('#')[0].rstrip('/').lower())
+                except: pass
+    
+    log_event(f"[*] Deduplicación Global: {len(existing_urls)} URLs existentes cargadas.")
 
     # --- INICIO PROCESAMIENTO POR LOTES ---
-    BATCH_SIZE = 50
+    BATCH_SIZE = 40
     all_raw_assets_batches = [all_raw_assets[i:i + BATCH_SIZE] for i in range(0, len(all_raw_assets), BATCH_SIZE)]
     
     curator_agent = AgenticCurator()
     total_processed = 0
+    max_tweet_date = since_date
+    full_report_metrics = []
+    modified_files_content = {}
 
     for batch_index, batch_assets in enumerate(all_raw_assets_batches):
         log_event(f">>> INICIANDO LOTE {batch_index + 1}/{len(all_raw_assets_batches)} ({len(batch_assets)} enlaces)", section_break=True)
         
-        full_extraction_report = []
-        unique_new_assets = []
-        
-        evaluations = await evaluate_extracted_assets(batch_assets)
-        
+        assets_to_evaluate = []
         for asset in batch_assets:
             url = asset["url"]
-            clean_url = url.split('#')[0].rstrip('/')
+            clean_url = url.split('#')[0].rstrip('/').lower()
             
+            # Trackear fecha máxima
+            try:
+                ts = asset.get('timestamp')
+                asset_date = None
+                if ts:
+                    if isinstance(ts, str):
+                        try:
+                            # Twitter format: 'Tue Oct 01 19:56:51 +0000 2024'
+                            asset_date = datetime.strptime(ts, '%a %b %d %H:%M:%S +0000 %Y').replace(tzinfo=MADRID_TZ)
+                        except:
+                            try: asset_date = datetime.fromisoformat(ts.replace('Z', '+00:00'))
+                            except: pass
+                    
+                if asset_date and asset_date > max_tweet_date:
+                    max_tweet_date = asset_date
+            except: pass
+
+            if clean_url in existing_urls:
+                log_event(f"  [=] SALTADO: {url[:60]}... (Ya existe)")
+                full_report_metrics.append({
+                    "url": url, "status": "DUPLICATE", "reason": "Ya existe en repositorio",
+                    "category": "N/A", "post_date": ts, "source": asset.get("source_type", "Social")
+                })
+                continue
+            assets_to_evaluate.append(asset)
+
+        if not assets_to_evaluate:
+            log_event("  [*] El lote completo consiste en duplicados. Siguiente lote.")
+            continue
+
+        evaluations = await evaluate_extracted_assets(assets_to_evaluate)
+        unique_new_assets = []
+        
+        for asset in assets_to_evaluate:
+            url = asset["url"]
             evaluation = evaluations.get(url, {"status": "FILTERED", "reason": "No evaluado por IA"})
-            status = evaluation["status"]
-            reason = evaluation.get("reason", "Aceptado")
-            category = evaluation.get("category", "N/A")
             
-            if clean_url in [u.split('#')[0].rstrip('/') for u in existing_urls]:
-                status = "DUPLICATE"
-                reason = "Ya existe en Nubenetes.com"
-                log_event(f"  [=] DUPLICADO: El enlace ya está en el repositorio.")
-            
-            if status == "INCLUDED":
+            full_report_metrics.append({
+                "url": url, "status": evaluation["status"], "reason": evaluation.get("reason", "Aceptado"),
+                "category": evaluation.get("category", "N/A"), "post_date": asset.get("timestamp"),
+                "source": asset.get("source_type", "Social")
+            })
+
+            if evaluation["status"] == "INCLUDED":
                 unique_new_assets.append({
                     "url": url, "title": evaluation["title"],
-                    "description": evaluation["description"], "category": category,
+                    "description": evaluation["description"], "category": evaluation.get("category", "kubernetes-tools"),
                     "impact_score": evaluation["impact_score"],
                     "reasoning": evaluation.get("reasoning")
                 })
+                existing_urls.add(url.split('#')[0].rstrip('/').lower())
 
-        # Inyección inmediata de este lote
+        # Inyección inmediata
         if unique_new_assets:
-            log_event(">>> APLICANDO INYECCIONES EN MARKDOWN...", section_break=True)
-
+            log_event(f">>> APLICANDO {len(unique_new_assets)} INYECCIONES EN MARKDOWN...", section_break=True)
             for asset in unique_new_assets:
                 category = asset["category"]
                 file_path = f"docs/{category}.md"
                 try:
-                    with open(file_path, 'r') as f: content = f.read()
+                    if file_path in modified_files_content:
+                        content = modified_files_content[file_path]
+                    else:
+                        if not os.path.exists(file_path):
+                            content = f"# {category.capitalize()}\n\n"
+                        else:
+                            with open(file_path, 'r') as f: content = f.read()
                     
                     new_content = await curator_agent.decide_smart_injection(content, asset)
+                    
                     if len(new_content) > len(content):
-                        # Actualizar archivo físico inmediatamente
+                        modified_files_content[file_path] = new_content
                         with open(file_path, 'w') as f: f.write(new_content)
+                        log_event(f"  [>>>] INYECTADO: {asset['url']}")
                 except Exception as e:
                     log_event(f"  [!] Error inyectando {asset['url']}: {e}")
 
         total_processed += len(batch_assets)
-        log_event(f"Fin del Lote {batch_index + 1}. Total procesado: {total_processed}/{len(all_raw_assets)}", section_break=True)
-        
-        # Pausa entre lotes para dejar respirar a la API
         if batch_index < len(all_raw_assets_batches) - 1:
-            log_event("[*] Esperando 30 segundos para el siguiente lote...")
-            await asyncio.sleep(30)
+            log_event(f"[*] Pausa de seguridad: 5s para el siguiente lote...")
+            await asyncio.sleep(5)
+
+    # 4. Finalización y PR
+    if modified_files_content:
+        log_event(">>> GENERANDO PULL REQUEST...", section_break=True)
+        metrics = {
+            "total_extracted": len(all_raw_assets),
+            "start_date": since_date.isoformat(),
+            "end_date": datetime.now(MADRID_TZ).isoformat(),
+            "full_report": full_report_metrics,
+            "x_audit": x_audit_trail
+        }
+        try:
+            git_controller.apply_multi_file_changes(modified_files_content, metrics)
+        except Exception as e:
+            log_event(f"[!] Error creando PR: {e}")
+
+    # Auditoría de reorganización
+    await curator_agent.suggest_reorganization()
+
+    # Actualizar estado
+    if max_tweet_date > since_date:
+        save_state(max_tweet_date + timedelta(seconds=1))
+
+    log_event("PROCESO FINALIZADO CON ÉXITO.", section_break=True)
+
+
 
     log_event("PROCESO FINALIZADO CON ÉXITO.", section_break=True)
 
diff --git a/src/memory/health_learning.json b/src/memory/health_learning.json
index e69de29b..dd26d09a 100644
--- a/src/memory/health_learning.json
+++ b/src/memory/health_learning.json
@@ -0,0 +1,3 @@
+{
+  "blacklisted_domains": []
+}
\ No newline at end of file
diff --git a/src/state_manager.py b/src/state_manager.py
new file mode 100644
index 00000000..aee4dd0e
--- /dev/null
+++ b/src/state_manager.py
@@ -0,0 +1,36 @@
+import os
+import json
+from datetime import datetime
+from src.config import MADRID_TZ
+from src.logger import log_event
+
+STATE_FILE = "src/memory/state.json"
+
+def load_state() -> dict:
+    default_state = {
+        "last_processed_tweet_date": "2024-10-01T00:00:00"
+    }
+    if os.path.exists(STATE_FILE):
+        try:
+            with open(STATE_FILE, 'r') as f:
+                return json.load(f)
+        except Exception as e:
+            log_event(f"[!] Error cargando state.json: {e}")
+    return default_state
+
+def save_state(last_date: datetime):
+    state = load_state()
+    state["last_processed_tweet_date"] = last_date.isoformat()
+    
+    os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
+    try:
+        with open(STATE_FILE, 'w') as f:
+            json.dump(state, f, indent=2)
+        log_event(f"[*] Estado guardado: última fecha procesada {last_date.date()}")
+    except Exception as e:
+        log_event(f"[!] Error guardando state.json: {e}")
+
+def get_last_date() -> datetime:
+    state = load_state()
+    date_str = state.get("last_processed_tweet_date")
+    return datetime.fromisoformat(date_str).replace(tzinfo=MADRID_TZ)
diff --git a/test_gemini.py b/test_gemini.py
new file mode 100644
index 00000000..3322a59a
--- /dev/null
+++ b/test_gemini.py
@@ -0,0 +1,12 @@
+import asyncio
+from src.gemini_utils import call_gemini_with_retry
+
+async def test():
+    try:
+        res = await call_gemini_with_retry("Hola, responde con la palabra 'OK' si recibes esto.")
+        print(f"Resultado: {res}")
+    except Exception as e:
+        print(f"Error: {e}")
+
+if __name__ == "__main__":
+    asyncio.run(test())