feat(gemini): implement Dynamic AI Model Discovery and Scoring Engine

2026-07-28 09:32:20 +00:00 · 2026-05-16 13:04:12 +02:00
parent c0cf71e62d
commit d2e9c0d7bf
3 changed files with 112 additions and 74 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -24,14 +24,8 @@ if GEMINI_API_KEY and not os.getenv("GOOGLE_API_KEY"):

 GH_TOKEN = os.getenv("GH_TOKEN")

-# Gemini Configuration (May 2026)
-GEMINI_API_VERSION = "v1"
-GEMINI_MODELS = [
-    "gemini-3.1-flash",
-    "gemini-3.1-pro",
-    "gemini-2.5-pro",
-    "gemini-2.5-flash"
-]
+# Gemini Configuration (Dynamic Discovery Enabled)
+GEMINI_API_VERSION = "v1beta"

 TARGET_REPO = "nubenetes/awesome-kubernetes"

--- a/src/gemini_utils.py
+++ b/src/gemini_utils.py
@@ -3,12 +3,77 @@ import asyncio
 import random
 import json
 import re
+import os
 from typing import Dict, Any, List, Optional
-from src.config import GEMINI_API_KEYS, GEMINI_API_VERSION, GEMINI_MODELS
+from src.config import GEMINI_API_KEYS, GEMINI_API_VERSION
 from src.logger import log_event

-# Global para mantener el índice de la API Key actual
+# Global para mantener el índice de la API Key actual y los modelos descubiertos
 CURRENT_KEY_INDEX = 0
+DISCOVERED_MODELS = []
+
+async def discover_optimal_models():
+    """
+    Queries the Google Model Service to find available models for the current keys.
+    Ranks them by capability (Pro > Flash) and version (3.1 > 2.5 > 1.5).
+    """
+    global DISCOVERED_MODELS
+    if DISCOVERED_MODELS: return DISCOVERED_MODELS
+    
+    log_event("[*] Starting AI Model Auto-Discovery...", section_break=True)
+    all_supported = []
+    
+    # Try all keys to get a union of available models
+    for key in GEMINI_API_KEYS:
+        try:
+            async with httpx.AsyncClient() as client:
+                # Use v1beta for discovery as it lists more models
+                url = f"https://generativelanguage.googleapis.com/v1beta/models?key={key}"
+                resp = await client.get(url, timeout=10)
+                if resp.status_code == 200:
+                    models_data = resp.json().get("models", [])
+                    for m in models_data:
+                        name = m.get("name", "").replace("models/", "")
+                        # Filter only models that support content generation
+                        if "generateContent" in m.get("supportedGenerationMethods", []):
+                            if name not in all_supported:
+                                all_supported.append(name)
+                elif resp.status_code == 429:
+                    log_event(f"  [!] Discovery Key is rate-limited (429). Skipping this key for discovery.")
+        except: pass
+
+    if not all_supported:
+        log_event("  [!] Discovery failed or keys limited. Falling back to safe defaults.")
+        # Fallback to absolute stables
+        DISCOVERED_MODELS = ["gemini-1.5-flash-latest", "gemini-1.5-flash", "gemini-1.5-pro"]
+        return DISCOVERED_MODELS
+
+    # SCORING ALGORITHM
+    def score_model(name: str) -> float:
+        score = 0.0
+        # 1. Version Score (Prioritize 3.x series in 2026)
+        if "3.1" in name: score += 100
+        elif "3.0" in name: score += 80
+        elif "2.5" in name: score += 60
+        elif "2.0" in name: score += 40
+        elif "1.5" in name: score += 20
+        
+        # 2. Tier Score
+        if "-pro" in name: score += 50
+        elif "-flash" in name: score += 25
+        elif "-lite" in name: score += 10
+        
+        # 3. Latest/Stable Bonus
+        if "-latest" in name: score += 5
+        if "experimental" in name or "exp" in name: score -= 15
+        
+        return score
+
+    # Sort by score descending
+    DISCOVERED_MODELS = sorted(all_supported, key=score_model, reverse=True)
+    log_event(f"  [+] Discovered {len(DISCOVERED_MODELS)} suitable models.")
+    log_event(f"  [+] Top Tier AI: {', '.join(DISCOVERED_MODELS[:3])}")
+    return DISCOVERED_MODELS

 class GeminiDiagnostics:
    def __init__(self):
@@ -32,87 +97,54 @@ class GeminiDiagnostics:
        return report

 async def resolve_url(url: str) -> str:
-    """Sigue las redirecciones para obtener la URL larga final, consolidando repositorios y evitando bucles."""
+    """Sigue las redirecciones para obtener la URL larga final."""
    shorteners = ['t.co', 'bit.ly', 'buff.ly', 'goo.gl', 'tinyurl.com', 't.ly', 'rb.gy', 'is.gd', 'drp.li', 't.me', 'lnkd.in']
-    try:
-        domain = url.split("//")[-1].split("/")[0].lower()
-    except:
-        return url
-    
-    # 1. Expansión Multi-salto (evita intermediarios de tracking)
-    final_url = url
-    max_hops = 5
-    current_hop = 0
+    try: domain = url.split("//")[-1].split("/")[0].lower()
+    except: return url
    
+    final_url, max_hops, current_hop = url, 5, 0
    async with httpx.AsyncClient(follow_redirects=True, timeout=8) as client:
        while current_hop < max_hops:
            try:
-                # Si no es un acortador conocido y ya tenemos una URL larga, paramos
                current_domain = final_url.split("//")[-1].split("/")[0].lower()
-                if current_hop > 0 and current_domain not in shorteners:
-                    break
-                    
+                if current_hop > 0 and current_domain not in shorteners: break
                resp = await client.head(final_url, timeout=5)
                new_url = str(resp.url)
                if new_url == final_url: break
-                
-                final_url = new_url
-                current_hop += 1
-            except:
-                break
-
-    # 2. Consolidación de Repositorios (GitHub/GitLab) con chequeo de MVQ (vía REST si es necesario)
-    repo_domains = ['github.com', 'gitlab.com']
-    current_domain = final_url.split("//")[-1].split("/")[0].lower()
-    
-    if any(d in current_domain for d in repo_domains):
-        try:
-            async with httpx.AsyncClient(follow_redirects=True) as client:
-                resp = await client.head(final_url, timeout=5)
-                if resp.status_code != 200:
-                    parts = final_url.split('/')
-                    if len(parts) > 4: 
-                        root_repo = "/".join(parts[:5])
-                        resp_root = await client.head(root_repo, timeout=5)
-                        if resp_root.status_code == 200:
-                            log_event(f"  [📦] Consolidación: {final_url} -> {root_repo}")
-                            final_url = root_repo
-        except:
-            pass
-            
+                final_url, current_hop = new_url, current_hop + 1
+            except: break
    return final_url

 def is_fuzzy_duplicate(url_a: str, url_b: str) -> bool:
    """Detecta si dos URLs son iguales ignorando parámetros de tracking comunes."""
    def clean(u):
        u = u.split('#')[0].rstrip('/').lower()
-        # Eliminar parámetros utm_* y otros comunes
        u = re.sub(r'(\?|&)(utm_[^&]+|s=[^&]+|t=[^&]+|ref=[^&]+)', '', u)
-        if u.endswith('?'): u = u[:-1]
-        return u
+        return u[:-1] if u.endswith('?') else u
    return clean(url_a) == clean(url_b)

 async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3):
    """
-    Calls Gemini API optimizing for quota usage (pay-per-use).
-    Rotates keys immediately on 429 and uses smart exponential backoff.
+    Calls Gemini API using dynamic discovery and intelligent rotation.
+    Optimizes for cost (Pay-as-you-go) and quality (Pro vs Flash).
    """
    global CURRENT_KEY_INDEX
    if not GEMINI_API_KEYS:
        raise ValueError("No GEMINI_API_KEYS configured.")

+    # Ensure models are discovered
+    models = await discover_optimal_models()
    diagnostics = GeminiDiagnostics()
    
    # Try rotating through all available keys
-    for _ in range(len(GEMINI_API_KEYS)):
+    for key_attempt in range(len(GEMINI_API_KEYS)):
        api_key = GEMINI_API_KEYS[CURRENT_KEY_INDEX]
        
        async with httpx.AsyncClient() as client:
-            for model in GEMINI_MODELS:
+            for model in models:
                full_model_name = f"models/{model}"
                api_url = f"https://generativelanguage.googleapis.com/{GEMINI_API_VERSION}/{full_model_name}:generateContent?key={api_key}"
                
-                key_blocked = False
                for attempt in range(max_retries):
                    try:
                        payload = {"contents": [{"parts": [{"text": prompt}]}]}
@@ -134,11 +166,16 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
                            break # Try next model
                        
                        elif response.status_code == 429:
-                            log_event(f"  [!] API 429 on key {CURRENT_KEY_INDEX+1}. Rotating...")
+                            log_event(f"  [!] API 429 (Rate Limit) on Key {CURRENT_KEY_INDEX+1}. Rotating Key...")
                            CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
-                            key_blocked = True
-                            break # Break attempt loop
+                            # On 429, we recursive call to start over with next key and top model
+                            return await call_gemini_with_retry(prompt, response_format, max_retries)
                        
+                        elif response.status_code == 404:
+                            # Model not found for this key or deprecated
+                            diagnostics.add_attempt(model, 404, "Not Found/Unsupported")
+                            break # Try next model (Pro -> Flash fallback)
+                            
                        elif response.status_code in [500, 503, 504]:
                            await asyncio.sleep(2 * (attempt + 1))
                            continue
@@ -148,21 +185,10 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
                            break
                            
                    except Exception as e:
-                        import traceback
-                        error_msg = f"{type(e).__name__}: {str(e)}"
-                        diagnostics.add_attempt(model, 0, error_msg)
-                        log_event(f"  [!] Model {model} failed with exception: {error_msg}")
+                        diagnostics.add_attempt(model, 0, str(e))
                        break
-                
-                if key_blocked:
-                    break # Break model loop to try next key
            
-            if key_blocked:
-                continue # Try next key_attempt
-            
-            # If we are here and didn't succeed with any model, rotate for the next global call
+            # If no model worked for this key, rotate for the next global call
            CURRENT_KEY_INDEX = (CURRENT_KEY_INDEX + 1) % len(GEMINI_API_KEYS)
-            await asyncio.sleep(0.5)
-
-    raise Exception(f"Critical Gemini failure after key rotation.\n{diagnostics.get_report()}")

+    raise Exception(f"Critical Gemini failure after key and model rotation.\n{diagnostics.get_report()}")