import httpx import asyncio import random import json import re import os import time from datetime import datetime from typing import Dict, Any, List, Optional from src.config import GEMINI_API_KEYS, GEMINI_API_VERSION, GEMINI_API_KEYS_DATA from src.logger import log_event # Global state for rate limiting and discovery CURRENT_KEY_INDEX = 0 DISCOVERED_MODELS = [] GLOBAL_COOLDOWN_UNTIL = 0 THROTTLED_MODELS = {} # {model_name: timestamp} GLOBAL_AI_SEMAPHORE = asyncio.Semaphore(5) # Max 5 concurrent calls globally class GeminiSessionTracker: def __init__(self): self.model_usage = {} # {model_name: count} self.key_stats = {i: {"calls": 0, "429s": 0, "404s": 0, "type": GEMINI_API_KEYS_DATA[i]["type"], "label": GEMINI_API_KEYS_DATA[i]["label"]} for i in range(len(GEMINI_API_KEYS))} self.discovery_log = [] self.start_time = datetime.now() self.total_throttles = 0 self.total_tokens_prompt = 0 self.total_tokens_completion = 0 self.cache_hits = 0 self.est_tokens_saved = 0 def track_cache_hit(self, est_tokens: int = 1500): self.cache_hits += 1 self.est_tokens_saved += est_tokens def track_call(self, key_idx: int, model: str, status: int, usage: Dict = None): if status == 200: self.model_usage[model] = self.model_usage.get(model, 0) + 1 if usage: self.total_tokens_prompt += usage.get("promptTokenCount", 0) self.total_tokens_completion += usage.get("candidatesTokenCount", 0) self.key_stats[key_idx]["calls"] += 1 if status == 429: self.key_stats[key_idx]["429s"] += 1 self.total_throttles += 1 if status == 404: self.key_stats[key_idx]["404s"] += 1 def get_intelligence_report(self) -> str: report = "\n### 🧠 AI Intelligence & Observability Report\n\n" report += "#### 🤖 Model Selection Logic (Dynamic)\n" report += f"Execution started with discovery of top models based on May 2026 hierarchy.\n\n" report += "| Model Used | Successful Calls | Hierarchy Logic |\n| :--- | :---: | :--- |\n" usage_items = sorted(self.model_usage.items(), key=lambda x: x[1], reverse=True) for model, count in usage_items: logic = "Elite/Pro (Complex Reasoning)" if "pro" in model else "Flash/Lite (High Speed)" report += f"| `{model}` | **{count}** | {logic} |\n" if not self.model_usage: report += "| No AI calls | 0 | N/A |\n" report += "\n#### 🔑 API Infrastructure & Quota Management\n" report += "| Key Index | Type | Provider Label | Usage | Errors (429/404) |\n| :--- | :--- | :--- | :---: | :---: |\n" for idx, stats in self.key_stats.items(): usage_bar = "█" * min(stats["calls"] // 5, 10) or "░" report += f"| Key {idx+1} | `{stats['type']}` | {stats['label']} | {usage_bar} ({stats['calls']}) | {stats['429s']} / {stats['404s']} |\n" report += f"\n#### 📊 Consumption and Efficiency Metrics (2026 Units)\n" report += f"- **Total Prompt Tokens**: {self.total_tokens_prompt:,}\n" report += f"- **Total Completion Tokens**: {self.total_tokens_completion:,}\n" # Cache-First Metrics hit_ratio = (self.cache_hits / (self.cache_hits + sum(self.model_usage.values())) * 100) if (self.cache_hits + sum(self.model_usage.values())) > 0 else 0 report += f"- **Database-First Cache Hits**: **{self.cache_hits}** ({hit_ratio:.1f}% hit ratio)\n" report += f"- **Estimated Tokens Saved**: ~{self.est_tokens_saved:,} (Zero-API cost)\n" report += f"- **Execution Efficiency**: {((self.total_tokens_completion / self.total_tokens_prompt * 100) if self.total_tokens_prompt > 0 else 0):.1f}% (Completion/Prompt)\n" status_msg = f"{len(DISCOVERED_MODELS)} models verified." if self.total_throttles > 0: status_msg += f" **Adaptive Tiering active ({self.total_throttles} throttles managed).**" report += f"\n*Status: {status_msg} System auto-adopted newest versions found.*" return report SESSION_TRACKER = GeminiSessionTracker() async def discover_optimal_models(): global DISCOVERED_MODELS if DISCOVERED_MODELS: return DISCOVERED_MODELS log_event("[*] Starting AI Model Auto-Discovery...", section_break=True) all_supported = [] for key in GEMINI_API_KEYS: try: async with httpx.AsyncClient() as client: url = f"https://generativelanguage.googleapis.com/v1beta/models?key={key}" resp = await client.get(url, timeout=10) if resp.status_code == 200: models_data = resp.json().get("models", []) for m in models_data: name = m.get("name", "").replace("models/", "") if "generateContent" in m.get("supportedGenerationMethods", []): if name not in all_supported: all_supported.append(name) elif resp.status_code == 429: log_event(f" [!] Discovery Key is rate-limited (429). Skipping.") except: pass if not all_supported: log_event(" [!] Discovery failed. Falling back to safe defaults.") DISCOVERED_MODELS = ["gemini-1.5-flash-latest", "gemini-1.5-flash", "gemini-1.5-pro"] return DISCOVERED_MODELS def score_model(name: str) -> float: score = 0.0 version_match = re.search(r'(\d+\.\d+)', name) if version_match: try: version = float(version_match.group(1)) score += version * 50 except: pass if "-ultra" in name: score += 100 elif "-pro" in name: score += 50 elif "-flash" in name: score += 25 elif "-lite" in name: score += 10 if "-latest" in name: score += 5 if "experimental" in name or "exp" in name: score -= 15 return score DISCOVERED_MODELS = sorted(all_supported, key=score_model, reverse=True) log_event(f" [+] Discovered {len(DISCOVERED_MODELS)} suitable models.") log_event(f" [+] Top Tier AI: {', '.join(DISCOVERED_MODELS[:3])}") return DISCOVERED_MODELS class GeminiDiagnostics: def __init__(self): self.attempts = [] def add_attempt(self, model: str, status: int, error: str = None, response_text: str = None): self.attempts.append({"model": model, "status": status, "error": error, "response_preview": response_text[:200] if response_text else None}) def get_report(self) -> str: report = "DIAGNÓSTICO GEMINI:\n" for i, a in enumerate(self.attempts): report += f" {i+1}. [{a['model']}] Status: {a['status']}" if a['error']: report += f" | Error: {a['error']}" if a['response_preview']: report += f" | Resp: {a['response_preview']}" report += "\n" return report async def resolve_url(url: str) -> str: shorteners = ['t.co', 'bit.ly', 'buff.ly', 'goo.gl', 'tinyurl.com', 't.ly', 'rb.gy', 'is.gd', 'drp.li', 't.me', 'lnkd.in'] try: domain = url.split("//")[-1].split("/")[0].lower() except: return url final_url, max_hops, current_hop = url, 5, 0 async with httpx.AsyncClient(follow_redirects=True, timeout=8) as client: while current_hop < max_hops: try: current_domain = final_url.split("//")[-1].split("/")[0].lower() if current_hop > 0 and current_domain not in shorteners: break resp = await client.head(final_url, timeout=5) new_url = str(resp.url) if new_url == final_url: break final_url, current_hop = new_url, current_hop + 1 except: break # Mandate 34: Prevent multiple trailing slashes using centralized utility return sanitize_trailing_slashes(final_url) def clean_toc_text(text: str) -> str: """ Ensures technical titles and TOC entries are robust. Strips emojis, replaces ampersands, and removes special chars. """ if not text: return "" # 1. Replace ampersands text = text.replace("&", "and") # 2. Strip Emojis (Regex for Unicode emoji ranges) text = re.sub(r'[\U00010000-\U0010ffff]', '', text) # 3. Strip other common problematic non-alphanumeric chars (except spaces and hyphens) text = re.sub(r'[^\w\s\-.]', '', text) return text.strip() def sanitize_trailing_slashes(url: str) -> str: """ Mandate 34: Enforces a ZERO trailing slash policy. Removes ALL trailing slashes and question marks from the end of the URL. Does NOT collapse slashes in the middle of the URL (to avoid breaking protocol or deep links). """ if not url or '://' not in url: return url # Remove all trailing slashes and question marks from the end of the entire string return url.rstrip('/').rstrip('?') def normalize_url(url: str) -> str: """ Normalización de URLs de alta precisión para Nubenetes. Preserva anclajes de línea (#L) y evita forzar minúsculas en rutas profundas. """ if not url: return "" # 0. Mandate 34: Cleanup redundant slashes first url = sanitize_trailing_slashes(url) # 1. Separar fragmento (pero preservar si es técnico como #L123) fragment = "" if "#" in url: url, fragment = url.split("#", 1) if not re.match(r'^L\d+', fragment): fragment = "" # Solo preservamos anclajes de línea # 2. Limpiar parámetros de tracking social (UTM, etc.) url = re.sub(r'(\?|&)(utm_[^&]+|s=[^&]+|t=[^&]+|ref=[^&]+|fbclid=[^&]+)', '', url) # Mandate 34: Remove all trailing slashes and question marks for internal canonical comparison url = url.rstrip("/").rstrip("?") # 3. Normalizar protocolo y dominio (Case Insensitive) match = re.match(r'^(https?://)([^/]+)(.*)', url, re.IGNORECASE) if match: proto, domain, path = match.groups() # El dominio es Case-Insensitive, el path puede ser Case-Sensitive url = f"https://{domain.lower()}{path}" return f"{url}#{fragment}" if fragment else url def is_fuzzy_duplicate(url_a: str, url_b: str) -> bool: return normalize_url(url_a) == normalize_url(url_b) async def call_gemini_with_retry(prompt: str, response_format: str = "json", max_retries: int = 3, prefer_flash: bool = False, use_grounding: bool = False): global CURRENT_KEY_INDEX, GLOBAL_COOLDOWN_UNTIL if not GEMINI_API_KEYS: raise ValueError("No GEMINI_API_KEYS configured.") models_pool = await discover_optimal_models() # 1. Smart Re-ordering if prefer_flash: models = sorted(models_pool, key=lambda m: 0 if "flash" in m or "lite" in m else 1) else: models = models_pool total_keys = len(GEMINI_API_KEYS) async with GLOBAL_AI_SEMAPHORE: for attempt_round in range(max_retries + 1): now = time.time() if now < GLOBAL_COOLDOWN_UNTIL: await asyncio.sleep(GLOBAL_COOLDOWN_UNTIL - now) for key_offset in range(total_keys): current_idx = (CURRENT_KEY_INDEX + key_offset) % total_keys api_key = GEMINI_API_KEYS[current_idx] async with httpx.AsyncClient() as client: for model in models: if THROTTLED_MODELS.get(model, 0) > time.time(): continue full_model_name = f"models/{model}" api_url = f"https://generativelanguage.googleapis.com/{GEMINI_API_VERSION}/{full_model_name}:generateContent?key={api_key}" try: # --- TOOL ENABLING (MCP-LIKE GROUNDING) --- payload = { "contents": [{"parts": [{"text": prompt}]}], "tools": [{"google_search_retrieval": {}}] if use_grounding else [] } response = await client.post(api_url, json=payload, timeout=50) resp_json = {} try: resp_json = response.json() except: pass usage = resp_json.get("usageMetadata", {}) SESSION_TRACKER.track_call(current_idx, model, response.status_code, usage) if response.status_code == 200: CURRENT_KEY_INDEX = current_idx if 'candidates' in resp_json and resp_json['candidates']: text_resp = resp_json['candidates'][0]['content']['parts'][0]['text'] if response_format == "json": match = re.search(r'\{.*\}|\[.*\]', text_resp, re.DOTALL) if match: try: data = json.loads(match.group(0)) return data[0] if isinstance(data, list) and len(data) > 0 else data except: pass # QUALITY UPGRADE: If flash failed parsing, don't give up on the key, try a Pro model if ("flash" in model or "lite" in model) and any("pro" in m for m in models): diagnostics.add_attempt(model, 200, "Flash JSON error - Upgrading to Pro...") continue diagnostics.add_attempt(model, 200, "JSON not found") break return text_resp diagnostics.add_attempt(model, 200, "No candidates") break elif response.status_code == 429: consecutive_429s += 1 # 2. ADAPTIVE TIERING: Mark this specific model as throttled throttle_duration = 30 if "pro" in model else 15 THROTTLED_MODELS[model] = time.time() + throttle_duration # 3. GLOBAL THROTTLING: Slow down entire engine GLOBAL_COOLDOWN_UNTIL = time.time() + 3.0 wait = base_wait_time * (1.8 ** (consecutive_429s - 1)) + random.uniform(1.0, 2.0) log_event(f" [!] API 429 on `{model}` (Key {current_idx+1}). Tiering down & backing off {wait:.1f}s...") await asyncio.sleep(wait) # Continue to next model in current key (likely Flash) continue elif response.status_code == 404: diagnostics.add_attempt(model, 404, "Not Found") break elif response.status_code in [500, 503, 504]: diagnostics.add_attempt(model, response.status_code, "Server Error") continue else: diagnostics.add_attempt(model, response.status_code, "API Error", response.text) break except Exception as e: SESSION_TRACKER.track_call(current_idx, model, 0, {}) diagnostics.add_attempt(model, 0, str(e)) break if attempt_round < max_retries: wait_round = base_wait_time * (2 ** attempt_round) log_event(f" [!] Exhausted tier options in round {attempt_round+1}. Cooling down {wait_round}s...") await asyncio.sleep(wait_round) raise Exception(f"Critical Gemini failure after adaptive tiering.\n{diagnostics.get_report()}")