From f411b57d9d643681966d5afdd92298b6f06d69e6 Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Sun, 10 May 2026 21:08:46 +0200 Subject: [PATCH] fix: resolve aiohttp header limit error and enhance autonomous learning --- src/ingestion_twikit.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/ingestion_twikit.py b/src/ingestion_twikit.py index 1d11cc7c..1f3b3877 100644 --- a/src/ingestion_twikit.py +++ b/src/ingestion_twikit.py @@ -67,8 +67,14 @@ class SocialDataExtractor: self.log_diagnostic(f"[*] Intentando extracción Guest (Mobile Bypass) para {self.target_account}...") try: - # Sesión totalmente aislada con su propio conector - async with aiohttp.ClientSession(headers=headers, read_bufsize=2**16) as session: + # Aumentamos agresivamente los límites del parser de HTTP + # max_line_size y max_field_size controlan el tamaño máximo de una cabecera HTTP + async with aiohttp.ClientSession( + headers=headers, + read_bufsize=2**17, # 128KB buffer + max_line_size=65536, # 64KB por línea de header (X usa CSPs gigantes) + max_field_size=65536 # 64KB por campo de header + ) as session: async with session.get(url, timeout=self.timeout) as response: if response.status != 200: self.log_diagnostic(f"[!] Error en modo Guest: HTTP {response.status}") @@ -79,12 +85,16 @@ class SocialDataExtractor: if "Log in to X" in html or "Sign up" in html: self.log_diagnostic("[!] Detectado Login Wall. X requiere login.") + self._update_learning("LoginWall") urls = self._extract_urls_from_text(html) valid_urls = [u for u in urls if all(x not in u for x in ["x.com", "twitter.com", "t.co"])] if not valid_urls: - self.log_diagnostic("[~] Scrape finalizado: 0 enlaces (Contenido dinámico no cargado).") + self.log_diagnostic("[~] Scrape finalizado: 0 enlaces útiles encontrados.") + self._update_learning("NoLinksFound") + else: + self._update_learning("Success") for url in set(valid_urls): extracted_data.append({ @@ -95,9 +105,23 @@ class SocialDataExtractor: return extracted_data except Exception as e: - self.log_diagnostic(f"[!] Fallo crítico en extracción Guest: {e}") + err_msg = str(e) + self.log_diagnostic(f"[!] Fallo crítico en extracción Guest: {err_msg}") + if "8190" in err_msg: + self._update_learning("HeaderSizeLimit") + else: + self._update_learning("TechnicalError") return [] + def _update_learning(self, failure_type: str): + learning_file = "src/memory/health_learning.json" + try: + with open(learning_file, 'r+') as f: + data = json.load(f) + data["last_x_failure"] = failure_type + f.seek(0); json.dump(data, f, indent=2); f.truncate() + except: pass + async def fetch_links_since(self, since_date: datetime) -> list[dict]: # Cargar aprendizaje previo learning_file = "src/memory/health_learning.json"