From 21a892ca403feeb2a0eb8af958be782495e1487f Mon Sep 17 00:00:00 2001
From: Nubenetes Bot <bot@nubenetes.com>
Date: Mon, 11 May 2026 00:33:21 +0200
Subject: [PATCH] feat: implement advanced search strategy for robust
 historical extraction

---
 src/ingestion_twikit.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/ingestion_twikit.py b/src/ingestion_twikit.py
index bb7c8dac..22f1b8a2 100644
--- a/src/ingestion_twikit.py
+++ b/src/ingestion_twikit.py
@@ -75,7 +75,13 @@ class SocialDataExtractor:
                         await context.add_cookies(formatted)
                     except: pass
 
-                await page.goto(f"https://x.com/{self.target_account}", wait_until="domcontentloaded", timeout=90000)
+                import urllib.parse
+                search_query = f"from:{self.target_account} since:{since_date.date().isoformat()}"
+                encoded_query = urllib.parse.quote(search_query)
+                search_url = f"https://x.com/search?q={encoded_query}&f=live"
+                
+                self.log_audit("Advanced Search", None, f"Query: {search_query}")
+                await page.goto(search_url, wait_until="domcontentloaded", timeout=90000)
                 await asyncio.sleep(15)
                 
                 stop_scrolling = False
@@ -87,6 +93,10 @@ class SocialDataExtractor:
                 while not stop_scrolling and scroll_count < max_scrolls:
                     articles = await page.query_selector_all('article[data-testid="tweet"]')
                     
+                    if not articles and scroll_count > 5:
+                        self.log_audit("Extraction", False, "No se detectan más tweets en la búsqueda.")
+                        break
+
                     for article in articles:
                         # 1. Ignorar Pinned Posts (Post Fijo)
                         social_context = await article.query_selector('[data-testid="socialContext"]')
@@ -138,7 +148,7 @@ class SocialDataExtractor:
                     scroll_count += 1
                 
                 if not stop_scrolling and scroll_count >= max_scrolls:
-                    self.log_audit("Scrolling", False, f"Alcanzado límite de scrolls ({max_scrolls}) sin llegar a la fecha objetivo.")
+                    self.log_audit("Scrolling", False, f"Alcanzado límite de scrolls ({max_scrolls}) en búsqueda avanzada.")
                 
                 await browser.close()