feat(ops): final mandate compliance sync - restored database-first, linguistic diversity, and repository consolidation

2026-07-28 01:21:41 +00:00 · 2026-05-17 13:58:51 +02:00
parent 644d9cc0cc
commit d376f86e7e
3 changed files with 62 additions and 11 deletions
--- a/src/agentic_curator.py
+++ b/src/agentic_curator.py
@@ -57,19 +57,35 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
    evaluations = {}
    curator = AgenticCurator()
    for i, asset in enumerate(raw_assets):
-        log_event(f"--- EVALUATING {i+1}/{len(raw_assets)}: {asset['url']} ---")
-        norm_url = normalize_url(asset["url"])
+        url = asset["url"]
+        log_event(f"--- EVALUATING {i+1}/{len(raw_assets)}: {url} ---")
+        norm_url = normalize_url(url)
        
+        # --- DATABASE-FIRST: Reuse insights ---
+        if norm_url in curator.inventory:
+            cached = curator.inventory[norm_url]
+            if cached.get("title") and cached.get("hierarchy"):
+                log_event(f"  [⚡] REUSING CACHED INSIGHTS: {cached['title']}")
+                from src.gemini_utils import SESSION_TRACKER
+                SESSION_TRACKER.track_cache_hit(est_tokens=2200)
+                evaluations[url] = {"status": "INCLUDED", **cached}
+                continue
+
        # 1. Fetch & Fingerprint
-        web_content, rich_meta = await _deep_fetch_content(asset["url"])
+        web_content, rich_meta = await _deep_fetch_content(url)
        content_hash = hashlib.sha256(web_content.encode()).hexdigest() if web_content else "N/A"
        
-        # 2. AI Logic
+        # 2. AI Logic (O'Reilly + Linguistic Diversity)
        is_primary = "nubenetes" in asset.get("source_type", "Social").lower()
        strictness = "BE EXTREMELY SELECTIVE.\n" if not is_primary else ""
        prompt = (
            "You act as a Senior Technical Librarian in 2026.\n" + strictness +
-            "Analyze the resource and respond ONLY with JSON: {\"impact_score\": int, \"pub_date\": \"YYYY-MM-DD\", \"primary_category\": \"cat\", \"related_categories\": [\"cat1\"], \"title\": \"...\", \"desc\": \"...\", \"en_summary\": \"...\", \"language\": \"...\", \"resource_type\": \"...\", \"complexity\": \"...\", \"technical_hierarchy\": [\"Area\", \"Topic\", ...], \"is_microservice\": bool}\n"
+            "PHASE 1: LINGUISTIC DIVERSITY (Mandate 10)\n" +
+            "- DESC (V1 Archive): Provide a professional summary in the RESOURCE'S NATIVE LANGUAGE.\n" +
+            "- EN_SUMMARY (V2 Portal): Provide a professional English synthesis.\n" +
+            "PHASE 2: ARCHITECTURAL CLASSIFICATION (O'REILLY STYLE)\n" +
+            "- Identify TECHNICAL_HIERARCHY: List (max 10 strings) Area > Topic > Subtopics.\n" +
+            "Respond ONLY with JSON: {\"impact_score\": int, \"pub_date\": \"YYYY-MM-DD\", \"primary_category\": \"cat\", \"title\": \"...\", \"desc\": \"...\", \"en_summary\": \"...\", \"language\": \"...\", \"resource_type\": \"...\", \"complexity\": \"...\", \"technical_hierarchy\": [\"Area\", ...], \"is_microservice\": bool}\n"
            f"CONTENT: {web_content[:2000]}"
        )
        
@@ -90,11 +106,11 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
                    "category": primary_cat, "status": "online", "last_checked": datetime.now().timestamp()
                }
                curator.inventory[norm_url] = eval_data
-                evaluations[asset["url"]] = {"status": "INCLUDED", **eval_data}
+                evaluations[url] = {"status": "INCLUDED", **eval_data}
                curator._save_inventory()
                log_event(f"  [+] ACCEPTED: {data['title']}")
            else:
-                evaluations[asset["url"]] = {"status": "FILTERED"}
+                evaluations[url] = {"status": "FILTERED"}
        except Exception as e: log_event(f"  [!] AI Error: {e}")
    return evaluations

--- a/src/intelligent_health_checker.py
+++ b/src/intelligent_health_checker.py
@@ -109,12 +109,27 @@ class IntelligentLinkCleaner:
                    text = resp.text.lower()
                    if any(kw in text for kw in parked_indicators): return False, "parked", None
                    return True, "OK", str(resp.url) if str(resp.url) != url else None
+                
+                # Definitive Failures
                if resp.status_code in [404, 410]:
+                    # AUTO-HEAL GitHub Branches (master -> main)
                    if "github.com" in url and "/master/" in url:
                        heal = url.replace("/master/", "/main/")
                        try:
-                            if (await client.get(heal)).status_code < 200: return True, "healed", heal
+                            if (await client.get(heal)).status_code < 400: return True, "healed", heal
                        except: pass
+                    
+                    # Mandate 8: Repository Consolidation
+                    if "github.com" in url:
+                        match = re.search(r'(https?://github\.com/[^/]+/[^/]+)', url)
+                        if match:
+                            root_url = match.group(1)
+                            if root_url != url:
+                                try:
+                                    if (await client.get(root_url)).status_code < 400:
+                                        return True, "consolidated_to_root", root_url
+                                except: pass
+
                    return False, "404", None
                return True, f"Soft Block {resp.status_code}", None
        except: return True, "Connection Error", None
--- a/src/v2_optimizer.py
+++ b/src/v2_optimizer.py
@@ -298,9 +298,29 @@ class V2VisionEngine:
                        img = f"    ![Preview]({l.get('social_preview_url')})\n" if l.get('social_preview_url') else ""
                        md += f"!!! note \"{title}\"\n{img}    **[Access Resource]({l['url']})** {'🌟'*l.get('stars',4)} | Level: {l.get('complexity', 'Beginner')}\n    \n    {l.get('ai_summary', l.get('description', ''))}\n\n"
                    else:
-                        date = f"**({l.get('year', 'N/A')})** "
-                        tags = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>"
-                        md += f"  - {date}[{title}]({l['url']}){tags} {'🌟'*l.get('stars',0)}\n"
+                        year_prefix = f"**({l.get('year', 'N/A')})** "
+                        gh_info = f" <span class='md-tag md-tag--info'>⭐ {l.get('gh_stars',0)}</span>" if l.get('gh_stars') else ""
+                        icon = " 🎥" if l.get("is_video") else ""
+                        
+                        lang = l.get("language", "English")
+                        lang_tag = f" <span class='md-tag md-tag--warning'>[{lang.upper()} CONTENT]</span>" if lang.lower() != "english" else ""
+                        
+                        comp = l.get("complexity", "Intermediate")
+                        comp_tag = f" <span class='md-tag md-tag--critical'>[{comp.upper()} LEVEL]</span>" if comp.lower() in ["architect", "advanced"] else ""
+                        
+                        res_type = l.get("resource_type", "Reference")
+                        type_tag = f" <span class='md-tag md-tag--primary'>[{res_type.upper()}]</span>" if res_type.lower() in ["case study", "guide", "documentation"] else ""
+                        
+                        rich = "".join([
+                            f" <small>by **{l['author']}**</small>" if l.get("author") else "",
+                            f" <span class='md-tag md-tag--info'>⏱️ {l['duration']}</span>" if l.get("duration") else "",
+                            f" <span class='md-tag md-tag--info'>📖 {l['reading_time']}</span>" if l.get("reading_time") else ""
+                        ])
+                        
+                        tag = l.get("tag", "[COMMUNITY-TOOL]")
+                        color = "success" if "STANDARD" in tag else "warning" if "EMERGING" in tag else "info"
+                        
+                        md += f"  - {year_prefix}[{title}]({l['url']}){icon}{gh_info}{lang_tag}{comp_tag}{type_tag}{rich} {'🌟'*l.get('stars',0)} <span class='md-tag md-tag--{color}'>{tag}</span>\n"
                        if l.get('ai_summary'): md += f"\n      {l['ai_summary']}\n\n"
            return md