From dffc032f141b47a43be1359f2eb02d4de28cb9f1 Mon Sep 17 00:00:00 2001 From: Nubenetes Bot Date: Fri, 15 May 2026 17:59:17 +0200 Subject: [PATCH] feat: implement Incremental Elite Engine with automatic V1-to-V2 sync and sophisticated maturity tagging --- .github/workflows/agentic_v2_builder.yml | 13 ++++++++++++- GEMINI.md | 7 ++++++- src/v2_optimizer.py | 3 ++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/agentic_v2_builder.yml b/.github/workflows/agentic_v2_builder.yml index 62e09107..1e409c5e 100644 --- a/.github/workflows/agentic_v2_builder.yml +++ b/.github/workflows/agentic_v2_builder.yml @@ -2,7 +2,17 @@ name: Nubenetes V2 Agentic Builder on: workflow_dispatch: - # Automatically run after a successful curation run to sync V2 + inputs: + force_reevaluate: + description: 'Force AI re-evaluation (ignores cache for tags/years)' + type: boolean + default: false + # Automatic detection: Sync V2 whenever V1 archive is updated (manual or automated) + push: + branches: [ develop ] + paths: + - 'docs/**' + # Support automated sync after curation run workflow_run: workflows: ["Nubenetes Automated Agentic Curation"] types: @@ -37,6 +47,7 @@ jobs: GEMINI_API_KEY_1: ${{ secrets.GEMINI_API_KEY_1 }} GEMINI_API_KEY_2: ${{ secrets.GEMINI_API_KEY_2 }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + FORCE_EVAL: ${{ github.event.inputs.force_reevaluate }} PYTHONPATH: . run: | python -u src/v2_optimizer.py diff --git a/GEMINI.md b/GEMINI.md index a7624e76..5d73008f 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -15,7 +15,7 @@ This file contains the accumulated instructions and long-term vision for the aut 9. **URL Expansion**: All shortened links (t.co, bit.ly, buff.ly, etc.) MUST be expanded to their original long version before being evaluated or injected. This ensures inventory homogeneity and improves global deduplication precision. 10. **Official Language (English Only)**: All injected content (titles, descriptions, headers), execution logs, and automated communications (PRs) MUST be exclusively in ENGLISH. Nubenetes is a global resource and linguistic consistency is critical. 11. **Workflow-Config Synchronization**: The GitHub Actions curation workflow form (`agentic_cron.yml`) MUST remain perfectly synchronized with the curation sources configuration file (`data/curation_sources.yaml`). Any addition, removal, or renaming of topics/categories in the configuration file requires a corresponding update to the workflow's input fields (checkboxes) to ensure users can toggle those sources manually. This maintains consistency between data-driven sources and the UI trigger. -12. **V2 Elite Maintenance**: The Nubenetes V2 (Agentic Elite) edition is a derived view of the V1 archive. It is managed via the `src/v2_optimizer.py` script and stored in the `v2-docs/` directory. AI agents MUST NOT modify `v2-docs/` directly via standard curation workflows; they must only use the `agentic_v2_builder.yml` workflow to perform the periodic "Elite Selection" process. Standard curation and cleaning workflows must always target the `docs/` directory as the primary source of truth. +12. **V2 Elite Maintenance**: The Nubenetes V2 (Agentic Elite) edition is a derived view of the V1 archive. It is managed via the `src/v2_optimizer.py` script and stored in the `v2-docs/` directory. The `agentic_v2_builder.yml` workflow synchronizes V2 automatically whenever V1 (`docs/`) is updated (manually or via bot). Standard curation and cleaning workflows must always target the `docs/` directory as the primary source of truth. 13. **Detailed Logging for V2**: When running the V2 Optimizer, agents MUST use unbuffered logging and detailed output messages. If the optimizer returns '0 links kept', the agent MUST investigate the logs to determine if it was due to AI selection or a parsing/API error. 14. **Persistent V2 Caching**: The V2 Optimizer MUST use a persistent cache file (`data/v2_cache.json`) to store AI evaluations (year, quality, category). This is mandatory to minimize API costs and ensure execution speed across 15k+ links. 15. **GitHub Metadata Enrichment**: For all `github.com` resources, the bot MUST attempt to fetch real-time metadata (stars, last commit) using the GitHub API. This data must be included in the V2 rendering to provide current context. @@ -122,3 +122,8 @@ The bot must rotate between profiles to avoid detection: - **Relative Asset Routing**: Updated all V2 image and configuration paths to point relatively to `../docs/` to avoid asset duplication. - **Rendering & Path Resolution**: Implemented `
` and `use_directory_urls: false` across V1 and V2 to resolve persistent image path breakage and ensure proper Markdown rendering within HTML tags. - **Optimizer Alignment**: Hardened `src/v2_optimizer.py` to enforce these architectural rules (flat navigation, relative paths, and resilient V1 content extraction). + - **Incremental Elite Engine**: Implemented a sophisticated V2 sync strategy using `data/v2_cache.json`. + - **Automatic Detection**: The `agentic_v2_builder.yml` workflow now triggers automatically whenever `docs/` changes or after a curation run. + - **Cost Efficiency**: Only NEW links from V1 are sent to Gemini. Existing links use cached AI evaluations but are locally "upgraded" with real-time GitHub metadata (stars/dates) and dynamic maturity tagging. + - **Maturity Taxonomy**: Replaced generic labels with a professional 5-tier system (`[DE FACTO STANDARD]`, `[ENTERPRISE-STABLE]`, `[EMERGING]`, `[LEGACY]`, `[GUIDE]`) explained in the V2 Index. + - **Manual Control**: The workflow supports a `force_reevaluate` flag for full architectural refreshes. diff --git a/src/v2_optimizer.py b/src/v2_optimizer.py index 0aa148f1..a003a32e 100644 --- a/src/v2_optimizer.py +++ b/src/v2_optimizer.py @@ -208,6 +208,7 @@ class V2VisionEngine: async def _evaluate_and_score_resources(self, links: List[Dict]) -> List[Dict]: refined = [] to_evaluate = [] + force_eval = os.getenv("FORCE_EVAL", "false").lower() == "true" # We want to re-evaluate the tags and years, so we will bypass cache for tagging logic, # but use cache for AI stars if available to save cost. @@ -216,7 +217,7 @@ class V2VisionEngine: # To allow the new logic to apply to cached items, we re-process GitHub links # and re-apply the tag logic even if it's in the cache. item = l.copy() - if url in self.cache and "stars" in self.cache[url]: + if not force_eval and url in self.cache and "stars" in self.cache[url]: item.update(self.cache[url]) else: to_evaluate.append(item)