fix: resolve V2 optimizer issues (models 404, parsing errors, empty exceptions)

This commit is contained in:
Nubenetes Bot
2026-05-15 01:08:45 +02:00
parent e48d56a1d5
commit 603ef5d5a5
4 changed files with 71 additions and 39 deletions

View File

@@ -16,9 +16,10 @@ This file contains the accumulated instructions and long-term vision for the aut
10. **Official Language (English Only)**: All injected content (titles, descriptions, headers), execution logs, and automated communications (PRs) MUST be exclusively in ENGLISH. Nubenetes is a global resource and linguistic consistency is critical.
11. **Workflow-Config Synchronization**: The GitHub Actions curation workflow form (`agentic_cron.yml`) MUST remain perfectly synchronized with the curation sources configuration file (`data/curation_sources.yaml`). Any addition, removal, or renaming of topics/categories in the configuration file requires a corresponding update to the workflow's input fields (checkboxes) to ensure users can toggle those sources manually. This maintains consistency between data-driven sources and the UI trigger.
12. **V2 Elite Maintenance**: The Nubenetes V2 (Agentic Elite) edition is a derived view of the V1 archive. It is managed via the `src/v2_optimizer.py` script and stored in the `v2-docs/` directory. AI agents MUST NOT modify `v2-docs/` directly via standard curation workflows; they must only use the `agentic_v2_builder.yml` workflow to perform the periodic "Elite Selection" process. Standard curation and cleaning workflows must always target the `docs/` directory as the primary source of truth.
13. **Detailed Logging for V2**: When running the V2 Optimizer, agents MUST use unbuffered logging and detailed output messages. If the optimizer returns '0 links kept', the agent MUST investigate the logs to determine if it was due to AI selection or a parsing/API error.
## 🛠️ Structural Evolution & Navigation
...
* **No Link Limits**: There are NO hard limits on the number of links per page or per section (##/###). Nubenetes is built to host thousands of references.
* **TOC Consistency**: Every `.md` page (including the main index `docs/index.md`) MUST maintain an internal Table of Contents (TOC) at the beginning. This TOC must include all sections (##) and subsections (###) nested correctly using a numbered list format with working anchors.
* **Relative References & Anchors**:

View File

@@ -27,10 +27,10 @@ GH_TOKEN = os.getenv("GH_TOKEN")
# Gemini Configuration (May 2026)
GEMINI_API_VERSION = "v1beta"
GEMINI_MODELS = [
"gemini-2.5-flash-lite",
"gemini-2.0-flash",
"gemini-1.5-flash",
"gemini-2.5-flash"
"gemini-1.5-pro",
"gemini-2.0-flash-exp",
"gemini-1.5-flash-latest"
]
TARGET_REPO = "nubenetes/awesome-kubernetes"

View File

@@ -148,7 +148,10 @@ async def call_gemini_with_retry(prompt: str, response_format: str = "json", max
break
except Exception as e:
diagnostics.add_attempt(model, 0, f"Exception: {str(e)}")
import traceback
error_msg = f"{type(e).__name__}: {str(e)}"
diagnostics.add_attempt(model, 0, error_msg)
log_event(f" [!] Model {model} failed with exception: {error_msg}")
break
if key_blocked:

View File

@@ -40,47 +40,75 @@ class V2Optimizer:
# If no links, just copy the structure/headers
headers = [l for l in content.splitlines() if l.startswith("#")]
with open(v2_path, "w") as f:
f.write("\n".join(headers) + "\n\n*Content coming soon as part of the 2026 Agentic Elite curation.*")
v2_header = f"# {filename.replace('.md', '').capitalize()} (Elite Selection)\n\n"
v2_header += "!!! info \"Note\"\n This category is currently under review by our Agentic AI.\n\n"
f.write(v2_header + "\n".join(headers))
return
formatted_links = []
for title, url, desc in links:
formatted_links.append(f"- [{title}]({url}) {desc.strip()}")
log_event(f"[*] V2 Optimizer: Analyzing {len(formatted_links)} links in {filename}")
prompt = (
f"{self.elite_criteria}\n"
f"FILE: {filename}\n"
f"LINKS TO EVALUATE:\n" + "\n".join(formatted_links[:100]) + "\n\n"
"Respond ONLY with a JSON list of indices to KEEP. "
"Example: [0, 5, 22]. Remember to ALWAYS keep 'Awesome' repos."
)
try:
indices = await call_gemini_with_retry(prompt)
if not isinstance(indices, list): indices = []
pre_selected_indices = []
for i, (title, url, desc) in enumerate(links):
link_text = f"[{title}]({url}) {desc.strip()}"
formatted_links.append(f"{i}. {link_text}")
selected_links = [formatted_links[i] for i in indices if i < len(formatted_links)]
# Reconstruct V2 file
v2_content = f"# {filename.replace('.md', '').capitalize()} (Elite Selection)\n\n"
v2_content += "!!! abstract \"2026 Agentic Vision\"\n"
v2_content += " This page contains a curated selection of top-tier resources, strictly filtered by our Agentic AI for high impact and modern relevance.\n\n"
if selected_links:
v2_content += "## Selected Resources\n"
v2_content += "\n".join(selected_links)
else:
v2_content += "\n*No resources met the elite criteria for this specific category yet.*"
# MANDATE: Always keep Awesome lists
if "awesome" in title.lower() or "awesome" in url.lower():
pre_selected_indices.append(i)
with open(v2_path, "w") as f:
f.write(v2_content)
log_event(f"[*] V2 Optimizer: Analyzing {len(formatted_links)} links in {filename} (Pre-selected Awesome: {len(pre_selected_indices)})")
# Split into manageable chunks if too many links
MAX_LINKS_PER_PROMPT = 150
all_selected_indices = set(pre_selected_indices)
for chunk_start in range(0, len(formatted_links), MAX_LINKS_PER_PROMPT):
chunk = formatted_links[chunk_start:chunk_start + MAX_LINKS_PER_PROMPT]
prompt = (
f"{self.elite_criteria}\n"
f"FILE: {filename}\n"
f"LINKS TO EVALUATE (Indices {chunk_start} to {chunk_start + len(chunk) - 1}):\n" + "\n".join(chunk) + "\n\n"
"Respond ONLY with a JSON object: {\"keep_indices\": [int, int, ...]}\n"
"Example: {\"keep_indices\": [0, 5, 22]}"
)
try:
log_event(f" [>] Requesting AI selection for chunk {chunk_start}...")
response_data = await call_gemini_with_retry(prompt)
log_event(f" [OK] V2 file generated: {v2_path} ({len(selected_links)} links kept)")
# Robust parsing of keep_indices
indices = []
if isinstance(response_data, dict):
indices = response_data.get("keep_indices", [])
elif isinstance(response_data, list):
indices = response_data
for idx in indices:
try:
all_selected_indices.add(int(idx))
except: continue
except Exception as e:
log_event(f" [!] AI error on chunk {chunk_start}: {e}")
except Exception as e:
log_event(f" [!] Error optimizing {filename} for V2: {e}")
# Final reconstruction
selected_links = [links[i] for i in sorted(list(all_selected_indices)) if i < len(links)]
v2_content = f"# {filename.replace('.md', '').capitalize()} (Elite Selection)\n\n"
v2_content += "!!! abstract \"2026 Agentic Vision\"\n"
v2_content += " This page contains a curated selection of top-tier resources, strictly filtered by our Agentic AI for high impact and modern relevance.\n\n"
if selected_links:
v2_content += "## Selected Resources\n"
for title, url, desc in selected_links:
v2_content += f" - [{title}]({url}){desc}\n"
else:
v2_content += "\n*No resources met the elite criteria for this specific category yet.*"
with open(v2_path, "w") as f:
f.write(v2_content)
log_event(f" [OK] V2 file generated: {v2_path} (Total kept: {len(selected_links)})")
async def run_full_optimization(self):
log_event("STARTING V2 AGENTIC OPTIMIZATION (THE ARCHITECT'S CUT)", section_break=True)