feat(data): implement Unified Metadata Architecture (inventory & structure map) and document Knowledge Graph

This commit is contained in:
Nubenetes Bot
2026-05-16 11:34:11 +02:00
parent e64262b0f4
commit dfb2804e22
7 changed files with 221811 additions and 12 deletions

View File

@@ -36,6 +36,11 @@ This file contains the accumulated instructions and long-term vision for the aut
- **Flat Asset Routing**: To avoid depth-related path breakage, both V1 (`mkdocs.yml`) and V2 (`v2-mkdocs.yml`) MUST have `use_directory_urls: false`. This ensures relative paths (e.g., `images/img.png`) resolve correctly regardless of the page depth.
20. **V2 Navigation Design**: The V2 top navigation bar MUST maintain a flat structure. All dimensions and categories must be top-level tabs in `v2-mkdocs.yml` to ensure direct discoverability and avoid nested groupings like "Categories".
21. **V2 Impact-Driven Sorting**: The V2 portal MUST prioritize **relevance (Impact) over dates** within sections to provide high-density technical value. Sorting MUST follow: 1. Stars/Relevance (DESC), 2. Year (DESC). The mission statement and descriptions MUST reflect this impact-driven synthesis.
22. **Unified Metadata Database (Local Storage)**: All link metadata MUST be managed via the local YAML database in `data/`.
- **`inventory.yaml`**: The primary source of truth for years, stars (0-5), and descriptions.
- **`structure_map.yaml`**: Tracks link locations and visual formatting (bold/highlight) across V1 and V2.
- **Persistence**: Every agent MUST load these files at startup and save any modifications immediately to ensure state continuity across workflows.
- **Manual Priority**: AI agents MUST NOT overwrite existing manual descriptions in the V1 archive files. Enrichment is strictly for `inventory.yaml` and the V2 portal.
## 🛠️ Structural Evolution & Navigation
...

View File

@@ -204,15 +204,38 @@ To maintain the high-density quality of V2 without redundant AI costs, the `V2Vi
3. **UI Polish**: Implements strategic highlighting (`==text==`) for top-tier resources and a clean chronological view that hides unknown dates.
4. **Flat Routing**: Both versions use `use_directory_urls: false` to ensure relative asset paths (`images/`) remain stable across all sub-pages.
### Comparison Matrix
| Feature | V1 (Exhaustive) | V2 (Elite) |
| :--- | :--- | :--- |
| **Philosophy** | "Leave no resource behind" | "Only the best for 2026" |
| **Volume** | High (17k+ Links) | Optimized (~2k Links) |
| **Depth** | Historical & Wide | Cutting-edge & Deep |
| **Chronology** | **Unified Engine** (YYYY) | **Unified Engine** (YYYY) |
| **Filtering** | Basic (Health only) | AI-Scored (🌟🌟🌟) |
| **MVQ Check** | No (Exhaustive Preservation) | Yes (Stale repos deprioritized) |
## 📊 The Unified Agentic Database (Knowledge Graph)
Nubenetes now utilizes a **Unified Metadata Architecture** to maintain consistency across V1 and V2 while optimizing AI performance. All links are indexed in a local YAML database that serves as the "Memory" for our autonomous agents.
### Database Components
1. **Central Inventory (`data/inventory.yaml`)**: Stores global technical metadata.
* `title`, `year`, `stars` (0-5), `description` (V1), and `ai_summary` (V2 Elite).
2. **Structure Map (`data/structure_map.yaml`)**: Tracks the physical presence and formatting of links.
* Tracks which `.md` pages contain the link in V1 and V2.
* Stores visual state: `is_bold`, `is_highlighted` (`==`).
### Agentic Data Flow
```mermaid
graph TD
AC[Agentic Curator] -->|New Resource| DB[(Unified DB)]
LC[Link Cleaner] -->|Health & Metadata| DB
V2[V2 Vision Engine] -->|Elite Selection| DB
DB -->|Metadata Sync| V1[V1 Archive: docs/]
DB -->|Advanced UI| V2P[V2 Portal: v2-docs/]
subgraph Local Storage
DB1[inventory.yaml]
DB2[structure_map.yaml]
end
```
### Strategic Benefits
- **Zero Redundancy**: Links already analyzed by Gemini are never re-evaluated unless forced.
- **Visual Consistency**: Highlighting (`==`) and Bold formatting are managed via the database to ensure high-signal discovery.
- **Cross-Edition Sync**: A metadata update in the YAML instantly propagates to both V1 and V2 during the next build cycle.
- **Manual Priority**: Existing V1 descriptions are protected; AI only intervenes for new additions or V2-specific enrichment.
---

File diff suppressed because it is too large Load Diff

142744
data/structure_map.yaml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -112,7 +112,7 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
"stars": min(max(score // 20, 0), 5),
"last_checked": datetime.now().timestamp()
}
self._save_inventory()
self._save_inventory(); self._save_structure_map()
except: pass
log_event(f" [+] ACCEPTED: \"{data['title']}\" (Score: {score})")
log_event(f" Primary: {primary_cat} | Related: {', '.join(related_cats)}")
@@ -133,6 +133,7 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class AgenticCurator:
def __init__(self):
@@ -142,6 +143,7 @@ class AgenticCurator:
self.index_path = "docs/index.md"
self.stats = {"orphans_linked": 0}
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
def _load_inventory(self) -> dict:
if os.path.exists(INVENTORY_PATH):
@@ -157,7 +159,23 @@ class AgenticCurator:
with open(INVENTORY_PATH, "w") as f:
import yaml
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
async def _rebuild_toc(self, content: str) -> str:
"""

View File

@@ -16,6 +16,7 @@ from src.logger import log_event
CORE_FILES = ["docs/index.md", "README.md"]
MEMORY_FILE = "src/memory/health_learning.json"
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class IntelligentLinkCleaner:
def __init__(self):
@@ -27,6 +28,7 @@ class IntelligentLinkCleaner:
self.description_updates: Dict[str, str] = {}
self.learning_data = self._load_memory()
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
self.action_log: List[Dict] = []
self.detailed_stats = {
"total_scanned": 0,
@@ -63,6 +65,21 @@ class IntelligentLinkCleaner:
import yaml
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
async def _fetch_github_metadata(self, url: str) -> Dict:
match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
if not match: return {}
@@ -300,7 +317,7 @@ class IntelligentLinkCleaner:
if not is_alive:
self.dead_links[url] = (fallback if fallback else "DEAD", reason)
log_event(f" [!] DEAD: {url} -> {reason} {'(Fallback: ' + fallback + ')' if fallback else ''}")
self._save_memory(); self._save_inventory()
self._save_memory(); self._save_inventory(); self._save_structure_map()
async def apply_changes(self):
log_event("APPLYING INTELLIGENT CLEANING & PR GENERATION...", section_break=True)

View File

@@ -13,6 +13,7 @@ from src.logger import log_event
V1_DIR = "docs"
V2_DIR = "v2-docs"
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class V2VisionEngine:
def __init__(self):
@@ -51,6 +52,7 @@ class V2VisionEngine:
"- Style: Technical, neutral, and informative. Language: English only.\n"
)
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
def _load_inventory(self) -> Dict:
if os.path.exists(INVENTORY_PATH):
@@ -65,6 +67,21 @@ class V2VisionEngine:
with open(INVENTORY_PATH, "w") as f:
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
async def analyze_and_cluster(self):
log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
@@ -87,7 +104,7 @@ class V2VisionEngine:
await self._write_premium_files(v2_data, mosaic_html, videos_html)
await self._sync_enterprise_navigation(v2_data)
self._save_inventory()
self._save_inventory(); self._save_structure_map()
log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)
async def _gather_all_v1_content(self) -> (List[Dict], str, str):