mirror of
https://github.com/nubenetes/awesome-kubernetes.git
synced 2026-05-22 00:53:37 +00:00
feat(data): implement Unified Metadata Architecture (inventory & structure map) and document Knowledge Graph
This commit is contained in:
@@ -36,6 +36,11 @@ This file contains the accumulated instructions and long-term vision for the aut
|
||||
- **Flat Asset Routing**: To avoid depth-related path breakage, both V1 (`mkdocs.yml`) and V2 (`v2-mkdocs.yml`) MUST have `use_directory_urls: false`. This ensures relative paths (e.g., `images/img.png`) resolve correctly regardless of the page depth.
|
||||
20. **V2 Navigation Design**: The V2 top navigation bar MUST maintain a flat structure. All dimensions and categories must be top-level tabs in `v2-mkdocs.yml` to ensure direct discoverability and avoid nested groupings like "Categories".
|
||||
21. **V2 Impact-Driven Sorting**: The V2 portal MUST prioritize **relevance (Impact) over dates** within sections to provide high-density technical value. Sorting MUST follow: 1. Stars/Relevance (DESC), 2. Year (DESC). The mission statement and descriptions MUST reflect this impact-driven synthesis.
|
||||
22. **Unified Metadata Database (Local Storage)**: All link metadata MUST be managed via the local YAML database in `data/`.
|
||||
- **`inventory.yaml`**: The primary source of truth for years, stars (0-5), and descriptions.
|
||||
- **`structure_map.yaml`**: Tracks link locations and visual formatting (bold/highlight) across V1 and V2.
|
||||
- **Persistence**: Every agent MUST load these files at startup and save any modifications immediately to ensure state continuity across workflows.
|
||||
- **Manual Priority**: AI agents MUST NOT overwrite existing manual descriptions in the V1 archive files. Enrichment is strictly for `inventory.yaml` and the V2 portal.
|
||||
|
||||
## 🛠️ Structural Evolution & Navigation
|
||||
...
|
||||
|
||||
41
README.md
41
README.md
@@ -204,15 +204,38 @@ To maintain the high-density quality of V2 without redundant AI costs, the `V2Vi
|
||||
3. **UI Polish**: Implements strategic highlighting (`==text==`) for top-tier resources and a clean chronological view that hides unknown dates.
|
||||
4. **Flat Routing**: Both versions use `use_directory_urls: false` to ensure relative asset paths (`images/`) remain stable across all sub-pages.
|
||||
|
||||
### Comparison Matrix
|
||||
| Feature | V1 (Exhaustive) | V2 (Elite) |
|
||||
| :--- | :--- | :--- |
|
||||
| **Philosophy** | "Leave no resource behind" | "Only the best for 2026" |
|
||||
| **Volume** | High (17k+ Links) | Optimized (~2k Links) |
|
||||
| **Depth** | Historical & Wide | Cutting-edge & Deep |
|
||||
| **Chronology** | **Unified Engine** (YYYY) | **Unified Engine** (YYYY) |
|
||||
| **Filtering** | Basic (Health only) | AI-Scored (🌟🌟🌟) |
|
||||
| **MVQ Check** | No (Exhaustive Preservation) | Yes (Stale repos deprioritized) |
|
||||
## 📊 The Unified Agentic Database (Knowledge Graph)
|
||||
|
||||
Nubenetes now utilizes a **Unified Metadata Architecture** to maintain consistency across V1 and V2 while optimizing AI performance. All links are indexed in a local YAML database that serves as the "Memory" for our autonomous agents.
|
||||
|
||||
### Database Components
|
||||
1. **Central Inventory (`data/inventory.yaml`)**: Stores global technical metadata.
|
||||
* `title`, `year`, `stars` (0-5), `description` (V1), and `ai_summary` (V2 Elite).
|
||||
2. **Structure Map (`data/structure_map.yaml`)**: Tracks the physical presence and formatting of links.
|
||||
* Tracks which `.md` pages contain the link in V1 and V2.
|
||||
* Stores visual state: `is_bold`, `is_highlighted` (`==`).
|
||||
|
||||
### Agentic Data Flow
|
||||
```mermaid
|
||||
graph TD
|
||||
AC[Agentic Curator] -->|New Resource| DB[(Unified DB)]
|
||||
LC[Link Cleaner] -->|Health & Metadata| DB
|
||||
V2[V2 Vision Engine] -->|Elite Selection| DB
|
||||
|
||||
DB -->|Metadata Sync| V1[V1 Archive: docs/]
|
||||
DB -->|Advanced UI| V2P[V2 Portal: v2-docs/]
|
||||
|
||||
subgraph Local Storage
|
||||
DB1[inventory.yaml]
|
||||
DB2[structure_map.yaml]
|
||||
end
|
||||
```
|
||||
|
||||
### Strategic Benefits
|
||||
- **Zero Redundancy**: Links already analyzed by Gemini are never re-evaluated unless forced.
|
||||
- **Visual Consistency**: Highlighting (`==`) and Bold formatting are managed via the database to ensure high-signal discovery.
|
||||
- **Cross-Edition Sync**: A metadata update in the YAML instantly propagates to both V1 and V2 during the next build cycle.
|
||||
- **Manual Priority**: Existing V1 descriptions are protected; AI only intervenes for new additions or V2-specific enrichment.
|
||||
|
||||
---
|
||||
|
||||
|
||||
78975
data/inventory.yaml
78975
data/inventory.yaml
File diff suppressed because it is too large
Load Diff
142744
data/structure_map.yaml
Normal file
142744
data/structure_map.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -112,7 +112,7 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
|
||||
"stars": min(max(score // 20, 0), 5),
|
||||
"last_checked": datetime.now().timestamp()
|
||||
}
|
||||
self._save_inventory()
|
||||
self._save_inventory(); self._save_structure_map()
|
||||
except: pass
|
||||
log_event(f" [+] ACCEPTED: \"{data['title']}\" (Score: {score})")
|
||||
log_event(f" Primary: {primary_cat} | Related: {', '.join(related_cats)}")
|
||||
@@ -133,6 +133,7 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
|
||||
|
||||
|
||||
INVENTORY_PATH = "data/inventory.yaml"
|
||||
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
|
||||
|
||||
class AgenticCurator:
|
||||
def __init__(self):
|
||||
@@ -142,6 +143,7 @@ class AgenticCurator:
|
||||
self.index_path = "docs/index.md"
|
||||
self.stats = {"orphans_linked": 0}
|
||||
self.inventory = self._load_inventory()
|
||||
self.structure_map = self._load_structure_map()
|
||||
|
||||
def _load_inventory(self) -> dict:
|
||||
if os.path.exists(INVENTORY_PATH):
|
||||
@@ -157,7 +159,23 @@ class AgenticCurator:
|
||||
with open(INVENTORY_PATH, "w") as f:
|
||||
import yaml
|
||||
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
|
||||
|
||||
def _load_structure_map(self) -> dict:
|
||||
if os.path.exists(STRUCTURE_MAP_PATH):
|
||||
try:
|
||||
with open(STRUCTURE_MAP_PATH, "r") as f:
|
||||
import yaml
|
||||
return yaml.safe_load(f) or {}
|
||||
except: return {}
|
||||
return {}
|
||||
|
||||
def _save_structure_map(self):
|
||||
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
|
||||
with open(STRUCTURE_MAP_PATH, "w") as f:
|
||||
import yaml
|
||||
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
|
||||
self.inventory = self._load_inventory()
|
||||
self.structure_map = self._load_structure_map()
|
||||
|
||||
async def _rebuild_toc(self, content: str) -> str:
|
||||
"""
|
||||
|
||||
@@ -16,6 +16,7 @@ from src.logger import log_event
|
||||
CORE_FILES = ["docs/index.md", "README.md"]
|
||||
MEMORY_FILE = "src/memory/health_learning.json"
|
||||
INVENTORY_PATH = "data/inventory.yaml"
|
||||
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
|
||||
|
||||
class IntelligentLinkCleaner:
|
||||
def __init__(self):
|
||||
@@ -27,6 +28,7 @@ class IntelligentLinkCleaner:
|
||||
self.description_updates: Dict[str, str] = {}
|
||||
self.learning_data = self._load_memory()
|
||||
self.inventory = self._load_inventory()
|
||||
self.structure_map = self._load_structure_map()
|
||||
self.action_log: List[Dict] = []
|
||||
self.detailed_stats = {
|
||||
"total_scanned": 0,
|
||||
@@ -63,6 +65,21 @@ class IntelligentLinkCleaner:
|
||||
import yaml
|
||||
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
|
||||
|
||||
def _load_structure_map(self) -> dict:
|
||||
if os.path.exists(STRUCTURE_MAP_PATH):
|
||||
try:
|
||||
with open(STRUCTURE_MAP_PATH, "r") as f:
|
||||
import yaml
|
||||
return yaml.safe_load(f) or {}
|
||||
except: return {}
|
||||
return {}
|
||||
|
||||
def _save_structure_map(self):
|
||||
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
|
||||
with open(STRUCTURE_MAP_PATH, "w") as f:
|
||||
import yaml
|
||||
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
|
||||
|
||||
async def _fetch_github_metadata(self, url: str) -> Dict:
|
||||
match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
|
||||
if not match: return {}
|
||||
@@ -300,7 +317,7 @@ class IntelligentLinkCleaner:
|
||||
if not is_alive:
|
||||
self.dead_links[url] = (fallback if fallback else "DEAD", reason)
|
||||
log_event(f" [!] DEAD: {url} -> {reason} {'(Fallback: ' + fallback + ')' if fallback else ''}")
|
||||
self._save_memory(); self._save_inventory()
|
||||
self._save_memory(); self._save_inventory(); self._save_structure_map()
|
||||
|
||||
async def apply_changes(self):
|
||||
log_event("APPLYING INTELLIGENT CLEANING & PR GENERATION...", section_break=True)
|
||||
|
||||
@@ -13,6 +13,7 @@ from src.logger import log_event
|
||||
V1_DIR = "docs"
|
||||
V2_DIR = "v2-docs"
|
||||
INVENTORY_PATH = "data/inventory.yaml"
|
||||
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
|
||||
|
||||
class V2VisionEngine:
|
||||
def __init__(self):
|
||||
@@ -51,6 +52,7 @@ class V2VisionEngine:
|
||||
"- Style: Technical, neutral, and informative. Language: English only.\n"
|
||||
)
|
||||
self.inventory = self._load_inventory()
|
||||
self.structure_map = self._load_structure_map()
|
||||
|
||||
def _load_inventory(self) -> Dict:
|
||||
if os.path.exists(INVENTORY_PATH):
|
||||
@@ -65,6 +67,21 @@ class V2VisionEngine:
|
||||
with open(INVENTORY_PATH, "w") as f:
|
||||
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
|
||||
|
||||
def _load_structure_map(self) -> dict:
|
||||
if os.path.exists(STRUCTURE_MAP_PATH):
|
||||
try:
|
||||
with open(STRUCTURE_MAP_PATH, "r") as f:
|
||||
import yaml
|
||||
return yaml.safe_load(f) or {}
|
||||
except: return {}
|
||||
return {}
|
||||
|
||||
def _save_structure_map(self):
|
||||
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
|
||||
with open(STRUCTURE_MAP_PATH, "w") as f:
|
||||
import yaml
|
||||
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
|
||||
|
||||
async def analyze_and_cluster(self):
|
||||
log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
|
||||
all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
|
||||
@@ -87,7 +104,7 @@ class V2VisionEngine:
|
||||
await self._write_premium_files(v2_data, mosaic_html, videos_html)
|
||||
await self._sync_enterprise_navigation(v2_data)
|
||||
|
||||
self._save_inventory()
|
||||
self._save_inventory(); self._save_structure_map()
|
||||
log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)
|
||||
|
||||
async def _gather_all_v1_content(self) -> (List[Dict], str, str):
|
||||
|
||||
Reference in New Issue
Block a user