feat(data): implement Unified Metadata Architecture (inventory & structure map) and document Knowledge Graph

This commit is contained in:
Nubenetes Bot
2026-05-16 11:34:11 +02:00
parent e64262b0f4
commit dfb2804e22
7 changed files with 221811 additions and 12 deletions

View File

@@ -112,7 +112,7 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
"stars": min(max(score // 20, 0), 5),
"last_checked": datetime.now().timestamp()
}
self._save_inventory()
self._save_inventory(); self._save_structure_map()
except: pass
log_event(f" [+] ACCEPTED: \"{data['title']}\" (Score: {score})")
log_event(f" Primary: {primary_cat} | Related: {', '.join(related_cats)}")
@@ -133,6 +133,7 @@ async def evaluate_extracted_assets(raw_assets: List[Dict]) -> Dict[str, Dict]:
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class AgenticCurator:
def __init__(self):
@@ -142,6 +143,7 @@ class AgenticCurator:
self.index_path = "docs/index.md"
self.stats = {"orphans_linked": 0}
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
def _load_inventory(self) -> dict:
if os.path.exists(INVENTORY_PATH):
@@ -157,7 +159,23 @@ class AgenticCurator:
with open(INVENTORY_PATH, "w") as f:
import yaml
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
async def _rebuild_toc(self, content: str) -> str:
"""

View File

@@ -16,6 +16,7 @@ from src.logger import log_event
CORE_FILES = ["docs/index.md", "README.md"]
MEMORY_FILE = "src/memory/health_learning.json"
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class IntelligentLinkCleaner:
def __init__(self):
@@ -27,6 +28,7 @@ class IntelligentLinkCleaner:
self.description_updates: Dict[str, str] = {}
self.learning_data = self._load_memory()
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
self.action_log: List[Dict] = []
self.detailed_stats = {
"total_scanned": 0,
@@ -63,6 +65,21 @@ class IntelligentLinkCleaner:
import yaml
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
async def _fetch_github_metadata(self, url: str) -> Dict:
match = re.search(r'github\.com/([^/]+)/([^/]+)', url)
if not match: return {}
@@ -300,7 +317,7 @@ class IntelligentLinkCleaner:
if not is_alive:
self.dead_links[url] = (fallback if fallback else "DEAD", reason)
log_event(f" [!] DEAD: {url} -> {reason} {'(Fallback: ' + fallback + ')' if fallback else ''}")
self._save_memory(); self._save_inventory()
self._save_memory(); self._save_inventory(); self._save_structure_map()
async def apply_changes(self):
log_event("APPLYING INTELLIGENT CLEANING & PR GENERATION...", section_break=True)

View File

@@ -13,6 +13,7 @@ from src.logger import log_event
V1_DIR = "docs"
V2_DIR = "v2-docs"
INVENTORY_PATH = "data/inventory.yaml"
STRUCTURE_MAP_PATH = "data/structure_map.yaml"
class V2VisionEngine:
def __init__(self):
@@ -51,6 +52,7 @@ class V2VisionEngine:
"- Style: Technical, neutral, and informative. Language: English only.\n"
)
self.inventory = self._load_inventory()
self.structure_map = self._load_structure_map()
def _load_inventory(self) -> Dict:
if os.path.exists(INVENTORY_PATH):
@@ -65,6 +67,21 @@ class V2VisionEngine:
with open(INVENTORY_PATH, "w") as f:
yaml.dump(self.inventory, f, sort_keys=False, allow_unicode=True)
def _load_structure_map(self) -> dict:
if os.path.exists(STRUCTURE_MAP_PATH):
try:
with open(STRUCTURE_MAP_PATH, "r") as f:
import yaml
return yaml.safe_load(f) or {}
except: return {}
return {}
def _save_structure_map(self):
os.makedirs(os.path.dirname(STRUCTURE_MAP_PATH), exist_ok=True)
with open(STRUCTURE_MAP_PATH, "w") as f:
import yaml
yaml.dump(self.structure_map, f, sort_keys=False, allow_unicode=True)
async def analyze_and_cluster(self):
log_event("STARTING V2 HIGH-DENSITY CHRONOLOGICAL LIBRARY GENERATION", section_break=True)
all_v1_links, mosaic_html, videos_html = await self._gather_all_v1_content()
@@ -87,7 +104,7 @@ class V2VisionEngine:
await self._write_premium_files(v2_data, mosaic_html, videos_html)
await self._sync_enterprise_navigation(v2_data)
self._save_inventory()
self._save_inventory(); self._save_structure_map()
log_event("V2 LIBRARY GENERATION COMPLETED.", section_break=True)
async def _gather_all_v1_content(self) -> (List[Dict], str, str):