diff --git a/skill_categorization/tools/scripts/generate_index.py b/skill_categorization/tools/scripts/generate_index.py index 94724b77..f5286268 100644 --- a/skill_categorization/tools/scripts/generate_index.py +++ b/skill_categorization/tools/scripts/generate_index.py @@ -2,6 +2,8 @@ import os import json import re import sys +from collections.abc import Mapping +from datetime import date, datetime import yaml from _project_paths import find_repo_root @@ -161,6 +163,27 @@ def infer_category(skill_info, metadata, body_text): return infer_dynamic_category(str(skill_info.get("id", ""))) + +def normalize_yaml_value(value): + if isinstance(value, Mapping): + return {key: normalize_yaml_value(val) for key, val in value.items()} + if isinstance(value, list): + return [normalize_yaml_value(item) for item in value] + if isinstance(value, (date, datetime)): + return value.isoformat() + if isinstance(value, (bytes, bytearray)): + return bytes(value).decode("utf-8", errors="replace") + return value + + +def coerce_metadata_text(value): + if value is None or isinstance(value, (Mapping, list, tuple, set)): + return None + if isinstance(value, str): + return value + return str(value) + + def parse_frontmatter(content): """ Parses YAML frontmatter, sanitizing unquoted values containing @. @@ -190,7 +213,12 @@ def parse_frontmatter(content): sanitized_yaml = '\n'.join(sanitized_lines) try: - return yaml.safe_load(sanitized_yaml) or {} + parsed = yaml.safe_load(sanitized_yaml) or {} + parsed = normalize_yaml_value(parsed) + if not isinstance(parsed, Mapping): + print("⚠️ YAML frontmatter must be a mapping/object") + return {} + return dict(parsed) except yaml.YAMLError as e: print(f"⚠️ YAML parsing error: {e}") return {} @@ -240,11 +268,25 @@ def generate_index(skills_dir, output_file): body = content[fm_match.end():].strip() # Merge Metadata (frontmatter takes priority) - if "name" in metadata: skill_info["name"] = metadata["name"] - if "description" in metadata: skill_info["description"] = metadata["description"] - if "risk" in metadata: skill_info["risk"] = metadata["risk"] - if "source" in metadata: skill_info["source"] = metadata["source"] - if "date_added" in metadata: skill_info["date_added"] = metadata["date_added"] + name = coerce_metadata_text(metadata.get("name")) + description = coerce_metadata_text(metadata.get("description")) + risk = coerce_metadata_text(metadata.get("risk")) + source = coerce_metadata_text(metadata.get("source")) + date_added = coerce_metadata_text(metadata.get("date_added")) + category = coerce_metadata_text(metadata.get("category")) + + if name is not None: + skill_info["name"] = name + if description is not None: + skill_info["description"] = description + if risk is not None: + skill_info["risk"] = risk + if source is not None: + skill_info["source"] = source + if date_added is not None: + skill_info["date_added"] = date_added + if category is not None: + skill_info["category"] = category # Category: prefer frontmatter, then folder structure, then default inferred_category, confidence, reason = infer_category(skill_info, metadata, body) diff --git a/tools/scripts/generate_index.py b/tools/scripts/generate_index.py index eea0e5e2..3c7451e6 100644 --- a/tools/scripts/generate_index.py +++ b/tools/scripts/generate_index.py @@ -808,8 +808,18 @@ def normalize_yaml_value(value): return [normalize_yaml_value(item) for item in value] if isinstance(value, (date, datetime)): return value.isoformat() + if isinstance(value, (bytes, bytearray)): + return bytes(value).decode("utf-8", errors="replace") return value + +def coerce_metadata_text(value): + if value is None or isinstance(value, (Mapping, list, tuple, set)): + return None + if isinstance(value, str): + return value + return str(value) + def parse_frontmatter(content): """ Parses YAML frontmatter, sanitizing unquoted values containing @. @@ -905,15 +915,27 @@ def generate_index(skills_dir, output_file, compatibility_report=None): metadata = parse_frontmatter(content) # Merge Metadata (frontmatter takes priority) - if "name" in metadata: skill_info["name"] = metadata["name"] - if "description" in metadata: skill_info["description"] = metadata["description"] - if "risk" in metadata: skill_info["risk"] = metadata["risk"] - if "source" in metadata: skill_info["source"] = metadata["source"] - if "date_added" in metadata: skill_info["date_added"] = metadata["date_added"] + name = coerce_metadata_text(metadata.get("name")) + description = coerce_metadata_text(metadata.get("description")) + risk = coerce_metadata_text(metadata.get("risk")) + source = coerce_metadata_text(metadata.get("source")) + date_added = coerce_metadata_text(metadata.get("date_added")) + category = coerce_metadata_text(metadata.get("category")) + + if name is not None: + skill_info["name"] = name + if description is not None: + skill_info["description"] = description + if risk is not None: + skill_info["risk"] = risk + if source is not None: + skill_info["source"] = source + if date_added is not None: + skill_info["date_added"] = date_added # Category: prefer frontmatter, then folder structure, then conservative inference - if "category" in metadata: - skill_info["category"] = metadata["category"] + if category is not None: + skill_info["category"] = category elif skill_info["category"] is None: inferred_category = infer_category( skill_info["id"], diff --git a/tools/scripts/plugin_compatibility.py b/tools/scripts/plugin_compatibility.py index 63a4758f..718e0103 100644 --- a/tools/scripts/plugin_compatibility.py +++ b/tools/scripts/plugin_compatibility.py @@ -65,6 +65,8 @@ def _normalize_yaml_value(value: Any) -> Any: return [_normalize_yaml_value(item) for item in value] if isinstance(value, (date, datetime)): return value.isoformat() + if isinstance(value, (bytes, bytearray)): + return bytes(value).decode("utf-8", errors="replace") return value @@ -114,6 +116,7 @@ def _runtime_dependency_files(skill_dir: Path) -> list[str]: def _local_link_reasons(content: str, skill_dir: Path) -> set[str]: reasons: set[str] = set() + resolved_skill_dir = skill_dir.resolve() for link in LOCAL_LINK_RE.findall(content): link_clean = link.split("#", 1)[0].strip() @@ -125,6 +128,11 @@ def _local_link_reasons(content: str, skill_dir: Path) -> set[str]: continue target_path = (skill_dir / link_clean).resolve(strict=False) + try: + target_path.relative_to(resolved_skill_dir) + except ValueError: + reasons.add("escaped_local_reference") + continue if not target_path.exists(): reasons.add("broken_local_reference") diff --git a/tools/scripts/sync_editorial_bundles.py b/tools/scripts/sync_editorial_bundles.py index af02400b..62e0fc0b 100644 --- a/tools/scripts/sync_editorial_bundles.py +++ b/tools/scripts/sync_editorial_bundles.py @@ -22,6 +22,7 @@ from update_readme import configure_utf8_output, load_metadata SAFE_SKILL_ID_RE = re.compile( r"^(?!.*(?:^|/)\.{1,2}(?:/|$))[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)*$" ) +SAFE_BUNDLE_ID_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9._-]*[A-Za-z0-9])?$") REPO_URL = "https://github.com/sickn33/antigravity-awesome-skills" AUTHOR = { "name": "sickn33 and contributors", @@ -182,6 +183,11 @@ def _validate_bundle_skill_id(skill_id: str) -> None: raise ValueError(f"Invalid skill id in editorial bundles manifest: {skill_id!r}") +def _validate_bundle_id(bundle_id: str) -> None: + if not SAFE_BUNDLE_ID_RE.fullmatch(bundle_id): + raise ValueError(f"Invalid editorial bundle id: {bundle_id!r}") + + def _validate_editorial_bundles(root: Path, payload: dict[str, Any]) -> list[dict[str, Any]]: bundles = payload.get("bundles") if not isinstance(bundles, list) or not bundles: @@ -199,6 +205,7 @@ def _validate_editorial_bundles(root: Path, payload: dict[str, Any]) -> list[dic bundle_name = str(bundle.get("name", "")).strip() if not bundle_id or not bundle_name: raise ValueError("Each editorial bundle requires non-empty 'id' and 'name'.") + _validate_bundle_id(bundle_id) if bundle_id in seen_bundle_ids: raise ValueError(f"Duplicate editorial bundle id: {bundle_id}") if bundle_name in seen_bundle_names: diff --git a/tools/scripts/tests/test_editorial_bundles.py b/tools/scripts/tests/test_editorial_bundles.py index e16ae535..437df0bd 100644 --- a/tools/scripts/tests/test_editorial_bundles.py +++ b/tools/scripts/tests/test_editorial_bundles.py @@ -106,6 +106,30 @@ class EditorialBundlesTests(unittest.TestCase): ) self.assertEqual(generated_plugins, expected_plugins) + def test_manifest_rejects_bundle_ids_with_path_traversal(self): + with tempfile.TemporaryDirectory() as temp_dir: + temp_root = pathlib.Path(temp_dir) + skill_dir = temp_root / "skills" / "safe-skill" + skill_dir.mkdir(parents=True, exist_ok=True) + + payload = { + "bundles": [ + { + "id": "../../outside", + "name": "Safe Bundle", + "group": "Security", + "emoji": "🛡️", + "tagline": "Test bundle", + "audience": "Testers", + "description": "Testers", + "skills": [{"id": "safe-skill", "summary": "ok"}], + } + ] + } + + with self.assertRaisesRegex(ValueError, "Invalid editorial bundle id"): + editorial_bundles._validate_editorial_bundles(temp_root, payload) + def test_sample_bundle_copy_matches_source_file_inventory(self): sample_bundle = next(bundle for bundle in self.manifest_bundles if bundle["id"] == "documents-presentations") plugin_skills_root = REPO_ROOT / "plugins" / "antigravity-bundle-documents-presentations" / "skills" diff --git a/tools/scripts/tests/test_frontmatter_parsing_security.py b/tools/scripts/tests/test_frontmatter_parsing_security.py index 2f1a6a83..db57a39a 100644 --- a/tools/scripts/tests/test_frontmatter_parsing_security.py +++ b/tools/scripts/tests/test_frontmatter_parsing_security.py @@ -21,6 +21,10 @@ def load_module(relative_path: str, module_name: str): generate_index = load_module("tools/scripts/generate_index.py", "generate_index") +legacy_generate_index = load_module( + "skill_categorization/tools/scripts/generate_index.py", + "legacy_generate_index", +) validate_skills = load_module("tools/scripts/validate_skills.py", "validate_skills") @@ -80,6 +84,34 @@ class FrontmatterParsingSecurityTests(unittest.TestCase): self.assertEqual(skills[0]["date_added"], "2026-03-15") self.assertIn('"date_added": "2026-03-15"', output_file.read_text(encoding="utf-8")) + def test_generate_index_normalizes_binary_frontmatter_scalars(self): + with tempfile.TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + skills_dir = root / "skills" + skill_dir = skills_dir / "demo" + output_file = root / "skills_index.json" + + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\nname: !!binary aGVsbG8=\ndescription: !!binary d29ybGQ=\n---\nBody\n", + encoding="utf-8", + ) + + skills = generate_index.generate_index(str(skills_dir), str(output_file)) + + self.assertEqual(skills[0]["name"], "hello") + self.assertEqual(skills[0]["description"], "world") + self.assertIn('"name": "hello"', output_file.read_text(encoding="utf-8")) + + def test_legacy_generate_index_rejects_non_mapping_and_normalizes_binary_scalars(self): + self.assertEqual(legacy_generate_index.parse_frontmatter("---\njust-a-string\n---\nbody\n"), {}) + metadata = legacy_generate_index.parse_frontmatter( + "---\nname: !!binary aGVsbG8=\ndescription: !!binary d29ybGQ=\n---\nbody\n" + ) + + self.assertEqual(metadata["name"], "hello") + self.assertEqual(metadata["description"], "world") + def test_generate_index_ignores_symlinked_skill_markdown(self): with tempfile.TemporaryDirectory() as temp_dir: root = Path(temp_dir) diff --git a/tools/scripts/tests/test_plugin_compatibility.py b/tools/scripts/tests/test_plugin_compatibility.py index ee7db1e9..b4e87749 100644 --- a/tools/scripts/tests/test_plugin_compatibility.py +++ b/tools/scripts/tests/test_plugin_compatibility.py @@ -78,6 +78,30 @@ class PluginCompatibilityTests(unittest.TestCase): self.assertEqual(entry["targets"]["claude"], "blocked") self.assertIn("undeclared_runtime_dependency", entry["reasons"]) + def test_relative_links_cannot_escape_skill_directory(self): + with tempfile.TemporaryDirectory() as temp_dir: + skills_dir = pathlib.Path(temp_dir) / "skills" + self._write_skill( + skills_dir, + "escaping-link-skill", + ( + "---\n" + "name: escaping-link-skill\n" + "description: Example\n" + "---\n" + "Read [secret](../../outside/secret.txt)\n" + ), + ) + outside_dir = pathlib.Path(temp_dir) / "outside" + outside_dir.mkdir(parents=True, exist_ok=True) + (outside_dir / "secret.txt").write_text("secret", encoding="utf-8") + + report = plugin_compatibility.build_report(skills_dir) + entry = report["skills"][0] + self.assertEqual(entry["targets"]["codex"], "blocked") + self.assertEqual(entry["targets"]["claude"], "blocked") + self.assertIn("escaped_local_reference", entry["reasons"]) + def test_manual_setup_metadata_can_make_runtime_skill_supported(self): with tempfile.TemporaryDirectory() as temp_dir: skills_dir = pathlib.Path(temp_dir) / "skills"