fix(security): harden markdown rendering and sync safety

This commit is contained in:
sck_0
2026-03-15 09:21:51 +01:00
parent 078847f681
commit c0c118e223
8 changed files with 246 additions and 3 deletions

View File

@@ -3,6 +3,7 @@ import json
import re
import sys
from collections.abc import Mapping
from datetime import date, datetime
import yaml
from _project_paths import find_repo_root
@@ -13,6 +14,15 @@ if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
def normalize_yaml_value(value):
if isinstance(value, Mapping):
return {key: normalize_yaml_value(val) for key, val in value.items()}
if isinstance(value, list):
return [normalize_yaml_value(item) for item in value]
if isinstance(value, (date, datetime)):
return value.isoformat()
return value
def parse_frontmatter(content):
"""
Parses YAML frontmatter, sanitizing unquoted values containing @.
@@ -43,6 +53,7 @@ def parse_frontmatter(content):
try:
parsed = yaml.safe_load(sanitized_yaml) or {}
parsed = normalize_yaml_value(parsed)
if not isinstance(parsed, Mapping):
print("⚠️ YAML frontmatter must be a mapping/object")
return {}

View File

@@ -38,6 +38,31 @@ class FrontmatterParsingSecurityTests(unittest.TestCase):
self.assertIsNone(metadata)
self.assertTrue(any("mapping" in error.lower() for error in errors))
def test_validate_skills_normalizes_unquoted_yaml_dates(self):
content = "---\nname: demo\ndescription: ok\ndate_added: 2026-03-15\n---\nbody\n"
metadata, errors = validate_skills.parse_frontmatter(content)
self.assertEqual(errors, [])
self.assertEqual(metadata["date_added"], "2026-03-15")
def test_generate_index_serializes_unquoted_yaml_dates(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "demo"
output_file = root / "skills_index.json"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"---\nname: demo\ndescription: ok\ndate_added: 2026-03-15\n---\nBody\n",
encoding="utf-8",
)
skills = generate_index.generate_index(str(skills_dir), str(output_file))
self.assertEqual(skills[0]["date_added"], "2026-03-15")
self.assertIn('"date_added": "2026-03-15"', output_file.read_text(encoding="utf-8"))
def test_generate_index_ignores_symlinked_skill_markdown(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)

View File

@@ -5,6 +5,7 @@ import sys
import io
import yaml
from collections.abc import Mapping
from datetime import date, datetime
from _project_paths import find_repo_root
@@ -38,6 +39,15 @@ WHEN_TO_USE_PATTERNS = [
def has_when_to_use_section(content):
return any(pattern.search(content) for pattern in WHEN_TO_USE_PATTERNS)
def normalize_yaml_value(value):
if isinstance(value, Mapping):
return {key: normalize_yaml_value(val) for key, val in value.items()}
if isinstance(value, list):
return [normalize_yaml_value(item) for item in value]
if isinstance(value, (date, datetime)):
return value.isoformat()
return value
def parse_frontmatter(content, rel_path=None):
"""
Parse frontmatter using PyYAML for robustness.
@@ -51,6 +61,7 @@ def parse_frontmatter(content, rel_path=None):
fm_errors = []
try:
metadata = yaml.safe_load(fm_text) or {}
metadata = normalize_yaml_value(metadata)
if not isinstance(metadata, Mapping):
return None, ["Frontmatter must be a YAML mapping/object."]