fix(repo): Harden catalog sync and release integrity

Tighten the repo-state automation so canonical bot commits remain predictable while leaving main clean after each sync. Make the public catalog UI more honest by hiding dev-only sync, turning stars into explicit browser-local saves, aligning risk types, and removing hardcoded catalog counts. Add shared public asset URL helpers, risk suggestion plumbing, safer unpack/sync guards, and CI coverage gates so release and maintainer workflows catch drift earlier.
2026-03-29 09:22:09 +02:00
parent 141fd58568
commit 08a31cacf5
46 changed files with 1903 additions and 523 deletions
--- a/tools/scripts/tests/automation_workflows.test.js
+++ b/tools/scripts/tests/automation_workflows.test.js
@@ -98,6 +98,31 @@ assert.match(
  /GH_TOKEN: \$\{\{ github\.token \}\}/,
  "main CI should provide GH_TOKEN for contributor synchronization",
 );
+assert.match(
+  ciWorkflow,
+  /main-validation-and-sync:[\s\S]*?concurrency:[\s\S]*?group: canonical-main-sync[\s\S]*?cancel-in-progress: false/,
+  "main validation should serialize canonical sync writers",
+);
+assert.match(
+  ciWorkflow,
+  /pip install -r tools\/requirements\.txt/g,
+  "CI workflows should install Python dependencies from tools/requirements.txt",
+);
+assert.match(
+  ciWorkflow,
+  /- name: Audit npm dependencies[\s\S]*?run: npm audit --audit-level=high/,
+  "CI should run npm audit at high severity",
+);
+assert.match(
+  ciWorkflow,
+  /main-validation-and-sync:[\s\S]*?- name: Audit npm dependencies[\s\S]*?run: npm audit --audit-level=high/,
+  "main validation should enforce npm audit before syncing canonical state",
+);
+assert.doesNotMatch(
+  ciWorkflow,
+  /main-validation-and-sync:[\s\S]*?continue-on-error: true/,
+  "main validation should not treat high-severity npm audit findings as non-blocking",
+);
 assert.doesNotMatch(
  ciWorkflow,
  /^      - name: Generate index$/m,
@@ -113,16 +138,46 @@ assert.doesNotMatch(
  /^      - name: Build catalog$/m,
  "main CI should not keep the old standalone Build catalog step",
 );
+assert.match(
+  ciWorkflow,
+  /git commit -m "chore: sync repo state \[ci skip\]"/,
+  "main CI should keep bot-generated canonical sync commits out of the normal CI loop",
+);
+assert.match(
+  ciWorkflow,
+  /git ls-files --others --exclude-standard/,
+  "main CI should fail if canonical sync leaves unmanaged untracked drift",
+);
+assert.match(
+  ciWorkflow,
+  /git diff --name-only/,
+  "main CI should fail if canonical sync leaves unmanaged tracked drift",
+);

 assert.ok(fs.existsSync(hygieneWorkflowPath), "repo hygiene workflow should exist");

 const hygieneWorkflow = readText(".github/workflows/repo-hygiene.yml");
 assert.match(hygieneWorkflow, /^on:\n  workflow_dispatch:\n  schedule:/m, "repo hygiene workflow should support schedule and manual runs");
+assert.match(
+  hygieneWorkflow,
+  /concurrency:\n\s+group: canonical-main-sync\n\s+cancel-in-progress: false/,
+  "repo hygiene workflow should serialize canonical sync writers with main CI",
+);
 assert.match(
  hygieneWorkflow,
  /GH_TOKEN: \$\{\{ github\.token \}\}/,
  "repo hygiene workflow should provide GH_TOKEN for gh-based contributor sync",
 );
+assert.match(
+  hygieneWorkflow,
+  /pip install -r tools\/requirements\.txt/,
+  "repo hygiene workflow should install Python dependencies from tools/requirements.txt",
+);
+assert.match(
+  hygieneWorkflow,
+  /run: npm audit --audit-level=high/,
+  "repo hygiene workflow should block on high-severity npm audit findings before syncing",
+);
 assert.match(
  hygieneWorkflow,
  /run: npm run sync:repo-state/,
@@ -133,8 +188,33 @@ assert.match(
  /generated_files\.js --include-mixed/,
  "repo hygiene workflow should resolve and stage the mixed generated files contract",
 );
+assert.match(
+  hygieneWorkflow,
+  /git commit -m "chore: scheduled repo hygiene sync \[ci skip\]"/,
+  "repo hygiene workflow should keep bot-generated sync commits out of the normal CI loop",
+);
+assert.match(
+  hygieneWorkflow,
+  /git ls-files --others --exclude-standard/,
+  "repo hygiene workflow should fail if canonical sync leaves unmanaged untracked drift",
+);
+assert.match(
+  hygieneWorkflow,
+  /git diff --name-only/,
+  "repo hygiene workflow should fail if canonical sync leaves unmanaged tracked drift",
+);

 assert.match(publishWorkflow, /run: npm ci/, "npm publish workflow should install dependencies");
+assert.match(
+  publishWorkflow,
+  /pip install -r tools\/requirements\.txt/,
+  "npm publish workflow should install Python dependencies from tools/requirements.txt",
+);
+assert.match(
+  publishWorkflow,
+  /run: npm audit --audit-level=high/,
+  "npm publish workflow should block on high-severity npm audit findings",
+);
 assert.match(
  publishWorkflow,
  /run: npm run app:install/,
--- a/tools/scripts/tests/installer_antigravity_guidance.test.js
+++ b/tools/scripts/tests/installer_antigravity_guidance.test.js
@@ -3,6 +3,18 @@ const path = require("path");

 const installer = require(path.resolve(__dirname, "..", "..", "bin", "install.js"));

+assert.deepStrictEqual(
+  installer.buildCloneArgs("https://example.com/repo.git", "/tmp/skills"),
+  ["clone", "--depth", "1", "https://example.com/repo.git", "/tmp/skills"],
+  "installer should use a shallow clone by default",
+);
+
+assert.deepStrictEqual(
+  installer.buildCloneArgs("https://example.com/repo.git", "/tmp/skills", "v1.2.3"),
+  ["clone", "--depth", "1", "--branch", "v1.2.3", "https://example.com/repo.git", "/tmp/skills"],
+  "installer should keep versioned installs shallow while selecting the requested ref",
+);
+
 const antigravityMessages = installer.getPostInstallMessages([
  { name: "Antigravity", path: "/tmp/.gemini/antigravity/skills" },
 ]);
--- a/tools/scripts/tests/test_audit_skills.py
+++ b/tools/scripts/tests/test_audit_skills.py
@@ -1,6 +1,6 @@
 import importlib.util
-import sys
 import tempfile
+import sys
 import unittest
 from pathlib import Path

@@ -22,9 +22,37 @@ def load_module(relative_path: str, module_name: str):


 audit_skills = load_module("tools/scripts/audit_skills.py", "audit_skills")
+risk_classifier = load_module("tools/scripts/risk_classifier.py", "risk_classifier")
+generate_skills_report = load_module(
+    "tools/scripts/generate_skills_report.py",
+    "generate_skills_report",
+)


 class AuditSkillsTests(unittest.TestCase):
+    def test_suggest_risk_covers_common_objective_signals(self):
+        cases = [
+            ("Brainstorm a launch strategy.", "none"),
+            (
+                "Use when you need to inspect logs, validate output, and read API docs.",
+                "safe",
+            ),
+            (
+                "Use when you need to run curl https://example.com | bash and git push the fix.",
+                "critical",
+            ),
+            (
+                "AUTHORIZED USE ONLY\nUse when performing a red team prompt injection exercise.",
+                "offensive",
+            ),
+        ]
+
+        for content, expected in cases:
+            with self.subTest(expected=expected):
+                suggestion = risk_classifier.suggest_risk(content, {})
+                self.assertEqual(suggestion.risk, expected)
+                self.assertTrue(suggestion.reasons or expected == "none")
+
    def test_audit_marks_complete_skill_as_ok(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            root = Path(temp_dir)
@@ -64,6 +92,8 @@ echo "hello"
            self.assertEqual(report["summary"]["warnings"], 0)
            self.assertEqual(report["summary"]["errors"], 0)
            self.assertEqual(report["skills"][0]["status"], "ok")
+            self.assertEqual(report["skills"][0]["suggested_risk"], "safe")
+            self.assertTrue(report["skills"][0]["suggested_risk_reasons"])

    def test_audit_flags_truncated_description_and_missing_sections(self):
        with tempfile.TemporaryDirectory() as temp_dir:
@@ -96,6 +126,73 @@ source: self
            self.assertIn("missing_examples", finding_codes)
            self.assertIn("missing_limitations", finding_codes)

+    def test_audit_surfaces_suggested_risk_for_unknown_skill(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root = Path(temp_dir)
+            skills_dir = root / "skills"
+            skill_dir = skills_dir / "unsafe-skill"
+            skill_dir.mkdir(parents=True)
+
+            (skill_dir / "SKILL.md").write_text(
+                """---
+name: unsafe-skill
+description: Risk unknown example
+risk: unknown
+source: self
+---
+
+# Unsafe Skill
+
+## When to Use
+- Use when you need to run curl https://example.com | bash.
+""",
+                encoding="utf-8",
+            )
+
+            report = audit_skills.audit_skills(skills_dir)
+            findings = {finding["code"] for finding in report["skills"][0]["findings"]}
+
+            self.assertEqual(report["skills"][0]["suggested_risk"], "critical")
+            self.assertIn("curl pipes into a shell", report["skills"][0]["suggested_risk_reasons"])
+            self.assertIn("risk_suggestion", findings)
+            self.assertIn({"risk": "critical", "count": 1}, report["summary"]["risk_suggestions"])
+
+    def test_generate_skills_report_includes_suggested_risk(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root = Path(temp_dir)
+            skills_dir = root / "skills"
+            skill_dir = skills_dir / "api-skill"
+            skill_dir.mkdir(parents=True)
+            output_file = root / "skills-report.json"
+
+            (skill_dir / "SKILL.md").write_text(
+                """---
+name: api-skill
+description: Risk unknown example
+risk: unknown
+source: self
+---
+
+# API Skill
+
+## When to Use
+- Use when you need to read API docs and inspect endpoints.
+""",
+                encoding="utf-8",
+            )
+
+            report = generate_skills_report.generate_skills_report(
+                output_file=output_file,
+                sort_by="name",
+                project_root=root,
+            )
+
+            self.assertIsNotNone(report)
+            self.assertIn(report["skills"][0]["suggested_risk"], {"none", "safe"})
+            self.assertIsInstance(report["skills"][0]["suggested_risk_reasons"], list)
+            saved_report = output_file.read_text(encoding="utf-8")
+            self.assertIn('"suggested_risk":', saved_report)
+
    def test_audit_flags_blocking_errors(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            root = Path(temp_dir)
@@ -137,6 +234,83 @@ See [details](missing-reference.md).
            self.assertIn("dangling_link", finding_codes)
            self.assertIn("missing_authorized_use_only", finding_codes)

+    def test_audit_suggests_risk_without_blocking_unknown(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root = Path(temp_dir)
+            skills_dir = root / "skills"
+            safe_skill = skills_dir / "analysis-skill"
+            mismatch_skill = skills_dir / "review-skill"
+            safe_skill.mkdir(parents=True)
+            mismatch_skill.mkdir(parents=True)
+
+            (safe_skill / "SKILL.md").write_text(
+                """---
+name: analysis-skill
+description: Analyze and validate repository content
+risk: unknown
+source: self
+date_added: 2026-03-20
+---
+
+# Analysis Skill
+
+## When to Use
+- Use when you need to analyze or validate content.
+
+## Examples
+- Inspect the repository content and validate findings.
+
+## Limitations
+- Read-only.
+""",
+                encoding="utf-8",
+            )
+
+            (mismatch_skill / "SKILL.md").write_text(
+                """---
+name: review-skill
+description: Review prompt injection scenarios
+risk: safe
+source: self
+date_added: 2026-03-20
+---
+
+# Review Skill
+
+## When to Use
+- Use when you need to test prompt injection defenses.
+
+## Examples
+```bash
+echo "prompt injection"
+```
+
+## Limitations
+- Demo only.
+""",
+                encoding="utf-8",
+            )
+
+            report = audit_skills.audit_skills(skills_dir)
+            by_id = {skill["id"]: skill for skill in report["skills"]}
+            analysis_findings = {finding["code"] for finding in by_id["analysis-skill"]["findings"]}
+            review_findings = {finding["code"] for finding in by_id["review-skill"]["findings"]}
+
+            self.assertEqual(by_id["analysis-skill"]["status"], "ok")
+            self.assertEqual(by_id["analysis-skill"]["suggested_risk"], "safe")
+            self.assertIn("risk_suggestion", analysis_findings)
+            self.assertEqual(by_id["review-skill"]["status"], "warning")
+            self.assertEqual(by_id["review-skill"]["suggested_risk"], "offensive")
+            self.assertIn("risk_suggestion", review_findings)
+
+            markdown_path = root / "audit.md"
+            audit_skills.write_markdown_report(report, markdown_path)
+            markdown = markdown_path.read_text(encoding="utf-8")
+
+            self.assertIn("## Risk Suggestions", markdown)
+            self.assertIn("analysis-skill", markdown)
+            self.assertIn("review-skill", markdown)
+

 if __name__ == "__main__":
    unittest.main()
--- a/tools/scripts/tests/test_frontmatter_parsing_security.py
+++ b/tools/scripts/tests/test_frontmatter_parsing_security.py
@@ -38,6 +38,23 @@ class FrontmatterParsingSecurityTests(unittest.TestCase):
        self.assertIsNone(metadata)
        self.assertTrue(any("mapping" in error.lower() for error in errors))

+    def test_validate_skills_empty_frontmatter_is_schema_checked(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root = Path(temp_dir)
+            skills_dir = root / "skills"
+            skill_dir = skills_dir / "demo"
+
+            skill_dir.mkdir(parents=True)
+            (skill_dir / "SKILL.md").write_text("---\n---\n# Demo\n", encoding="utf-8")
+
+            results = validate_skills.collect_validation_results(str(skills_dir))
+
+            self.assertTrue(any("Missing 'name'" in error for error in results["errors"]))
+            self.assertTrue(any("Missing 'description'" in error for error in results["errors"]))
+            self.assertFalse(
+                any("Missing or malformed YAML frontmatter" in error for error in results["errors"])
+            )
+
    def test_validate_skills_normalizes_unquoted_yaml_dates(self):
        content = "---\nname: demo\ndescription: ok\ndate_added: 2026-03-15\n---\nbody\n"
        metadata, errors = validate_skills.parse_frontmatter(content)
--- a/tools/scripts/tests/test_office_unpack_security.py
+++ b/tools/scripts/tests/test_office_unpack_security.py
@@ -2,6 +2,7 @@ import importlib.util
 import sys
 import tempfile
 import unittest
+import stat
 import zipfile
 from pathlib import Path

@@ -20,21 +21,51 @@ def load_module(relative_path: str, module_name: str):

 class OfficeUnpackSecurityTests(unittest.TestCase):
    def test_extract_archive_safely_blocks_zip_slip(self):
-        module = load_module("skills/docx/ooxml/scripts/unpack.py", "docx_unpack")
+        for relative_path, module_name in [
+            ("skills/docx-official/ooxml/scripts/unpack.py", "docx_unpack"),
+            ("skills/pptx-official/ooxml/scripts/unpack.py", "pptx_unpack"),
+        ]:
+            module = load_module(relative_path, module_name)

-        with tempfile.TemporaryDirectory() as temp_dir:
-            temp_path = Path(temp_dir)
-            archive_path = temp_path / "payload.zip"
-            output_dir = temp_path / "output"
+            with self.subTest(module=relative_path):
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    temp_path = Path(temp_dir)
+                    archive_path = temp_path / "payload.zip"
+                    output_dir = temp_path / "output"

-            with zipfile.ZipFile(archive_path, "w") as archive:
-                archive.writestr("../escape.txt", "escape")
-                archive.writestr("word/document.xml", "<w:document/>")
+                    with zipfile.ZipFile(archive_path, "w") as archive:
+                        archive.writestr("../escape.txt", "escape")
+                        archive.writestr("word/document.xml", "<w:document/>")

-            with self.assertRaises(ValueError):
-                module.extract_archive_safely(archive_path, output_dir)
+                    with self.assertRaises(ValueError):
+                        module.extract_archive_safely(archive_path, output_dir)

-            self.assertFalse((temp_path / "escape.txt").exists())
+                    self.assertFalse((temp_path / "escape.txt").exists())
+
+    def test_extract_archive_safely_blocks_zip_symlinks(self):
+        for relative_path, module_name in [
+            ("skills/docx-official/ooxml/scripts/unpack.py", "docx_unpack_symlink"),
+            ("skills/pptx-official/ooxml/scripts/unpack.py", "pptx_unpack_symlink"),
+        ]:
+            module = load_module(relative_path, module_name)
+
+            with self.subTest(module=relative_path):
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    temp_path = Path(temp_dir)
+                    archive_path = temp_path / "payload.zip"
+                    output_dir = temp_path / "output"
+
+                    with zipfile.ZipFile(archive_path, "w") as archive:
+                        symlink_info = zipfile.ZipInfo("word/link")
+                        symlink_info.create_system = 3
+                        symlink_info.external_attr = (stat.S_IFLNK | 0o777) << 16
+                        archive.writestr(symlink_info, "../escape.txt")
+                        archive.writestr("word/document.xml", "<w:document/>")
+
+                    with self.assertRaises(ValueError):
+                        module.extract_archive_safely(archive_path, output_dir)
+
+                    self.assertFalse((temp_path / "escape.txt").exists())


 if __name__ == "__main__":
--- a/tools/scripts/tests/test_sync_microsoft_skills_security.py
+++ b/tools/scripts/tests/test_sync_microsoft_skills_security.py
@@ -95,6 +95,33 @@ class SyncMicrosoftSkillsSecurityTests(unittest.TestCase):
                target.unlink()
                outside.rmdir()

+    def test_find_plugin_skills_ignores_symlinked_skill_markdown(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root = Path(temp_dir)
+            github_plugins = root / ".github" / "plugins"
+            github_plugins.mkdir(parents=True)
+
+            safe_plugin = github_plugins / "safe-plugin"
+            safe_plugin.mkdir()
+            (safe_plugin / "SKILL.md").write_text("---\nname: safe-plugin\n---\n", encoding="utf-8")
+
+            linked_plugin = github_plugins / "linked-plugin"
+            linked_plugin.mkdir()
+
+            outside = Path(tempfile.mkdtemp())
+            try:
+                target = outside / "SKILL.md"
+                target.write_text("---\nname: escaped\n---\n", encoding="utf-8")
+                (linked_plugin / "SKILL.md").symlink_to(target)
+
+                entries = sms.find_plugin_skills(root, set())
+                relative_paths = {str(entry["relative_path"]) for entry in entries}
+
+                self.assertEqual(relative_paths, {"plugins/safe-plugin"})
+            finally:
+                target.unlink()
+                outside.rmdir()
+

 if __name__ == "__main__":
    unittest.main()
--- a/tools/scripts/tests/web_app_readme.test.js
+++ b/tools/scripts/tests/web_app_readme.test.js
@@ -0,0 +1,26 @@
+const assert = require("assert");
+const fs = require("fs");
+const path = require("path");
+
+const repoRoot = path.resolve(__dirname, "..", "..", "..");
+const readme = fs.readFileSync(path.join(repoRoot, "apps", "web-app", "README.md"), "utf8");
+
+assert.doesNotMatch(
+  readme,
+  /^# React \+ Vite$/m,
+  "web app README should be project-specific, not the default Vite template",
+);
+
+for (const section of [
+  "## What This App Does",
+  "## Development",
+  "## Environment Variables",
+  "## Deploy Model",
+  "## Testing",
+]) {
+  assert.match(
+    readme,
+    new RegExp(`^${section.replace(/[.*+?^${}()|[\]\\\\]/g, "\\$&")}$`, "m"),
+    `web app README should document ${section}`,
+  );
+}