Files
claude-skills-reference/engineering/spec-driven-workflow/scripts/spec_validator.py
Reza Rezvani 268061b0fd fix: move browser-automation and spec-driven-workflow scripts to scripts/ directory
Validator expects scripts in scripts/ subdirectory, not at skill root.
Moved 6 scripts to match repo convention.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 14:53:14 +01:00

462 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Spec Validator - Validates a feature specification for completeness and quality.
Checks that a spec document contains all required sections, uses RFC 2119 keywords
correctly, has acceptance criteria in Given/When/Then format, and scores overall
completeness from 0-100.
Sections checked:
- Context, Functional Requirements, Non-Functional Requirements
- Acceptance Criteria, Edge Cases, API Contracts, Data Models, Out of Scope
Exit codes: 0 = pass, 1 = warnings, 2 = critical (or --strict with score < 80)
No external dependencies - uses only Python standard library.
"""
import argparse
import json
import re
import sys
from pathlib import Path
from typing import Dict, List, Any, Tuple
# Section definitions: (key, display_name, required_header_patterns, weight)
SECTIONS = [
("context", "Context", [r"^##\s+Context"], 10),
("functional_requirements", "Functional Requirements", [r"^##\s+Functional\s+Requirements"], 15),
("non_functional_requirements", "Non-Functional Requirements", [r"^##\s+Non-Functional\s+Requirements"], 10),
("acceptance_criteria", "Acceptance Criteria", [r"^##\s+Acceptance\s+Criteria"], 20),
("edge_cases", "Edge Cases", [r"^##\s+Edge\s+Cases"], 10),
("api_contracts", "API Contracts", [r"^##\s+API\s+Contracts"], 10),
("data_models", "Data Models", [r"^##\s+Data\s+Models"], 10),
("out_of_scope", "Out of Scope", [r"^##\s+Out\s+of\s+Scope"], 10),
("metadata", "Metadata (Author/Date/Status)", [r"\*\*Author:\*\*", r"\*\*Date:\*\*", r"\*\*Status:\*\*"], 5),
]
RFC_KEYWORDS = ["MUST", "MUST NOT", "SHOULD", "SHOULD NOT", "MAY"]
# Patterns that indicate placeholder/unfilled content
PLACEHOLDER_PATTERNS = [
r"\[your\s+name\]",
r"\[list\s+reviewers\]",
r"\[describe\s+",
r"\[input/condition\]",
r"\[precondition\]",
r"\[action\]",
r"\[expected\s+result\]",
r"\[feature/capability\]",
r"\[operation\]",
r"\[threshold\]",
r"\[UI\s+component\]",
r"\[service\]",
r"\[percentage\]",
r"\[number\]",
r"\[METHOD\]",
r"\[endpoint\]",
r"\[Name\]",
r"\[Entity\s+Name\]",
r"\[type\]",
r"\[constraints\]",
r"\[field\]",
r"\[reason\]",
]
class SpecValidator:
"""Validates a spec document for completeness and quality."""
def __init__(self, content: str, file_path: str = ""):
self.content = content
self.file_path = file_path
self.lines = content.split("\n")
self.findings: List[Dict[str, Any]] = []
self.section_scores: Dict[str, Dict[str, Any]] = {}
def validate(self) -> Dict[str, Any]:
"""Run all validation checks and return results."""
self._check_sections_present()
self._check_functional_requirements()
self._check_acceptance_criteria()
self._check_edge_cases()
self._check_rfc_keywords()
self._check_api_contracts()
self._check_data_models()
self._check_out_of_scope()
self._check_placeholders()
self._check_traceability()
total_score = self._calculate_score()
return {
"file": self.file_path,
"score": total_score,
"grade": self._score_to_grade(total_score),
"sections": self.section_scores,
"findings": self.findings,
"summary": self._build_summary(total_score),
}
def _add_finding(self, severity: str, section: str, message: str):
"""Record a validation finding."""
self.findings.append({
"severity": severity, # "error", "warning", "info"
"section": section,
"message": message,
})
def _find_section_content(self, header_pattern: str) -> str:
"""Extract content between a section header and the next ## header."""
in_section = False
section_lines = []
for line in self.lines:
if re.match(header_pattern, line, re.IGNORECASE):
in_section = True
continue
if in_section and re.match(r"^##\s+", line):
break
if in_section:
section_lines.append(line)
return "\n".join(section_lines)
def _check_sections_present(self):
"""Check that all required sections exist."""
for key, name, patterns, weight in SECTIONS:
found = False
for pattern in patterns:
for line in self.lines:
if re.search(pattern, line, re.IGNORECASE):
found = True
break
if found:
break
if found:
self.section_scores[key] = {"name": name, "present": True, "score": weight, "max": weight}
else:
self.section_scores[key] = {"name": name, "present": False, "score": 0, "max": weight}
self._add_finding("error", key, f"Missing section: {name}")
def _check_functional_requirements(self):
"""Validate functional requirements format and content."""
content = self._find_section_content(r"^##\s+Functional\s+Requirements")
if not content.strip():
return
fr_pattern = re.compile(r"-\s+FR-(\d+):")
matches = fr_pattern.findall(content)
if not matches:
self._add_finding("error", "functional_requirements", "No numbered requirements found (expected FR-N: format)")
if "functional_requirements" in self.section_scores:
self.section_scores["functional_requirements"]["score"] = max(
0, self.section_scores["functional_requirements"]["score"] - 10
)
return
fr_count = len(matches)
if fr_count < 3:
self._add_finding("warning", "functional_requirements", f"Only {fr_count} requirements found. Most features need 3+.")
# Check for RFC keywords
has_keyword = False
for kw in RFC_KEYWORDS:
if kw in content:
has_keyword = True
break
if not has_keyword:
self._add_finding("warning", "functional_requirements", "No RFC 2119 keywords (MUST/SHOULD/MAY) found.")
def _check_acceptance_criteria(self):
"""Validate acceptance criteria use Given/When/Then format."""
content = self._find_section_content(r"^##\s+Acceptance\s+Criteria")
if not content.strip():
return
ac_pattern = re.compile(r"###\s+AC-(\d+):")
matches = ac_pattern.findall(content)
if not matches:
self._add_finding("error", "acceptance_criteria", "No numbered acceptance criteria found (expected ### AC-N: format)")
if "acceptance_criteria" in self.section_scores:
self.section_scores["acceptance_criteria"]["score"] = max(
0, self.section_scores["acceptance_criteria"]["score"] - 15
)
return
ac_count = len(matches)
# Check Given/When/Then
given_count = len(re.findall(r"(?i)\bgiven\b", content))
when_count = len(re.findall(r"(?i)\bwhen\b", content))
then_count = len(re.findall(r"(?i)\bthen\b", content))
if given_count < ac_count:
self._add_finding("warning", "acceptance_criteria",
f"Found {ac_count} criteria but only {given_count} 'Given' clauses. Each AC needs Given/When/Then.")
if when_count < ac_count:
self._add_finding("warning", "acceptance_criteria",
f"Found {ac_count} criteria but only {when_count} 'When' clauses.")
if then_count < ac_count:
self._add_finding("warning", "acceptance_criteria",
f"Found {ac_count} criteria but only {then_count} 'Then' clauses.")
# Check for FR references
fr_refs = re.findall(r"\(FR-\d+", content)
if not fr_refs:
self._add_finding("warning", "acceptance_criteria",
"No acceptance criteria reference functional requirements (expected (FR-N) in title).")
def _check_edge_cases(self):
"""Validate edge cases section."""
content = self._find_section_content(r"^##\s+Edge\s+Cases")
if not content.strip():
return
ec_pattern = re.compile(r"-\s+EC-(\d+):")
matches = ec_pattern.findall(content)
if not matches:
self._add_finding("warning", "edge_cases", "No numbered edge cases found (expected EC-N: format)")
elif len(matches) < 3:
self._add_finding("warning", "edge_cases", f"Only {len(matches)} edge cases. Consider failure modes for each external dependency.")
def _check_rfc_keywords(self):
"""Check RFC 2119 keywords are used consistently (capitalized)."""
# Look for lowercase must/should/may that might be intended as RFC keywords
context_content = self._find_section_content(r"^##\s+Functional\s+Requirements")
context_content += self._find_section_content(r"^##\s+Non-Functional\s+Requirements")
for kw in ["must", "should", "may"]:
# Find lowercase usage in requirement-like sentences
pattern = rf"(?:system|service|API|endpoint)\s+{kw}\s+"
if re.search(pattern, context_content):
self._add_finding("warning", "rfc_keywords",
f"Found lowercase '{kw}' in requirements. RFC 2119 keywords should be UPPERCASE: {kw.upper()}")
def _check_api_contracts(self):
"""Validate API contracts section."""
content = self._find_section_content(r"^##\s+API\s+Contracts")
if not content.strip():
return
# Check for at least one endpoint definition
has_endpoint = bool(re.search(r"(GET|POST|PUT|PATCH|DELETE)\s+/", content))
if not has_endpoint:
self._add_finding("warning", "api_contracts", "No HTTP method + path found (expected e.g., POST /api/endpoint)")
# Check for request/response definitions
has_interface = bool(re.search(r"interface\s+\w+", content))
if not has_interface:
self._add_finding("info", "api_contracts", "No TypeScript interfaces found. Consider defining request/response shapes.")
def _check_data_models(self):
"""Validate data models section."""
content = self._find_section_content(r"^##\s+Data\s+Models")
if not content.strip():
return
# Check for table format
has_table = bool(re.search(r"\|.*\|.*\|", content))
if not has_table:
self._add_finding("warning", "data_models", "No table-formatted data models found. Use | Field | Type | Constraints | format.")
def _check_out_of_scope(self):
"""Validate out of scope section."""
content = self._find_section_content(r"^##\s+Out\s+of\s+Scope")
if not content.strip():
return
os_pattern = re.compile(r"-\s+OS-(\d+):")
matches = os_pattern.findall(content)
if not matches:
self._add_finding("warning", "out_of_scope", "No numbered exclusions found (expected OS-N: format)")
elif len(matches) < 2:
self._add_finding("info", "out_of_scope", "Only 1 exclusion listed. Consider what was deliberately left out.")
def _check_placeholders(self):
"""Check for unfilled placeholder text."""
placeholder_count = 0
for pattern in PLACEHOLDER_PATTERNS:
matches = re.findall(pattern, self.content, re.IGNORECASE)
placeholder_count += len(matches)
if placeholder_count > 0:
self._add_finding("warning", "placeholders",
f"Found {placeholder_count} placeholder(s) that need to be filled in (e.g., [your name], [describe ...]).")
# Deduct from overall score proportionally
for key in self.section_scores:
if self.section_scores[key]["present"]:
deduction = min(3, self.section_scores[key]["score"])
self.section_scores[key]["score"] = max(0, self.section_scores[key]["score"] - deduction)
def _check_traceability(self):
"""Check that acceptance criteria reference functional requirements."""
ac_content = self._find_section_content(r"^##\s+Acceptance\s+Criteria")
fr_content = self._find_section_content(r"^##\s+Functional\s+Requirements")
if not ac_content.strip() or not fr_content.strip():
return
# Extract FR IDs
fr_ids = set(re.findall(r"FR-(\d+)", fr_content))
# Extract FR references from AC
ac_fr_refs = set(re.findall(r"FR-(\d+)", ac_content))
unreferenced = fr_ids - ac_fr_refs
if unreferenced:
unreferenced_list = ", ".join(f"FR-{i}" for i in sorted(unreferenced))
self._add_finding("warning", "traceability",
f"Functional requirements without acceptance criteria: {unreferenced_list}")
def _calculate_score(self) -> int:
"""Calculate the total completeness score."""
total = sum(s["score"] for s in self.section_scores.values())
maximum = sum(s["max"] for s in self.section_scores.values())
if maximum == 0:
return 0
# Apply finding-based deductions
error_count = sum(1 for f in self.findings if f["severity"] == "error")
warning_count = sum(1 for f in self.findings if f["severity"] == "warning")
base_score = round((total / maximum) * 100)
deduction = (error_count * 5) + (warning_count * 2)
return max(0, min(100, base_score - deduction))
@staticmethod
def _score_to_grade(score: int) -> str:
"""Convert score to letter grade."""
if score >= 90:
return "A"
if score >= 80:
return "B"
if score >= 70:
return "C"
if score >= 60:
return "D"
return "F"
def _build_summary(self, score: int) -> str:
"""Build human-readable summary."""
errors = [f for f in self.findings if f["severity"] == "error"]
warnings = [f for f in self.findings if f["severity"] == "warning"]
infos = [f for f in self.findings if f["severity"] == "info"]
lines = [
f"Spec Completeness Score: {score}/100 (Grade: {self._score_to_grade(score)})",
f"Errors: {len(errors)}, Warnings: {len(warnings)}, Info: {len(infos)}",
"",
]
if errors:
lines.append("ERRORS (must fix):")
for e in errors:
lines.append(f" [{e['section']}] {e['message']}")
lines.append("")
if warnings:
lines.append("WARNINGS (should fix):")
for w in warnings:
lines.append(f" [{w['section']}] {w['message']}")
lines.append("")
if infos:
lines.append("INFO:")
for i in infos:
lines.append(f" [{i['section']}] {i['message']}")
lines.append("")
# Section breakdown
lines.append("Section Breakdown:")
for key, data in self.section_scores.items():
status = "PRESENT" if data["present"] else "MISSING"
lines.append(f" {data['name']}: {data['score']}/{data['max']} ({status})")
return "\n".join(lines)
def format_human(result: Dict[str, Any]) -> str:
"""Format validation result for human reading."""
lines = [
"=" * 60,
"SPEC VALIDATION REPORT",
"=" * 60,
"",
]
if result["file"]:
lines.append(f"File: {result['file']}")
lines.append("")
lines.append(result["summary"])
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="Validate a feature specification for completeness and quality.",
epilog="Example: python spec_validator.py --file spec.md --strict",
)
parser.add_argument(
"--file",
"-f",
required=True,
help="Path to the spec markdown file",
)
parser.add_argument(
"--strict",
action="store_true",
help="Exit with code 2 if score is below 80",
)
parser.add_argument(
"--json",
action="store_true",
dest="json_flag",
help="Output results as JSON",
)
args = parser.parse_args()
file_path = Path(args.file)
if not file_path.exists():
print(f"Error: File not found: {file_path}", file=sys.stderr)
sys.exit(2)
content = file_path.read_text(encoding="utf-8")
if not content.strip():
print(f"Error: File is empty: {file_path}", file=sys.stderr)
sys.exit(2)
validator = SpecValidator(content, str(file_path))
result = validator.validate()
if args.json_flag:
print(json.dumps(result, indent=2))
else:
print(format_human(result))
# Determine exit code
score = result["score"]
has_errors = any(f["severity"] == "error" for f in result["findings"])
has_warnings = any(f["severity"] == "warning" for f in result["findings"])
if args.strict and score < 80:
sys.exit(2)
elif has_errors:
sys.exit(2)
elif has_warnings:
sys.exit(1)
else:
sys.exit(0)
if __name__ == "__main__":
main()