Files
claude-skills-reference/engineering/helm-chart-builder/scripts/values_validator.py
2026-03-15 23:28:54 +01:00

443 lines
14 KiB
Python

#!/usr/bin/env python3
"""
helm-chart-builder: Values Validator
Validate values.yaml files against Helm best practices — documentation coverage,
type consistency, naming conventions, default quality, and security.
Usage:
python scripts/values_validator.py values.yaml
python scripts/values_validator.py values.yaml --output json
python scripts/values_validator.py values.yaml --strict
"""
import argparse
import json
import re
import sys
from pathlib import Path
# --- Demo values.yaml ---
DEMO_VALUES = """# Default values for demo-app
replicaCount: 1
image:
repository: nginx
tag: latest
pullPolicy: Always
service:
type: ClusterIP
port: 80
ingress:
enabled: false
resources: {}
PASSWORD: supersecret123
db_password: changeme
api-key: sk-12345
deeply:
nested:
structure:
that:
goes:
too:
deep: true
undocumented_value: something
AnotherValue: 42
snake_case_key: bad
"""
# --- Validation Rules ---
NAMING_PATTERN = re.compile(r"^[a-z][a-zA-Z0-9]*$") # camelCase
SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)+$") # snake_case
UPPER_CASE_PATTERN = re.compile(r"^[A-Z]") # Starts with uppercase
SECRET_KEY_PATTERNS = [
re.compile(r"(?:password|secret|token|apiKey|api_key|api-key|private_key|credentials)", re.IGNORECASE),
]
KNOWN_STRUCTURES = {
"image": ["repository", "tag", "pullPolicy"],
"service": ["type", "port"],
"ingress": ["enabled"],
"resources": [],
"serviceAccount": ["create", "name"],
"autoscaling": ["enabled", "minReplicas", "maxReplicas"],
}
def parse_values(content):
"""Parse values.yaml into structured data with metadata.
Returns a list of entries with key paths, values, depth, and comment info.
"""
entries = []
key_stack = []
indent_stack = [0]
prev_comment = None
for line_num, line in enumerate(content.splitlines(), 1):
stripped = line.strip()
# Track comments for documentation coverage
if stripped.startswith("#"):
prev_comment = stripped
continue
if not stripped:
prev_comment = None
continue
indent = len(line) - len(line.lstrip())
# Pop stack for dedented lines
while len(indent_stack) > 1 and indent <= indent_stack[-1]:
indent_stack.pop()
if key_stack:
key_stack.pop()
# Parse key: value
match = re.match(r"^(\S+)\s*:\s*(.*)", stripped)
if match and not stripped.startswith("-"):
key = match.group(1)
raw_value = match.group(2).strip()
# Check for inline comment
inline_comment = None
if "#" in raw_value:
val_part, _, comment_part = raw_value.partition("#")
raw_value = val_part.strip()
inline_comment = comment_part.strip()
# Build full key path
full_path = ".".join(key_stack + [key])
depth = len(key_stack) + 1
# Determine value type
value_type = "unknown"
if not raw_value or raw_value == "":
value_type = "map"
key_stack.append(key)
indent_stack.append(indent)
elif raw_value in ("true", "false"):
value_type = "boolean"
elif raw_value == "null" or raw_value == "~":
value_type = "null"
elif raw_value == "{}":
value_type = "empty_map"
elif raw_value == "[]":
value_type = "empty_list"
elif re.match(r"^-?\d+$", raw_value):
value_type = "integer"
elif re.match(r"^-?\d+\.\d+$", raw_value):
value_type = "float"
elif raw_value.startswith('"') or raw_value.startswith("'"):
value_type = "string"
else:
value_type = "string"
has_doc = prev_comment is not None or inline_comment is not None
entries.append({
"key": key,
"full_path": full_path,
"value": raw_value,
"value_type": value_type,
"depth": depth,
"line": line_num,
"has_documentation": has_doc,
"comment": prev_comment or inline_comment,
})
prev_comment = None
else:
prev_comment = None
return entries
def validate_naming(entries):
"""Check key naming conventions."""
findings = []
for entry in entries:
key = entry["key"]
# Skip map entries (they're parent keys)
if entry["value_type"] == "map":
# Parent keys should still be camelCase
pass
if SNAKE_CASE_PATTERN.match(key):
findings.append({
"severity": "medium",
"category": "naming",
"message": f"Key '{entry['full_path']}' uses snake_case — Helm convention is camelCase",
"fix": f"Rename to camelCase: {to_camel_case(key)}",
"line": entry["line"],
})
elif UPPER_CASE_PATTERN.match(key) and not key.isupper():
findings.append({
"severity": "medium",
"category": "naming",
"message": f"Key '{entry['full_path']}' starts with uppercase — use camelCase",
"fix": f"Rename: {key[0].lower() + key[1:]}",
"line": entry["line"],
})
elif "-" in key:
findings.append({
"severity": "medium",
"category": "naming",
"message": f"Key '{entry['full_path']}' uses kebab-case — Helm convention is camelCase",
"fix": f"Rename to camelCase: {to_camel_case(key)}",
"line": entry["line"],
})
return findings
def validate_documentation(entries):
"""Check documentation coverage."""
findings = []
total = len(entries)
documented = sum(1 for e in entries if e["has_documentation"])
if total > 0:
coverage = (documented / total) * 100
if coverage < 50:
findings.append({
"severity": "high",
"category": "documentation",
"message": f"Only {coverage:.0f}% of values have comments ({documented}/{total})",
"fix": "Add inline YAML comments explaining purpose, type, and valid options for each value",
"line": 0,
})
elif coverage < 80:
findings.append({
"severity": "medium",
"category": "documentation",
"message": f"{coverage:.0f}% documentation coverage ({documented}/{total}) — aim for 80%+",
"fix": "Add comments for undocumented values",
"line": 0,
})
# Flag specific undocumented top-level keys
for entry in entries:
if entry["depth"] == 1 and not entry["has_documentation"]:
findings.append({
"severity": "low",
"category": "documentation",
"message": f"Top-level key '{entry['key']}' has no comment",
"fix": f"Add a comment above '{entry['key']}' explaining its purpose",
"line": entry["line"],
})
return findings
def validate_defaults(entries):
"""Check default value quality."""
findings = []
for entry in entries:
# Check for :latest tag
if entry["key"] == "tag" and entry["value"] in ("latest", '"latest"', "'latest'"):
findings.append({
"severity": "high",
"category": "defaults",
"message": f"image.tag defaults to 'latest' — not reproducible",
"fix": "Use a specific version tag or reference .Chart.AppVersion in template",
"line": entry["line"],
})
# Check pullPolicy
if entry["key"] == "pullPolicy" and entry["value"] in ("Always", '"Always"', "'Always'"):
findings.append({
"severity": "low",
"category": "defaults",
"message": "imagePullPolicy defaults to 'Always''IfNotPresent' is better for production",
"fix": "Change default to IfNotPresent (Always is appropriate for :latest only)",
"line": entry["line"],
})
# Check empty resources
if entry["key"] == "resources" and entry["value_type"] == "empty_map":
findings.append({
"severity": "medium",
"category": "defaults",
"message": "resources defaults to {} — no requests or limits set",
"fix": "Provide default resource requests (e.g., cpu: 100m, memory: 128Mi)",
"line": entry["line"],
})
return findings
def validate_secrets(entries):
"""Check for secrets in default values."""
findings = []
for entry in entries:
for pattern in SECRET_KEY_PATTERNS:
if pattern.search(entry["full_path"]):
val = entry["value"].strip("'\"")
if val and val not in ("", "null", "~", "{}", "[]", "changeme", "CHANGEME", "TODO", '""', "''"):
findings.append({
"severity": "critical",
"category": "security",
"message": f"Potential secret with default value: {entry['full_path']} = {val[:30]}...",
"fix": "Remove default. Use empty string, null, or 'changeme' placeholder with comment",
"line": entry["line"],
})
break
return findings
def validate_depth(entries):
"""Check nesting depth."""
findings = []
max_depth = max((e["depth"] for e in entries), default=0)
if max_depth > 4:
deep_entries = [e for e in entries if e["depth"] > 4]
for entry in deep_entries[:3]: # Report first 3
findings.append({
"severity": "medium",
"category": "structure",
"message": f"Deeply nested key ({entry['depth']} levels): {entry['full_path']}",
"fix": "Flatten structure — max 3-4 levels deep for usability",
"line": entry["line"],
})
return findings
def to_camel_case(name):
"""Convert snake_case or kebab-case to camelCase."""
parts = re.split(r"[-_]", name)
return parts[0].lower() + "".join(p.capitalize() for p in parts[1:])
def generate_report(content, output_format="text", strict=False):
"""Generate full validation report."""
entries = parse_values(content)
findings = []
findings.extend(validate_naming(entries))
findings.extend(validate_documentation(entries))
findings.extend(validate_defaults(entries))
findings.extend(validate_secrets(entries))
findings.extend(validate_depth(entries))
if strict:
# Elevate medium to high, low to medium
for f in findings:
if f["severity"] == "medium":
f["severity"] = "high"
elif f["severity"] == "low":
f["severity"] = "medium"
# Sort by severity
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
findings.sort(key=lambda f: severity_order.get(f["severity"], 4))
# Score
deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))
counts = {
"critical": sum(1 for f in findings if f["severity"] == "critical"),
"high": sum(1 for f in findings if f["severity"] == "high"),
"medium": sum(1 for f in findings if f["severity"] == "medium"),
"low": sum(1 for f in findings if f["severity"] == "low"),
}
# Stats
total_keys = len(entries)
documented = sum(1 for e in entries if e["has_documentation"])
max_depth = max((e["depth"] for e in entries), default=0)
result = {
"score": score,
"total_keys": total_keys,
"documented_keys": documented,
"documentation_coverage": f"{(documented / total_keys * 100):.0f}%" if total_keys > 0 else "N/A",
"max_depth": max_depth,
"findings": findings,
"finding_counts": counts,
}
if output_format == "json":
print(json.dumps(result, indent=2))
return result
# Text output
print(f"\n{'=' * 60}")
print(f" Values.yaml Validation Report")
print(f"{'=' * 60}")
print(f" Score: {score}/100")
print(f" Keys: {total_keys} | Documented: {documented} ({result['documentation_coverage']})")
print(f" Max Depth: {max_depth}")
print()
print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
print(f"{'' * 60}")
for f in findings:
icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
print(f"\n {icon} {f['severity'].upper()} [{f['category']}]")
print(f" {f['message']}")
if f.get("line", 0) > 0:
print(f" Line: {f['line']}")
print(f" Fix: {f['fix']}")
if not findings:
print("\n No issues found. Values file looks good.")
print(f"\n{'=' * 60}\n")
return result
def main():
parser = argparse.ArgumentParser(
description="helm-chart-builder: values.yaml best-practice validator"
)
parser.add_argument("valuesfile", nargs="?", help="Path to values.yaml (omit for demo)")
parser.add_argument(
"--output", "-o",
choices=["text", "json"],
default="text",
help="Output format (default: text)",
)
parser.add_argument(
"--strict",
action="store_true",
help="Strict mode — elevate warnings to higher severity",
)
args = parser.parse_args()
if args.valuesfile:
path = Path(args.valuesfile)
if not path.exists():
print(f"Error: File not found: {args.valuesfile}", file=sys.stderr)
sys.exit(1)
content = path.read_text(encoding="utf-8")
else:
print("No values file provided. Running demo validation...\n")
content = DEMO_VALUES
generate_report(content, args.output, args.strict)
if __name__ == "__main__":
main()