443 lines
14 KiB
Python
443 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
helm-chart-builder: Values Validator
|
|
|
|
Validate values.yaml files against Helm best practices — documentation coverage,
|
|
type consistency, naming conventions, default quality, and security.
|
|
|
|
Usage:
|
|
python scripts/values_validator.py values.yaml
|
|
python scripts/values_validator.py values.yaml --output json
|
|
python scripts/values_validator.py values.yaml --strict
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
# --- Demo values.yaml ---
|
|
|
|
DEMO_VALUES = """# Default values for demo-app
|
|
replicaCount: 1
|
|
|
|
image:
|
|
repository: nginx
|
|
tag: latest
|
|
pullPolicy: Always
|
|
|
|
service:
|
|
type: ClusterIP
|
|
port: 80
|
|
|
|
ingress:
|
|
enabled: false
|
|
|
|
resources: {}
|
|
|
|
PASSWORD: supersecret123
|
|
db_password: changeme
|
|
api-key: sk-12345
|
|
|
|
deeply:
|
|
nested:
|
|
structure:
|
|
that:
|
|
goes:
|
|
too:
|
|
deep: true
|
|
|
|
undocumented_value: something
|
|
AnotherValue: 42
|
|
snake_case_key: bad
|
|
"""
|
|
|
|
|
|
# --- Validation Rules ---
|
|
|
|
NAMING_PATTERN = re.compile(r"^[a-z][a-zA-Z0-9]*$") # camelCase
|
|
SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)+$") # snake_case
|
|
UPPER_CASE_PATTERN = re.compile(r"^[A-Z]") # Starts with uppercase
|
|
|
|
SECRET_KEY_PATTERNS = [
|
|
re.compile(r"(?:password|secret|token|apiKey|api_key|api-key|private_key|credentials)", re.IGNORECASE),
|
|
]
|
|
|
|
KNOWN_STRUCTURES = {
|
|
"image": ["repository", "tag", "pullPolicy"],
|
|
"service": ["type", "port"],
|
|
"ingress": ["enabled"],
|
|
"resources": [],
|
|
"serviceAccount": ["create", "name"],
|
|
"autoscaling": ["enabled", "minReplicas", "maxReplicas"],
|
|
}
|
|
|
|
|
|
def parse_values(content):
|
|
"""Parse values.yaml into structured data with metadata.
|
|
|
|
Returns a list of entries with key paths, values, depth, and comment info.
|
|
"""
|
|
entries = []
|
|
key_stack = []
|
|
indent_stack = [0]
|
|
prev_comment = None
|
|
|
|
for line_num, line in enumerate(content.splitlines(), 1):
|
|
stripped = line.strip()
|
|
|
|
# Track comments for documentation coverage
|
|
if stripped.startswith("#"):
|
|
prev_comment = stripped
|
|
continue
|
|
|
|
if not stripped:
|
|
prev_comment = None
|
|
continue
|
|
|
|
indent = len(line) - len(line.lstrip())
|
|
|
|
# Pop stack for dedented lines
|
|
while len(indent_stack) > 1 and indent <= indent_stack[-1]:
|
|
indent_stack.pop()
|
|
if key_stack:
|
|
key_stack.pop()
|
|
|
|
# Parse key: value
|
|
match = re.match(r"^(\S+)\s*:\s*(.*)", stripped)
|
|
if match and not stripped.startswith("-"):
|
|
key = match.group(1)
|
|
raw_value = match.group(2).strip()
|
|
|
|
# Check for inline comment
|
|
inline_comment = None
|
|
if "#" in raw_value:
|
|
val_part, _, comment_part = raw_value.partition("#")
|
|
raw_value = val_part.strip()
|
|
inline_comment = comment_part.strip()
|
|
|
|
# Build full key path
|
|
full_path = ".".join(key_stack + [key])
|
|
depth = len(key_stack) + 1
|
|
|
|
# Determine value type
|
|
value_type = "unknown"
|
|
if not raw_value or raw_value == "":
|
|
value_type = "map"
|
|
key_stack.append(key)
|
|
indent_stack.append(indent)
|
|
elif raw_value in ("true", "false"):
|
|
value_type = "boolean"
|
|
elif raw_value == "null" or raw_value == "~":
|
|
value_type = "null"
|
|
elif raw_value == "{}":
|
|
value_type = "empty_map"
|
|
elif raw_value == "[]":
|
|
value_type = "empty_list"
|
|
elif re.match(r"^-?\d+$", raw_value):
|
|
value_type = "integer"
|
|
elif re.match(r"^-?\d+\.\d+$", raw_value):
|
|
value_type = "float"
|
|
elif raw_value.startswith('"') or raw_value.startswith("'"):
|
|
value_type = "string"
|
|
else:
|
|
value_type = "string"
|
|
|
|
has_doc = prev_comment is not None or inline_comment is not None
|
|
|
|
entries.append({
|
|
"key": key,
|
|
"full_path": full_path,
|
|
"value": raw_value,
|
|
"value_type": value_type,
|
|
"depth": depth,
|
|
"line": line_num,
|
|
"has_documentation": has_doc,
|
|
"comment": prev_comment or inline_comment,
|
|
})
|
|
|
|
prev_comment = None
|
|
else:
|
|
prev_comment = None
|
|
|
|
return entries
|
|
|
|
|
|
def validate_naming(entries):
|
|
"""Check key naming conventions."""
|
|
findings = []
|
|
|
|
for entry in entries:
|
|
key = entry["key"]
|
|
|
|
# Skip map entries (they're parent keys)
|
|
if entry["value_type"] == "map":
|
|
# Parent keys should still be camelCase
|
|
pass
|
|
|
|
if SNAKE_CASE_PATTERN.match(key):
|
|
findings.append({
|
|
"severity": "medium",
|
|
"category": "naming",
|
|
"message": f"Key '{entry['full_path']}' uses snake_case — Helm convention is camelCase",
|
|
"fix": f"Rename to camelCase: {to_camel_case(key)}",
|
|
"line": entry["line"],
|
|
})
|
|
elif UPPER_CASE_PATTERN.match(key) and not key.isupper():
|
|
findings.append({
|
|
"severity": "medium",
|
|
"category": "naming",
|
|
"message": f"Key '{entry['full_path']}' starts with uppercase — use camelCase",
|
|
"fix": f"Rename: {key[0].lower() + key[1:]}",
|
|
"line": entry["line"],
|
|
})
|
|
elif "-" in key:
|
|
findings.append({
|
|
"severity": "medium",
|
|
"category": "naming",
|
|
"message": f"Key '{entry['full_path']}' uses kebab-case — Helm convention is camelCase",
|
|
"fix": f"Rename to camelCase: {to_camel_case(key)}",
|
|
"line": entry["line"],
|
|
})
|
|
|
|
return findings
|
|
|
|
|
|
def validate_documentation(entries):
|
|
"""Check documentation coverage."""
|
|
findings = []
|
|
total = len(entries)
|
|
documented = sum(1 for e in entries if e["has_documentation"])
|
|
|
|
if total > 0:
|
|
coverage = (documented / total) * 100
|
|
if coverage < 50:
|
|
findings.append({
|
|
"severity": "high",
|
|
"category": "documentation",
|
|
"message": f"Only {coverage:.0f}% of values have comments ({documented}/{total})",
|
|
"fix": "Add inline YAML comments explaining purpose, type, and valid options for each value",
|
|
"line": 0,
|
|
})
|
|
elif coverage < 80:
|
|
findings.append({
|
|
"severity": "medium",
|
|
"category": "documentation",
|
|
"message": f"{coverage:.0f}% documentation coverage ({documented}/{total}) — aim for 80%+",
|
|
"fix": "Add comments for undocumented values",
|
|
"line": 0,
|
|
})
|
|
|
|
# Flag specific undocumented top-level keys
|
|
for entry in entries:
|
|
if entry["depth"] == 1 and not entry["has_documentation"]:
|
|
findings.append({
|
|
"severity": "low",
|
|
"category": "documentation",
|
|
"message": f"Top-level key '{entry['key']}' has no comment",
|
|
"fix": f"Add a comment above '{entry['key']}' explaining its purpose",
|
|
"line": entry["line"],
|
|
})
|
|
|
|
return findings
|
|
|
|
|
|
def validate_defaults(entries):
|
|
"""Check default value quality."""
|
|
findings = []
|
|
|
|
for entry in entries:
|
|
# Check for :latest tag
|
|
if entry["key"] == "tag" and entry["value"] in ("latest", '"latest"', "'latest'"):
|
|
findings.append({
|
|
"severity": "high",
|
|
"category": "defaults",
|
|
"message": f"image.tag defaults to 'latest' — not reproducible",
|
|
"fix": "Use a specific version tag or reference .Chart.AppVersion in template",
|
|
"line": entry["line"],
|
|
})
|
|
|
|
# Check pullPolicy
|
|
if entry["key"] == "pullPolicy" and entry["value"] in ("Always", '"Always"', "'Always'"):
|
|
findings.append({
|
|
"severity": "low",
|
|
"category": "defaults",
|
|
"message": "imagePullPolicy defaults to 'Always' — 'IfNotPresent' is better for production",
|
|
"fix": "Change default to IfNotPresent (Always is appropriate for :latest only)",
|
|
"line": entry["line"],
|
|
})
|
|
|
|
# Check empty resources
|
|
if entry["key"] == "resources" and entry["value_type"] == "empty_map":
|
|
findings.append({
|
|
"severity": "medium",
|
|
"category": "defaults",
|
|
"message": "resources defaults to {} — no requests or limits set",
|
|
"fix": "Provide default resource requests (e.g., cpu: 100m, memory: 128Mi)",
|
|
"line": entry["line"],
|
|
})
|
|
|
|
return findings
|
|
|
|
|
|
def validate_secrets(entries):
|
|
"""Check for secrets in default values."""
|
|
findings = []
|
|
|
|
for entry in entries:
|
|
for pattern in SECRET_KEY_PATTERNS:
|
|
if pattern.search(entry["full_path"]):
|
|
val = entry["value"].strip("'\"")
|
|
if val and val not in ("", "null", "~", "{}", "[]", "changeme", "CHANGEME", "TODO", '""', "''"):
|
|
findings.append({
|
|
"severity": "critical",
|
|
"category": "security",
|
|
"message": f"Potential secret with default value: {entry['full_path']} = {val[:30]}...",
|
|
"fix": "Remove default. Use empty string, null, or 'changeme' placeholder with comment",
|
|
"line": entry["line"],
|
|
})
|
|
break
|
|
|
|
return findings
|
|
|
|
|
|
def validate_depth(entries):
|
|
"""Check nesting depth."""
|
|
findings = []
|
|
max_depth = max((e["depth"] for e in entries), default=0)
|
|
|
|
if max_depth > 4:
|
|
deep_entries = [e for e in entries if e["depth"] > 4]
|
|
for entry in deep_entries[:3]: # Report first 3
|
|
findings.append({
|
|
"severity": "medium",
|
|
"category": "structure",
|
|
"message": f"Deeply nested key ({entry['depth']} levels): {entry['full_path']}",
|
|
"fix": "Flatten structure — max 3-4 levels deep for usability",
|
|
"line": entry["line"],
|
|
})
|
|
|
|
return findings
|
|
|
|
|
|
def to_camel_case(name):
|
|
"""Convert snake_case or kebab-case to camelCase."""
|
|
parts = re.split(r"[-_]", name)
|
|
return parts[0].lower() + "".join(p.capitalize() for p in parts[1:])
|
|
|
|
|
|
def generate_report(content, output_format="text", strict=False):
|
|
"""Generate full validation report."""
|
|
entries = parse_values(content)
|
|
findings = []
|
|
|
|
findings.extend(validate_naming(entries))
|
|
findings.extend(validate_documentation(entries))
|
|
findings.extend(validate_defaults(entries))
|
|
findings.extend(validate_secrets(entries))
|
|
findings.extend(validate_depth(entries))
|
|
|
|
if strict:
|
|
# Elevate medium to high, low to medium
|
|
for f in findings:
|
|
if f["severity"] == "medium":
|
|
f["severity"] = "high"
|
|
elif f["severity"] == "low":
|
|
f["severity"] = "medium"
|
|
|
|
# Sort by severity
|
|
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
|
findings.sort(key=lambda f: severity_order.get(f["severity"], 4))
|
|
|
|
# Score
|
|
deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
|
|
score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))
|
|
|
|
counts = {
|
|
"critical": sum(1 for f in findings if f["severity"] == "critical"),
|
|
"high": sum(1 for f in findings if f["severity"] == "high"),
|
|
"medium": sum(1 for f in findings if f["severity"] == "medium"),
|
|
"low": sum(1 for f in findings if f["severity"] == "low"),
|
|
}
|
|
|
|
# Stats
|
|
total_keys = len(entries)
|
|
documented = sum(1 for e in entries if e["has_documentation"])
|
|
max_depth = max((e["depth"] for e in entries), default=0)
|
|
|
|
result = {
|
|
"score": score,
|
|
"total_keys": total_keys,
|
|
"documented_keys": documented,
|
|
"documentation_coverage": f"{(documented / total_keys * 100):.0f}%" if total_keys > 0 else "N/A",
|
|
"max_depth": max_depth,
|
|
"findings": findings,
|
|
"finding_counts": counts,
|
|
}
|
|
|
|
if output_format == "json":
|
|
print(json.dumps(result, indent=2))
|
|
return result
|
|
|
|
# Text output
|
|
print(f"\n{'=' * 60}")
|
|
print(f" Values.yaml Validation Report")
|
|
print(f"{'=' * 60}")
|
|
print(f" Score: {score}/100")
|
|
print(f" Keys: {total_keys} | Documented: {documented} ({result['documentation_coverage']})")
|
|
print(f" Max Depth: {max_depth}")
|
|
print()
|
|
print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
|
|
print(f"{'─' * 60}")
|
|
|
|
for f in findings:
|
|
icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
|
|
print(f"\n {icon} {f['severity'].upper()} [{f['category']}]")
|
|
print(f" {f['message']}")
|
|
if f.get("line", 0) > 0:
|
|
print(f" Line: {f['line']}")
|
|
print(f" Fix: {f['fix']}")
|
|
|
|
if not findings:
|
|
print("\n No issues found. Values file looks good.")
|
|
|
|
print(f"\n{'=' * 60}\n")
|
|
return result
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="helm-chart-builder: values.yaml best-practice validator"
|
|
)
|
|
parser.add_argument("valuesfile", nargs="?", help="Path to values.yaml (omit for demo)")
|
|
parser.add_argument(
|
|
"--output", "-o",
|
|
choices=["text", "json"],
|
|
default="text",
|
|
help="Output format (default: text)",
|
|
)
|
|
parser.add_argument(
|
|
"--strict",
|
|
action="store_true",
|
|
help="Strict mode — elevate warnings to higher severity",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.valuesfile:
|
|
path = Path(args.valuesfile)
|
|
if not path.exists():
|
|
print(f"Error: File not found: {args.valuesfile}", file=sys.stderr)
|
|
sys.exit(1)
|
|
content = path.read_text(encoding="utf-8")
|
|
else:
|
|
print("No values file provided. Running demo validation...\n")
|
|
content = DEMO_VALUES
|
|
|
|
generate_report(content, args.output, args.strict)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|