#!/usr/bin/env python3 """ helm-chart-builder: Values Validator Validate values.yaml files against Helm best practices — documentation coverage, type consistency, naming conventions, default quality, and security. Usage: python scripts/values_validator.py values.yaml python scripts/values_validator.py values.yaml --output json python scripts/values_validator.py values.yaml --strict """ import argparse import json import re import sys from pathlib import Path # --- Demo values.yaml --- DEMO_VALUES = """# Default values for demo-app replicaCount: 1 image: repository: nginx tag: latest pullPolicy: Always service: type: ClusterIP port: 80 ingress: enabled: false resources: {} PASSWORD: supersecret123 db_password: changeme api-key: sk-12345 deeply: nested: structure: that: goes: too: deep: true undocumented_value: something AnotherValue: 42 snake_case_key: bad """ # --- Validation Rules --- NAMING_PATTERN = re.compile(r"^[a-z][a-zA-Z0-9]*$") # camelCase SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)+$") # snake_case UPPER_CASE_PATTERN = re.compile(r"^[A-Z]") # Starts with uppercase SECRET_KEY_PATTERNS = [ re.compile(r"(?:password|secret|token|apiKey|api_key|api-key|private_key|credentials)", re.IGNORECASE), ] KNOWN_STRUCTURES = { "image": ["repository", "tag", "pullPolicy"], "service": ["type", "port"], "ingress": ["enabled"], "resources": [], "serviceAccount": ["create", "name"], "autoscaling": ["enabled", "minReplicas", "maxReplicas"], } def parse_values(content): """Parse values.yaml into structured data with metadata. Returns a list of entries with key paths, values, depth, and comment info. """ entries = [] key_stack = [] indent_stack = [0] prev_comment = None for line_num, line in enumerate(content.splitlines(), 1): stripped = line.strip() # Track comments for documentation coverage if stripped.startswith("#"): prev_comment = stripped continue if not stripped: prev_comment = None continue indent = len(line) - len(line.lstrip()) # Pop stack for dedented lines while len(indent_stack) > 1 and indent <= indent_stack[-1]: indent_stack.pop() if key_stack: key_stack.pop() # Parse key: value match = re.match(r"^(\S+)\s*:\s*(.*)", stripped) if match and not stripped.startswith("-"): key = match.group(1) raw_value = match.group(2).strip() # Check for inline comment inline_comment = None if "#" in raw_value: val_part, _, comment_part = raw_value.partition("#") raw_value = val_part.strip() inline_comment = comment_part.strip() # Build full key path full_path = ".".join(key_stack + [key]) depth = len(key_stack) + 1 # Determine value type value_type = "unknown" if not raw_value or raw_value == "": value_type = "map" key_stack.append(key) indent_stack.append(indent) elif raw_value in ("true", "false"): value_type = "boolean" elif raw_value == "null" or raw_value == "~": value_type = "null" elif raw_value == "{}": value_type = "empty_map" elif raw_value == "[]": value_type = "empty_list" elif re.match(r"^-?\d+$", raw_value): value_type = "integer" elif re.match(r"^-?\d+\.\d+$", raw_value): value_type = "float" elif raw_value.startswith('"') or raw_value.startswith("'"): value_type = "string" else: value_type = "string" has_doc = prev_comment is not None or inline_comment is not None entries.append({ "key": key, "full_path": full_path, "value": raw_value, "value_type": value_type, "depth": depth, "line": line_num, "has_documentation": has_doc, "comment": prev_comment or inline_comment, }) prev_comment = None else: prev_comment = None return entries def validate_naming(entries): """Check key naming conventions.""" findings = [] for entry in entries: key = entry["key"] # Skip map entries (they're parent keys) if entry["value_type"] == "map": # Parent keys should still be camelCase pass if SNAKE_CASE_PATTERN.match(key): findings.append({ "severity": "medium", "category": "naming", "message": f"Key '{entry['full_path']}' uses snake_case — Helm convention is camelCase", "fix": f"Rename to camelCase: {to_camel_case(key)}", "line": entry["line"], }) elif UPPER_CASE_PATTERN.match(key) and not key.isupper(): findings.append({ "severity": "medium", "category": "naming", "message": f"Key '{entry['full_path']}' starts with uppercase — use camelCase", "fix": f"Rename: {key[0].lower() + key[1:]}", "line": entry["line"], }) elif "-" in key: findings.append({ "severity": "medium", "category": "naming", "message": f"Key '{entry['full_path']}' uses kebab-case — Helm convention is camelCase", "fix": f"Rename to camelCase: {to_camel_case(key)}", "line": entry["line"], }) return findings def validate_documentation(entries): """Check documentation coverage.""" findings = [] total = len(entries) documented = sum(1 for e in entries if e["has_documentation"]) if total > 0: coverage = (documented / total) * 100 if coverage < 50: findings.append({ "severity": "high", "category": "documentation", "message": f"Only {coverage:.0f}% of values have comments ({documented}/{total})", "fix": "Add inline YAML comments explaining purpose, type, and valid options for each value", "line": 0, }) elif coverage < 80: findings.append({ "severity": "medium", "category": "documentation", "message": f"{coverage:.0f}% documentation coverage ({documented}/{total}) — aim for 80%+", "fix": "Add comments for undocumented values", "line": 0, }) # Flag specific undocumented top-level keys for entry in entries: if entry["depth"] == 1 and not entry["has_documentation"]: findings.append({ "severity": "low", "category": "documentation", "message": f"Top-level key '{entry['key']}' has no comment", "fix": f"Add a comment above '{entry['key']}' explaining its purpose", "line": entry["line"], }) return findings def validate_defaults(entries): """Check default value quality.""" findings = [] for entry in entries: # Check for :latest tag if entry["key"] == "tag" and entry["value"] in ("latest", '"latest"', "'latest'"): findings.append({ "severity": "high", "category": "defaults", "message": f"image.tag defaults to 'latest' — not reproducible", "fix": "Use a specific version tag or reference .Chart.AppVersion in template", "line": entry["line"], }) # Check pullPolicy if entry["key"] == "pullPolicy" and entry["value"] in ("Always", '"Always"', "'Always'"): findings.append({ "severity": "low", "category": "defaults", "message": "imagePullPolicy defaults to 'Always' — 'IfNotPresent' is better for production", "fix": "Change default to IfNotPresent (Always is appropriate for :latest only)", "line": entry["line"], }) # Check empty resources if entry["key"] == "resources" and entry["value_type"] == "empty_map": findings.append({ "severity": "medium", "category": "defaults", "message": "resources defaults to {} — no requests or limits set", "fix": "Provide default resource requests (e.g., cpu: 100m, memory: 128Mi)", "line": entry["line"], }) return findings def validate_secrets(entries): """Check for secrets in default values.""" findings = [] for entry in entries: for pattern in SECRET_KEY_PATTERNS: if pattern.search(entry["full_path"]): val = entry["value"].strip("'\"") if val and val not in ("", "null", "~", "{}", "[]", "changeme", "CHANGEME", "TODO", '""', "''"): findings.append({ "severity": "critical", "category": "security", "message": f"Potential secret with default value: {entry['full_path']} = {val[:30]}...", "fix": "Remove default. Use empty string, null, or 'changeme' placeholder with comment", "line": entry["line"], }) break return findings def validate_depth(entries): """Check nesting depth.""" findings = [] max_depth = max((e["depth"] for e in entries), default=0) if max_depth > 4: deep_entries = [e for e in entries if e["depth"] > 4] for entry in deep_entries[:3]: # Report first 3 findings.append({ "severity": "medium", "category": "structure", "message": f"Deeply nested key ({entry['depth']} levels): {entry['full_path']}", "fix": "Flatten structure — max 3-4 levels deep for usability", "line": entry["line"], }) return findings def to_camel_case(name): """Convert snake_case or kebab-case to camelCase.""" parts = re.split(r"[-_]", name) return parts[0].lower() + "".join(p.capitalize() for p in parts[1:]) def generate_report(content, output_format="text", strict=False): """Generate full validation report.""" entries = parse_values(content) findings = [] findings.extend(validate_naming(entries)) findings.extend(validate_documentation(entries)) findings.extend(validate_defaults(entries)) findings.extend(validate_secrets(entries)) findings.extend(validate_depth(entries)) if strict: # Elevate medium to high, low to medium for f in findings: if f["severity"] == "medium": f["severity"] = "high" elif f["severity"] == "low": f["severity"] = "medium" # Sort by severity severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3} findings.sort(key=lambda f: severity_order.get(f["severity"], 4)) # Score deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2} score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings)) counts = { "critical": sum(1 for f in findings if f["severity"] == "critical"), "high": sum(1 for f in findings if f["severity"] == "high"), "medium": sum(1 for f in findings if f["severity"] == "medium"), "low": sum(1 for f in findings if f["severity"] == "low"), } # Stats total_keys = len(entries) documented = sum(1 for e in entries if e["has_documentation"]) max_depth = max((e["depth"] for e in entries), default=0) result = { "score": score, "total_keys": total_keys, "documented_keys": documented, "documentation_coverage": f"{(documented / total_keys * 100):.0f}%" if total_keys > 0 else "N/A", "max_depth": max_depth, "findings": findings, "finding_counts": counts, } if output_format == "json": print(json.dumps(result, indent=2)) return result # Text output print(f"\n{'=' * 60}") print(f" Values.yaml Validation Report") print(f"{'=' * 60}") print(f" Score: {score}/100") print(f" Keys: {total_keys} | Documented: {documented} ({result['documentation_coverage']})") print(f" Max Depth: {max_depth}") print() print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low") print(f"{'─' * 60}") for f in findings: icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?") print(f"\n {icon} {f['severity'].upper()} [{f['category']}]") print(f" {f['message']}") if f.get("line", 0) > 0: print(f" Line: {f['line']}") print(f" Fix: {f['fix']}") if not findings: print("\n No issues found. Values file looks good.") print(f"\n{'=' * 60}\n") return result def main(): parser = argparse.ArgumentParser( description="helm-chart-builder: values.yaml best-practice validator" ) parser.add_argument("valuesfile", nargs="?", help="Path to values.yaml (omit for demo)") parser.add_argument( "--output", "-o", choices=["text", "json"], default="text", help="Output format (default: text)", ) parser.add_argument( "--strict", action="store_true", help="Strict mode — elevate warnings to higher severity", ) args = parser.parse_args() if args.valuesfile: path = Path(args.valuesfile) if not path.exists(): print(f"Error: File not found: {args.valuesfile}", file=sys.stderr) sys.exit(1) content = path.read_text(encoding="utf-8") else: print("No values file provided. Running demo validation...\n") content = DEMO_VALUES generate_report(content, args.output, args.strict) if __name__ == "__main__": main()