claude-skills-reference/engineering/terraform-patterns/scripts/tf_module_analyzer.py

#!/usr/bin/env python3
"""
terraform-patterns: Terraform Module Analyzer

Analyze a Terraform directory structure for module quality, resource counts,
naming conventions, and structural best practices. Reports variable/output
coverage, file organization, and actionable recommendations.

Usage:
    python scripts/tf_module_analyzer.py ./terraform
    python scripts/tf_module_analyzer.py ./terraform --output json
    python scripts/tf_module_analyzer.py ./modules/vpc
"""

import argparse
import json
import os
import re
import sys
from pathlib import Path


# --- Demo Terraform Files ---

DEMO_FILES = {
    "main.tf": """
resource "aws_instance" "web_server" {
  ami           = var.ami_id
  instance_type = var.instance_type

  tags = {
    Name = "web-server"
  }
}

resource "aws_s3_bucket" "data" {
  bucket = "my-data-bucket-12345"
}

resource "aws_security_group" "web" {
  name = "web-sg"

  ingress {
    from_port   = 80
    to_port     = 80
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }

  ingress {
    from_port   = 22
    to_port     = 22
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }
}

data "aws_ami" "ubuntu" {
  most_recent = true
  owners      = ["099720109477"]
}

module "vpc" {
  source = "./modules/vpc"
  cidr   = var.vpc_cidr
}
""",
    "variables.tf": """
variable "ami_id" {
  type = string
}

variable "instance_type" {
  default = "t3.micro"
}

variable "vpc_cidr" {
  description = "CIDR block for the VPC"
  type        = string
  default     = "10.0.0.0/16"
}

variable "environment" {
  description = "Deployment environment"
  type        = string
  validation {
    condition     = contains(["dev", "staging", "prod"], var.environment)
    error_message = "Environment must be dev, staging, or prod."
  }
}
""",
    "outputs.tf": """
output "instance_id" {
  value = aws_instance.web_server.id
}

output "bucket_arn" {
  value       = aws_s3_bucket.data.arn
  description = "ARN of the data S3 bucket"
}
""",
}

# --- Naming convention patterns ---

# Terraform resource naming: lowercase, underscores, alphanumeric
VALID_RESOURCE_NAME = re.compile(r'^[a-z][a-z0-9_]*$')

# Expected files in a well-structured module
EXPECTED_FILES = {
    "main.tf": "Primary resources",
    "variables.tf": "Input variables",
    "outputs.tf": "Output values",
    "versions.tf": "Provider and Terraform version requirements",
}

OPTIONAL_FILES = {
    "locals.tf": "Computed local values",
    "data.tf": "Data sources",
    "backend.tf": "Remote state backend configuration",
    "providers.tf": "Provider configuration",
    "README.md": "Module documentation",
}


def find_tf_files(directory):
    """Find all .tf files in a directory (non-recursive)."""
    tf_files = {}
    for entry in sorted(os.listdir(directory)):
        if entry.endswith(".tf"):
            filepath = os.path.join(directory, entry)
            with open(filepath, encoding="utf-8") as f:
                tf_files[entry] = f.read()
    return tf_files


def parse_resources(content):
    """Extract resource declarations from HCL content."""
    resources = []
    for match in re.finditer(
        r'^resource\s+"([^"]+)"\s+"([^"]+)"', content, re.MULTILINE
    ):
        resources.append({
            "type": match.group(1),
            "name": match.group(2),
            "provider": match.group(1).split("_")[0],
        })
    return resources


def parse_data_sources(content):
    """Extract data source declarations."""
    sources = []
    for match in re.finditer(
        r'^data\s+"([^"]+)"\s+"([^"]+)"', content, re.MULTILINE
    ):
        sources.append({"type": match.group(1), "name": match.group(2)})
    return sources


def parse_variables(content):
    """Extract variable declarations with metadata."""
    variables = []
    # Match variable blocks
    for match in re.finditer(
        r'^variable\s+"([^"]+)"\s*\{(.*?)\n\}',
        content,
        re.MULTILINE | re.DOTALL,
    ):
        name = match.group(1)
        body = match.group(2)
        var = {
            "name": name,
            "has_description": "description" in body,
            "has_type": bool(re.search(r'\btype\s*=', body)),
            "has_default": bool(re.search(r'\bdefault\s*=', body)),
            "has_validation": "validation" in body,
            "is_sensitive": "sensitive" in body and bool(
                re.search(r'\bsensitive\s*=\s*true', body)
            ),
        }
        variables.append(var)
    return variables


def parse_outputs(content):
    """Extract output declarations with metadata."""
    outputs = []
    for match in re.finditer(
        r'^output\s+"([^"]+)"\s*\{(.*?)\n\}',
        content,
        re.MULTILINE | re.DOTALL,
    ):
        name = match.group(1)
        body = match.group(2)
        out = {
            "name": name,
            "has_description": "description" in body,
            "is_sensitive": "sensitive" in body and bool(
                re.search(r'\bsensitive\s*=\s*true', body)
            ),
        }
        outputs.append(out)
    return outputs


def parse_modules(content):
    """Extract module calls."""
    modules = []
    for match in re.finditer(
        r'^module\s+"([^"]+)"\s*\{(.*?)\n\}',
        content,
        re.MULTILINE | re.DOTALL,
    ):
        name = match.group(1)
        body = match.group(2)
        source_match = re.search(r'source\s*=\s*"([^"]+)"', body)
        source = source_match.group(1) if source_match else "unknown"
        modules.append({"name": name, "source": source})
    return modules


def check_naming(resources, data_sources):
    """Check naming conventions."""
    issues = []
    for r in resources:
        if not VALID_RESOURCE_NAME.match(r["name"]):
            issues.append({
                "severity": "medium",
                "message": f"Resource '{r['type']}.{r['name']}' uses non-standard naming — use lowercase with underscores",
            })
        if r["name"].startswith(r["provider"] + "_"):
            issues.append({
                "severity": "low",
                "message": f"Resource '{r['type']}.{r['name']}' name repeats the provider prefix — redundant",
            })
    for d in data_sources:
        if not VALID_RESOURCE_NAME.match(d["name"]):
            issues.append({
                "severity": "medium",
                "message": f"Data source '{d['type']}.{d['name']}' uses non-standard naming",
            })
    return issues


def check_variables(variables):
    """Check variable quality."""
    issues = []
    for v in variables:
        if not v["has_description"]:
            issues.append({
                "severity": "medium",
                "message": f"Variable '{v['name']}' missing description — consumers won't know what to provide",
            })
        if not v["has_type"]:
            issues.append({
                "severity": "high",
                "message": f"Variable '{v['name']}' missing type constraint — accepts any value",
            })
        # Check if name suggests a secret
        secret_patterns = ["password", "secret", "token", "key", "api_key", "credentials"]
        name_lower = v["name"].lower()
        if any(p in name_lower for p in secret_patterns) and not v["is_sensitive"]:
            issues.append({
                "severity": "high",
                "message": f"Variable '{v['name']}' looks like a secret but is not marked sensitive = true",
            })
    return issues


def check_outputs(outputs):
    """Check output quality."""
    issues = []
    for o in outputs:
        if not o["has_description"]:
            issues.append({
                "severity": "low",
                "message": f"Output '{o['name']}' missing description",
            })
    return issues


def check_file_structure(tf_files):
    """Check if expected files are present."""
    issues = []
    filenames = set(tf_files.keys())
    for expected, purpose in EXPECTED_FILES.items():
        if expected not in filenames:
            issues.append({
                "severity": "medium" if expected != "versions.tf" else "high",
                "message": f"Missing '{expected}' — {purpose}",
            })
    return issues


def analyze_directory(tf_files):
    """Run full analysis on a set of .tf files."""
    all_content = "\n".join(tf_files.values())

    resources = parse_resources(all_content)
    data_sources = parse_data_sources(all_content)
    variables = parse_variables(all_content)
    outputs = parse_outputs(all_content)
    modules = parse_modules(all_content)

    # Collect findings
    findings = []
    findings.extend(check_file_structure(tf_files))
    findings.extend(check_naming(resources, data_sources))
    findings.extend(check_variables(variables))
    findings.extend(check_outputs(outputs))

    # Check for backend configuration
    has_backend = any(
        re.search(r'\bbackend\s+"', content)
        for content in tf_files.values()
    )
    if not has_backend:
        findings.append({
            "severity": "high",
            "message": "No remote backend configured — state is stored locally",
        })

    # Check for terraform required_version
    has_tf_version = any(
        re.search(r'required_version\s*=', content)
        for content in tf_files.values()
    )
    if not has_tf_version:
        findings.append({
            "severity": "medium",
            "message": "No required_version constraint — any Terraform version can be used",
        })

    # Providers in child modules check
    for filename, content in tf_files.items():
        if filename not in ("providers.tf", "versions.tf", "backend.tf"):
            if re.search(r'^provider\s+"', content, re.MULTILINE):
                findings.append({
                    "severity": "medium",
                    "message": f"Provider configuration found in '{filename}' — keep providers in root module only",
                })

    # Sort findings
    severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
    findings.sort(key=lambda f: severity_order.get(f["severity"], 4))

    # Unique providers
    providers = sorted(set(r["provider"] for r in resources))

    return {
        "files": sorted(tf_files.keys()),
        "file_count": len(tf_files),
        "resources": resources,
        "resource_count": len(resources),
        "data_sources": data_sources,
        "data_source_count": len(data_sources),
        "variables": variables,
        "variable_count": len(variables),
        "outputs": outputs,
        "output_count": len(outputs),
        "modules": modules,
        "module_count": len(modules),
        "providers": providers,
        "findings": findings,
    }


def generate_report(analysis, output_format="text"):
    """Generate analysis report."""
    findings = analysis["findings"]

    # Score
    deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
    score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))

    counts = {
        "critical": sum(1 for f in findings if f["severity"] == "critical"),
        "high": sum(1 for f in findings if f["severity"] == "high"),
        "medium": sum(1 for f in findings if f["severity"] == "medium"),
        "low": sum(1 for f in findings if f["severity"] == "low"),
    }

    result = {
        "score": score,
        "files": analysis["files"],
        "resource_count": analysis["resource_count"],
        "data_source_count": analysis["data_source_count"],
        "variable_count": analysis["variable_count"],
        "output_count": analysis["output_count"],
        "module_count": analysis["module_count"],
        "providers": analysis["providers"],
        "findings": findings,
        "finding_counts": counts,
    }

    if output_format == "json":
        print(json.dumps(result, indent=2))
        return result

    # Text output
    print(f"\n{'=' * 60}")
    print(f"  Terraform Module Analysis Report")
    print(f"{'=' * 60}")
    print(f"  Score: {score}/100")
    print(f"  Files: {', '.join(analysis['files'])}")
    print(f"  Providers: {', '.join(analysis['providers']) if analysis['providers'] else 'none detected'}")
    print()
    print(f"  Resources: {analysis['resource_count']} | Data Sources: {analysis['data_source_count']}")
    print(f"  Variables: {analysis['variable_count']} | Outputs: {analysis['output_count']} | Modules: {analysis['module_count']}")
    print()
    print(f"  Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
    print(f"{'─' * 60}")

    for f in findings:
        icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
        print(f"\n  {icon} {f['severity'].upper()}")
        print(f"  {f['message']}")

    if not findings:
        print("\n  No issues found. Module structure looks good.")

    print(f"\n{'=' * 60}\n")
    return result


def main():
    parser = argparse.ArgumentParser(
        description="terraform-patterns: Terraform module analyzer"
    )
    parser.add_argument(
        "directory", nargs="?",
        help="Path to Terraform directory (omit for demo)",
    )
    parser.add_argument(
        "--output", "-o",
        choices=["text", "json"],
        default="text",
        help="Output format (default: text)",
    )
    args = parser.parse_args()

    if args.directory:
        dirpath = Path(args.directory)
        if not dirpath.is_dir():
            print(f"Error: Not a directory: {args.directory}", file=sys.stderr)
            sys.exit(1)
        tf_files = find_tf_files(str(dirpath))
        if not tf_files:
            print(f"Error: No .tf files found in {args.directory}", file=sys.stderr)
            sys.exit(1)
    else:
        print("No directory provided. Running demo analysis...\n")
        tf_files = DEMO_FILES

    analysis = analyze_directory(tf_files)
    generate_report(analysis, args.output)


if __name__ == "__main__":
    main()