feat(skills): add terraform-patterns agent skill
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
409
engineering/terraform-patterns/references/module-patterns.md
Normal file
409
engineering/terraform-patterns/references/module-patterns.md
Normal file
@@ -0,0 +1,409 @@
|
||||
# Terraform Module Design Patterns Reference
|
||||
|
||||
## Pattern 1: Flat Module (Single Directory)
|
||||
|
||||
Best for: Small projects, < 20 resources, single team ownership.
|
||||
|
||||
```
|
||||
project/
|
||||
├── main.tf
|
||||
├── variables.tf
|
||||
├── outputs.tf
|
||||
├── versions.tf
|
||||
├── locals.tf
|
||||
├── backend.tf
|
||||
└── terraform.tfvars
|
||||
```
|
||||
|
||||
### Example: Simple VPC + EC2
|
||||
|
||||
```hcl
|
||||
# versions.tf
|
||||
terraform {
|
||||
required_version = ">= 1.5.0"
|
||||
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# locals.tf
|
||||
locals {
|
||||
name_prefix = "${var.project}-${var.environment}"
|
||||
common_tags = {
|
||||
Project = var.project
|
||||
Environment = var.environment
|
||||
ManagedBy = "terraform"
|
||||
}
|
||||
}
|
||||
|
||||
# main.tf
|
||||
resource "aws_vpc" "main" {
|
||||
cidr_block = var.vpc_cidr
|
||||
enable_dns_hostnames = true
|
||||
enable_dns_support = true
|
||||
|
||||
tags = merge(local.common_tags, {
|
||||
Name = "${local.name_prefix}-vpc"
|
||||
})
|
||||
}
|
||||
|
||||
resource "aws_subnet" "public" {
|
||||
count = length(var.public_subnet_cidrs)
|
||||
vpc_id = aws_vpc.main.id
|
||||
cidr_block = var.public_subnet_cidrs[count.index]
|
||||
availability_zone = var.availability_zones[count.index]
|
||||
|
||||
tags = merge(local.common_tags, {
|
||||
Name = "${local.name_prefix}-public-${count.index + 1}"
|
||||
Tier = "public"
|
||||
})
|
||||
}
|
||||
|
||||
# variables.tf
|
||||
variable "project" {
|
||||
description = "Project name used for resource naming"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "environment" {
|
||||
description = "Deployment environment"
|
||||
type = string
|
||||
validation {
|
||||
condition = contains(["dev", "staging", "prod"], var.environment)
|
||||
error_message = "Environment must be dev, staging, or prod."
|
||||
}
|
||||
}
|
||||
|
||||
variable "vpc_cidr" {
|
||||
description = "CIDR block for the VPC"
|
||||
type = string
|
||||
default = "10.0.0.0/16"
|
||||
validation {
|
||||
condition = can(cidrhost(var.vpc_cidr, 0))
|
||||
error_message = "Must be a valid CIDR block."
|
||||
}
|
||||
}
|
||||
|
||||
variable "public_subnet_cidrs" {
|
||||
description = "CIDR blocks for public subnets"
|
||||
type = list(string)
|
||||
default = ["10.0.1.0/24", "10.0.2.0/24"]
|
||||
}
|
||||
|
||||
variable "availability_zones" {
|
||||
description = "AZs for subnet placement"
|
||||
type = list(string)
|
||||
default = ["us-east-1a", "us-east-1b"]
|
||||
}
|
||||
|
||||
# outputs.tf
|
||||
output "vpc_id" {
|
||||
description = "ID of the created VPC"
|
||||
value = aws_vpc.main.id
|
||||
}
|
||||
|
||||
output "public_subnet_ids" {
|
||||
description = "IDs of public subnets"
|
||||
value = aws_subnet.public[*].id
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pattern 2: Nested Modules (Composition)
|
||||
|
||||
Best for: Multiple environments, shared patterns, team collaboration.
|
||||
|
||||
```
|
||||
infrastructure/
|
||||
├── environments/
|
||||
│ ├── dev/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── backend.tf
|
||||
│ │ └── terraform.tfvars
|
||||
│ ├── staging/
|
||||
│ │ └── ...
|
||||
│ └── prod/
|
||||
│ └── ...
|
||||
└── modules/
|
||||
├── networking/
|
||||
│ ├── main.tf
|
||||
│ ├── variables.tf
|
||||
│ └── outputs.tf
|
||||
├── compute/
|
||||
│ └── ...
|
||||
└── database/
|
||||
└── ...
|
||||
```
|
||||
|
||||
### Root Module (environments/dev/main.tf)
|
||||
|
||||
```hcl
|
||||
module "networking" {
|
||||
source = "../../modules/networking"
|
||||
|
||||
project = var.project
|
||||
environment = "dev"
|
||||
vpc_cidr = "10.0.0.0/16"
|
||||
public_subnet_cidrs = ["10.0.1.0/24", "10.0.2.0/24"]
|
||||
private_subnet_cidrs = ["10.0.10.0/24", "10.0.11.0/24"]
|
||||
}
|
||||
|
||||
module "compute" {
|
||||
source = "../../modules/compute"
|
||||
|
||||
project = var.project
|
||||
environment = "dev"
|
||||
vpc_id = module.networking.vpc_id
|
||||
subnet_ids = module.networking.private_subnet_ids
|
||||
instance_type = "t3.micro"
|
||||
instance_count = 1
|
||||
}
|
||||
|
||||
module "database" {
|
||||
source = "../../modules/database"
|
||||
|
||||
project = var.project
|
||||
environment = "dev"
|
||||
vpc_id = module.networking.vpc_id
|
||||
subnet_ids = module.networking.private_subnet_ids
|
||||
instance_class = "db.t3.micro"
|
||||
allocated_storage = 20
|
||||
db_password = var.db_password
|
||||
}
|
||||
```
|
||||
|
||||
### Key Rules
|
||||
- Child modules never call other child modules
|
||||
- Pass values explicitly — no hidden data source lookups in children
|
||||
- Provider configuration only in root module
|
||||
- Each module has its own variables.tf, outputs.tf, main.tf
|
||||
|
||||
---
|
||||
|
||||
## Pattern 3: Registry Module Pattern
|
||||
|
||||
Best for: Reusable modules shared across teams or organizations.
|
||||
|
||||
```
|
||||
terraform-aws-vpc/
|
||||
├── main.tf
|
||||
├── variables.tf
|
||||
├── outputs.tf
|
||||
├── versions.tf
|
||||
├── README.md
|
||||
├── examples/
|
||||
│ ├── simple/
|
||||
│ │ └── main.tf
|
||||
│ └── complete/
|
||||
│ └── main.tf
|
||||
└── modules/
|
||||
├── subnet/
|
||||
│ ├── main.tf
|
||||
│ ├── variables.tf
|
||||
│ └── outputs.tf
|
||||
└── nat-gateway/
|
||||
└── ...
|
||||
```
|
||||
|
||||
### Publishing Conventions
|
||||
|
||||
```hcl
|
||||
# Consumer usage
|
||||
module "vpc" {
|
||||
source = "terraform-aws-modules/vpc/aws"
|
||||
version = "~> 5.0"
|
||||
|
||||
name = "my-vpc"
|
||||
cidr = "10.0.0.0/16"
|
||||
|
||||
azs = ["us-east-1a", "us-east-1b"]
|
||||
private_subnets = ["10.0.1.0/24", "10.0.2.0/24"]
|
||||
public_subnets = ["10.0.101.0/24", "10.0.102.0/24"]
|
||||
|
||||
enable_nat_gateway = true
|
||||
single_nat_gateway = true
|
||||
}
|
||||
```
|
||||
|
||||
### Registry Module Requirements
|
||||
- Repository named `terraform-<PROVIDER>-<NAME>`
|
||||
- README.md with usage examples
|
||||
- Semantic versioning via git tags
|
||||
- examples/ directory with working configurations
|
||||
- No provider configuration in the module itself
|
||||
|
||||
---
|
||||
|
||||
## Pattern 4: Mono-Repo with Workspaces
|
||||
|
||||
Best for: Teams that prefer single-repo with workspace-based isolation.
|
||||
|
||||
```hcl
|
||||
# backend.tf
|
||||
terraform {
|
||||
backend "s3" {
|
||||
bucket = "my-terraform-state"
|
||||
key = "project/terraform.tfstate"
|
||||
region = "us-east-1"
|
||||
dynamodb_table = "terraform-locks"
|
||||
encrypt = true
|
||||
}
|
||||
}
|
||||
|
||||
# main.tf
|
||||
locals {
|
||||
env_config = {
|
||||
dev = {
|
||||
instance_type = "t3.micro"
|
||||
instance_count = 1
|
||||
db_class = "db.t3.micro"
|
||||
}
|
||||
staging = {
|
||||
instance_type = "t3.small"
|
||||
instance_count = 2
|
||||
db_class = "db.t3.small"
|
||||
}
|
||||
prod = {
|
||||
instance_type = "t3.large"
|
||||
instance_count = 3
|
||||
db_class = "db.r5.large"
|
||||
}
|
||||
}
|
||||
config = local.env_config[terraform.workspace]
|
||||
}
|
||||
```
|
||||
|
||||
### Usage
|
||||
```bash
|
||||
terraform workspace new dev
|
||||
terraform workspace new staging
|
||||
terraform workspace new prod
|
||||
|
||||
terraform workspace select dev
|
||||
terraform apply
|
||||
|
||||
terraform workspace select prod
|
||||
terraform apply
|
||||
```
|
||||
|
||||
### Workspace Caveats
|
||||
- All environments share the same backend — less isolation than separate directories
|
||||
- A mistake in the code affects all environments
|
||||
- Can't have different provider versions per workspace
|
||||
- Recommended only for simple setups; prefer separate directories for production
|
||||
|
||||
---
|
||||
|
||||
## Pattern 5: for_each vs count
|
||||
|
||||
### Use `count` for identical resources
|
||||
```hcl
|
||||
resource "aws_subnet" "public" {
|
||||
count = 3
|
||||
vpc_id = aws_vpc.main.id
|
||||
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index)
|
||||
availability_zone = data.aws_availability_zones.available.names[count.index]
|
||||
}
|
||||
```
|
||||
|
||||
### Use `for_each` for distinct resources
|
||||
```hcl
|
||||
variable "buckets" {
|
||||
type = map(object({
|
||||
versioning = bool
|
||||
lifecycle_days = number
|
||||
}))
|
||||
default = {
|
||||
logs = { versioning = false, lifecycle_days = 30 }
|
||||
backups = { versioning = true, lifecycle_days = 90 }
|
||||
assets = { versioning = true, lifecycle_days = 0 }
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket" "this" {
|
||||
for_each = var.buckets
|
||||
bucket = "${var.project}-${each.key}"
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_versioning" "this" {
|
||||
for_each = { for k, v in var.buckets : k => v if v.versioning }
|
||||
bucket = aws_s3_bucket.this[each.key].id
|
||||
|
||||
versioning_configuration {
|
||||
status = "Enabled"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Why `for_each` > `count`
|
||||
- `count` uses index — removing item 0 shifts all others, causing destroy/recreate
|
||||
- `for_each` uses keys — removing a key only affects that resource
|
||||
- Use `count` only for identical resources where order doesn't matter
|
||||
|
||||
---
|
||||
|
||||
## Variable Design Patterns
|
||||
|
||||
### Object Variables for Related Settings
|
||||
```hcl
|
||||
variable "database" {
|
||||
description = "Database configuration"
|
||||
type = object({
|
||||
engine = string
|
||||
instance_class = string
|
||||
storage_gb = number
|
||||
multi_az = bool
|
||||
backup_days = number
|
||||
})
|
||||
default = {
|
||||
engine = "postgres"
|
||||
instance_class = "db.t3.micro"
|
||||
storage_gb = 20
|
||||
multi_az = false
|
||||
backup_days = 7
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Validation Blocks
|
||||
```hcl
|
||||
variable "instance_type" {
|
||||
description = "EC2 instance type"
|
||||
type = string
|
||||
|
||||
validation {
|
||||
condition = can(regex("^t[23]\\.", var.instance_type))
|
||||
error_message = "Only t2 or t3 instance types are allowed."
|
||||
}
|
||||
}
|
||||
|
||||
variable "cidr_block" {
|
||||
description = "VPC CIDR block"
|
||||
type = string
|
||||
|
||||
validation {
|
||||
condition = can(cidrhost(var.cidr_block, 0))
|
||||
error_message = "Must be a valid IPv4 CIDR block."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
| Anti-Pattern | Problem | Solution |
|
||||
|-------------|---------|----------|
|
||||
| God module (100+ resources) | Impossible to reason about, slow plan/apply | Split into focused child modules |
|
||||
| Circular module dependencies | Terraform can't resolve dependency graph | Flatten or restructure module boundaries |
|
||||
| Data sources in child modules | Hidden dependencies, hard to test | Pass values as variables from root module |
|
||||
| Provider config in child modules | Can't reuse module across accounts/regions | Configure providers in root only |
|
||||
| Hardcoded values | Not reusable across environments | Use variables with defaults and validation |
|
||||
| No outputs | Consumer modules can't reference resources | Output IDs, ARNs, endpoints |
|
||||
| No variable descriptions | Users don't know what to provide | Every variable gets a description |
|
||||
| `terraform.tfvars` committed | Secrets leak to version control | Use `.gitignore`, env vars, or Vault |
|
||||
472
engineering/terraform-patterns/references/state-management.md
Normal file
472
engineering/terraform-patterns/references/state-management.md
Normal file
@@ -0,0 +1,472 @@
|
||||
# Terraform State Management Reference
|
||||
|
||||
## Backend Configuration Patterns
|
||||
|
||||
### AWS: S3 + DynamoDB (Recommended)
|
||||
|
||||
```hcl
|
||||
terraform {
|
||||
backend "s3" {
|
||||
bucket = "mycompany-terraform-state"
|
||||
key = "project/env/terraform.tfstate"
|
||||
region = "us-east-1"
|
||||
encrypt = true
|
||||
dynamodb_table = "terraform-locks"
|
||||
# Optional: KMS key for encryption
|
||||
# kms_key_id = "arn:aws:kms:us-east-1:ACCOUNT:key/KEY_ID"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Prerequisites:**
|
||||
```hcl
|
||||
# Bootstrap these resources manually or with a separate Terraform config
|
||||
resource "aws_s3_bucket" "state" {
|
||||
bucket = "mycompany-terraform-state"
|
||||
|
||||
lifecycle {
|
||||
prevent_destroy = true
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_versioning" "state" {
|
||||
bucket = aws_s3_bucket.state.id
|
||||
versioning_configuration {
|
||||
status = "Enabled"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_server_side_encryption_configuration" "state" {
|
||||
bucket = aws_s3_bucket.state.id
|
||||
rule {
|
||||
apply_server_side_encryption_by_default {
|
||||
sse_algorithm = "aws:kms"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_public_access_block" "state" {
|
||||
bucket = aws_s3_bucket.state.id
|
||||
block_public_acls = true
|
||||
block_public_policy = true
|
||||
ignore_public_acls = true
|
||||
restrict_public_buckets = true
|
||||
}
|
||||
|
||||
resource "aws_dynamodb_table" "locks" {
|
||||
name = "terraform-locks"
|
||||
billing_mode = "PAY_PER_REQUEST"
|
||||
hash_key = "LockID"
|
||||
|
||||
attribute {
|
||||
name = "LockID"
|
||||
type = "S"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### GCP: Google Cloud Storage
|
||||
|
||||
```hcl
|
||||
terraform {
|
||||
backend "gcs" {
|
||||
bucket = "mycompany-terraform-state"
|
||||
prefix = "project/env"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key features:**
|
||||
- Native locking (no separate lock table needed)
|
||||
- Object versioning for state history
|
||||
- IAM-based access control
|
||||
- Encryption at rest by default
|
||||
|
||||
---
|
||||
|
||||
### Azure: Blob Storage
|
||||
|
||||
```hcl
|
||||
terraform {
|
||||
backend "azurerm" {
|
||||
resource_group_name = "terraform-state-rg"
|
||||
storage_account_name = "mycompanytfstate"
|
||||
container_name = "tfstate"
|
||||
key = "project/env/terraform.tfstate"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key features:**
|
||||
- Native blob locking
|
||||
- Encryption at rest with Microsoft-managed or customer-managed keys
|
||||
- RBAC-based access control
|
||||
|
||||
---
|
||||
|
||||
### Terraform Cloud / Enterprise
|
||||
|
||||
```hcl
|
||||
terraform {
|
||||
cloud {
|
||||
organization = "mycompany"
|
||||
workspaces {
|
||||
name = "project-dev"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key features:**
|
||||
- Built-in state locking, encryption, and versioning
|
||||
- RBAC and team-based access control
|
||||
- Remote execution (plan/apply run in TF Cloud)
|
||||
- Sentinel policy-as-code integration
|
||||
- Cost estimation on plans
|
||||
|
||||
---
|
||||
|
||||
## Environment Isolation Strategies
|
||||
|
||||
### Strategy 1: Separate Directories (Recommended)
|
||||
|
||||
```
|
||||
infrastructure/
|
||||
├── environments/
|
||||
│ ├── dev/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── backend.tf # key = "project/dev/terraform.tfstate"
|
||||
│ │ └── terraform.tfvars
|
||||
│ ├── staging/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── backend.tf # key = "project/staging/terraform.tfstate"
|
||||
│ │ └── terraform.tfvars
|
||||
│ └── prod/
|
||||
│ ├── main.tf
|
||||
│ ├── backend.tf # key = "project/prod/terraform.tfstate"
|
||||
│ └── terraform.tfvars
|
||||
└── modules/
|
||||
└── ...
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Complete isolation — a mistake in dev can't affect prod
|
||||
- Different provider versions per environment
|
||||
- Different module versions per environment (pin prod, iterate in dev)
|
||||
- Clear audit trail — who changed what, where
|
||||
|
||||
**Cons:**
|
||||
- Some duplication across environment directories
|
||||
- Must update modules in each environment separately
|
||||
|
||||
### Strategy 2: Terraform Workspaces
|
||||
|
||||
```hcl
|
||||
# Single directory, multiple workspaces
|
||||
terraform {
|
||||
backend "s3" {
|
||||
bucket = "mycompany-terraform-state"
|
||||
key = "project/terraform.tfstate"
|
||||
region = "us-east-1"
|
||||
dynamodb_table = "terraform-locks"
|
||||
encrypt = true
|
||||
}
|
||||
}
|
||||
|
||||
# State files stored at:
|
||||
# env:/dev/project/terraform.tfstate
|
||||
# env:/staging/project/terraform.tfstate
|
||||
# env:/prod/project/terraform.tfstate
|
||||
```
|
||||
|
||||
```bash
|
||||
terraform workspace new dev
|
||||
terraform workspace select dev
|
||||
terraform plan -var-file="env/dev.tfvars"
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Less duplication — single set of .tf files
|
||||
- Quick to switch between environments
|
||||
- Built-in workspace support in backends
|
||||
|
||||
**Cons:**
|
||||
- Shared code means a bug affects all environments simultaneously
|
||||
- Can't have different provider versions per workspace
|
||||
- Easy to accidentally apply to wrong workspace
|
||||
- Less isolation than separate directories
|
||||
|
||||
### Strategy 3: Terragrunt (DRY Configuration)
|
||||
|
||||
```
|
||||
infrastructure/
|
||||
├── terragrunt.hcl # Root — defines remote state pattern
|
||||
├── modules/
|
||||
│ └── vpc/
|
||||
│ ├── main.tf
|
||||
│ ├── variables.tf
|
||||
│ └── outputs.tf
|
||||
├── dev/
|
||||
│ ├── terragrunt.hcl # env = "dev"
|
||||
│ └── vpc/
|
||||
│ └── terragrunt.hcl # inputs for dev VPC
|
||||
├── staging/
|
||||
│ └── ...
|
||||
└── prod/
|
||||
└── ...
|
||||
```
|
||||
|
||||
```hcl
|
||||
# Root terragrunt.hcl
|
||||
remote_state {
|
||||
backend = "s3"
|
||||
generate = {
|
||||
path = "backend.tf"
|
||||
if_exists = "overwrite_terragrunt"
|
||||
}
|
||||
config = {
|
||||
bucket = "mycompany-terraform-state"
|
||||
key = "${path_relative_to_include()}/terraform.tfstate"
|
||||
region = "us-east-1"
|
||||
encrypt = true
|
||||
dynamodb_table = "terraform-locks"
|
||||
}
|
||||
}
|
||||
|
||||
# dev/vpc/terragrunt.hcl
|
||||
terraform {
|
||||
source = "../../modules/vpc"
|
||||
}
|
||||
|
||||
inputs = {
|
||||
environment = "dev"
|
||||
vpc_cidr = "10.0.0.0/16"
|
||||
}
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Maximum DRY — define module once, parameterize per environment
|
||||
- Automatic state key generation from directory structure
|
||||
- Dependency management between modules (`dependency` blocks)
|
||||
- `run-all` for applying multiple modules at once
|
||||
|
||||
**Cons:**
|
||||
- Additional tool dependency (Terragrunt)
|
||||
- Learning curve
|
||||
- Debugging can be harder (generated files)
|
||||
|
||||
---
|
||||
|
||||
## State Migration Patterns
|
||||
|
||||
### Local to Remote (S3)
|
||||
|
||||
```bash
|
||||
# 1. Add backend configuration to backend.tf
|
||||
# 2. Run init with migration flag
|
||||
terraform init -migrate-state
|
||||
|
||||
# Terraform will prompt:
|
||||
# "Do you want to copy existing state to the new backend?"
|
||||
# Answer: yes
|
||||
```
|
||||
|
||||
### Between Remote Backends
|
||||
|
||||
```bash
|
||||
# 1. Pull current state
|
||||
terraform state pull > terraform.tfstate.backup
|
||||
|
||||
# 2. Update backend configuration in backend.tf
|
||||
|
||||
# 3. Reinitialize with migration
|
||||
terraform init -migrate-state
|
||||
|
||||
# 4. Verify
|
||||
terraform plan # Should show no changes
|
||||
```
|
||||
|
||||
### State Import (Existing Resources)
|
||||
|
||||
```bash
|
||||
# Import a single resource
|
||||
terraform import aws_instance.web i-1234567890abcdef0
|
||||
|
||||
# Import with for_each key
|
||||
terraform import 'aws_subnet.public["us-east-1a"]' subnet-0123456789abcdef0
|
||||
|
||||
# Bulk import (Terraform 1.5+ import blocks)
|
||||
import {
|
||||
to = aws_instance.web
|
||||
id = "i-1234567890abcdef0"
|
||||
}
|
||||
```
|
||||
|
||||
### State Move (Refactoring)
|
||||
|
||||
```bash
|
||||
# Rename a resource (avoids destroy/recreate)
|
||||
terraform state mv aws_instance.old_name aws_instance.new_name
|
||||
|
||||
# Move into a module
|
||||
terraform state mv aws_instance.web module.compute.aws_instance.web
|
||||
|
||||
# Move between state files
|
||||
terraform state mv -state-out=other.tfstate aws_instance.web aws_instance.web
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## State Locking
|
||||
|
||||
### Why Locking Matters
|
||||
Without locking, two concurrent `terraform apply` runs can corrupt state. The second apply reads stale state and may create duplicate resources or lose track of existing ones.
|
||||
|
||||
### Lock Behavior by Backend
|
||||
|
||||
| Backend | Lock Mechanism | Auto-Lock | Force Unlock |
|
||||
|---------|---------------|-----------|--------------|
|
||||
| S3 | DynamoDB table | Yes (if table configured) | `terraform force-unlock LOCK_ID` |
|
||||
| GCS | Native blob locking | Yes | `terraform force-unlock LOCK_ID` |
|
||||
| Azure Blob | Native blob lease | Yes | `terraform force-unlock LOCK_ID` |
|
||||
| TF Cloud | Built-in | Always | Via UI or API |
|
||||
| Consul | Key-value lock | Yes | `terraform force-unlock LOCK_ID` |
|
||||
| Local | `.terraform.lock.hcl` | Yes (single user) | Delete lock file |
|
||||
|
||||
### Force Unlock (Emergency Only)
|
||||
|
||||
```bash
|
||||
# Only use when you're certain no other process is running
|
||||
terraform force-unlock LOCK_ID
|
||||
|
||||
# The LOCK_ID is shown in the error message when lock fails:
|
||||
# Error: Error locking state: Error acquiring the state lock
|
||||
# Lock Info:
|
||||
# ID: 12345678-abcd-1234-abcd-1234567890ab
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## State Security Best Practices
|
||||
|
||||
### 1. Encrypt at Rest
|
||||
```hcl
|
||||
# S3 — server-side encryption
|
||||
backend "s3" {
|
||||
encrypt = true
|
||||
kms_key_id = "arn:aws:kms:us-east-1:ACCOUNT:key/KEY_ID"
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Restrict Access
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject",
|
||||
"s3:DeleteObject"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::mycompany-terraform-state/project/*",
|
||||
"Condition": {
|
||||
"StringEquals": {
|
||||
"aws:PrincipalTag/Team": "platform"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"dynamodb:GetItem",
|
||||
"dynamodb:PutItem",
|
||||
"dynamodb:DeleteItem"
|
||||
],
|
||||
"Resource": "arn:aws:dynamodb:us-east-1:ACCOUNT:table/terraform-locks"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Enable Versioning (State History)
|
||||
```hcl
|
||||
resource "aws_s3_bucket_versioning" "state" {
|
||||
bucket = aws_s3_bucket.state.id
|
||||
versioning_configuration {
|
||||
status = "Enabled"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Versioning lets you recover from state corruption by restoring a previous version.
|
||||
|
||||
### 4. Audit Access
|
||||
- Enable S3 access logging or CloudTrail data events
|
||||
- Monitor for unexpected state reads (potential secret extraction)
|
||||
- State files contain sensitive values — treat them like credentials
|
||||
|
||||
### 5. Sensitive Values in State
|
||||
Terraform stores all resource attributes in state, including passwords, private keys, and tokens. This is unavoidable. Mitigate by:
|
||||
- Encrypting state at rest (KMS)
|
||||
- Restricting state file access (IAM)
|
||||
- Using `sensitive = true` on variables and outputs (prevents display, not storage)
|
||||
- Rotating secrets regularly (state contains the value at apply time)
|
||||
|
||||
---
|
||||
|
||||
## Drift Detection and Reconciliation
|
||||
|
||||
### Detect Drift
|
||||
```bash
|
||||
# Plan with detailed exit code
|
||||
terraform plan -detailed-exitcode
|
||||
# Exit 0 = no changes
|
||||
# Exit 1 = error
|
||||
# Exit 2 = changes detected (drift)
|
||||
```
|
||||
|
||||
### Common Drift Sources
|
||||
| Source | Example | Prevention |
|
||||
|--------|---------|------------|
|
||||
| Console changes | Someone edits SG rules in AWS Console | SCPs to restrict console access, or accept and reconcile |
|
||||
| Auto-scaling | ASG launches instances not in state | Don't manage individual instances; manage ASG |
|
||||
| External tools | Ansible modifies EC2 tags | Agree on ownership boundaries |
|
||||
| Dependent resource changes | AMI deregistered | Use data sources to detect, lifecycle ignore_changes |
|
||||
|
||||
### Reconciliation Options
|
||||
```hcl
|
||||
# Option 1: Apply to restore desired state
|
||||
terraform apply
|
||||
|
||||
# Option 2: Refresh state to match reality
|
||||
terraform apply -refresh-only
|
||||
|
||||
# Option 3: Ignore specific attribute drift
|
||||
resource "aws_instance" "web" {
|
||||
lifecycle {
|
||||
ignore_changes = [tags["LastModifiedBy"], ami]
|
||||
}
|
||||
}
|
||||
|
||||
# Option 4: Import the manually-created resource
|
||||
terraform import aws_security_group_rule.new sg-12345_ingress_tcp_443_443_0.0.0.0/0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting Checklist
|
||||
|
||||
| Symptom | Likely Cause | Fix |
|
||||
|---------|-------------|-----|
|
||||
| "Error acquiring state lock" | Concurrent run or crashed process | Wait for other run to finish, or `force-unlock` |
|
||||
| "Backend configuration changed" | Backend config modified | Run `terraform init -reconfigure` or `-migrate-state` |
|
||||
| "Resource already exists" | Resource created outside Terraform | `terraform import` the resource |
|
||||
| "No matching resource found" | Resource deleted outside Terraform | `terraform state rm` the resource |
|
||||
| State file growing very large | Too many resources in one state | Split into smaller state files using modules |
|
||||
| Slow plan/apply | Large state file, many resources | Split state, use `-target` for urgent changes |
|
||||
| "Provider produced inconsistent result" | Provider bug or API race condition | Retry, or pin provider version |
|
||||
| Workspace confusion | Applied to wrong workspace | Always check `terraform workspace show` before apply |
|
||||
Reference in New Issue
Block a user