Merge pull request #364 from alirezarezvani/feat/autoresearch-plugin

Feat/autoresearch plugin
This commit is contained in:
Alireza Rezvani
2026-03-15 23:39:30 +01:00
committed by GitHub
33 changed files with 4805 additions and 29 deletions

View File

@@ -3,7 +3,7 @@
"name": "claude-code-skills",
"description": "Production-ready skill packages for AI agents - Marketing, Engineering, Product, C-Level, PM, and RA/QM",
"repository": "https://github.com/alirezarezvani/claude-skills",
"total_skills": 160,
"total_skills": 162,
"skills": [
{
"name": "contract-and-proposal-writer",
@@ -431,6 +431,12 @@
"category": "engineering-advanced",
"description": "Git Worktree Manager"
},
{
"name": "helm-chart-builder",
"source": "../../engineering/helm-chart-builder",
"category": "engineering-advanced",
"description": "Helm chart development agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw \u2014 chart scaffolding, values design, template patterns, dependency management, security hardening, and chart testing. Use when: user wants to create or improve Helm charts, design values.yaml files, implement template helpers, audit chart security (RBAC, network policies, pod security), manage subcharts, or run helm lint/test."
},
{
"name": "interview-system-designer",
"source": "../../engineering/interview-system-designer",
@@ -509,6 +515,12 @@
"category": "engineering-advanced",
"description": "Scan codebases for technical debt, score severity, track trends, and generate prioritized remediation plans. Use when users mention tech debt, code quality, refactoring priority, debt scoring, cleanup sprints, or code health assessment. Also use for legacy code modernization planning and maintenance cost estimation."
},
{
"name": "terraform-patterns",
"source": "../../engineering/terraform-patterns",
"category": "engineering-advanced",
"description": "Terraform infrastructure-as-code agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. Covers module design patterns, state management strategies, provider configuration, security hardening, policy-as-code with Sentinel/OPA, and CI/CD plan/apply workflows. Use when: user wants to design Terraform modules, manage state backends, review Terraform security, implement multi-region deployments, or follow IaC best practices."
},
{
"name": "financial-analyst",
"source": "../../finance/financial-analyst",
@@ -983,7 +995,7 @@
"description": "Software engineering and technical skills"
},
"engineering-advanced": {
"count": 27,
"count": 29,
"source": "../../engineering",
"description": "Advanced engineering skills - agents, RAG, MCP, CI/CD, databases, observability"
},

View File

@@ -0,0 +1 @@
../../engineering/helm-chart-builder

View File

@@ -0,0 +1 @@
../../engineering/terraform-patterns

View File

@@ -1,6 +1,6 @@
{
"name": "business-growth-skills",
"description": "4 business & growth skills: customer success manager, sales engineer, revenue operations, and contract & proposal writer",
"description": "4 business & growth skills: customer success manager, sales engineer, revenue operations, and contract & proposal writer. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "business-growth-skills"
description: "4 production-ready business and growth skills: customer success manager with health scoring and churn prediction, sales engineer with RFP analysis, revenue operations with pipeline and GTM metrics, and contract & proposal writer. Python tools included (all stdlib-only). Works with Claude Code, Codex CLI, and OpenClaw."
description: "4 business growth agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. Customer success (health scoring, churn), sales engineer (RFP), revenue operations (pipeline, GTM), contract & proposal writer. Python tools (stdlib-only)."
version: 1.1.0
author: Alireza Rezvani
license: MIT

View File

@@ -1,6 +1,6 @@
{
"name": "c-level-skills",
"description": "28 C-level advisory skills: complete virtual board of directors with CEO, CTO, COO, CPO, CMO, CFO, CRO, CISO, CHRO advisors, executive mentor, founder coach, Chief of Staff router, board meetings, decision logger, board deck builder, scenario war room, competitive intel, org health diagnostic, M&A playbook, international expansion, culture architect, change management, strategic alignment, and more",
"description": "28 C-level advisory skills: complete virtual board of directors with CEO, CTO, COO, CPO, CMO, CFO, CRO, CISO, CHRO advisors, executive mentor, founder coach, Chief of Staff router, board meetings, decision logger, board deck builder, scenario war room, competitive intel, org health diagnostic, M&A playbook, international expansion, culture architect, change management, strategic alignment, and more. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "c-level-advisor"
description: "Provides strategic business advice by channelling the perspectives of 10 executive roles — CEO, CTO, COO, CPO, CMO, CFO, CRO, CISO, CHRO, and Executive Mentor — across decisions, trade-offs, and org challenges. Runs multi-role board meetings, routes questions to the right executive voice, and delivers structured recommendations (Bottom Line → What → Why → How to Act → Your Decision). Use when a founder or executive needs business strategy advice, leadership perspective, executive decision support, board-level input, fundraising guidance, product-market fit review, hiring or culture frameworks, risk assessment, or competitive analysis."
description: "10 C-level advisory agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. CEO, CTO, COO, CPO, CMO, CFO, CRO, CISO, CHRO, Executive Mentor. Multi-role board meetings, strategy routing, structured recommendations. For founders needing executive-level decision support."
license: MIT
metadata:
version: 2.0.0

View File

@@ -1,6 +1,6 @@
{
"name": "engineering-skills",
"description": "24 production-ready engineering skills: architecture, frontend, backend, fullstack, QA, DevOps, security, AI/ML, data engineering, Playwright (9 sub-skills), self-improving agent, Stripe integration, TDD guide, Google Workspace CLI, and more",
"description": "24 production-ready engineering skills: architecture, frontend, backend, fullstack, QA, DevOps, security, AI/ML, data engineering, Playwright (9 sub-skills), self-improving agent, Stripe integration, TDD guide, Google Workspace CLI, and more. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "engineering-skills"
description: "23 production-ready engineering skills covering architecture, frontend, backend, fullstack, QA, DevOps, security, AI/ML, data engineering, computer vision, and specialized tools like Playwright Pro, Stripe integration, AWS, and MS365. 30+ Python automation tools (all stdlib-only). Works with Claude Code, Codex CLI, and OpenClaw."
description: "23 engineering agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw, and 6 more tools. Architecture, frontend, backend, QA, DevOps, security, AI/ML, data engineering, Playwright, Stripe, AWS, MS365. 30+ Python tools (stdlib-only)."
version: 1.1.0
author: Alireza Rezvani
license: MIT

View File

@@ -1,6 +1,6 @@
{
"name": "engineering-advanced-skills",
"description": "25 advanced engineering skills: agent designer, RAG architect, database designer, migration architect, observability designer, dependency auditor, release manager, API reviewer, CI/CD pipeline builder, MCP server builder, skill security auditor, performance profiler, and more",
"description": "25 advanced engineering skills: agent designer, RAG architect, database designer, migration architect, observability designer, dependency auditor, release manager, API reviewer, CI/CD pipeline builder, MCP server builder, skill security auditor, performance profiler, and more. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "engineering-advanced-skills"
description: "25 advanced POWERFUL-tier engineering skills covering agent design, RAG architecture, MCP servers, CI/CD pipelines, database design, observability, security auditing, release management, and platform operations. Works with Claude Code, Codex CLI, and OpenClaw."
description: "25 advanced engineering agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. Agent design, RAG, MCP servers, CI/CD, database design, observability, security auditing, release management, platform ops."
version: 1.1.0
author: Alireza Rezvani
license: MIT

View File

@@ -0,0 +1,13 @@
{
"name": "helm-chart-builder",
"description": "Helm chart development agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw — chart scaffolding, values design, template patterns, dependency management, security hardening, and chart testing.",
"version": "1.0.0",
"author": {
"name": "Alireza Rezvani",
"url": "https://alirezarezvani.com"
},
"homepage": "https://github.com/alirezarezvani/claude-skills/tree/main/engineering/helm-chart-builder",
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}

View File

@@ -0,0 +1,449 @@
---
name: "helm-chart-builder"
description: "Helm chart development agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw — chart scaffolding, values design, template patterns, dependency management, security hardening, and chart testing. Use when: user wants to create or improve Helm charts, design values.yaml files, implement template helpers, audit chart security (RBAC, network policies, pod security), manage subcharts, or run helm lint/test."
license: MIT
metadata:
version: 1.0.0
author: Alireza Rezvani
category: engineering
updated: 2026-03-15
---
# Helm Chart Builder
> Production-grade Helm charts. Sensible defaults. Secure by design. No cargo-culting.
Opinionated Helm workflow that turns ad-hoc Kubernetes manifests into maintainable, testable, reusable charts. Covers chart structure, values design, template patterns, dependency management, and security hardening.
Not a Helm tutorial — a set of concrete decisions about how to build charts that operators trust and developers don't fight.
---
## Slash Commands
| Command | What it does |
|---------|-------------|
| `/helm:create` | Scaffold a production-ready Helm chart with best-practice structure |
| `/helm:review` | Analyze an existing chart for issues — missing labels, hardcoded values, template anti-patterns |
| `/helm:security` | Audit chart for security issues — RBAC, network policies, pod security, secrets handling |
---
## When This Skill Activates
Recognize these patterns from the user:
- "Create a Helm chart for this service"
- "Review my Helm chart"
- "Is this chart secure?"
- "Design a values.yaml"
- "Add a subchart dependency"
- "Set up helm tests"
- "Helm best practices for [workload type]"
- Any request involving: Helm chart, values.yaml, Chart.yaml, templates, helpers, _helpers.tpl, subcharts, helm lint, helm test
If the user has a Helm chart or wants to package Kubernetes resources → this skill applies.
---
## Workflow
### `/helm:create` — Chart Scaffolding
1. **Identify workload type**
- Web service (Deployment + Service + Ingress)
- Worker (Deployment, no Service)
- CronJob (CronJob + ServiceAccount)
- Stateful service (StatefulSet + PVC + Headless Service)
- Library chart (no templates, only helpers)
2. **Scaffold chart structure**
```
mychart/
├── Chart.yaml # Chart metadata and dependencies
├── values.yaml # Default configuration
├── values.schema.json # Optional: JSON Schema for values validation
├── .helmignore # Files to exclude from packaging
├── templates/
│ ├── _helpers.tpl # Named templates and helper functions
│ ├── deployment.yaml # Workload resource
│ ├── service.yaml # Service exposure
│ ├── ingress.yaml # Ingress (if applicable)
│ ├── serviceaccount.yaml # ServiceAccount
│ ├── hpa.yaml # HorizontalPodAutoscaler
│ ├── pdb.yaml # PodDisruptionBudget
│ ├── networkpolicy.yaml # NetworkPolicy
│ ├── configmap.yaml # ConfigMap (if needed)
│ ├── secret.yaml # Secret (if needed)
│ ├── NOTES.txt # Post-install usage instructions
│ └── tests/
│ └── test-connection.yaml
└── charts/ # Subcharts (dependencies)
```
3. **Apply Chart.yaml best practices**
```
METADATA
├── apiVersion: v2 (Helm 3 only — never v1)
├── name: matches directory name exactly
├── version: semver (chart version, not app version)
├── appVersion: application version string
├── description: one-line summary of what the chart deploys
└── type: application (or library for shared helpers)
DEPENDENCIES
├── Pin dependency versions with ~X.Y.Z (patch-level float)
├── Use condition field to make subcharts optional
├── Use alias for multiple instances of same subchart
└── Run helm dependency update after changes
```
4. **Generate values.yaml with documentation**
- Every value has an inline comment explaining purpose and type
- Sensible defaults that work for development
- Override-friendly structure (flat where possible, nested only when logical)
- No hardcoded cluster-specific values (image registry, domain, storage class)
5. **Validate**
```bash
python3 scripts/chart_analyzer.py mychart/
helm lint mychart/
helm template mychart/ --debug
```
### `/helm:review` — Chart Analysis
1. **Check chart structure**
| Check | Severity | Fix |
|-------|----------|-----|
| Missing _helpers.tpl | High | Create helpers for common labels and selectors |
| No NOTES.txt | Medium | Add post-install instructions |
| No .helmignore | Low | Create one to exclude .git, CI files, tests |
| Missing Chart.yaml fields | Medium | Add description, appVersion, maintainers |
| Hardcoded values in templates | High | Extract to values.yaml with defaults |
2. **Check template quality**
| Check | Severity | Fix |
|-------|----------|-----|
| Missing standard labels | High | Use `app.kubernetes.io/*` labels via _helpers.tpl |
| No resource requests/limits | Critical | Add resources section with defaults in values.yaml |
| Hardcoded image tag | High | Use `{{ .Values.image.repository }}:{{ .Values.image.tag }}` |
| No imagePullPolicy | Medium | Default to `IfNotPresent`, overridable |
| Missing liveness/readiness probes | High | Add probes with configurable paths and ports |
| No pod anti-affinity | Medium | Add preferred anti-affinity for HA |
| Duplicate template code | Medium | Extract into named templates in _helpers.tpl |
3. **Check values.yaml quality**
```bash
python3 scripts/values_validator.py mychart/values.yaml
```
4. **Generate review report**
```
HELM CHART REVIEW — [chart name]
Date: [timestamp]
CRITICAL: [count]
HIGH: [count]
MEDIUM: [count]
LOW: [count]
[Detailed findings with fix recommendations]
```
### `/helm:security` — Security Audit
1. **Pod security audit**
| Check | Severity | Fix |
|-------|----------|-----|
| No securityContext | Critical | Add runAsNonRoot, readOnlyRootFilesystem |
| Running as root | Critical | Set `runAsNonRoot: true`, `runAsUser: 1000` |
| Writable root filesystem | High | Set `readOnlyRootFilesystem: true` + emptyDir for tmp |
| All capabilities retained | High | Drop ALL, add only specific needed caps |
| Privileged container | Critical | Set `privileged: false`, use specific capabilities |
| No seccomp profile | Medium | Set `seccompProfile.type: RuntimeDefault` |
| allowPrivilegeEscalation true | High | Set `allowPrivilegeEscalation: false` |
2. **RBAC audit**
| Check | Severity | Fix |
|-------|----------|-----|
| No ServiceAccount | Medium | Create dedicated SA, don't use default |
| automountServiceAccountToken true | Medium | Set to false unless pod needs K8s API access |
| ClusterRole instead of Role | Medium | Use namespace-scoped Role unless cluster-wide needed |
| Wildcard permissions | Critical | Use specific resource names and verbs |
| No RBAC at all | Low | Acceptable if pod doesn't need K8s API access |
3. **Network and secrets audit**
| Check | Severity | Fix |
|-------|----------|-----|
| No NetworkPolicy | Medium | Add default-deny ingress + explicit allow rules |
| Secrets in values.yaml | Critical | Use external secrets operator or sealed-secrets |
| No PodDisruptionBudget | Medium | Add PDB with minAvailable for HA workloads |
| hostNetwork: true | High | Remove unless absolutely required (e.g., CNI plugin) |
| hostPID or hostIPC | Critical | Never use in application charts |
4. **Generate security report**
```
SECURITY AUDIT — [chart name]
Date: [timestamp]
CRITICAL: [count]
HIGH: [count]
MEDIUM: [count]
LOW: [count]
[Detailed findings with remediation steps]
```
---
## Tooling
### `scripts/chart_analyzer.py`
CLI utility for static analysis of Helm chart directories.
**Features:**
- Chart structure validation (required files, directory layout)
- Template anti-pattern detection (hardcoded values, missing labels, no resource limits)
- Chart.yaml metadata checks
- Standard labels verification (app.kubernetes.io/*)
- Security baseline checks
- JSON and text output
**Usage:**
```bash
# Analyze a chart directory
python3 scripts/chart_analyzer.py mychart/
# JSON output
python3 scripts/chart_analyzer.py mychart/ --output json
# Security-focused analysis
python3 scripts/chart_analyzer.py mychart/ --security
```
### `scripts/values_validator.py`
CLI utility for validating values.yaml against best practices.
**Features:**
- Documentation coverage (inline comments)
- Type consistency checks
- Hardcoded secrets detection
- Default value quality analysis
- Structure depth analysis
- Naming convention validation
- JSON and text output
**Usage:**
```bash
# Validate values.yaml
python3 scripts/values_validator.py values.yaml
# JSON output
python3 scripts/values_validator.py values.yaml --output json
# Strict mode (fail on warnings)
python3 scripts/values_validator.py values.yaml --strict
```
---
## Template Patterns
### Pattern 1: Standard Labels (_helpers.tpl)
```yaml
{{/*
Common labels for all resources.
*/}}
{{- define "mychart.labels" -}}
helm.sh/chart: {{ include "mychart.chart" . }}
app.kubernetes.io/name: {{ include "mychart.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels (subset of common labels — must be immutable).
*/}}
{{- define "mychart.selectorLabels" -}}
app.kubernetes.io/name: {{ include "mychart.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
```
### Pattern 2: Conditional Resources
```yaml
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "mychart.fullname" . }}
labels:
{{- include "mychart.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ include "mychart.fullname" $ }}
port:
number: {{ $.Values.service.port }}
{{- end }}
{{- end }}
{{- end }}
```
### Pattern 3: Security-Hardened Pod Spec
```yaml
spec:
serviceAccountName: {{ include "mychart.serviceAccountName" . }}
automountServiceAccountToken: false
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
containers:
- name: {{ .Chart.Name }}
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
resources:
{{- toYaml .Values.resources | nindent 8 }}
volumeMounts:
- name: tmp
mountPath: /tmp
volumes:
- name: tmp
emptyDir: {}
```
---
## Values Design Principles
```
STRUCTURE
├── Flat over nested (image.tag > container.spec.image.tag)
├── Group by resource (service.*, ingress.*, resources.*)
├── Use enabled: true/false for optional resources
├── Document every key with inline YAML comments
└── Provide sensible development defaults
NAMING
├── camelCase for keys (replicaCount, not replica_count)
├── Boolean keys: use adjectives (enabled, required) not verbs
├── Nested keys: max 3 levels deep
└── Match upstream conventions (image.repository, image.tag, image.pullPolicy)
ANTI-PATTERNS
├── Hardcoded cluster URLs or domains
├── Secrets as default values
├── Empty strings where null is correct
├── Deeply nested structures (>3 levels)
├── Undocumented values
└── values.yaml that doesn't work without overrides
```
---
## Dependency Management
```
SUBCHARTS
├── Use Chart.yaml dependencies (not requirements.yaml — Helm 3)
├── Pin versions: version: ~15.x.x (patch float)
├── Use condition: to make optional: condition: postgresql.enabled
├── Use alias: for multiple instances of same chart
├── Override subchart values under subchart name key in values.yaml
└── Run helm dependency update before packaging
LIBRARY CHARTS
├── type: library in Chart.yaml — no templates directory
├── Export named templates only — no rendered resources
├── Use for shared labels, annotations, security contexts
└── Version independently from application charts
```
---
## Proactive Triggers
Flag these without being asked:
- **No _helpers.tpl** → Create one. Every chart needs standard labels and fullname helpers.
- **Hardcoded image tag in template** → Extract to values.yaml. Tags must be overridable.
- **No resource requests/limits** → Add them. Pods without limits can starve the node.
- **Running as root** → Add securityContext. No exceptions for production charts.
- **No NOTES.txt** → Create one. Users need post-install instructions.
- **Secrets in values.yaml defaults** → Remove them. Use placeholders with comments explaining how to provide secrets.
- **No liveness/readiness probes** → Add them. Kubernetes needs to know if the pod is healthy.
- **Missing app.kubernetes.io labels** → Add via _helpers.tpl. Required for proper resource tracking.
---
## Installation
### One-liner (any tool)
```bash
git clone https://github.com/alirezarezvani/claude-skills.git
cp -r claude-skills/engineering/helm-chart-builder ~/.claude/skills/
```
### Multi-tool install
```bash
./scripts/convert.sh --skill helm-chart-builder --tool codex|gemini|cursor|windsurf|openclaw
```
### OpenClaw
```bash
clawhub install cs-helm-chart-builder
```
---
## Related Skills
- **senior-devops** — Broader DevOps scope (CI/CD, IaC, monitoring). Complementary — use helm-chart-builder for chart-specific work, senior-devops for pipeline and infrastructure.
- **docker-development** — Container building. Complementary — docker-development builds the images, helm-chart-builder deploys them to Kubernetes.
- **ci-cd-pipeline-builder** — Pipeline construction. Complementary — helm-chart-builder defines the deployment artifact, ci-cd-pipeline-builder automates its delivery.
- **senior-security** — Application security. Complementary — helm-chart-builder covers Kubernetes-level security (RBAC, pod security), senior-security covers application-level threats.

View File

@@ -0,0 +1,435 @@
# Helm Chart Patterns Reference
## Standard Chart Structure
### Minimal Production Chart
```
mychart/
├── Chart.yaml
├── values.yaml
├── .helmignore
└── templates/
├── _helpers.tpl
├── deployment.yaml
├── service.yaml
├── serviceaccount.yaml
├── NOTES.txt
└── tests/
└── test-connection.yaml
```
### Full Production Chart
```
mychart/
├── Chart.yaml
├── values.yaml
├── values.schema.json # JSON Schema validation
├── .helmignore
├── templates/
│ ├── _helpers.tpl
│ ├── deployment.yaml
│ ├── service.yaml
│ ├── ingress.yaml
│ ├── serviceaccount.yaml
│ ├── hpa.yaml
│ ├── pdb.yaml
│ ├── networkpolicy.yaml
│ ├── configmap.yaml
│ ├── secret.yaml
│ ├── NOTES.txt
│ └── tests/
│ └── test-connection.yaml
└── charts/ # Managed by helm dependency update
```
---
## _helpers.tpl — Standard Helpers
Every chart needs these. Copy and adapt.
```yaml
{{/*
Expand the name of the chart.
*/}}
{{- define "mychart.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
Truncated at 63 chars because some Kubernetes name fields are limited.
*/}}
{{- define "mychart.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "mychart.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels.
*/}}
{{- define "mychart.labels" -}}
helm.sh/chart: {{ include "mychart.chart" . }}
{{ include "mychart.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels (immutable — used in matchLabels).
*/}}
{{- define "mychart.selectorLabels" -}}
app.kubernetes.io/name: {{ include "mychart.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use.
*/}}
{{- define "mychart.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "mychart.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
```
### Why These Helpers Matter
- **Name truncation** — Kubernetes names max at 63 characters. Always trunc.
- **Selector labels separate from common labels** — selectors are immutable after creation. Adding `app.kubernetes.io/version` to selectors breaks upgrades.
- **nameOverride vs fullnameOverride** — `nameOverride` replaces the chart name portion, `fullnameOverride` replaces everything.
---
## Deployment Patterns
### Standard Web Service
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "mychart.fullname" . }}
labels:
{{- include "mychart.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "mychart.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "mychart.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
serviceAccountName: {{ include "mychart.serviceAccountName" . }}
automountServiceAccountToken: false
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
```
### Worker (No Service)
```yaml
# Same as above but without ports, probes, or Service resource
# Use for background workers, queue consumers, cron jobs
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
command: {{ toYaml .Values.command | nindent 8 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
```
---
## Conditional Resource Patterns
### Optional Ingress
```yaml
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "mychart.fullname" . }}
labels:
{{- include "mychart.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
ingressClassName: {{ .Values.ingress.className }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ include "mychart.fullname" $ }}
port:
number: {{ $.Values.service.port }}
{{- end }}
{{- end }}
{{- end }}
```
### Optional HPA
```yaml
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "mychart.fullname" . }}
labels:
{{- include "mychart.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "mychart.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}
```
---
## PodDisruptionBudget
```yaml
{{- if .Values.pdb.enabled }}
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: {{ include "mychart.fullname" . }}
labels:
{{- include "mychart.labels" . | nindent 4 }}
spec:
{{- if .Values.pdb.minAvailable }}
minAvailable: {{ .Values.pdb.minAvailable }}
{{- end }}
{{- if .Values.pdb.maxUnavailable }}
maxUnavailable: {{ .Values.pdb.maxUnavailable }}
{{- end }}
selector:
matchLabels:
{{- include "mychart.selectorLabels" . | nindent 6 }}
{{- end }}
```
---
## Test Connection Template
```yaml
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "mychart.fullname" . }}-test-connection"
labels:
{{- include "mychart.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "mychart.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never
```
---
## NOTES.txt Pattern
```
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "mychart.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
kubectl get --namespace {{ .Release.Namespace }} svc {{ include "mychart.fullname" . }} -w
{{- else if contains "ClusterIP" .Values.service.type }}
kubectl --namespace {{ .Release.Namespace }} port-forward svc/{{ include "mychart.fullname" . }} {{ .Values.service.port }}:{{ .Values.service.port }}
echo "Visit http://127.0.0.1:{{ .Values.service.port }}"
{{- end }}
```
---
## Dependency Management
### Chart.yaml with Dependencies
```yaml
apiVersion: v2
name: myapp
version: 1.0.0
appVersion: "2.5.0"
dependencies:
- name: postgresql
version: ~15.5.0
repository: https://charts.bitnami.com/bitnami
condition: postgresql.enabled
- name: redis
version: ~19.0.0
repository: https://charts.bitnami.com/bitnami
condition: redis.enabled
- name: common
version: ~2.0.0
repository: https://charts.bitnami.com/bitnami
tags:
- bitnami-common
```
### Overriding Subchart Values
```yaml
# values.yaml — subchart values go under the dependency name
postgresql:
enabled: true
auth:
database: myapp
username: myapp
primary:
resources:
requests:
cpu: 250m
memory: 256Mi
redis:
enabled: false
```
### Commands
```bash
# Download dependencies
helm dependency update mychart/
# List dependencies
helm dependency list mychart/
# Build (same as update but doesn't update Chart.lock)
helm dependency build mychart/
```
---
## Troubleshooting Checklist
| Symptom | Likely Cause | Fix |
|---------|-------------|-----|
| Template renders empty | Missing `{{- if }}` or wrong value path | `helm template --debug` to see rendered output |
| Upgrade fails on selector change | Selector labels changed between versions | Never change selectorLabels — they're immutable |
| Values not applying | Wrong nesting in values override | Check indentation and key paths |
| Subchart not rendering | Missing `condition:` or dependency not updated | Run `helm dependency update` |
| Name too long | Kubernetes 63-char limit | Ensure `trunc 63` in _helpers.tpl |
| RBAC permission denied | ServiceAccount missing or wrong Role | Check SA exists and RoleBinding is correct |

View File

@@ -0,0 +1,462 @@
# Values.yaml Design Reference
## Design Principles
### 1. Every Value Is Documented
```yaml
# Bad — what does this mean?
replicaCount: 1
maxSurge: 25%
# Good — clear purpose, type, and constraints
# -- Number of pod replicas. Ignored when autoscaling.enabled is true.
replicaCount: 1
# -- Maximum number of pods above desired count during rolling update (int or percentage).
maxSurge: 25%
```
### 2. Sensible Defaults That Work
A user should be able to `helm install mychart .` with zero overrides and get a working deployment.
```yaml
# Bad — broken without override
image:
repository: "" # Fails: no image
tag: "" # Fails: no tag
# Good — works out of the box
image:
repository: nginx # Default image for development
tag: "" # Defaults to .Chart.AppVersion in template
pullPolicy: IfNotPresent
```
### 3. Flat Over Nested
```yaml
# Bad — 5 levels deep, painful to override
container:
spec:
security:
context:
runAsNonRoot: true
# Good — 2 levels, easy to override with --set
securityContext:
runAsNonRoot: true
```
**Rule of thumb:** Max 3 levels of nesting. If you need more, redesign.
### 4. Group by Resource
```yaml
# Good — grouped by Kubernetes resource
service:
type: ClusterIP
port: 80
ingress:
enabled: false
className: ""
hosts: []
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 10
```
---
## Standard Values Structure
### Recommended Layout Order
```yaml
# -- Number of pod replicas
replicaCount: 1
# -- Override chart name
nameOverride: ""
# -- Override fully qualified app name
fullnameOverride: ""
image:
# -- Container image repository
repository: myapp
# -- Image pull policy
pullPolicy: IfNotPresent
# -- Image tag (defaults to .Chart.AppVersion)
tag: ""
# -- Image pull secrets for private registries
imagePullSecrets: []
serviceAccount:
# -- Create a ServiceAccount
create: true
# -- Annotations for the ServiceAccount
annotations: {}
# -- ServiceAccount name (generated from fullname if not set)
name: ""
# -- Automount the service account token
automount: false
# -- Pod annotations
podAnnotations: {}
# -- Additional pod labels
podLabels: {}
# -- Pod security context
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
# -- Container security context
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
service:
# -- Service type
type: ClusterIP
# -- Service port
port: 80
ingress:
# -- Enable ingress
enabled: false
# -- Ingress class name
className: ""
# -- Ingress annotations
annotations: {}
# -- Ingress hosts
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
# -- Ingress TLS configuration
tls: []
# -- Container resource requests and limits
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
# -- Liveness probe configuration
livenessProbe:
httpGet:
path: /healthz
port: http
initialDelaySeconds: 15
periodSeconds: 20
# -- Readiness probe configuration
readinessProbe:
httpGet:
path: /readyz
port: http
initialDelaySeconds: 5
periodSeconds: 10
autoscaling:
# -- Enable horizontal pod autoscaler
enabled: false
# -- Minimum replicas
minReplicas: 1
# -- Maximum replicas
maxReplicas: 10
# -- Target CPU utilization percentage
targetCPUUtilizationPercentage: 80
# -- Target memory utilization percentage (optional)
# targetMemoryUtilizationPercentage: 80
pdb:
# -- Enable PodDisruptionBudget
enabled: false
# -- Minimum available pods
minAvailable: 1
# -- Maximum unavailable pods (alternative to minAvailable)
# maxUnavailable: 1
# -- Node selector constraints
nodeSelector: {}
# -- Tolerations for pod scheduling
tolerations: []
# -- Affinity rules for pod scheduling
affinity: {}
# -- Additional volumes
volumes: []
# -- Additional volume mounts
volumeMounts: []
```
---
## Anti-Patterns
### 1. Secrets in Default Values
```yaml
# BAD — secret visible in chart package, git history, Helm release
database:
password: "mysecretpassword"
apiKey: "sk-abc123"
# GOOD — empty defaults with documentation
database:
# -- Database password (required). Provide via --set or external secret.
password: ""
# -- API key. Use external-secrets or sealed-secrets in production.
apiKey: ""
```
### 2. Cluster-Specific Defaults
```yaml
# BAD — won't work on any other cluster
ingress:
host: app.my-company.internal
storageClass: gp3
registry: 123456789.dkr.ecr.us-east-1.amazonaws.com
# GOOD — generic defaults
ingress:
host: chart-example.local
storageClass: "" # Uses cluster default
image:
repository: myapp # Override for private registry
```
### 3. Boolean Naming
```yaml
# BAD — unclear, verb-based
createServiceAccount: true
doAutoScale: false
skipTLS: true
# GOOD — adjective-based, consistent
serviceAccount:
create: true # "Is it created?" reads naturally
autoscaling:
enabled: false # "Is it enabled?" reads naturally
tls:
insecureSkipVerify: false # Matches Go/K8s convention
```
### 4. Undocumented Values
```yaml
# BAD — what are these? What types? What are valid options?
foo: bar
maxRetries: 3
mode: advanced
workers: 4
# GOOD — purpose, type, and constraints are clear
# -- Operation mode. Options: "simple", "advanced", "debug"
mode: advanced
# -- Number of background worker threads (1-16)
workers: 4
# -- Maximum retry attempts for failed API calls
maxRetries: 3
```
### 5. Empty String vs Null
```yaml
# BAD — ambiguous: is empty string intentional?
annotations: ""
nodeSelector: ""
# GOOD — null/empty map means "not set"
annotations: {}
nodeSelector: {}
# Or simply omit optional values
```
---
## Override Patterns
### Hierarchy (lowest to highest priority)
1. `values.yaml` in chart
2. Parent chart's `values.yaml` (for subcharts)
3. `-f custom-values.yaml` (left to right, last wins)
4. `--set key=value` (highest priority)
### Common Override Scenarios
```bash
# Production override file
helm install myapp . -f values-production.yaml
# Quick override with --set
helm install myapp . --set replicaCount=3 --set image.tag=v2.1.0
# Multiple value files (last wins)
helm install myapp . -f values-base.yaml -f values-production.yaml -f values-secrets.yaml
```
### values-production.yaml Pattern
```yaml
# Production overrides only — don't repeat defaults
replicaCount: 3
image:
tag: "v2.1.0"
pullPolicy: IfNotPresent
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: "2"
memory: 1Gi
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 20
ingress:
enabled: true
className: nginx
hosts:
- host: app.example.com
paths:
- path: /
pathType: Prefix
tls:
- secretName: app-tls
hosts:
- app.example.com
```
---
## Type Safety with values.schema.json
### Basic Schema
```json
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"required": ["replicaCount", "image"],
"properties": {
"replicaCount": {
"type": "integer",
"minimum": 1,
"description": "Number of pod replicas"
},
"image": {
"type": "object",
"required": ["repository"],
"properties": {
"repository": {
"type": "string",
"minLength": 1,
"description": "Container image repository"
},
"tag": {
"type": "string",
"description": "Image tag"
},
"pullPolicy": {
"type": "string",
"enum": ["Always", "IfNotPresent", "Never"],
"description": "Image pull policy"
}
}
},
"service": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["ClusterIP", "NodePort", "LoadBalancer"],
"description": "Kubernetes service type"
},
"port": {
"type": "integer",
"minimum": 1,
"maximum": 65535,
"description": "Service port number"
}
}
}
}
}
```
### Why Use Schema
- **Fails fast** — `helm install` rejects invalid values before rendering templates
- **Documents types** — self-documenting valid options (enums, ranges)
- **IDE support** — editors can autocomplete and validate values files
- **CI safety** — catches typos in value overrides early
---
## Testing Values
### helm lint
```bash
# Basic lint
helm lint mychart/
# Lint with override values
helm lint mychart/ -f values-production.yaml
# Lint with --set
helm lint mychart/ --set replicaCount=0 # Should fail schema
```
### helm template
```bash
# Render templates locally
helm template myrelease mychart/
# Render with overrides to verify
helm template myrelease mychart/ -f values-production.yaml
# Debug mode (shows computed values)
helm template myrelease mychart/ --debug
# Render specific template
helm template myrelease mychart/ -s templates/deployment.yaml
```
### Checklist for New Values
| Check | Question |
|-------|----------|
| Documented? | Does the key have an inline comment? |
| Default works? | Can you helm install without overriding? |
| Type clear? | Is it obvious if this is string, int, bool, list, map? |
| Overridable? | Can it be set with `--set`? (avoid deeply nested) |
| No secrets? | Are default values free of passwords/tokens? |
| camelCase? | Does it follow Helm naming convention? |
| Flat enough? | Is nesting 3 levels or less? |

View File

@@ -0,0 +1,542 @@
#!/usr/bin/env python3
"""
helm-chart-builder: Chart Analyzer
Static analysis of Helm chart directories for structural issues, template
anti-patterns, missing labels, hardcoded values, and security baseline checks.
Usage:
python scripts/chart_analyzer.py mychart/
python scripts/chart_analyzer.py mychart/ --output json
python scripts/chart_analyzer.py mychart/ --security
"""
import argparse
import json
import re
import sys
from pathlib import Path
# --- Analysis Rules ---
REQUIRED_FILES = [
{"path": "Chart.yaml", "severity": "critical", "message": "Missing Chart.yaml — not a valid Helm chart"},
{"path": "values.yaml", "severity": "high", "message": "Missing values.yaml — chart has no configurable defaults"},
{"path": "templates/_helpers.tpl", "severity": "high", "message": "Missing _helpers.tpl — no shared label/name helpers"},
{"path": "templates/NOTES.txt", "severity": "medium", "message": "Missing NOTES.txt — no post-install instructions for users"},
{"path": ".helmignore", "severity": "low", "message": "Missing .helmignore — CI files, .git, tests may be packaged"},
]
CHART_YAML_CHECKS = [
{"field": "apiVersion", "severity": "critical", "message": "Missing apiVersion in Chart.yaml"},
{"field": "name", "severity": "critical", "message": "Missing name in Chart.yaml"},
{"field": "version", "severity": "critical", "message": "Missing version in Chart.yaml"},
{"field": "description", "severity": "medium", "message": "Missing description in Chart.yaml"},
{"field": "appVersion", "severity": "medium", "message": "Missing appVersion in Chart.yaml — operators won't know what app version is deployed"},
{"field": "type", "severity": "low", "message": "Missing type in Chart.yaml — defaults to 'application'"},
]
TEMPLATE_ANTI_PATTERNS = [
{
"id": "TP001",
"severity": "high",
"pattern": r'image:\s*["\']?[a-z][a-z0-9./-]+:[a-z0-9][a-z0-9._-]*["\']?\s*$',
"message": "Hardcoded image tag in template — must use .Values.image.repository and .Values.image.tag",
"fix": 'Use: image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"',
},
{
"id": "TP002",
"severity": "high",
"pattern": r'replicas:\s*\d+\s*$',
"message": "Hardcoded replica count — must be configurable via values",
"fix": "Use: replicas: {{ .Values.replicaCount }}",
},
{
"id": "TP003",
"severity": "medium",
"pattern": r'port:\s*\d+\s*$',
"message": "Hardcoded port number — should be configurable via values",
"fix": "Use: port: {{ .Values.service.port }}",
},
{
"id": "TP004",
"severity": "high",
"pattern": r'(?:name|namespace):\s*[a-z][a-z0-9-]+\s*$',
"message": "Hardcoded name/namespace — should use template helpers",
"fix": 'Use: name: {{ include "mychart.fullname" . }}',
},
{
"id": "TP005",
"severity": "medium",
"pattern": r'nodePort:\s*\d+',
"message": "Hardcoded nodePort — should be configurable or avoided",
"fix": "Use: nodePort: {{ .Values.service.nodePort }} with conditional",
},
]
SECURITY_CHECKS = [
{
"id": "SC001",
"severity": "critical",
"check": "no_security_context",
"message": "No securityContext found in any template — pods run as root with full capabilities",
"fix": "Add pod and container securityContext with runAsNonRoot, readOnlyRootFilesystem, drop ALL capabilities",
},
{
"id": "SC002",
"severity": "critical",
"check": "privileged_container",
"message": "Privileged container detected — full host access",
"fix": "Remove privileged: true. Use specific capabilities instead",
},
{
"id": "SC003",
"severity": "high",
"check": "no_run_as_non_root",
"message": "No runAsNonRoot: true — container may run as root",
"fix": "Add runAsNonRoot: true to pod securityContext",
},
{
"id": "SC004",
"severity": "high",
"check": "no_readonly_rootfs",
"message": "No readOnlyRootFilesystem — container filesystem is writable",
"fix": "Add readOnlyRootFilesystem: true and use emptyDir for writable paths",
},
{
"id": "SC005",
"severity": "medium",
"check": "no_network_policy",
"message": "No NetworkPolicy template — all pod-to-pod traffic allowed",
"fix": "Add a NetworkPolicy template with default-deny ingress and explicit allow rules",
},
{
"id": "SC006",
"severity": "medium",
"check": "automount_sa_token",
"message": "automountServiceAccountToken not set to false — pod can access K8s API",
"fix": "Set automountServiceAccountToken: false unless the pod needs K8s API access",
},
{
"id": "SC007",
"severity": "high",
"check": "host_network",
"message": "hostNetwork: true — pod shares host network namespace",
"fix": "Remove hostNetwork unless absolutely required (e.g., CNI plugin)",
},
{
"id": "SC008",
"severity": "critical",
"check": "host_pid_ipc",
"message": "hostPID or hostIPC enabled — pod can see host processes/IPC",
"fix": "Remove hostPID and hostIPC — never needed in application charts",
},
]
LABEL_PATTERNS = [
r"app\.kubernetes\.io/name",
r"app\.kubernetes\.io/instance",
r"app\.kubernetes\.io/version",
r"app\.kubernetes\.io/managed-by",
r"helm\.sh/chart",
]
# --- Demo Chart ---
DEMO_CHART_YAML = """apiVersion: v2
name: demo-app
version: 0.1.0
"""
DEMO_VALUES_YAML = """replicaCount: 1
image:
repository: nginx
tag: latest
pullPolicy: Always
service:
type: ClusterIP
port: 80
"""
DEMO_DEPLOYMENT = """apiVersion: apps/v1
kind: Deployment
metadata:
name: demo-app
spec:
replicas: 3
template:
spec:
containers:
- name: demo-app
image: nginx:1.25
ports:
- containerPort: 80
"""
def parse_yaml_simple(content):
"""Simple key-value parser for YAML (stdlib only)."""
result = {}
for line in content.splitlines():
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if ":" in stripped and not stripped.startswith("-"):
key, _, val = stripped.partition(":")
key = key.strip()
val = val.strip().strip("'\"")
if val:
result[key] = val
return result
def check_structure(chart_dir):
"""Check chart directory for required files."""
findings = []
for check in REQUIRED_FILES:
path = chart_dir / check["path"]
if not path.exists():
findings.append({
"id": "ST" + str(REQUIRED_FILES.index(check) + 1).zfill(3),
"severity": check["severity"],
"message": check["message"],
"fix": f"Create {check['path']}",
"file": check["path"],
})
return findings
def check_chart_yaml(chart_dir):
"""Validate Chart.yaml metadata."""
findings = []
chart_path = chart_dir / "Chart.yaml"
if not chart_path.exists():
return findings
content = chart_path.read_text(encoding="utf-8")
parsed = parse_yaml_simple(content)
for check in CHART_YAML_CHECKS:
if check["field"] not in parsed:
findings.append({
"id": "CY" + str(CHART_YAML_CHECKS.index(check) + 1).zfill(3),
"severity": check["severity"],
"message": check["message"],
"fix": f"Add '{check['field']}:' to Chart.yaml",
"file": "Chart.yaml",
})
# Check apiVersion value
if parsed.get("apiVersion") == "v1":
findings.append({
"id": "CY007",
"severity": "medium",
"message": "apiVersion: v1 is Helm 2 format — use v2 for Helm 3",
"fix": "Change apiVersion to v2",
"file": "Chart.yaml",
})
# Check version is semver
version = parsed.get("version", "")
if version and not re.match(r"^\d+\.\d+\.\d+", version):
findings.append({
"id": "CY008",
"severity": "high",
"message": f"Version '{version}' is not valid semver",
"fix": "Use semver format: MAJOR.MINOR.PATCH (e.g., 1.0.0)",
"file": "Chart.yaml",
})
return findings
def check_templates(chart_dir):
"""Scan templates for anti-patterns."""
findings = []
templates_dir = chart_dir / "templates"
if not templates_dir.exists():
return findings
template_files = list(templates_dir.glob("*.yaml")) + list(templates_dir.glob("*.yml")) + list(templates_dir.glob("*.tpl"))
all_content = ""
for tpl_file in template_files:
content = tpl_file.read_text(encoding="utf-8")
all_content += content + "\n"
rel_path = tpl_file.relative_to(chart_dir)
for rule in TEMPLATE_ANTI_PATTERNS:
# Skip patterns that would false-positive on template expressions
for match in re.finditer(rule["pattern"], content, re.MULTILINE):
line = match.group(0).strip()
# Skip if the line contains a template expression
if "{{" in line or "}}" in line:
continue
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": rule["message"],
"fix": rule["fix"],
"file": str(rel_path),
"line": line[:80],
})
# Check for standard labels
helpers_file = templates_dir / "_helpers.tpl"
if helpers_file.exists():
helpers_content = helpers_file.read_text(encoding="utf-8")
for label_pattern in LABEL_PATTERNS:
if not re.search(label_pattern, helpers_content) and not re.search(label_pattern, all_content):
label_name = label_pattern.replace("\\.", ".")
findings.append({
"id": "LB001",
"severity": "high",
"message": f"Standard label '{label_name}' not found in helpers or templates",
"fix": f"Add {label_name} to the labels helper in _helpers.tpl",
"file": "templates/_helpers.tpl",
"line": "(label not found)",
})
# Check for resource limits
if "resources:" not in all_content and template_files:
findings.append({
"id": "TP006",
"severity": "critical",
"message": "No resource requests/limits in any template — pods can consume unlimited node resources",
"fix": "Add resources section: {{ toYaml .Values.resources | nindent 12 }}",
"file": "templates/",
"line": "(no resources block found)",
})
# Check for probes
if "livenessProbe" not in all_content and "readinessProbe" not in all_content and template_files:
has_deployment = any("Deployment" in f.read_text(encoding="utf-8") for f in template_files if f.suffix in (".yaml", ".yml"))
if has_deployment:
findings.append({
"id": "TP007",
"severity": "high",
"message": "No liveness/readiness probes — Kubernetes cannot detect unhealthy pods",
"fix": "Add livenessProbe and readinessProbe with configurable values",
"file": "templates/deployment.yaml",
"line": "(no probes found)",
})
return findings
def check_security(chart_dir):
"""Run security-focused checks."""
findings = []
templates_dir = chart_dir / "templates"
if not templates_dir.exists():
return findings
template_files = list(templates_dir.glob("*.yaml")) + list(templates_dir.glob("*.yml"))
all_content = ""
for tpl_file in template_files:
all_content += tpl_file.read_text(encoding="utf-8") + "\n"
for check in SECURITY_CHECKS:
triggered = False
if check["check"] == "no_security_context":
if "securityContext" not in all_content and template_files:
triggered = True
elif check["check"] == "privileged_container":
if re.search(r"privileged:\s*true", all_content):
triggered = True
elif check["check"] == "no_run_as_non_root":
if "securityContext" in all_content and "runAsNonRoot" not in all_content:
triggered = True
elif check["check"] == "no_readonly_rootfs":
if "securityContext" in all_content and "readOnlyRootFilesystem" not in all_content:
triggered = True
elif check["check"] == "no_network_policy":
np_file = templates_dir / "networkpolicy.yaml"
if not np_file.exists() and "NetworkPolicy" not in all_content:
triggered = True
elif check["check"] == "automount_sa_token":
if "automountServiceAccountToken" not in all_content and template_files:
triggered = True
elif check["check"] == "host_network":
if re.search(r"hostNetwork:\s*true", all_content):
triggered = True
elif check["check"] == "host_pid_ipc":
if re.search(r"host(?:PID|IPC):\s*true", all_content):
triggered = True
if triggered:
findings.append({
"id": check["id"],
"severity": check["severity"],
"message": check["message"],
"fix": check["fix"],
"file": "templates/",
})
# Check for secrets in values.yaml
values_path = chart_dir / "values.yaml"
if values_path.exists():
values_content = values_path.read_text(encoding="utf-8")
for match in re.finditer(r"^(\s*\S*(?:password|secret|token|apiKey|api_key)\s*:\s*)(\S+)", values_content, re.MULTILINE | re.IGNORECASE):
val = match.group(2).strip("'\"")
if val and val not in ("null", "~", '""', "''", "changeme", "CHANGEME", "TODO"):
findings.append({
"id": "SC009",
"severity": "critical",
"message": f"Potential secret in values.yaml default: {match.group(0).strip()[:60]}",
"fix": "Remove default secret values. Use empty string or null with documentation",
"file": "values.yaml",
"line": match.group(0).strip()[:80],
})
return findings
def analyze_chart(chart_dir, output_format="text", security_focus=False):
"""Run full chart analysis."""
findings = []
findings.extend(check_structure(chart_dir))
findings.extend(check_chart_yaml(chart_dir))
findings.extend(check_templates(chart_dir))
if security_focus:
findings.extend(check_security(chart_dir))
# Filter to security-relevant items only
security_ids = {"SC001", "SC002", "SC003", "SC004", "SC005", "SC006", "SC007", "SC008", "SC009"}
security_severities = {"critical", "high"}
findings = [f for f in findings if f["id"] in security_ids or f["severity"] in security_severities]
else:
findings.extend(check_security(chart_dir))
# Deduplicate
seen = set()
unique = []
for f in findings:
key = (f["id"], f.get("line", ""), f.get("file", ""))
if key not in seen:
seen.add(key)
unique.append(f)
findings = unique
# Sort by severity
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
findings.sort(key=lambda f: severity_order.get(f["severity"], 4))
# Score
deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))
counts = {
"critical": sum(1 for f in findings if f["severity"] == "critical"),
"high": sum(1 for f in findings if f["severity"] == "high"),
"medium": sum(1 for f in findings if f["severity"] == "medium"),
"low": sum(1 for f in findings if f["severity"] == "low"),
}
# Chart metadata
chart_yaml_path = chart_dir / "Chart.yaml"
chart_meta = parse_yaml_simple(chart_yaml_path.read_text(encoding="utf-8")) if chart_yaml_path.exists() else {}
result = {
"score": score,
"chart_name": chart_meta.get("name", chart_dir.name),
"chart_version": chart_meta.get("version", "unknown"),
"app_version": chart_meta.get("appVersion", "unknown"),
"findings": findings,
"finding_counts": counts,
}
if output_format == "json":
print(json.dumps(result, indent=2))
return result
# Text output
print(f"\n{'=' * 60}")
print(f" Helm Chart Analysis Report")
print(f"{'=' * 60}")
print(f" Score: {score}/100")
print(f" Chart: {result['chart_name']} v{result['chart_version']}")
print(f" App Version: {result['app_version']}")
print()
print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
print(f"{'' * 60}")
for f in findings:
icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
print(f"\n [{f['id']}] {icon} {f['severity'].upper()}")
print(f" {f['message']}")
if "file" in f:
print(f" File: {f['file']}")
if "line" in f:
print(f" Line: {f['line']}")
print(f" Fix: {f['fix']}")
if not findings:
print("\n No issues found. Chart looks good.")
print(f"\n{'=' * 60}\n")
return result
def run_demo():
"""Run analysis on demo chart data."""
import tempfile
import os
with tempfile.TemporaryDirectory() as tmpdir:
chart_dir = Path(tmpdir) / "demo-app"
chart_dir.mkdir()
(chart_dir / "Chart.yaml").write_text(DEMO_CHART_YAML)
(chart_dir / "values.yaml").write_text(DEMO_VALUES_YAML)
templates_dir = chart_dir / "templates"
templates_dir.mkdir()
(templates_dir / "deployment.yaml").write_text(DEMO_DEPLOYMENT)
return chart_dir, analyze_chart
def main():
parser = argparse.ArgumentParser(
description="helm-chart-builder: Helm chart static analyzer"
)
parser.add_argument("chartdir", nargs="?", help="Path to Helm chart directory (omit for demo)")
parser.add_argument(
"--output", "-o",
choices=["text", "json"],
default="text",
help="Output format (default: text)",
)
parser.add_argument(
"--security",
action="store_true",
help="Security-focused analysis only",
)
args = parser.parse_args()
if args.chartdir:
chart_dir = Path(args.chartdir)
if not chart_dir.is_dir():
print(f"Error: Not a directory: {args.chartdir}", file=sys.stderr)
sys.exit(1)
analyze_chart(chart_dir, args.output, args.security)
else:
print("No chart directory provided. Running demo analysis...\n")
import tempfile
with tempfile.TemporaryDirectory() as tmpdir:
chart_dir = Path(tmpdir) / "demo-app"
chart_dir.mkdir()
(chart_dir / "Chart.yaml").write_text(DEMO_CHART_YAML)
(chart_dir / "values.yaml").write_text(DEMO_VALUES_YAML)
templates_dir = chart_dir / "templates"
templates_dir.mkdir()
(templates_dir / "deployment.yaml").write_text(DEMO_DEPLOYMENT)
analyze_chart(chart_dir, args.output, args.security)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,442 @@
#!/usr/bin/env python3
"""
helm-chart-builder: Values Validator
Validate values.yaml files against Helm best practices — documentation coverage,
type consistency, naming conventions, default quality, and security.
Usage:
python scripts/values_validator.py values.yaml
python scripts/values_validator.py values.yaml --output json
python scripts/values_validator.py values.yaml --strict
"""
import argparse
import json
import re
import sys
from pathlib import Path
# --- Demo values.yaml ---
DEMO_VALUES = """# Default values for demo-app
replicaCount: 1
image:
repository: nginx
tag: latest
pullPolicy: Always
service:
type: ClusterIP
port: 80
ingress:
enabled: false
resources: {}
PASSWORD: supersecret123
db_password: changeme
api-key: sk-12345
deeply:
nested:
structure:
that:
goes:
too:
deep: true
undocumented_value: something
AnotherValue: 42
snake_case_key: bad
"""
# --- Validation Rules ---
NAMING_PATTERN = re.compile(r"^[a-z][a-zA-Z0-9]*$") # camelCase
SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)+$") # snake_case
UPPER_CASE_PATTERN = re.compile(r"^[A-Z]") # Starts with uppercase
SECRET_KEY_PATTERNS = [
re.compile(r"(?:password|secret|token|apiKey|api_key|api-key|private_key|credentials)", re.IGNORECASE),
]
KNOWN_STRUCTURES = {
"image": ["repository", "tag", "pullPolicy"],
"service": ["type", "port"],
"ingress": ["enabled"],
"resources": [],
"serviceAccount": ["create", "name"],
"autoscaling": ["enabled", "minReplicas", "maxReplicas"],
}
def parse_values(content):
"""Parse values.yaml into structured data with metadata.
Returns a list of entries with key paths, values, depth, and comment info.
"""
entries = []
key_stack = []
indent_stack = [0]
prev_comment = None
for line_num, line in enumerate(content.splitlines(), 1):
stripped = line.strip()
# Track comments for documentation coverage
if stripped.startswith("#"):
prev_comment = stripped
continue
if not stripped:
prev_comment = None
continue
indent = len(line) - len(line.lstrip())
# Pop stack for dedented lines
while len(indent_stack) > 1 and indent <= indent_stack[-1]:
indent_stack.pop()
if key_stack:
key_stack.pop()
# Parse key: value
match = re.match(r"^(\S+)\s*:\s*(.*)", stripped)
if match and not stripped.startswith("-"):
key = match.group(1)
raw_value = match.group(2).strip()
# Check for inline comment
inline_comment = None
if "#" in raw_value:
val_part, _, comment_part = raw_value.partition("#")
raw_value = val_part.strip()
inline_comment = comment_part.strip()
# Build full key path
full_path = ".".join(key_stack + [key])
depth = len(key_stack) + 1
# Determine value type
value_type = "unknown"
if not raw_value or raw_value == "":
value_type = "map"
key_stack.append(key)
indent_stack.append(indent)
elif raw_value in ("true", "false"):
value_type = "boolean"
elif raw_value == "null" or raw_value == "~":
value_type = "null"
elif raw_value == "{}":
value_type = "empty_map"
elif raw_value == "[]":
value_type = "empty_list"
elif re.match(r"^-?\d+$", raw_value):
value_type = "integer"
elif re.match(r"^-?\d+\.\d+$", raw_value):
value_type = "float"
elif raw_value.startswith('"') or raw_value.startswith("'"):
value_type = "string"
else:
value_type = "string"
has_doc = prev_comment is not None or inline_comment is not None
entries.append({
"key": key,
"full_path": full_path,
"value": raw_value,
"value_type": value_type,
"depth": depth,
"line": line_num,
"has_documentation": has_doc,
"comment": prev_comment or inline_comment,
})
prev_comment = None
else:
prev_comment = None
return entries
def validate_naming(entries):
"""Check key naming conventions."""
findings = []
for entry in entries:
key = entry["key"]
# Skip map entries (they're parent keys)
if entry["value_type"] == "map":
# Parent keys should still be camelCase
pass
if SNAKE_CASE_PATTERN.match(key):
findings.append({
"severity": "medium",
"category": "naming",
"message": f"Key '{entry['full_path']}' uses snake_case — Helm convention is camelCase",
"fix": f"Rename to camelCase: {to_camel_case(key)}",
"line": entry["line"],
})
elif UPPER_CASE_PATTERN.match(key) and not key.isupper():
findings.append({
"severity": "medium",
"category": "naming",
"message": f"Key '{entry['full_path']}' starts with uppercase — use camelCase",
"fix": f"Rename: {key[0].lower() + key[1:]}",
"line": entry["line"],
})
elif "-" in key:
findings.append({
"severity": "medium",
"category": "naming",
"message": f"Key '{entry['full_path']}' uses kebab-case — Helm convention is camelCase",
"fix": f"Rename to camelCase: {to_camel_case(key)}",
"line": entry["line"],
})
return findings
def validate_documentation(entries):
"""Check documentation coverage."""
findings = []
total = len(entries)
documented = sum(1 for e in entries if e["has_documentation"])
if total > 0:
coverage = (documented / total) * 100
if coverage < 50:
findings.append({
"severity": "high",
"category": "documentation",
"message": f"Only {coverage:.0f}% of values have comments ({documented}/{total})",
"fix": "Add inline YAML comments explaining purpose, type, and valid options for each value",
"line": 0,
})
elif coverage < 80:
findings.append({
"severity": "medium",
"category": "documentation",
"message": f"{coverage:.0f}% documentation coverage ({documented}/{total}) — aim for 80%+",
"fix": "Add comments for undocumented values",
"line": 0,
})
# Flag specific undocumented top-level keys
for entry in entries:
if entry["depth"] == 1 and not entry["has_documentation"]:
findings.append({
"severity": "low",
"category": "documentation",
"message": f"Top-level key '{entry['key']}' has no comment",
"fix": f"Add a comment above '{entry['key']}' explaining its purpose",
"line": entry["line"],
})
return findings
def validate_defaults(entries):
"""Check default value quality."""
findings = []
for entry in entries:
# Check for :latest tag
if entry["key"] == "tag" and entry["value"] in ("latest", '"latest"', "'latest'"):
findings.append({
"severity": "high",
"category": "defaults",
"message": f"image.tag defaults to 'latest' — not reproducible",
"fix": "Use a specific version tag or reference .Chart.AppVersion in template",
"line": entry["line"],
})
# Check pullPolicy
if entry["key"] == "pullPolicy" and entry["value"] in ("Always", '"Always"', "'Always'"):
findings.append({
"severity": "low",
"category": "defaults",
"message": "imagePullPolicy defaults to 'Always''IfNotPresent' is better for production",
"fix": "Change default to IfNotPresent (Always is appropriate for :latest only)",
"line": entry["line"],
})
# Check empty resources
if entry["key"] == "resources" and entry["value_type"] == "empty_map":
findings.append({
"severity": "medium",
"category": "defaults",
"message": "resources defaults to {} — no requests or limits set",
"fix": "Provide default resource requests (e.g., cpu: 100m, memory: 128Mi)",
"line": entry["line"],
})
return findings
def validate_secrets(entries):
"""Check for secrets in default values."""
findings = []
for entry in entries:
for pattern in SECRET_KEY_PATTERNS:
if pattern.search(entry["full_path"]):
val = entry["value"].strip("'\"")
if val and val not in ("", "null", "~", "{}", "[]", "changeme", "CHANGEME", "TODO", '""', "''"):
findings.append({
"severity": "critical",
"category": "security",
"message": f"Potential secret with default value: {entry['full_path']} = {val[:30]}...",
"fix": "Remove default. Use empty string, null, or 'changeme' placeholder with comment",
"line": entry["line"],
})
break
return findings
def validate_depth(entries):
"""Check nesting depth."""
findings = []
max_depth = max((e["depth"] for e in entries), default=0)
if max_depth > 4:
deep_entries = [e for e in entries if e["depth"] > 4]
for entry in deep_entries[:3]: # Report first 3
findings.append({
"severity": "medium",
"category": "structure",
"message": f"Deeply nested key ({entry['depth']} levels): {entry['full_path']}",
"fix": "Flatten structure — max 3-4 levels deep for usability",
"line": entry["line"],
})
return findings
def to_camel_case(name):
"""Convert snake_case or kebab-case to camelCase."""
parts = re.split(r"[-_]", name)
return parts[0].lower() + "".join(p.capitalize() for p in parts[1:])
def generate_report(content, output_format="text", strict=False):
"""Generate full validation report."""
entries = parse_values(content)
findings = []
findings.extend(validate_naming(entries))
findings.extend(validate_documentation(entries))
findings.extend(validate_defaults(entries))
findings.extend(validate_secrets(entries))
findings.extend(validate_depth(entries))
if strict:
# Elevate medium to high, low to medium
for f in findings:
if f["severity"] == "medium":
f["severity"] = "high"
elif f["severity"] == "low":
f["severity"] = "medium"
# Sort by severity
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
findings.sort(key=lambda f: severity_order.get(f["severity"], 4))
# Score
deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))
counts = {
"critical": sum(1 for f in findings if f["severity"] == "critical"),
"high": sum(1 for f in findings if f["severity"] == "high"),
"medium": sum(1 for f in findings if f["severity"] == "medium"),
"low": sum(1 for f in findings if f["severity"] == "low"),
}
# Stats
total_keys = len(entries)
documented = sum(1 for e in entries if e["has_documentation"])
max_depth = max((e["depth"] for e in entries), default=0)
result = {
"score": score,
"total_keys": total_keys,
"documented_keys": documented,
"documentation_coverage": f"{(documented / total_keys * 100):.0f}%" if total_keys > 0 else "N/A",
"max_depth": max_depth,
"findings": findings,
"finding_counts": counts,
}
if output_format == "json":
print(json.dumps(result, indent=2))
return result
# Text output
print(f"\n{'=' * 60}")
print(f" Values.yaml Validation Report")
print(f"{'=' * 60}")
print(f" Score: {score}/100")
print(f" Keys: {total_keys} | Documented: {documented} ({result['documentation_coverage']})")
print(f" Max Depth: {max_depth}")
print()
print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
print(f"{'' * 60}")
for f in findings:
icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
print(f"\n {icon} {f['severity'].upper()} [{f['category']}]")
print(f" {f['message']}")
if f.get("line", 0) > 0:
print(f" Line: {f['line']}")
print(f" Fix: {f['fix']}")
if not findings:
print("\n No issues found. Values file looks good.")
print(f"\n{'=' * 60}\n")
return result
def main():
parser = argparse.ArgumentParser(
description="helm-chart-builder: values.yaml best-practice validator"
)
parser.add_argument("valuesfile", nargs="?", help="Path to values.yaml (omit for demo)")
parser.add_argument(
"--output", "-o",
choices=["text", "json"],
default="text",
help="Output format (default: text)",
)
parser.add_argument(
"--strict",
action="store_true",
help="Strict mode — elevate warnings to higher severity",
)
args = parser.parse_args()
if args.valuesfile:
path = Path(args.valuesfile)
if not path.exists():
print(f"Error: File not found: {args.valuesfile}", file=sys.stderr)
sys.exit(1)
content = path.read_text(encoding="utf-8")
else:
print("No values file provided. Running demo validation...\n")
content = DEMO_VALUES
generate_report(content, args.output, args.strict)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,13 @@
{
"name": "terraform-patterns",
"description": "Terraform infrastructure-as-code agent skill and plugin for module design patterns, state management strategies, provider configuration, security hardening, and CI/CD plan/apply workflows. Covers mono-repo vs multi-repo, workspaces, policy-as-code, and drift detection.",
"version": "1.0.0",
"author": {
"name": "Alireza Rezvani",
"url": "https://alirezarezvani.com"
},
"homepage": "https://github.com/alirezarezvani/claude-skills/tree/main/engineering/terraform-patterns",
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}

View File

@@ -0,0 +1,487 @@
---
name: "terraform-patterns"
description: "Terraform infrastructure-as-code agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. Covers module design patterns, state management strategies, provider configuration, security hardening, policy-as-code with Sentinel/OPA, and CI/CD plan/apply workflows. Use when: user wants to design Terraform modules, manage state backends, review Terraform security, implement multi-region deployments, or follow IaC best practices."
license: MIT
metadata:
version: 1.0.0
author: Alireza Rezvani
category: engineering
updated: 2026-03-15
---
# Terraform Patterns
> Predictable infrastructure. Secure state. Modules that compose. No drift.
Opinionated Terraform workflow that turns sprawling HCL into well-structured, secure, production-grade infrastructure code. Covers module design, state management, provider patterns, security hardening, and CI/CD integration.
Not a Terraform tutorial — a set of concrete decisions about how to write infrastructure code that doesn't break at 3 AM.
---
## Slash Commands
| Command | What it does |
|---------|-------------|
| `/terraform:review` | Analyze Terraform code for anti-patterns, security issues, and structure problems |
| `/terraform:module` | Design or refactor a Terraform module with proper inputs, outputs, and composition |
| `/terraform:security` | Audit Terraform code for security vulnerabilities, secrets exposure, and IAM misconfigurations |
---
## When This Skill Activates
Recognize these patterns from the user:
- "Review this Terraform code"
- "Design a Terraform module for..."
- "My Terraform state is..."
- "Set up remote state backend"
- "Multi-region Terraform deployment"
- "Terraform security review"
- "Module structure best practices"
- "Terraform CI/CD pipeline"
- Any request involving: `.tf` files, HCL, Terraform modules, state management, provider configuration, infrastructure-as-code
If the user has `.tf` files or wants to provision infrastructure with Terraform → this skill applies.
---
## Workflow
### `/terraform:review` — Terraform Code Review
1. **Analyze current state**
- Read all `.tf` files in the target directory
- Identify module structure (flat vs nested)
- Count resources, data sources, variables, outputs
- Check naming conventions
2. **Apply review checklist**
```
MODULE STRUCTURE
├── Variables have descriptions and type constraints
├── Outputs expose only what consumers need
├── Resources use consistent naming: {provider}_{type}_{purpose}
├── Locals used for computed values and DRY expressions
└── No hardcoded values — everything parameterized or in locals
STATE & BACKEND
├── Remote backend configured (S3, GCS, Azure Blob, Terraform Cloud)
├── State locking enabled (DynamoDB for S3, native for others)
├── State encryption at rest enabled
├── No secrets stored in state (or state access is restricted)
└── Workspaces or directory isolation for environments
PROVIDERS
├── Version constraints use pessimistic operator: ~> 5.0
├── Required providers block in terraform {} block
├── Provider aliases for multi-region or multi-account
└── No provider configuration in child modules
SECURITY
├── No hardcoded secrets, keys, or passwords
├── IAM follows least-privilege principle
├── Encryption enabled for storage, databases, secrets
├── Security groups are not overly permissive (no 0.0.0.0/0 ingress on sensitive ports)
└── Sensitive variables marked with sensitive = true
```
3. **Generate report**
```bash
python3 scripts/tf_module_analyzer.py ./terraform
```
4. **Run security scan**
```bash
python3 scripts/tf_security_scanner.py ./terraform
```
### `/terraform:module` — Module Design
1. **Identify module scope**
- Single responsibility: one module = one logical grouping
- Determine inputs (variables), outputs, and resource boundaries
- Decide: flat module (single directory) vs nested (calling child modules)
2. **Apply module design checklist**
```
STRUCTURE
├── main.tf — Primary resources
├── variables.tf — All input variables with descriptions and types
├── outputs.tf — All outputs with descriptions
├── versions.tf — terraform {} block with required_providers
├── locals.tf — Computed values and naming conventions
├── data.tf — Data sources (if any)
└── README.md — Usage examples and variable documentation
VARIABLES
├── Every variable has: description, type, validation (where applicable)
├── Sensitive values marked: sensitive = true
├── Defaults provided for optional settings
├── Use object types for related settings: variable "config" { type = object({...}) }
└── Validate with: validation { condition = ... }
OUTPUTS
├── Output IDs, ARNs, endpoints — things consumers need
├── Include description on every output
├── Mark sensitive outputs: sensitive = true
└── Don't output entire resources — only specific attributes
COMPOSITION
├── Root module calls child modules
├── Child modules never call other child modules
├── Pass values explicitly — no hidden data source lookups in child modules
├── Provider configuration only in root module
└── Use module "name" { source = "./modules/name" }
```
3. **Generate module scaffold**
- Output file structure with boilerplate
- Include variable validation blocks
- Add lifecycle rules where appropriate
### `/terraform:security` — Security Audit
1. **Code-level audit**
| Check | Severity | Fix |
|-------|----------|-----|
| Hardcoded secrets in `.tf` files | Critical | Use variables with sensitive = true or vault |
| IAM policy with `*` actions | Critical | Scope to specific actions and resources |
| Security group with 0.0.0.0/0 on port 22/3389 | Critical | Restrict to known CIDR blocks or use SSM/bastion |
| S3 bucket without encryption | High | Add `server_side_encryption_configuration` block |
| S3 bucket with public access | High | Add `aws_s3_bucket_public_access_block` |
| RDS without encryption | High | Set `storage_encrypted = true` |
| RDS publicly accessible | High | Set `publicly_accessible = false` |
| CloudTrail not enabled | Medium | Add `aws_cloudtrail` resource |
| Missing `prevent_destroy` on stateful resources | Medium | Add `lifecycle { prevent_destroy = true }` |
| Variables without `sensitive = true` for secrets | Medium | Add `sensitive = true` to secret variables |
2. **State security audit**
| Check | Severity | Fix |
|-------|----------|-----|
| Local state file | Critical | Migrate to remote backend with encryption |
| Remote state without encryption | High | Enable encryption on backend (SSE-S3, KMS) |
| No state locking | High | Enable DynamoDB for S3, native for TF Cloud |
| State accessible to all team members | Medium | Restrict via IAM policies or TF Cloud teams |
3. **Generate security report**
```bash
python3 scripts/tf_security_scanner.py ./terraform
python3 scripts/tf_security_scanner.py ./terraform --output json
```
---
## Tooling
### `scripts/tf_module_analyzer.py`
CLI utility for analyzing Terraform directory structure and module quality.
**Features:**
- Resource and data source counting
- Variable and output analysis (missing descriptions, types, validation)
- Naming convention checks
- Module composition detection
- File structure validation
- JSON and text output
**Usage:**
```bash
# Analyze a Terraform directory
python3 scripts/tf_module_analyzer.py ./terraform
# JSON output
python3 scripts/tf_module_analyzer.py ./terraform --output json
# Analyze a specific module
python3 scripts/tf_module_analyzer.py ./modules/vpc
```
### `scripts/tf_security_scanner.py`
CLI utility for scanning `.tf` files for common security issues.
**Features:**
- Hardcoded secret detection (AWS keys, passwords, tokens)
- Overly permissive IAM policy detection
- Open security group detection (0.0.0.0/0 on sensitive ports)
- Missing encryption checks (S3, RDS, EBS)
- Public access detection (S3, RDS, EC2)
- Sensitive variable audit
- JSON and text output
**Usage:**
```bash
# Scan a Terraform directory
python3 scripts/tf_security_scanner.py ./terraform
# JSON output
python3 scripts/tf_security_scanner.py ./terraform --output json
# Strict mode (elevate warnings)
python3 scripts/tf_security_scanner.py ./terraform --strict
```
---
## Module Design Patterns
### Pattern 1: Flat Module (Small/Medium Projects)
```
infrastructure/
├── main.tf # All resources
├── variables.tf # All inputs
├── outputs.tf # All outputs
├── versions.tf # Provider requirements
├── terraform.tfvars # Environment values (not committed)
└── backend.tf # Remote state configuration
```
Best for: Single application, < 20 resources, one team owns everything.
### Pattern 2: Nested Modules (Medium/Large Projects)
```
infrastructure/
├── environments/
│ ├── dev/
│ │ ├── main.tf # Calls modules with dev params
│ │ ├── backend.tf # Dev state backend
│ │ └── terraform.tfvars
│ ├── staging/
│ │ └── ...
│ └── prod/
│ └── ...
├── modules/
│ ├── networking/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ └── outputs.tf
│ ├── compute/
│ │ └── ...
│ └── database/
│ └── ...
└── versions.tf
```
Best for: Multiple environments, shared infrastructure patterns, team collaboration.
### Pattern 3: Mono-Repo with Terragrunt
```
infrastructure/
├── terragrunt.hcl # Root config
├── modules/ # Reusable modules
│ ├── vpc/
│ ├── eks/
│ └── rds/
├── dev/
│ ├── terragrunt.hcl # Dev overrides
│ ├── vpc/
│ │ └── terragrunt.hcl # Module invocation
│ └── eks/
│ └── terragrunt.hcl
└── prod/
├── terragrunt.hcl
└── ...
```
Best for: Large-scale, many environments, DRY configuration, team-level isolation.
---
## Provider Configuration Patterns
### Version Pinning
```hcl
terraform {
required_version = ">= 1.5.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0" # Allow 5.x, block 6.0
}
random = {
source = "hashicorp/random"
version = "~> 3.5"
}
}
}
```
### Multi-Region with Aliases
```hcl
provider "aws" {
region = "us-east-1"
}
provider "aws" {
alias = "west"
region = "us-west-2"
}
resource "aws_s3_bucket" "primary" {
bucket = "my-app-primary"
}
resource "aws_s3_bucket" "replica" {
provider = aws.west
bucket = "my-app-replica"
}
```
### Multi-Account with Assume Role
```hcl
provider "aws" {
alias = "production"
region = "us-east-1"
assume_role {
role_arn = "arn:aws:iam::PROD_ACCOUNT_ID:role/TerraformRole"
}
}
```
---
## State Management Decision Tree
```
Single developer, small project?
├── Yes → Local state (but migrate to remote ASAP)
└── No
├── Using Terraform Cloud/Enterprise?
│ └── Yes → TF Cloud native backend (built-in locking, encryption, RBAC)
└── No
├── AWS?
│ └── S3 + DynamoDB (encryption, locking, versioning)
├── GCP?
│ └── GCS bucket (native locking, encryption)
├── Azure?
│ └── Azure Blob Storage (native locking, encryption)
└── Other?
└── Consul or PostgreSQL backend
Environment isolation strategy:
├── Separate state files per environment (recommended)
│ ├── Option A: Separate directories (dev/, staging/, prod/)
│ └── Option B: Terraform workspaces (simpler but less isolation)
└── Single state file for all environments (never do this)
```
---
## CI/CD Integration Patterns
### GitHub Actions Plan/Apply
```yaml
# .github/workflows/terraform.yml
name: Terraform
on:
pull_request:
paths: ['terraform/**']
push:
branches: [main]
paths: ['terraform/**']
jobs:
plan:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- uses: actions/checkout@v4
- uses: hashicorp/setup-terraform@v3
- run: terraform init
- run: terraform validate
- run: terraform plan -out=tfplan
- run: terraform show -json tfplan > plan.json
# Post plan as PR comment
apply:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
environment: production
steps:
- uses: actions/checkout@v4
- uses: hashicorp/setup-terraform@v3
- run: terraform init
- run: terraform apply -auto-approve
```
### Drift Detection
```yaml
# Run on schedule to detect drift
name: Drift Detection
on:
schedule:
- cron: '0 6 * * 1-5' # Weekdays at 6 AM
jobs:
detect:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: hashicorp/setup-terraform@v3
- run: terraform init
- run: |
terraform plan -detailed-exitcode -out=drift.tfplan 2>&1 | tee drift.log
EXIT_CODE=$?
if [ $EXIT_CODE -eq 2 ]; then
echo "DRIFT DETECTED — review drift.log"
# Send alert (Slack, PagerDuty, etc.)
fi
```
---
## Proactive Triggers
Flag these without being asked:
- **No remote backend configured** → Migrate to S3/GCS/Azure Blob with locking and encryption.
- **Provider without version constraint** → Add `version = "~> X.0"` to prevent breaking upgrades.
- **Hardcoded secrets in .tf files** → Use variables with `sensitive = true`, or integrate Vault/SSM.
- **IAM policy with `"Action": "*"`** → Scope to specific actions. No wildcard actions in production.
- **Security group open to 0.0.0.0/0 on SSH/RDP** → Restrict to bastion CIDR or use SSM Session Manager.
- **No state locking** → Enable DynamoDB table for S3 backend, or use TF Cloud.
- **Resources without tags** → Add default_tags in provider block. Tags are mandatory for cost tracking.
- **Missing `prevent_destroy` on databases/storage** → Add lifecycle block to prevent accidental deletion.
---
## Installation
### One-liner (any tool)
```bash
git clone https://github.com/alirezarezvani/claude-skills.git
cp -r claude-skills/engineering/terraform-patterns ~/.claude/skills/
```
### Multi-tool install
```bash
./scripts/convert.sh --skill terraform-patterns --tool codex|gemini|cursor|windsurf|openclaw
```
### OpenClaw
```bash
clawhub install terraform-patterns
```
---
## Related Skills
- **senior-devops** — Broader DevOps scope (CI/CD, monitoring, containerization). Complementary — use terraform-patterns for IaC-specific work, senior-devops for pipeline and infrastructure operations.
- **aws-solution-architect** — AWS architecture design. Complementary — terraform-patterns implements the infrastructure, aws-solution-architect designs it.
- **senior-security** — Application security. Complementary — terraform-patterns covers infrastructure security posture, senior-security covers application-level threats.
- **ci-cd-pipeline-builder** — Pipeline construction. Complementary — terraform-patterns defines infrastructure, ci-cd-pipeline-builder automates deployment.

View File

@@ -0,0 +1,409 @@
# Terraform Module Design Patterns Reference
## Pattern 1: Flat Module (Single Directory)
Best for: Small projects, < 20 resources, single team ownership.
```
project/
├── main.tf
├── variables.tf
├── outputs.tf
├── versions.tf
├── locals.tf
├── backend.tf
└── terraform.tfvars
```
### Example: Simple VPC + EC2
```hcl
# versions.tf
terraform {
required_version = ">= 1.5.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
}
# locals.tf
locals {
name_prefix = "${var.project}-${var.environment}"
common_tags = {
Project = var.project
Environment = var.environment
ManagedBy = "terraform"
}
}
# main.tf
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(local.common_tags, {
Name = "${local.name_prefix}-vpc"
})
}
resource "aws_subnet" "public" {
count = length(var.public_subnet_cidrs)
vpc_id = aws_vpc.main.id
cidr_block = var.public_subnet_cidrs[count.index]
availability_zone = var.availability_zones[count.index]
tags = merge(local.common_tags, {
Name = "${local.name_prefix}-public-${count.index + 1}"
Tier = "public"
})
}
# variables.tf
variable "project" {
description = "Project name used for resource naming"
type = string
}
variable "environment" {
description = "Deployment environment"
type = string
validation {
condition = contains(["dev", "staging", "prod"], var.environment)
error_message = "Environment must be dev, staging, or prod."
}
}
variable "vpc_cidr" {
description = "CIDR block for the VPC"
type = string
default = "10.0.0.0/16"
validation {
condition = can(cidrhost(var.vpc_cidr, 0))
error_message = "Must be a valid CIDR block."
}
}
variable "public_subnet_cidrs" {
description = "CIDR blocks for public subnets"
type = list(string)
default = ["10.0.1.0/24", "10.0.2.0/24"]
}
variable "availability_zones" {
description = "AZs for subnet placement"
type = list(string)
default = ["us-east-1a", "us-east-1b"]
}
# outputs.tf
output "vpc_id" {
description = "ID of the created VPC"
value = aws_vpc.main.id
}
output "public_subnet_ids" {
description = "IDs of public subnets"
value = aws_subnet.public[*].id
}
```
---
## Pattern 2: Nested Modules (Composition)
Best for: Multiple environments, shared patterns, team collaboration.
```
infrastructure/
├── environments/
│ ├── dev/
│ │ ├── main.tf
│ │ ├── backend.tf
│ │ └── terraform.tfvars
│ ├── staging/
│ │ └── ...
│ └── prod/
│ └── ...
└── modules/
├── networking/
│ ├── main.tf
│ ├── variables.tf
│ └── outputs.tf
├── compute/
│ └── ...
└── database/
└── ...
```
### Root Module (environments/dev/main.tf)
```hcl
module "networking" {
source = "../../modules/networking"
project = var.project
environment = "dev"
vpc_cidr = "10.0.0.0/16"
public_subnet_cidrs = ["10.0.1.0/24", "10.0.2.0/24"]
private_subnet_cidrs = ["10.0.10.0/24", "10.0.11.0/24"]
}
module "compute" {
source = "../../modules/compute"
project = var.project
environment = "dev"
vpc_id = module.networking.vpc_id
subnet_ids = module.networking.private_subnet_ids
instance_type = "t3.micro"
instance_count = 1
}
module "database" {
source = "../../modules/database"
project = var.project
environment = "dev"
vpc_id = module.networking.vpc_id
subnet_ids = module.networking.private_subnet_ids
instance_class = "db.t3.micro"
allocated_storage = 20
db_password = var.db_password
}
```
### Key Rules
- Child modules never call other child modules
- Pass values explicitly — no hidden data source lookups in children
- Provider configuration only in root module
- Each module has its own variables.tf, outputs.tf, main.tf
---
## Pattern 3: Registry Module Pattern
Best for: Reusable modules shared across teams or organizations.
```
terraform-aws-vpc/
├── main.tf
├── variables.tf
├── outputs.tf
├── versions.tf
├── README.md
├── examples/
│ ├── simple/
│ │ └── main.tf
│ └── complete/
│ └── main.tf
└── modules/
├── subnet/
│ ├── main.tf
│ ├── variables.tf
│ └── outputs.tf
└── nat-gateway/
└── ...
```
### Publishing Conventions
```hcl
# Consumer usage
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"
name = "my-vpc"
cidr = "10.0.0.0/16"
azs = ["us-east-1a", "us-east-1b"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
}
```
### Registry Module Requirements
- Repository named `terraform-<PROVIDER>-<NAME>`
- README.md with usage examples
- Semantic versioning via git tags
- examples/ directory with working configurations
- No provider configuration in the module itself
---
## Pattern 4: Mono-Repo with Workspaces
Best for: Teams that prefer single-repo with workspace-based isolation.
```hcl
# backend.tf
terraform {
backend "s3" {
bucket = "my-terraform-state"
key = "project/terraform.tfstate"
region = "us-east-1"
dynamodb_table = "terraform-locks"
encrypt = true
}
}
# main.tf
locals {
env_config = {
dev = {
instance_type = "t3.micro"
instance_count = 1
db_class = "db.t3.micro"
}
staging = {
instance_type = "t3.small"
instance_count = 2
db_class = "db.t3.small"
}
prod = {
instance_type = "t3.large"
instance_count = 3
db_class = "db.r5.large"
}
}
config = local.env_config[terraform.workspace]
}
```
### Usage
```bash
terraform workspace new dev
terraform workspace new staging
terraform workspace new prod
terraform workspace select dev
terraform apply
terraform workspace select prod
terraform apply
```
### Workspace Caveats
- All environments share the same backend — less isolation than separate directories
- A mistake in the code affects all environments
- Can't have different provider versions per workspace
- Recommended only for simple setups; prefer separate directories for production
---
## Pattern 5: for_each vs count
### Use `count` for identical resources
```hcl
resource "aws_subnet" "public" {
count = 3
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index)
availability_zone = data.aws_availability_zones.available.names[count.index]
}
```
### Use `for_each` for distinct resources
```hcl
variable "buckets" {
type = map(object({
versioning = bool
lifecycle_days = number
}))
default = {
logs = { versioning = false, lifecycle_days = 30 }
backups = { versioning = true, lifecycle_days = 90 }
assets = { versioning = true, lifecycle_days = 0 }
}
}
resource "aws_s3_bucket" "this" {
for_each = var.buckets
bucket = "${var.project}-${each.key}"
}
resource "aws_s3_bucket_versioning" "this" {
for_each = { for k, v in var.buckets : k => v if v.versioning }
bucket = aws_s3_bucket.this[each.key].id
versioning_configuration {
status = "Enabled"
}
}
```
### Why `for_each` > `count`
- `count` uses index — removing item 0 shifts all others, causing destroy/recreate
- `for_each` uses keys — removing a key only affects that resource
- Use `count` only for identical resources where order doesn't matter
---
## Variable Design Patterns
### Object Variables for Related Settings
```hcl
variable "database" {
description = "Database configuration"
type = object({
engine = string
instance_class = string
storage_gb = number
multi_az = bool
backup_days = number
})
default = {
engine = "postgres"
instance_class = "db.t3.micro"
storage_gb = 20
multi_az = false
backup_days = 7
}
}
```
### Validation Blocks
```hcl
variable "instance_type" {
description = "EC2 instance type"
type = string
validation {
condition = can(regex("^t[23]\\.", var.instance_type))
error_message = "Only t2 or t3 instance types are allowed."
}
}
variable "cidr_block" {
description = "VPC CIDR block"
type = string
validation {
condition = can(cidrhost(var.cidr_block, 0))
error_message = "Must be a valid IPv4 CIDR block."
}
}
```
---
## Anti-Patterns to Avoid
| Anti-Pattern | Problem | Solution |
|-------------|---------|----------|
| God module (100+ resources) | Impossible to reason about, slow plan/apply | Split into focused child modules |
| Circular module dependencies | Terraform can't resolve dependency graph | Flatten or restructure module boundaries |
| Data sources in child modules | Hidden dependencies, hard to test | Pass values as variables from root module |
| Provider config in child modules | Can't reuse module across accounts/regions | Configure providers in root only |
| Hardcoded values | Not reusable across environments | Use variables with defaults and validation |
| No outputs | Consumer modules can't reference resources | Output IDs, ARNs, endpoints |
| No variable descriptions | Users don't know what to provide | Every variable gets a description |
| `terraform.tfvars` committed | Secrets leak to version control | Use `.gitignore`, env vars, or Vault |

View File

@@ -0,0 +1,472 @@
# Terraform State Management Reference
## Backend Configuration Patterns
### AWS: S3 + DynamoDB (Recommended)
```hcl
terraform {
backend "s3" {
bucket = "mycompany-terraform-state"
key = "project/env/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
# Optional: KMS key for encryption
# kms_key_id = "arn:aws:kms:us-east-1:ACCOUNT:key/KEY_ID"
}
}
```
**Prerequisites:**
```hcl
# Bootstrap these resources manually or with a separate Terraform config
resource "aws_s3_bucket" "state" {
bucket = "mycompany-terraform-state"
lifecycle {
prevent_destroy = true
}
}
resource "aws_s3_bucket_versioning" "state" {
bucket = aws_s3_bucket.state.id
versioning_configuration {
status = "Enabled"
}
}
resource "aws_s3_bucket_server_side_encryption_configuration" "state" {
bucket = aws_s3_bucket.state.id
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "aws:kms"
}
}
}
resource "aws_s3_bucket_public_access_block" "state" {
bucket = aws_s3_bucket.state.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}
resource "aws_dynamodb_table" "locks" {
name = "terraform-locks"
billing_mode = "PAY_PER_REQUEST"
hash_key = "LockID"
attribute {
name = "LockID"
type = "S"
}
}
```
---
### GCP: Google Cloud Storage
```hcl
terraform {
backend "gcs" {
bucket = "mycompany-terraform-state"
prefix = "project/env"
}
}
```
**Key features:**
- Native locking (no separate lock table needed)
- Object versioning for state history
- IAM-based access control
- Encryption at rest by default
---
### Azure: Blob Storage
```hcl
terraform {
backend "azurerm" {
resource_group_name = "terraform-state-rg"
storage_account_name = "mycompanytfstate"
container_name = "tfstate"
key = "project/env/terraform.tfstate"
}
}
```
**Key features:**
- Native blob locking
- Encryption at rest with Microsoft-managed or customer-managed keys
- RBAC-based access control
---
### Terraform Cloud / Enterprise
```hcl
terraform {
cloud {
organization = "mycompany"
workspaces {
name = "project-dev"
}
}
}
```
**Key features:**
- Built-in state locking, encryption, and versioning
- RBAC and team-based access control
- Remote execution (plan/apply run in TF Cloud)
- Sentinel policy-as-code integration
- Cost estimation on plans
---
## Environment Isolation Strategies
### Strategy 1: Separate Directories (Recommended)
```
infrastructure/
├── environments/
│ ├── dev/
│ │ ├── main.tf
│ │ ├── backend.tf # key = "project/dev/terraform.tfstate"
│ │ └── terraform.tfvars
│ ├── staging/
│ │ ├── main.tf
│ │ ├── backend.tf # key = "project/staging/terraform.tfstate"
│ │ └── terraform.tfvars
│ └── prod/
│ ├── main.tf
│ ├── backend.tf # key = "project/prod/terraform.tfstate"
│ └── terraform.tfvars
└── modules/
└── ...
```
**Pros:**
- Complete isolation — a mistake in dev can't affect prod
- Different provider versions per environment
- Different module versions per environment (pin prod, iterate in dev)
- Clear audit trail — who changed what, where
**Cons:**
- Some duplication across environment directories
- Must update modules in each environment separately
### Strategy 2: Terraform Workspaces
```hcl
# Single directory, multiple workspaces
terraform {
backend "s3" {
bucket = "mycompany-terraform-state"
key = "project/terraform.tfstate"
region = "us-east-1"
dynamodb_table = "terraform-locks"
encrypt = true
}
}
# State files stored at:
# env:/dev/project/terraform.tfstate
# env:/staging/project/terraform.tfstate
# env:/prod/project/terraform.tfstate
```
```bash
terraform workspace new dev
terraform workspace select dev
terraform plan -var-file="env/dev.tfvars"
```
**Pros:**
- Less duplication — single set of .tf files
- Quick to switch between environments
- Built-in workspace support in backends
**Cons:**
- Shared code means a bug affects all environments simultaneously
- Can't have different provider versions per workspace
- Easy to accidentally apply to wrong workspace
- Less isolation than separate directories
### Strategy 3: Terragrunt (DRY Configuration)
```
infrastructure/
├── terragrunt.hcl # Root — defines remote state pattern
├── modules/
│ └── vpc/
│ ├── main.tf
│ ├── variables.tf
│ └── outputs.tf
├── dev/
│ ├── terragrunt.hcl # env = "dev"
│ └── vpc/
│ └── terragrunt.hcl # inputs for dev VPC
├── staging/
│ └── ...
└── prod/
└── ...
```
```hcl
# Root terragrunt.hcl
remote_state {
backend = "s3"
generate = {
path = "backend.tf"
if_exists = "overwrite_terragrunt"
}
config = {
bucket = "mycompany-terraform-state"
key = "${path_relative_to_include()}/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
# dev/vpc/terragrunt.hcl
terraform {
source = "../../modules/vpc"
}
inputs = {
environment = "dev"
vpc_cidr = "10.0.0.0/16"
}
```
**Pros:**
- Maximum DRY — define module once, parameterize per environment
- Automatic state key generation from directory structure
- Dependency management between modules (`dependency` blocks)
- `run-all` for applying multiple modules at once
**Cons:**
- Additional tool dependency (Terragrunt)
- Learning curve
- Debugging can be harder (generated files)
---
## State Migration Patterns
### Local to Remote (S3)
```bash
# 1. Add backend configuration to backend.tf
# 2. Run init with migration flag
terraform init -migrate-state
# Terraform will prompt:
# "Do you want to copy existing state to the new backend?"
# Answer: yes
```
### Between Remote Backends
```bash
# 1. Pull current state
terraform state pull > terraform.tfstate.backup
# 2. Update backend configuration in backend.tf
# 3. Reinitialize with migration
terraform init -migrate-state
# 4. Verify
terraform plan # Should show no changes
```
### State Import (Existing Resources)
```bash
# Import a single resource
terraform import aws_instance.web i-1234567890abcdef0
# Import with for_each key
terraform import 'aws_subnet.public["us-east-1a"]' subnet-0123456789abcdef0
# Bulk import (Terraform 1.5+ import blocks)
import {
to = aws_instance.web
id = "i-1234567890abcdef0"
}
```
### State Move (Refactoring)
```bash
# Rename a resource (avoids destroy/recreate)
terraform state mv aws_instance.old_name aws_instance.new_name
# Move into a module
terraform state mv aws_instance.web module.compute.aws_instance.web
# Move between state files
terraform state mv -state-out=other.tfstate aws_instance.web aws_instance.web
```
---
## State Locking
### Why Locking Matters
Without locking, two concurrent `terraform apply` runs can corrupt state. The second apply reads stale state and may create duplicate resources or lose track of existing ones.
### Lock Behavior by Backend
| Backend | Lock Mechanism | Auto-Lock | Force Unlock |
|---------|---------------|-----------|--------------|
| S3 | DynamoDB table | Yes (if table configured) | `terraform force-unlock LOCK_ID` |
| GCS | Native blob locking | Yes | `terraform force-unlock LOCK_ID` |
| Azure Blob | Native blob lease | Yes | `terraform force-unlock LOCK_ID` |
| TF Cloud | Built-in | Always | Via UI or API |
| Consul | Key-value lock | Yes | `terraform force-unlock LOCK_ID` |
| Local | `.terraform.lock.hcl` | Yes (single user) | Delete lock file |
### Force Unlock (Emergency Only)
```bash
# Only use when you're certain no other process is running
terraform force-unlock LOCK_ID
# The LOCK_ID is shown in the error message when lock fails:
# Error: Error locking state: Error acquiring the state lock
# Lock Info:
# ID: 12345678-abcd-1234-abcd-1234567890ab
```
---
## State Security Best Practices
### 1. Encrypt at Rest
```hcl
# S3 — server-side encryption
backend "s3" {
encrypt = true
kms_key_id = "arn:aws:kms:us-east-1:ACCOUNT:key/KEY_ID"
}
```
### 2. Restrict Access
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": "arn:aws:s3:::mycompany-terraform-state/project/*",
"Condition": {
"StringEquals": {
"aws:PrincipalTag/Team": "platform"
}
}
},
{
"Effect": "Allow",
"Action": [
"dynamodb:GetItem",
"dynamodb:PutItem",
"dynamodb:DeleteItem"
],
"Resource": "arn:aws:dynamodb:us-east-1:ACCOUNT:table/terraform-locks"
}
]
}
```
### 3. Enable Versioning (State History)
```hcl
resource "aws_s3_bucket_versioning" "state" {
bucket = aws_s3_bucket.state.id
versioning_configuration {
status = "Enabled"
}
}
```
Versioning lets you recover from state corruption by restoring a previous version.
### 4. Audit Access
- Enable S3 access logging or CloudTrail data events
- Monitor for unexpected state reads (potential secret extraction)
- State files contain sensitive values — treat them like credentials
### 5. Sensitive Values in State
Terraform stores all resource attributes in state, including passwords, private keys, and tokens. This is unavoidable. Mitigate by:
- Encrypting state at rest (KMS)
- Restricting state file access (IAM)
- Using `sensitive = true` on variables and outputs (prevents display, not storage)
- Rotating secrets regularly (state contains the value at apply time)
---
## Drift Detection and Reconciliation
### Detect Drift
```bash
# Plan with detailed exit code
terraform plan -detailed-exitcode
# Exit 0 = no changes
# Exit 1 = error
# Exit 2 = changes detected (drift)
```
### Common Drift Sources
| Source | Example | Prevention |
|--------|---------|------------|
| Console changes | Someone edits SG rules in AWS Console | SCPs to restrict console access, or accept and reconcile |
| Auto-scaling | ASG launches instances not in state | Don't manage individual instances; manage ASG |
| External tools | Ansible modifies EC2 tags | Agree on ownership boundaries |
| Dependent resource changes | AMI deregistered | Use data sources to detect, lifecycle ignore_changes |
### Reconciliation Options
```hcl
# Option 1: Apply to restore desired state
terraform apply
# Option 2: Refresh state to match reality
terraform apply -refresh-only
# Option 3: Ignore specific attribute drift
resource "aws_instance" "web" {
lifecycle {
ignore_changes = [tags["LastModifiedBy"], ami]
}
}
# Option 4: Import the manually-created resource
terraform import aws_security_group_rule.new sg-12345_ingress_tcp_443_443_0.0.0.0/0
```
---
## Troubleshooting Checklist
| Symptom | Likely Cause | Fix |
|---------|-------------|-----|
| "Error acquiring state lock" | Concurrent run or crashed process | Wait for other run to finish, or `force-unlock` |
| "Backend configuration changed" | Backend config modified | Run `terraform init -reconfigure` or `-migrate-state` |
| "Resource already exists" | Resource created outside Terraform | `terraform import` the resource |
| "No matching resource found" | Resource deleted outside Terraform | `terraform state rm` the resource |
| State file growing very large | Too many resources in one state | Split into smaller state files using modules |
| Slow plan/apply | Large state file, many resources | Split state, use `-target` for urgent changes |
| "Provider produced inconsistent result" | Provider bug or API race condition | Retry, or pin provider version |
| Workspace confusion | Applied to wrong workspace | Always check `terraform workspace show` before apply |

View File

@@ -0,0 +1,461 @@
#!/usr/bin/env python3
"""
terraform-patterns: Terraform Module Analyzer
Analyze a Terraform directory structure for module quality, resource counts,
naming conventions, and structural best practices. Reports variable/output
coverage, file organization, and actionable recommendations.
Usage:
python scripts/tf_module_analyzer.py ./terraform
python scripts/tf_module_analyzer.py ./terraform --output json
python scripts/tf_module_analyzer.py ./modules/vpc
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
# --- Demo Terraform Files ---
DEMO_FILES = {
"main.tf": """
resource "aws_instance" "web_server" {
ami = var.ami_id
instance_type = var.instance_type
tags = {
Name = "web-server"
}
}
resource "aws_s3_bucket" "data" {
bucket = "my-data-bucket-12345"
}
resource "aws_security_group" "web" {
name = "web-sg"
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
}
data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"]
}
module "vpc" {
source = "./modules/vpc"
cidr = var.vpc_cidr
}
""",
"variables.tf": """
variable "ami_id" {
type = string
}
variable "instance_type" {
default = "t3.micro"
}
variable "vpc_cidr" {
description = "CIDR block for the VPC"
type = string
default = "10.0.0.0/16"
}
variable "environment" {
description = "Deployment environment"
type = string
validation {
condition = contains(["dev", "staging", "prod"], var.environment)
error_message = "Environment must be dev, staging, or prod."
}
}
""",
"outputs.tf": """
output "instance_id" {
value = aws_instance.web_server.id
}
output "bucket_arn" {
value = aws_s3_bucket.data.arn
description = "ARN of the data S3 bucket"
}
""",
}
# --- Naming convention patterns ---
# Terraform resource naming: lowercase, underscores, alphanumeric
VALID_RESOURCE_NAME = re.compile(r'^[a-z][a-z0-9_]*$')
# Expected files in a well-structured module
EXPECTED_FILES = {
"main.tf": "Primary resources",
"variables.tf": "Input variables",
"outputs.tf": "Output values",
"versions.tf": "Provider and Terraform version requirements",
}
OPTIONAL_FILES = {
"locals.tf": "Computed local values",
"data.tf": "Data sources",
"backend.tf": "Remote state backend configuration",
"providers.tf": "Provider configuration",
"README.md": "Module documentation",
}
def find_tf_files(directory):
"""Find all .tf files in a directory (non-recursive)."""
tf_files = {}
for entry in sorted(os.listdir(directory)):
if entry.endswith(".tf"):
filepath = os.path.join(directory, entry)
with open(filepath, encoding="utf-8") as f:
tf_files[entry] = f.read()
return tf_files
def parse_resources(content):
"""Extract resource declarations from HCL content."""
resources = []
for match in re.finditer(
r'^resource\s+"([^"]+)"\s+"([^"]+)"', content, re.MULTILINE
):
resources.append({
"type": match.group(1),
"name": match.group(2),
"provider": match.group(1).split("_")[0],
})
return resources
def parse_data_sources(content):
"""Extract data source declarations."""
sources = []
for match in re.finditer(
r'^data\s+"([^"]+)"\s+"([^"]+)"', content, re.MULTILINE
):
sources.append({"type": match.group(1), "name": match.group(2)})
return sources
def parse_variables(content):
"""Extract variable declarations with metadata."""
variables = []
# Match variable blocks
for match in re.finditer(
r'^variable\s+"([^"]+)"\s*\{(.*?)\n\}',
content,
re.MULTILINE | re.DOTALL,
):
name = match.group(1)
body = match.group(2)
var = {
"name": name,
"has_description": "description" in body,
"has_type": bool(re.search(r'\btype\s*=', body)),
"has_default": bool(re.search(r'\bdefault\s*=', body)),
"has_validation": "validation" in body,
"is_sensitive": "sensitive" in body and bool(
re.search(r'\bsensitive\s*=\s*true', body)
),
}
variables.append(var)
return variables
def parse_outputs(content):
"""Extract output declarations with metadata."""
outputs = []
for match in re.finditer(
r'^output\s+"([^"]+)"\s*\{(.*?)\n\}',
content,
re.MULTILINE | re.DOTALL,
):
name = match.group(1)
body = match.group(2)
out = {
"name": name,
"has_description": "description" in body,
"is_sensitive": "sensitive" in body and bool(
re.search(r'\bsensitive\s*=\s*true', body)
),
}
outputs.append(out)
return outputs
def parse_modules(content):
"""Extract module calls."""
modules = []
for match in re.finditer(
r'^module\s+"([^"]+)"\s*\{(.*?)\n\}',
content,
re.MULTILINE | re.DOTALL,
):
name = match.group(1)
body = match.group(2)
source_match = re.search(r'source\s*=\s*"([^"]+)"', body)
source = source_match.group(1) if source_match else "unknown"
modules.append({"name": name, "source": source})
return modules
def check_naming(resources, data_sources):
"""Check naming conventions."""
issues = []
for r in resources:
if not VALID_RESOURCE_NAME.match(r["name"]):
issues.append({
"severity": "medium",
"message": f"Resource '{r['type']}.{r['name']}' uses non-standard naming — use lowercase with underscores",
})
if r["name"].startswith(r["provider"] + "_"):
issues.append({
"severity": "low",
"message": f"Resource '{r['type']}.{r['name']}' name repeats the provider prefix — redundant",
})
for d in data_sources:
if not VALID_RESOURCE_NAME.match(d["name"]):
issues.append({
"severity": "medium",
"message": f"Data source '{d['type']}.{d['name']}' uses non-standard naming",
})
return issues
def check_variables(variables):
"""Check variable quality."""
issues = []
for v in variables:
if not v["has_description"]:
issues.append({
"severity": "medium",
"message": f"Variable '{v['name']}' missing description — consumers won't know what to provide",
})
if not v["has_type"]:
issues.append({
"severity": "high",
"message": f"Variable '{v['name']}' missing type constraint — accepts any value",
})
# Check if name suggests a secret
secret_patterns = ["password", "secret", "token", "key", "api_key", "credentials"]
name_lower = v["name"].lower()
if any(p in name_lower for p in secret_patterns) and not v["is_sensitive"]:
issues.append({
"severity": "high",
"message": f"Variable '{v['name']}' looks like a secret but is not marked sensitive = true",
})
return issues
def check_outputs(outputs):
"""Check output quality."""
issues = []
for o in outputs:
if not o["has_description"]:
issues.append({
"severity": "low",
"message": f"Output '{o['name']}' missing description",
})
return issues
def check_file_structure(tf_files):
"""Check if expected files are present."""
issues = []
filenames = set(tf_files.keys())
for expected, purpose in EXPECTED_FILES.items():
if expected not in filenames:
issues.append({
"severity": "medium" if expected != "versions.tf" else "high",
"message": f"Missing '{expected}'{purpose}",
})
return issues
def analyze_directory(tf_files):
"""Run full analysis on a set of .tf files."""
all_content = "\n".join(tf_files.values())
resources = parse_resources(all_content)
data_sources = parse_data_sources(all_content)
variables = parse_variables(all_content)
outputs = parse_outputs(all_content)
modules = parse_modules(all_content)
# Collect findings
findings = []
findings.extend(check_file_structure(tf_files))
findings.extend(check_naming(resources, data_sources))
findings.extend(check_variables(variables))
findings.extend(check_outputs(outputs))
# Check for backend configuration
has_backend = any(
re.search(r'\bbackend\s+"', content)
for content in tf_files.values()
)
if not has_backend:
findings.append({
"severity": "high",
"message": "No remote backend configured — state is stored locally",
})
# Check for terraform required_version
has_tf_version = any(
re.search(r'required_version\s*=', content)
for content in tf_files.values()
)
if not has_tf_version:
findings.append({
"severity": "medium",
"message": "No required_version constraint — any Terraform version can be used",
})
# Providers in child modules check
for filename, content in tf_files.items():
if filename not in ("providers.tf", "versions.tf", "backend.tf"):
if re.search(r'^provider\s+"', content, re.MULTILINE):
findings.append({
"severity": "medium",
"message": f"Provider configuration found in '{filename}' — keep providers in root module only",
})
# Sort findings
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
findings.sort(key=lambda f: severity_order.get(f["severity"], 4))
# Unique providers
providers = sorted(set(r["provider"] for r in resources))
return {
"files": sorted(tf_files.keys()),
"file_count": len(tf_files),
"resources": resources,
"resource_count": len(resources),
"data_sources": data_sources,
"data_source_count": len(data_sources),
"variables": variables,
"variable_count": len(variables),
"outputs": outputs,
"output_count": len(outputs),
"modules": modules,
"module_count": len(modules),
"providers": providers,
"findings": findings,
}
def generate_report(analysis, output_format="text"):
"""Generate analysis report."""
findings = analysis["findings"]
# Score
deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))
counts = {
"critical": sum(1 for f in findings if f["severity"] == "critical"),
"high": sum(1 for f in findings if f["severity"] == "high"),
"medium": sum(1 for f in findings if f["severity"] == "medium"),
"low": sum(1 for f in findings if f["severity"] == "low"),
}
result = {
"score": score,
"files": analysis["files"],
"resource_count": analysis["resource_count"],
"data_source_count": analysis["data_source_count"],
"variable_count": analysis["variable_count"],
"output_count": analysis["output_count"],
"module_count": analysis["module_count"],
"providers": analysis["providers"],
"findings": findings,
"finding_counts": counts,
}
if output_format == "json":
print(json.dumps(result, indent=2))
return result
# Text output
print(f"\n{'=' * 60}")
print(f" Terraform Module Analysis Report")
print(f"{'=' * 60}")
print(f" Score: {score}/100")
print(f" Files: {', '.join(analysis['files'])}")
print(f" Providers: {', '.join(analysis['providers']) if analysis['providers'] else 'none detected'}")
print()
print(f" Resources: {analysis['resource_count']} | Data Sources: {analysis['data_source_count']}")
print(f" Variables: {analysis['variable_count']} | Outputs: {analysis['output_count']} | Modules: {analysis['module_count']}")
print()
print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
print(f"{'' * 60}")
for f in findings:
icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
print(f"\n {icon} {f['severity'].upper()}")
print(f" {f['message']}")
if not findings:
print("\n No issues found. Module structure looks good.")
print(f"\n{'=' * 60}\n")
return result
def main():
parser = argparse.ArgumentParser(
description="terraform-patterns: Terraform module analyzer"
)
parser.add_argument(
"directory", nargs="?",
help="Path to Terraform directory (omit for demo)",
)
parser.add_argument(
"--output", "-o",
choices=["text", "json"],
default="text",
help="Output format (default: text)",
)
args = parser.parse_args()
if args.directory:
dirpath = Path(args.directory)
if not dirpath.is_dir():
print(f"Error: Not a directory: {args.directory}", file=sys.stderr)
sys.exit(1)
tf_files = find_tf_files(str(dirpath))
if not tf_files:
print(f"Error: No .tf files found in {args.directory}", file=sys.stderr)
sys.exit(1)
else:
print("No directory provided. Running demo analysis...\n")
tf_files = DEMO_FILES
analysis = analyze_directory(tf_files)
generate_report(analysis, args.output)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,577 @@
#!/usr/bin/env python3
"""
terraform-patterns: Terraform Security Scanner
Scan .tf files for common security issues including hardcoded secrets,
overly permissive IAM policies, open security groups, missing encryption,
and sensitive variable misuse.
Usage:
python scripts/tf_security_scanner.py ./terraform
python scripts/tf_security_scanner.py ./terraform --output json
python scripts/tf_security_scanner.py ./terraform --strict
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
# --- Demo Terraform File ---
DEMO_TF = """
provider "aws" {
region = "us-east-1"
access_key = "AKIAIOSFODNN7EXAMPLE"
secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
}
variable "db_password" {
type = string
default = "supersecret123"
}
resource "aws_instance" "web" {
ami = "ami-12345678"
instance_type = "t3.micro"
tags = {
Name = "web-server"
}
}
resource "aws_security_group" "web" {
name = "web-sg"
ingress {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 65535
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_iam_policy" "admin" {
name = "admin-policy"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = "*"
Resource = "*"
}
]
})
}
resource "aws_s3_bucket" "data" {
bucket = "my-data-bucket"
}
resource "aws_db_instance" "main" {
engine = "mysql"
instance_class = "db.t3.micro"
password = "hardcoded-password"
publicly_accessible = true
skip_final_snapshot = true
}
"""
# --- Security Rules ---
SECRET_PATTERNS = [
{
"id": "SEC001",
"name": "aws_access_key",
"severity": "critical",
"pattern": r'(?:access_key|aws_access_key_id)\s*=\s*"(AKIA[A-Z0-9]{16})"',
"message": "AWS access key hardcoded in configuration",
"fix": "Use environment variables, AWS profiles, or IAM roles instead",
},
{
"id": "SEC002",
"name": "aws_secret_key",
"severity": "critical",
"pattern": r'(?:secret_key|aws_secret_access_key)\s*=\s*"[A-Za-z0-9/+=]{40}"',
"message": "AWS secret key hardcoded in configuration",
"fix": "Use environment variables, AWS profiles, or IAM roles instead",
},
{
"id": "SEC003",
"name": "generic_password",
"severity": "critical",
"pattern": r'(?:password|passwd)\s*=\s*"[^"]{4,}"',
"message": "Password hardcoded in resource or provider configuration",
"fix": "Use a variable with sensitive = true, or fetch from Vault/SSM/Secrets Manager",
},
{
"id": "SEC004",
"name": "generic_secret",
"severity": "critical",
"pattern": r'(?:secret|token|api_key)\s*=\s*"[^"]{8,}"',
"message": "Secret or token hardcoded in configuration",
"fix": "Use a sensitive variable or secrets manager",
},
{
"id": "SEC005",
"name": "private_key",
"severity": "critical",
"pattern": r'-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----',
"message": "Private key embedded in Terraform configuration",
"fix": "Reference key file with file() function or use secrets manager",
},
]
IAM_PATTERNS = [
{
"id": "SEC010",
"name": "iam_wildcard_action",
"severity": "critical",
"pattern": r'Action\s*=\s*"\*"',
"message": "IAM policy with wildcard Action = \"*\" — grants all permissions",
"fix": "Scope Action to specific services and operations",
},
{
"id": "SEC011",
"name": "iam_wildcard_resource",
"severity": "high",
"pattern": r'Resource\s*=\s*"\*"',
"message": "IAM policy with wildcard Resource = \"*\" — applies to all resources",
"fix": "Scope Resource to specific ARN patterns",
},
{
"id": "SEC012",
"name": "iam_star_star",
"severity": "critical",
"pattern": r'Action\s*=\s*"\*"[^}]*Resource\s*=\s*"\*"',
"message": "IAM policy with Action=* AND Resource=* — effectively admin access",
"fix": "Follow least-privilege: grant only the specific actions and resources needed",
},
]
NETWORK_PATTERNS = [
{
"id": "SEC020",
"name": "sg_ssh_open",
"severity": "critical",
"pattern": None, # Custom check
"message": "Security group allows SSH (port 22) from 0.0.0.0/0",
"fix": "Restrict to known CIDR blocks, or use SSM Session Manager instead",
},
{
"id": "SEC021",
"name": "sg_rdp_open",
"severity": "critical",
"pattern": None, # Custom check
"message": "Security group allows RDP (port 3389) from 0.0.0.0/0",
"fix": "Restrict to known CIDR blocks, or use a bastion host",
},
{
"id": "SEC022",
"name": "sg_all_ports",
"severity": "critical",
"pattern": None, # Custom check
"message": "Security group allows all ports (0-65535) from 0.0.0.0/0",
"fix": "Open only the specific ports your application needs",
},
]
ENCRYPTION_PATTERNS = [
{
"id": "SEC030",
"name": "s3_no_encryption",
"severity": "high",
"pattern": None, # Custom check
"message": "S3 bucket without server-side encryption configuration",
"fix": "Add aws_s3_bucket_server_side_encryption_configuration resource",
},
{
"id": "SEC031",
"name": "rds_no_encryption",
"severity": "high",
"pattern": None, # Custom check
"message": "RDS instance without storage encryption",
"fix": "Set storage_encrypted = true on aws_db_instance",
},
{
"id": "SEC032",
"name": "ebs_no_encryption",
"severity": "medium",
"pattern": None, # Custom check
"message": "EBS volume without encryption",
"fix": "Set encrypted = true on aws_ebs_volume or enable account-level default encryption",
},
]
ACCESS_PATTERNS = [
{
"id": "SEC040",
"name": "rds_public",
"severity": "high",
"pattern": r'publicly_accessible\s*=\s*true',
"message": "RDS instance is publicly accessible",
"fix": "Set publicly_accessible = false and access via VPC/bastion",
},
{
"id": "SEC041",
"name": "s3_public_acl",
"severity": "high",
"pattern": r'acl\s*=\s*"public-read(?:-write)?"',
"message": "S3 bucket with public ACL",
"fix": "Remove public ACL and add aws_s3_bucket_public_access_block",
},
]
def find_tf_files(directory):
"""Find all .tf files in a directory (non-recursive)."""
tf_files = {}
for entry in sorted(os.listdir(directory)):
if entry.endswith(".tf"):
filepath = os.path.join(directory, entry)
with open(filepath, encoding="utf-8") as f:
tf_files[entry] = f.read()
return tf_files
def check_regex_rules(content, rules):
"""Run regex-based security rules against content."""
findings = []
for rule in rules:
if rule["pattern"] is None:
continue
for match in re.finditer(rule["pattern"], content, re.MULTILINE | re.IGNORECASE):
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": rule["message"],
"fix": rule["fix"],
"line": match.group(0).strip()[:80],
})
return findings
def check_security_groups(content):
"""Custom check for open security groups."""
findings = []
# Parse ingress blocks within security group resources
sg_blocks = re.finditer(
r'resource\s+"aws_security_group"[^{]*\{(.*?)\n\}',
content,
re.DOTALL,
)
for sg_match in sg_blocks:
sg_body = sg_match.group(1)
ingress_blocks = re.finditer(
r'ingress\s*\{(.*?)\}', sg_body, re.DOTALL
)
for ingress in ingress_blocks:
block = ingress.group(1)
has_open_cidr = '0.0.0.0/0' in block or '::/0' in block
if not has_open_cidr:
continue
from_port_match = re.search(r'from_port\s*=\s*(\d+)', block)
to_port_match = re.search(r'to_port\s*=\s*(\d+)', block)
if from_port_match and to_port_match:
from_port = int(from_port_match.group(1))
to_port = int(to_port_match.group(1))
# SSH open
if from_port <= 22 <= to_port:
rule = next(r for r in NETWORK_PATTERNS if r["id"] == "SEC020")
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": rule["message"],
"fix": rule["fix"],
"line": f"ingress port 22, cidr 0.0.0.0/0",
})
# RDP open
if from_port <= 3389 <= to_port:
rule = next(r for r in NETWORK_PATTERNS if r["id"] == "SEC021")
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": rule["message"],
"fix": rule["fix"],
"line": f"ingress port 3389, cidr 0.0.0.0/0",
})
# All ports open
if from_port == 0 and to_port >= 65535:
rule = next(r for r in NETWORK_PATTERNS if r["id"] == "SEC022")
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": rule["message"],
"fix": rule["fix"],
"line": f"ingress ports 0-65535, cidr 0.0.0.0/0",
})
return findings
def check_encryption(content):
"""Custom check for missing encryption on storage resources."""
findings = []
# S3 buckets without encryption
s3_buckets = re.findall(
r'resource\s+"aws_s3_bucket"\s+"([^"]+)"', content
)
s3_encryption = re.findall(
r'resource\s+"aws_s3_bucket_server_side_encryption_configuration"', content
)
# Also check inline encryption (older format)
inline_encryption = re.findall(
r'server_side_encryption_configuration', content
)
if s3_buckets and not s3_encryption and not inline_encryption:
rule = next(r for r in ENCRYPTION_PATTERNS if r["id"] == "SEC030")
for bucket in s3_buckets:
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": f"{rule['message']} (bucket: {bucket})",
"fix": rule["fix"],
"line": f'aws_s3_bucket.{bucket}',
})
# RDS without encryption
rds_blocks = re.finditer(
r'resource\s+"aws_db_instance"\s+"([^"]+)"\s*\{(.*?)\n\}',
content,
re.DOTALL,
)
for rds_match in rds_blocks:
name = rds_match.group(1)
body = rds_match.group(2)
if 'storage_encrypted' not in body or re.search(
r'storage_encrypted\s*=\s*false', body
):
rule = next(r for r in ENCRYPTION_PATTERNS if r["id"] == "SEC031")
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": f"{rule['message']} (instance: {name})",
"fix": rule["fix"],
"line": f'aws_db_instance.{name}',
})
# EBS volumes without encryption
ebs_blocks = re.finditer(
r'resource\s+"aws_ebs_volume"\s+"([^"]+)"\s*\{(.*?)\n\}',
content,
re.DOTALL,
)
for ebs_match in ebs_blocks:
name = ebs_match.group(1)
body = ebs_match.group(2)
if 'encrypted' not in body or re.search(
r'encrypted\s*=\s*false', body
):
rule = next(r for r in ENCRYPTION_PATTERNS if r["id"] == "SEC032")
findings.append({
"id": rule["id"],
"severity": rule["severity"],
"message": f"{rule['message']} (volume: {name})",
"fix": rule["fix"],
"line": f'aws_ebs_volume.{name}',
})
return findings
def check_sensitive_variables(content):
"""Check if variables that look like secrets are marked sensitive."""
findings = []
var_blocks = re.finditer(
r'variable\s+"([^"]+)"\s*\{(.*?)\n\}',
content,
re.DOTALL,
)
secret_names = ["password", "secret", "token", "api_key", "private_key", "credentials"]
for var_match in var_blocks:
name = var_match.group(1)
body = var_match.group(2)
name_lower = name.lower()
if any(s in name_lower for s in secret_names):
if not re.search(r'sensitive\s*=\s*true', body):
findings.append({
"id": "SEC050",
"severity": "medium",
"message": f"Variable '{name}' appears to be a secret but is not marked sensitive = true",
"fix": "Add sensitive = true to prevent the value from appearing in logs and plan output",
"line": f'variable "{name}"',
})
# Check for hardcoded default
default_match = re.search(r'default\s*=\s*"([^"]+)"', body)
if default_match and len(default_match.group(1)) > 0:
findings.append({
"id": "SEC051",
"severity": "critical",
"message": f"Variable '{name}' has a hardcoded default value for a secret",
"fix": "Remove the default value — require it to be passed at runtime via tfvars or env",
"line": f'variable "{name}" default = "{default_match.group(1)[:20]}..."',
})
return findings
def scan_content(content, strict=False):
"""Run all security checks on content."""
findings = []
findings.extend(check_regex_rules(content, SECRET_PATTERNS))
findings.extend(check_regex_rules(content, IAM_PATTERNS))
findings.extend(check_regex_rules(content, ACCESS_PATTERNS))
findings.extend(check_security_groups(content))
findings.extend(check_encryption(content))
findings.extend(check_sensitive_variables(content))
if strict:
for f in findings:
if f["severity"] == "medium":
f["severity"] = "high"
elif f["severity"] == "low":
f["severity"] = "medium"
# Deduplicate by (id, line)
seen = set()
unique = []
for f in findings:
key = (f["id"], f.get("line", ""))
if key not in seen:
seen.add(key)
unique.append(f)
findings = unique
# Sort by severity
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
findings.sort(key=lambda f: severity_order.get(f["severity"], 4))
return findings
def generate_report(content, output_format="text", strict=False):
"""Generate security scan report."""
findings = scan_content(content, strict)
# Score
deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2}
score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings))
counts = {
"critical": sum(1 for f in findings if f["severity"] == "critical"),
"high": sum(1 for f in findings if f["severity"] == "high"),
"medium": sum(1 for f in findings if f["severity"] == "medium"),
"low": sum(1 for f in findings if f["severity"] == "low"),
}
result = {
"score": score,
"findings": findings,
"finding_counts": counts,
"total_findings": len(findings),
}
if output_format == "json":
print(json.dumps(result, indent=2))
return result
# Text output
print(f"\n{'=' * 60}")
print(f" Terraform Security Scan Report")
print(f"{'=' * 60}")
print(f" Score: {score}/100")
print()
print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low")
print(f"{'' * 60}")
for f in findings:
icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?")
print(f"\n [{f['id']}] {icon} {f['severity'].upper()}")
print(f" {f['message']}")
if f.get("line"):
print(f" Match: {f['line']}")
print(f" Fix: {f['fix']}")
if not findings:
print("\n No security issues found. Configuration looks clean.")
print(f"\n{'=' * 60}\n")
return result
def main():
parser = argparse.ArgumentParser(
description="terraform-patterns: Terraform security scanner"
)
parser.add_argument(
"target", nargs="?",
help="Path to Terraform directory or .tf file (omit for demo)",
)
parser.add_argument(
"--output", "-o",
choices=["text", "json"],
default="text",
help="Output format (default: text)",
)
parser.add_argument(
"--strict",
action="store_true",
help="Strict mode — elevate warnings to higher severity",
)
args = parser.parse_args()
if args.target:
target = Path(args.target)
if target.is_dir():
tf_files = find_tf_files(str(target))
if not tf_files:
print(f"Error: No .tf files found in {args.target}", file=sys.stderr)
sys.exit(1)
content = "\n".join(tf_files.values())
elif target.is_file() and target.suffix == ".tf":
content = target.read_text(encoding="utf-8")
else:
print(f"Error: {args.target} is not a directory or .tf file", file=sys.stderr)
sys.exit(1)
else:
print("No target provided. Running demo scan...\n")
content = DEMO_TF
generate_report(content, args.output, args.strict)
if __name__ == "__main__":
main()

View File

@@ -1,6 +1,6 @@
{
"name": "finance-skills",
"description": "2 finance skills: financial analyst (ratio analysis, DCF valuation, budgeting, forecasting) and SaaS metrics coach (ARR, MRR, churn, CAC, LTV, NRR, Quick Ratio, 12-month projections). 7 Python automation tools",
"description": "2 finance skills: financial analyst (ratio analysis, DCF valuation, budgeting, forecasting) and SaaS metrics coach (ARR, MRR, churn, CAC, LTV, NRR, Quick Ratio, 12-month projections). 7 Python automation tools. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "finance-skills"
description: "Production-ready financial analyst skill with ratio analysis, DCF valuation, budget variance analysis, and rolling forecast construction. 4 Python tools (all stdlib-only). Works with Claude Code, Codex CLI, and OpenClaw."
description: "Financial analyst agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. Ratio analysis, DCF valuation, budget variance, rolling forecasts. 4 Python tools (stdlib-only)."
version: 1.0.0
author: Alireza Rezvani
license: MIT

View File

@@ -1,6 +1,6 @@
{
"name": "marketing-skills",
"description": "43 production-ready marketing skills across 7 pods: Content (copywriting, content strategy, content production), SEO (audits, schema markup, programmatic SEO, site architecture), CRO (A/B testing, forms, popups, signup flows, pricing, onboarding), Channels (email sequences, social media, paid ads, cold email, X/Twitter growth), Growth (launch strategy, referral programs, free tools), Intelligence (competitor analysis, marketing psychology, analytics tracking), and Sales enablement",
"description": "43 production-ready marketing skills across 7 pods: Content (copywriting, content strategy, content production), SEO (audits, schema markup, programmatic SEO, site architecture), CRO (A/B testing, forms, popups, signup flows, pricing, onboarding), Channels (email sequences, social media, paid ads, cold email, X/Twitter growth), Growth (launch strategy, referral programs, free tools), Intelligence (competitor analysis, marketing psychology, analytics tracking), and Sales enablement. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "marketing-skills"
description: "42-skill marketing division for AI coding agents. 7 specialist pods covering content, SEO, CRO, channels, growth, intelligence, and sales. Foundation context system + orchestration router. 27 Python tools (all stdlib-only). Works with Claude Code, Codex CLI, and OpenClaw."
description: "42 marketing agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw, and 6 more coding agents. 7 pods: content, SEO, CRO, channels, growth, intelligence, sales. Foundation context + orchestration router. 27 Python tools (stdlib-only)."
version: 2.0.0
author: Alireza Rezvani
license: MIT

View File

@@ -1,6 +1,6 @@
{
"name": "product-skills",
"description": "12 production-ready product skills: product manager toolkit (RICE, PRDs), agile product owner, product strategist, UX researcher, UI design system, competitive teardown, landing page generator, SaaS scaffolder, product analytics, experiment designer, product discovery, and roadmap communicator",
"description": "12 production-ready product skills: product manager toolkit (RICE, PRDs), agile product owner, product strategist, UX researcher, UI design system, competitive teardown, landing page generator, SaaS scaffolder, product analytics, experiment designer, product discovery, and roadmap communicator. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "product-skills"
description: "8 production-ready product skills: product manager toolkit with RICE prioritization, agile product owner, product strategist with OKR cascades, UX researcher, UI design system, competitive teardown, landing page generator, and SaaS scaffolder. Python tools included (all stdlib-only). Works with Claude Code, Codex CLI, and OpenClaw."
description: "10 product agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. PM toolkit (RICE), agile PO, product strategist (OKR), UX researcher, UI design system, competitive teardown, landing page generator, SaaS scaffolder, research summarizer. Python tools (stdlib-only)."
version: 1.1.0
author: Alireza Rezvani
license: MIT

View File

@@ -1,6 +1,6 @@
{
"name": "pm-skills",
"description": "6 project management skills: senior PM, scrum master, Jira expert, Confluence expert, Atlassian admin, and template creator for Atlassian users",
"description": "6 project management skills: senior PM, scrum master, Jira expert, Confluence expert, Atlassian admin, and template creator for Atlassian users. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "pm-skills"
description: "6 production-ready project management skills for Atlassian users: senior PM with portfolio management, scrum master with velocity forecasting, Jira expert with JQL mastery, Confluence expert, Atlassian admin, and template creator. MCP integration for live Jira/Confluence automation. Works with Claude Code, Codex CLI, and OpenClaw."
description: "6 project management agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. Senior PM, scrum master, Jira expert (JQL), Confluence expert, Atlassian admin, template creator. MCP integration for live Jira/Confluence automation."
version: 1.0.0
author: Alireza Rezvani
license: MIT

View File

@@ -1,6 +1,6 @@
{
"name": "ra-qm-skills",
"description": "12 regulatory affairs & quality management skills for HealthTech/MedTech: ISO 13485 QMS, MDR 2017/745, FDA 510(k)/PMA, GDPR/DSGVO, ISO 27001 ISMS, CAPA management, risk management, clinical evaluation, and more",
"description": "12 regulatory affairs & quality management skills for HealthTech/MedTech: ISO 13485 QMS, MDR 2017/745, FDA 510(k)/PMA, GDPR/DSGVO, ISO 27001 ISMS, CAPA management, risk management, clinical evaluation, and more. Agent skill and plugin for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw.",
"version": "2.1.2",
"author": {
"name": "Alireza Rezvani",
@@ -10,4 +10,4 @@
"repository": "https://github.com/alirezarezvani/claude-skills",
"license": "MIT",
"skills": "./"
}
}

View File

@@ -1,6 +1,6 @@
---
name: "ra-qm-skills"
description: "12 production-ready regulatory affairs and quality management skills for HealthTech/MedTech: ISO 13485 QMS, MDR 2017/745, FDA 510(k)/PMA, ISO 27001 ISMS, GDPR/DSGVO compliance, risk management (ISO 14971), CAPA, document control, and internal auditing. Python tools included (all stdlib-only). Works with Claude Code, Codex CLI, and OpenClaw."
description: "12 regulatory & QM agent skills and plugins for Claude Code, Codex, Gemini CLI, Cursor, OpenClaw. ISO 13485 QMS, MDR 2017/745, FDA 510(k)/PMA, ISO 27001 ISMS, GDPR/DSGVO, risk management (ISO 14971), CAPA, document control, auditing. Python tools (stdlib-only)."
version: 1.0.0
author: Alireza Rezvani
license: MIT