fix(engineering): improve env-secrets-manager - add scripts + extract references

2026-03-11 20:23:50 +01:00
parent bafb155334
commit 6f55bc4fd6
3 changed files with 218 additions and 291 deletions
--- a/engineering/env-secrets-manager/SKILL.md
+++ b/engineering/env-secrets-manager/SKILL.md
@@ -13,325 +13,66 @@ description: "Env & Secrets Manager"

 ## Overview

-Complete environment and secrets management workflow: .env file lifecycle across dev/staging/prod,
-.env.example auto-generation, required-var validation, secret leak detection in git history, and
-credential rotation playbook. Integrates with HashiCorp Vault, AWS SSM, 1Password CLI, and Doppler.
-
---
+Manage environment-variable hygiene and secrets safety across local development and production workflows. This skill focuses on practical auditing, drift awareness, and rotation readiness.

 ## Core Capabilities

- **.env lifecycle** — create, validate, sync across environments
- **.env.example generation** — strip values, preserve keys and comments
- **Validation script** — fail-fast on missing required vars at startup
- **Secret leak detection** — regex scan of git history and working tree
- **Rotation workflow** — detect → scope → rotate → deploy → verify
- **Secret manager integrations** — Vault KV v2, AWS SSM, 1Password, Doppler
+- `.env` and `.env.example` lifecycle guidance
+- Secret leak detection for repository working trees
+- Severity-based findings for likely credentials
+- Operational pointers for rotation and containment
+- Integration-ready outputs for CI checks

 ---

 ## When to Use

- Setting up a new project — scaffold .env.example and validation
- Before every commit — scan for accidentally staged secrets
- Post-incident response — leaked credential rotation procedure
- Onboarding new developers — they need all vars, not just some
- Environment drift investigation — prod behaving differently from staging
+- Before pushing commits that touched env/config files
+- During security audits and incident triage
+- When onboarding contributors who need safe env conventions
+- When validating that no obvious secrets are hardcoded

 ---

-## .env File Structure
+## Quick Start

-### Canonical Layout
 ```bash
-# .env.example — committed to git (no values)
-# .env.local   — developer machine (gitignored)
-# .env.staging — CI/CD or secret manager reference
-# .env.prod    — never on disk; pulled from secret manager at runtime
+# Scan a repository for likely secret leaks
+python3 scripts/env_auditor.py /path/to/repo

-# Application
-APP_NAME=
-APP_ENV=                    # dev | staging | prod
-APP_PORT=3000               # default port if not set
-APP_SECRET=                 # REQUIRED: JWT signing secret (min 32 chars)
-APP_URL=                    # REQUIRED: public base URL
-
-# Database
-DATABASE_URL=               # REQUIRED: full connection string
-DATABASE_POOL_MIN=2
-DATABASE_POOL_MAX=10
-
-# Auth
-AUTH_JWT_SECRET=            # REQUIRED
-AUTH_JWT_EXPIRY=3600        # seconds
-AUTH_REFRESH_SECRET=        # REQUIRED
-
-# Third-party APIs
-STRIPE_SECRET_KEY=          # REQUIRED in prod
-STRIPE_WEBHOOK_SECRET=      # REQUIRED in prod
-SENDGRID_API_KEY=
-
-# Storage
-AWS_ACCESS_KEY_ID=
-AWS_SECRET_ACCESS_KEY=
-AWS_REGION=eu-central-1
-AWS_S3_BUCKET=
-
-# Monitoring
-SENTRY_DSN=
-DD_API_KEY=
+# JSON output for CI pipelines
+python3 scripts/env_auditor.py /path/to/repo --json
 ```

 ---

-## .gitignore Patterns
+## Recommended Workflow

-Add to your project's `.gitignore`:
-
-```gitignore
-# Environment files — NEVER commit these
-.env
-.env.local
-.env.development
-.env.development.local
-.env.test.local
-.env.staging
-.env.staging.local
-.env.production
-.env.production.local
-.env.prod
-.env.*.local
-
-# Secret files
-*.pem
-*.key
-*.p12
-*.pfx
-secrets.json
-secrets.yaml
-secrets.yml
-credentials.json
-service-account.json
-
-# AWS
-.aws/credentials
-
-# Terraform state (may contain secrets)
-*.tfstate
-*.tfstate.backup
-.terraform/
-
-# Kubernetes secrets
-*-secret.yaml
-*-secrets.yaml
-```
+1. Run `scripts/env_auditor.py` on the repository root.
+2. Prioritize `critical` and `high` findings first.
+3. Rotate real credentials and remove exposed values.
+4. Update `.env.example` and `.gitignore` as needed.
+5. Add or tighten pre-commit/CI secret scanning gates.

 ---

-## .env.example Auto-Generation
+## Reference Docs

-```bash
-#!/bin/bash
-# scripts/gen-env-example.sh
-# Strips values from .env, preserves keys, defaults, and comments
-
-INPUT="${1:-.env}"
-OUTPUT="${2:-.env.example}"
-
-if [ ! -f "$INPUT" ]; then
-  echo "ERROR: $INPUT not found"
-  exit 1
-fi
-
-python3 - "$INPUT" "$OUTPUT" << 'PYEOF'
-import sys, re
-
-input_file = sys.argv[1]
-output_file = sys.argv[2]
-lines = []
-
-with open(input_file) as f:
-    for line in f:
-        stripped = line.rstrip('\n')
-        # Keep blank lines and comments as-is
-        if stripped == '' or stripped.startswith('#'):
-            lines.append(stripped)
-            continue
-        # Match KEY=VALUE or KEY="VALUE"
-        m = re.match(r'^([A-Z_][A-Z0-9_]*)=(.*)$', stripped)
-        if m:
-            key = m.group(1)
-            value = m.group(2).strip('"\'')
-            # Keep non-sensitive defaults (ports, regions, feature flags)
-            safe_defaults = re.compile(
-                r'^(APP_PORT|APP_ENV|APP_NAME|AWS_REGION|DATABASE_POOL_|LOG_LEVEL|'
-                r'FEATURE_|CACHE_TTL|RATE_LIMIT_|PAGINATION_|TIMEOUT_)',
-                re.I
-            )
-            sensitive = re.compile(
-                r'(SECRET|KEY|TOKEN|PASSWORD|PASS|CREDENTIAL|DSN|AUTH|PRIVATE|CERT)',
-                re.I
-            )
-            if safe_defaults.match(key) and value:
-                lines.append(f"{key}={value}  # default")
-            else:
-                lines.append(f"{key}=")
-        else:
-            lines.append(stripped)
-
-with open(output_file, 'w') as f:
-    f.write('\n'.join(lines) + '\n')
-
-print(f"Generated {output_file} from {input_file}")
-PYEOF
-```
-
-Usage:
-```bash
-bash scripts/gen-env-example.sh .env .env.example
-# Commit .env.example, never .env
-git add .env.example
-```
-
---
-
-## Required Variable Validation Script
-→ See references/validation-detection-rotation.md for details
-
-## Secret Manager Integrations
-
-### HashiCorp Vault KV v2
-```bash
-# Setup
-export VAULT_ADDR="https://vault.internal.company.com"
-export VAULT_TOKEN="$(vault login -method=oidc -format=json | jq -r '.auth.client_token')"
-
-# Write secrets
-vault kv put secret/myapp/prod \
-  DATABASE_URL="postgres://user:pass@host/db" \
-  APP_SECRET="$(openssl rand -base64 32)"
-
-# Read secrets into env
-eval $(vault kv get -format=json secret/myapp/prod | \
-  jq -r '.data.data | to_entries[] | "export \(.key)=\(.value)"')
-
-# In CI/CD (GitHub Actions)
-# Use vault-action: hashicorp/vault-action@v2
-```
-
-### AWS SSM Parameter Store
-```bash
-# Write (SecureString = encrypted with KMS)
-aws ssm put-parameter \
-  --name "/myapp/prod/DATABASE_URL" \
-  --value "postgres://..." \
-  --type "SecureString" \
-  --key-id "alias/myapp-secrets"
-
-# Read all params for an app/env into shell
-eval $(aws ssm get-parameters-by-path \
-  --path "/myapp/prod/" \
-  --with-decryption \
-  --query "Parameters[*].[Name,Value]" \
-  --output text | \
-  awk '{split($1,a,"/"); print "export " a[length(a)] "=\"" $2 "\""}')
-
-# In Node.js at startup
-# Use @aws-sdk/client-ssm to pull params before server starts
-```
-
-### 1Password CLI
-```bash
-# Authenticate
-eval $(op signin)
-
-# Get a specific field
-op read "op://MyVault/MyApp Prod/STRIPE_SECRET_KEY"
-
-# Export all fields from an item as env vars
-op item get "MyApp Prod" --format json | \
-  jq -r '.fields[] | select(.value != null) | "export \(.label)=\"\(.value)\""' | \
-  grep -E "^export [A-Z_]+" | source /dev/stdin
-
-# .env injection
-op inject -i .env.tpl -o .env
-# .env.tpl uses {{ op://Vault/Item/field }} syntax
-```
-
-### Doppler
-```bash
-# Setup
-doppler setup  # interactive: select project + config
-
-# Run any command with secrets injected
-doppler run -- node server.js
-doppler run -- npm run dev
-
-# Export to .env (local dev only — never commit output)
-doppler secrets download --no-file --format env > .env.local
-
-# Pull specific secret
-doppler secrets get DATABASE_URL --plain
-
-# Sync to another environment
-doppler secrets upload --project myapp --config staging < .env.staging.example
-```
-
---
-
-## Environment Drift Detection
-
-Check if staging and prod have the same set of keys (values may differ):
-
-```bash
-#!/bin/bash
-# scripts/check-env-drift.sh
-
-# Pull key names from both environments (not values)
-STAGING_KEYS=$(doppler secrets --project myapp --config staging --format json 2>/dev/null | \
-  jq -r 'keys[]' | sort)
-PROD_KEYS=$(doppler secrets --project myapp --config prod --format json 2>/dev/null | \
-  jq -r 'keys[]' | sort)
-
-ONLY_IN_STAGING=$(comm -23 <(echo "$STAGING_KEYS") <(echo "$PROD_KEYS"))
-ONLY_IN_PROD=$(comm -13 <(echo "$STAGING_KEYS") <(echo "$PROD_KEYS"))
-
-if [ -n "$ONLY_IN_STAGING" ]; then
-  echo "Keys in STAGING but NOT in PROD:"
-  echo "$ONLY_IN_STAGING" | sed 's/^/  /'
-fi
-
-if [ -n "$ONLY_IN_PROD" ]; then
-  echo "Keys in PROD but NOT in STAGING:"
-  echo "$ONLY_IN_PROD" | sed 's/^/  /'
-fi
-
-if [ -z "$ONLY_IN_STAGING" ] && [ -z "$ONLY_IN_PROD" ]; then
-  echo "✅ No env drift detected — staging and prod have identical key sets"
-fi
-```
+- `references/validation-detection-rotation.md`
+- `references/secret-patterns.md`

 ---

 ## Common Pitfalls

- **Committing .env instead of .env.example** — add `.env` to .gitignore on day 1; use pre-commit hooks
- **Storing secrets in CI/CD logs** — never `echo $SECRET`; mask vars in CI settings
- **Rotating only one place** — secrets often appear in Heroku, Vercel, Docker, K8s, CI — update ALL
- **Forgetting to invalidate sessions after JWT secret rotation** — all users will be logged out; communicate this
- **Using .env.example with real values** — example files are public; strip everything sensitive
- **Not monitoring after rotation** — watch audit logs for 24h after rotation to catch unauthorized old-credential use
- **Weak secrets** — `APP_SECRET=mysecret` is not a secret. Use `openssl rand -base64 32`
-
---
+- Committing real values in `.env.example`
+- Rotating one system but missing downstream consumers
+- Logging secrets during debugging or incident response
+- Treating suspected leaks as low urgency without validation

 ## Best Practices

-1. **Secret manager is source of truth** — .env files are for local dev only; never in prod
-2. **Rotate on a schedule**, not just after incidents — quarterly minimum for long-lived keys
-3. **Principle of least privilege** — each service gets its own API key with minimal permissions
-4. **Audit access** — log every secret read in Vault/SSM; alert on anomalous access
-5. **Never log secrets** — add log scrubbing middleware that redacts known secret patterns
-6. **Use short-lived credentials** — prefer OIDC/instance roles over long-lived access keys
-7. **Separate secrets per environment** — never share a key between dev and prod
-8. **Document rotation runbooks** — before an incident, not during one
+1. Use a secret manager as the production source of truth.
+2. Keep dev env files local and gitignored.
+3. Enforce detection in CI before merge.
+4. Re-test application paths immediately after credential rotation.
--- a/engineering/env-secrets-manager/references/secret-patterns.md
+++ b/engineering/env-secrets-manager/references/secret-patterns.md
@@ -0,0 +1,41 @@
+# Secret Pattern Reference
+
+## Detection Categories
+
+### Critical
+
+- OpenAI-like keys (`sk-...`)
+- GitHub personal access tokens (`ghp_...`)
+- AWS access key IDs (`AKIA...`)
+
+### High
+
+- Slack tokens (`xox...`)
+- Private key PEM blocks
+- Hardcoded assignments to `secret`, `token`, `password`, `api_key`
+
+### Medium
+
+- JWT-like tokens in plaintext
+- Suspected credentials in docs/scripts that should be redacted
+
+## Severity Guidance
+
+- `critical`: immediate rotation required; treat as active incident
+- `high`: likely sensitive; investigate and rotate if real credential
+- `medium`: possible exposure; verify context and sanitize where needed
+
+## Response Playbook
+
+1. Revoke or rotate exposed credential.
+2. Identify blast radius (services, environments, users).
+3. Remove from code/history where possible.
+4. Add preventive controls (pre-commit hooks, CI secret scans).
+5. Verify monitoring and access logs for abuse.
+
+## Preventive Baseline
+
+- Commit only `.env.example`, never `.env`.
+- Keep `.gitignore` patterns for env and key material.
+- Use secret managers for staging/prod.
+- Redact sensitive values from logs and debug output.
--- a/engineering/env-secrets-manager/scripts/env_auditor.py
+++ b/engineering/env-secrets-manager/scripts/env_auditor.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""Scan env files and source code for likely secret exposure patterns."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+from pathlib import Path
+from typing import Dict, Iterable, List
+
+IGNORED_DIRS = {
+    ".git",
+    "node_modules",
+    ".next",
+    "dist",
+    "build",
+    "coverage",
+    "venv",
+    ".venv",
+    "__pycache__",
+}
+
+SOURCE_EXTS = {
+    ".env",
+    ".py",
+    ".ts",
+    ".tsx",
+    ".js",
+    ".jsx",
+    ".json",
+    ".yaml",
+    ".yml",
+    ".toml",
+    ".ini",
+    ".sh",
+    ".md",
+}
+
+PATTERNS = [
+    ("critical", "openai_key", re.compile(r"\bsk-[A-Za-z0-9]{20,}\b")),
+    ("critical", "github_pat", re.compile(r"\bghp_[A-Za-z0-9]{20,}\b")),
+    ("critical", "aws_access_key_id", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
+    ("high", "slack_token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}\b")),
+    ("high", "private_key_block", re.compile(r"-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----")),
+    ("high", "generic_secret_assignment", re.compile(r"(?i)\b(secret|token|password|passwd|api[_-]?key)\b\s*[:=]\s*['\"]?[A-Za-z0-9_\-\/.+=]{8,}")),
+    ("medium", "jwt_like", re.compile(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")),
+]
+
+
+def iter_files(root: Path) -> Iterable[Path]:
+    for dirpath, dirnames, filenames in os.walk(root):
+        dirnames[:] = [d for d in dirnames if d not in IGNORED_DIRS]
+        for name in filenames:
+            p = Path(dirpath) / name
+            if p.is_file():
+                yield p
+
+
+def is_candidate(path: Path) -> bool:
+    if path.name.startswith(".env"):
+        return True
+    return path.suffix.lower() in SOURCE_EXTS
+
+
+def scan_file(path: Path, max_bytes: int, root: Path) -> List[Dict[str, object]]:
+    findings: List[Dict[str, object]] = []
+    try:
+        if path.stat().st_size > max_bytes:
+            return findings
+        text = path.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        return findings
+
+    for lineno, line in enumerate(text.splitlines(), start=1):
+        for severity, kind, pattern in PATTERNS:
+            if pattern.search(line):
+                findings.append(
+                    {
+                        "severity": severity,
+                        "pattern": kind,
+                        "file": str(path.relative_to(root)),
+                        "line": lineno,
+                        "snippet": line.strip()[:180],
+                    }
+                )
+    return findings
+
+
+def severity_counts(findings: List[Dict[str, object]]) -> Dict[str, int]:
+    counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
+    for item in findings:
+        sev = str(item.get("severity", "low"))
+        counts[sev] = counts.get(sev, 0) + 1
+    return counts
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Audit a repository for likely secret leaks in env files and source.")
+    parser.add_argument("path", help="Path to repository root")
+    parser.add_argument("--max-file-size-kb", type=int, default=512, help="Skip files larger than this size (default: 512)")
+    parser.add_argument("--json", action="store_true", help="Output JSON")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    root = Path(args.path).expanduser().resolve()
+    if not root.exists() or not root.is_dir():
+        raise SystemExit(f"Path is not a directory: {root}")
+
+    max_bytes = max(1, args.max_file_size_kb) * 1024
+    findings: List[Dict[str, object]] = []
+
+    for file_path in iter_files(root):
+        if is_candidate(file_path):
+            findings.extend(scan_file(file_path, max_bytes=max_bytes, root=root))
+
+    report = {
+        "root": str(root),
+        "total_findings": len(findings),
+        "severity_counts": severity_counts(findings),
+        "findings": findings,
+    }
+
+    if args.json:
+        print(json.dumps(report, indent=2))
+    else:
+        print("Env/Secrets Audit Report")
+        print(f"Root: {report['root']}")
+        print(f"Total findings: {report['total_findings']}")
+        print("Severity:")
+        for sev, count in report["severity_counts"].items():
+            print(f"- {sev}: {count}")
+        print("")
+        for item in findings[:200]:
+            print(f"[{item['severity'].upper()}] {item['file']}:{item['line']} ({item['pattern']})")
+            print(f"  {item['snippet']}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())