feat(engineering,ra-qm): add secrets-vault-manager, sql-database-assistant, gcp-cloud-architect, soc2-compliance

secrets-vault-manager (403-line SKILL.md, 3 scripts, 3 references): - HashiCorp Vault, AWS SM, Azure KV, GCP SM integration - Secret rotation, dynamic secrets, audit logging, emergency procedures sql-database-assistant (457-line SKILL.md, 3 scripts, 3 references): - Query optimization, migration generation, schema exploration - Multi-DB support (PostgreSQL, MySQL, SQLite, SQL Server) - ORM patterns (Prisma, Drizzle, TypeORM, SQLAlchemy) gcp-cloud-architect (418-line SKILL.md, 3 scripts, 3 references): - 6-step workflow mirroring aws-solution-architect for GCP - Cloud Run, GKE, BigQuery, Cloud Functions, cost optimization - Completes cloud trifecta (AWS + Azure + GCP) soc2-compliance (417-line SKILL.md, 3 scripts, 3 references): - SOC 2 Type I & II preparation, Trust Service Criteria mapping - Control matrix generation, evidence tracking, gap analysis - First SOC 2 skill in ra-qm-team (joins GDPR, ISO 27001, ISO 13485) All 12 scripts pass --help. Docs generated, mkdocs.yml nav updated. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 14:05:11 +01:00
parent 7a2189fa21
commit 87f3a007c9
36 changed files with 13450 additions and 6 deletions
--- a/engineering/sql-database-assistant/scripts/migration_generator.py
+++ b/engineering/sql-database-assistant/scripts/migration_generator.py
@@ -0,0 +1,442 @@
+#!/usr/bin/env python3
+"""
+Migration Generator
+
+Generates database migration file templates (up/down) from natural-language
+schema change descriptions.
+
+Supported operations:
+- Add column, drop column, rename column
+- Add table, drop table, rename table
+- Add index, drop index
+- Add constraint, drop constraint
+- Change column type
+
+Usage:
+    python migration_generator.py --change "add email_verified boolean to users" --dialect postgres
+    python migration_generator.py --change "rename column name to full_name in customers" --format alembic
+    python migration_generator.py --change "add index on orders(status, created_at)" --output 001_add_index.sql
+    python migration_generator.py --change "create table reviews with id, user_id, rating, body" --json
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import textwrap
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from typing import List, Optional, Tuple
+
+
+@dataclass
+class Migration:
+    """A generated migration with up and down scripts."""
+    description: str
+    dialect: str
+    format: str
+    up: str
+    down: str
+    warnings: List[str]
+
+    def to_dict(self):
+        return asdict(self)
+
+
+# ---------------------------------------------------------------------------
+# Change parsers — extract structured intent from natural language
+# ---------------------------------------------------------------------------
+
+def parse_add_column(desc: str) -> Optional[dict]:
+    """Parse: add <column> <type> to <table>"""
+    m = re.match(
+        r'add\s+(?:column\s+)?(\w+)\s+(\w[\w(),.]*)\s+(?:to|on)\s+(\w+)',
+        desc, re.IGNORECASE,
+    )
+    if m:
+        return {"op": "add_column", "column": m.group(1), "type": m.group(2), "table": m.group(3)}
+    return None
+
+
+def parse_drop_column(desc: str) -> Optional[dict]:
+    """Parse: drop/remove <column> from <table>"""
+    m = re.match(
+        r'(?:drop|remove)\s+(?:column\s+)?(\w+)\s+from\s+(\w+)',
+        desc, re.IGNORECASE,
+    )
+    if m:
+        return {"op": "drop_column", "column": m.group(1), "table": m.group(2)}
+    return None
+
+
+def parse_rename_column(desc: str) -> Optional[dict]:
+    """Parse: rename column <old> to <new> in <table>"""
+    m = re.match(
+        r'rename\s+column\s+(\w+)\s+to\s+(\w+)\s+in\s+(\w+)',
+        desc, re.IGNORECASE,
+    )
+    if m:
+        return {"op": "rename_column", "old": m.group(1), "new": m.group(2), "table": m.group(3)}
+    return None
+
+
+def parse_add_table(desc: str) -> Optional[dict]:
+    """Parse: create table <name> with <col1>, <col2>, ..."""
+    m = re.match(
+        r'create\s+table\s+(\w+)\s+with\s+(.+)',
+        desc, re.IGNORECASE,
+    )
+    if m:
+        cols = [c.strip() for c in m.group(2).split(",")]
+        return {"op": "add_table", "table": m.group(1), "columns": cols}
+    return None
+
+
+def parse_drop_table(desc: str) -> Optional[dict]:
+    """Parse: drop table <name>"""
+    m = re.match(r'drop\s+table\s+(\w+)', desc, re.IGNORECASE)
+    if m:
+        return {"op": "drop_table", "table": m.group(1)}
+    return None
+
+
+def parse_add_index(desc: str) -> Optional[dict]:
+    """Parse: add index on <table>(<col1>, <col2>)"""
+    m = re.match(
+        r'add\s+(?:unique\s+)?index\s+(?:on\s+)?(\w+)\s*\(([^)]+)\)',
+        desc, re.IGNORECASE,
+    )
+    if m:
+        unique = "unique" in desc.lower()
+        cols = [c.strip() for c in m.group(2).split(",")]
+        return {"op": "add_index", "table": m.group(1), "columns": cols, "unique": unique}
+    return None
+
+
+def parse_change_type(desc: str) -> Optional[dict]:
+    """Parse: change <column> type to <type> in <table>"""
+    m = re.match(
+        r'change\s+(?:column\s+)?(\w+)\s+type\s+to\s+(\w[\w(),.]*)\s+in\s+(\w+)',
+        desc, re.IGNORECASE,
+    )
+    if m:
+        return {"op": "change_type", "column": m.group(1), "new_type": m.group(2), "table": m.group(3)}
+    return None
+
+
+PARSERS = [
+    parse_add_column,
+    parse_drop_column,
+    parse_rename_column,
+    parse_add_table,
+    parse_drop_table,
+    parse_add_index,
+    parse_change_type,
+]
+
+
+def parse_change(desc: str) -> Optional[dict]:
+    for parser in PARSERS:
+        result = parser(desc)
+        if result:
+            return result
+    return None
+
+
+# ---------------------------------------------------------------------------
+# SQL generators per dialect
+# ---------------------------------------------------------------------------
+
+TYPE_MAP = {
+    "boolean": {"postgres": "BOOLEAN", "mysql": "TINYINT(1)", "sqlite": "INTEGER", "sqlserver": "BIT"},
+    "text": {"postgres": "TEXT", "mysql": "TEXT", "sqlite": "TEXT", "sqlserver": "NVARCHAR(MAX)"},
+    "integer": {"postgres": "INTEGER", "mysql": "INT", "sqlite": "INTEGER", "sqlserver": "INT"},
+    "int": {"postgres": "INTEGER", "mysql": "INT", "sqlite": "INTEGER", "sqlserver": "INT"},
+    "serial": {"postgres": "SERIAL", "mysql": "INT AUTO_INCREMENT", "sqlite": "INTEGER", "sqlserver": "INT IDENTITY(1,1)"},
+    "varchar": {"postgres": "VARCHAR(255)", "mysql": "VARCHAR(255)", "sqlite": "TEXT", "sqlserver": "NVARCHAR(255)"},
+    "timestamp": {"postgres": "TIMESTAMP", "mysql": "DATETIME", "sqlite": "TEXT", "sqlserver": "DATETIME2"},
+    "uuid": {"postgres": "UUID", "mysql": "CHAR(36)", "sqlite": "TEXT", "sqlserver": "UNIQUEIDENTIFIER"},
+    "json": {"postgres": "JSONB", "mysql": "JSON", "sqlite": "TEXT", "sqlserver": "NVARCHAR(MAX)"},
+    "decimal": {"postgres": "DECIMAL(19,4)", "mysql": "DECIMAL(19,4)", "sqlite": "REAL", "sqlserver": "DECIMAL(19,4)"},
+    "float": {"postgres": "DOUBLE PRECISION", "mysql": "DOUBLE", "sqlite": "REAL", "sqlserver": "FLOAT"},
+}
+
+
+def map_type(type_name: str, dialect: str) -> str:
+    """Map a generic type name to a dialect-specific type."""
+    key = type_name.lower().rstrip("()")
+    if key in TYPE_MAP and dialect in TYPE_MAP[key]:
+        return TYPE_MAP[key][dialect]
+    return type_name.upper()
+
+
+def gen_add_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    col_type = map_type(change["type"], dialect)
+    table = change["table"]
+    col = change["column"]
+    up = f"ALTER TABLE {table} ADD COLUMN {col} {col_type};"
+    down = f"ALTER TABLE {table} DROP COLUMN {col};"
+    return up, down, []
+
+
+def gen_drop_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    table = change["table"]
+    col = change["column"]
+    up = f"ALTER TABLE {table} DROP COLUMN {col};"
+    down = f"-- WARNING: Cannot fully reverse DROP COLUMN. Provide the original type.\nALTER TABLE {table} ADD COLUMN {col} TEXT;"
+    return up, down, ["Down migration uses TEXT as placeholder. Replace with the original column type."]
+
+
+def gen_rename_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    table = change["table"]
+    old, new = change["old"], change["new"]
+    warnings = []
+    if dialect == "postgres":
+        up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
+        down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
+    elif dialect == "mysql":
+        up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
+        down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
+    elif dialect == "sqlite":
+        up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
+        down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
+        warnings.append("SQLite RENAME COLUMN requires version 3.25.0+.")
+    elif dialect == "sqlserver":
+        up = f"EXEC sp_rename '{table}.{old}', '{new}', 'COLUMN';"
+        down = f"EXEC sp_rename '{table}.{new}', '{old}', 'COLUMN';"
+    else:
+        up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
+        down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
+    return up, down, warnings
+
+
+def gen_add_table(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    table = change["table"]
+    cols = change["columns"]
+    col_defs = []
+    has_id = False
+    for col in cols:
+        col = col.strip()
+        if col.lower() == "id":
+            has_id = True
+            if dialect == "postgres":
+                col_defs.append("    id SERIAL PRIMARY KEY")
+            elif dialect == "mysql":
+                col_defs.append("    id INT AUTO_INCREMENT PRIMARY KEY")
+            elif dialect == "sqlite":
+                col_defs.append("    id INTEGER PRIMARY KEY AUTOINCREMENT")
+            elif dialect == "sqlserver":
+                col_defs.append("    id INT IDENTITY(1,1) PRIMARY KEY")
+        else:
+            # Check if type is specified (e.g., "rating int")
+            parts = col.split()
+            if len(parts) >= 2:
+                col_defs.append(f"    {parts[0]} {map_type(parts[1], dialect)}")
+            else:
+                col_defs.append(f"    {col} TEXT")
+
+    cols_sql = ",\n".join(col_defs)
+    up = f"CREATE TABLE {table} (\n{cols_sql}\n);"
+    down = f"DROP TABLE {table};"
+    warnings = []
+    if not has_id:
+        warnings.append("Table has no explicit primary key. Consider adding an 'id' column.")
+    return up, down, warnings
+
+
+def gen_drop_table(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    table = change["table"]
+    up = f"DROP TABLE {table};"
+    down = f"-- WARNING: Cannot reverse DROP TABLE without original DDL.\nCREATE TABLE {table} (id INTEGER PRIMARY KEY);"
+    return up, down, ["Down migration is a placeholder. Replace with the original CREATE TABLE statement."]
+
+
+def gen_add_index(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    table = change["table"]
+    cols = change["columns"]
+    unique = "UNIQUE " if change.get("unique") else ""
+    idx_name = f"idx_{table}_{'_'.join(cols)}"
+    if dialect == "postgres":
+        up = f"CREATE {unique}INDEX CONCURRENTLY {idx_name} ON {table} ({', '.join(cols)});"
+    else:
+        up = f"CREATE {unique}INDEX {idx_name} ON {table} ({', '.join(cols)});"
+    down = f"DROP INDEX {idx_name};" if dialect != "mysql" else f"DROP INDEX {idx_name} ON {table};"
+    warnings = []
+    if dialect == "postgres":
+        warnings.append("CONCURRENTLY cannot run inside a transaction. Run outside migration transaction.")
+    return up, down, warnings
+
+
+def gen_change_type(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
+    table = change["table"]
+    col = change["column"]
+    new_type = map_type(change["new_type"], dialect)
+    warnings = ["Down migration uses TEXT as placeholder. Replace with the original column type."]
+    if dialect == "postgres":
+        up = f"ALTER TABLE {table} ALTER COLUMN {col} TYPE {new_type};"
+        down = f"ALTER TABLE {table} ALTER COLUMN {col} TYPE TEXT;"
+    elif dialect == "mysql":
+        up = f"ALTER TABLE {table} MODIFY COLUMN {col} {new_type};"
+        down = f"ALTER TABLE {table} MODIFY COLUMN {col} TEXT;"
+    elif dialect == "sqlserver":
+        up = f"ALTER TABLE {table} ALTER COLUMN {col} {new_type};"
+        down = f"ALTER TABLE {table} ALTER COLUMN {col} NVARCHAR(MAX);"
+    else:
+        up = f"-- SQLite does not support ALTER COLUMN. Recreate the table."
+        down = f"-- SQLite does not support ALTER COLUMN. Recreate the table."
+        warnings.append("SQLite requires table recreation for type changes.")
+    return up, down, warnings
+
+
+GENERATORS = {
+    "add_column": gen_add_column,
+    "drop_column": gen_drop_column,
+    "rename_column": gen_rename_column,
+    "add_table": gen_add_table,
+    "drop_table": gen_drop_table,
+    "add_index": gen_add_index,
+    "change_type": gen_change_type,
+}
+
+
+# ---------------------------------------------------------------------------
+# Format wrappers
+# ---------------------------------------------------------------------------
+
+def wrap_sql(up: str, down: str, description: str) -> Tuple[str, str]:
+    """Wrap as plain SQL migration files."""
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    header = f"-- Migration: {description}\n-- Generated: {datetime.now().isoformat()}\n\n"
+    return header + "-- Up\n" + up, header + "-- Down\n" + down
+
+
+def wrap_prisma(up: str, down: str, description: str) -> Tuple[str, str]:
+    """Format as Prisma migration SQL (Prisma uses raw SQL in migration.sql)."""
+    header = f"-- Migration: {description}\n-- Format: Prisma (migration.sql)\n\n"
+    return header + up, header + "-- Rollback\n" + down
+
+
+def wrap_alembic(up: str, down: str, description: str) -> Tuple[str, str]:
+    """Format as Alembic Python migration."""
+    slug = re.sub(r'\W+', '_', description.lower())[:40]
+    revision = datetime.now().strftime("%Y%m%d%H%M")
+    template = textwrap.dedent(f'''\
+        """
+        {description}
+
+        Revision ID: {revision}
+        """
+        from alembic import op
+        import sqlalchemy as sa
+
+        revision = '{revision}'
+        down_revision = None  # Set to previous revision
+
+
+        def upgrade():
+            op.execute("""
+        {textwrap.indent(up, "        ")}
+            """)
+
+
+        def downgrade():
+            op.execute("""
+        {textwrap.indent(down, "        ")}
+            """)
+    ''')
+    return template, ""
+
+
+FORMATTERS = {
+    "sql": wrap_sql,
+    "prisma": wrap_prisma,
+    "alembic": wrap_alembic,
+}
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate database migration templates from change descriptions.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Supported change descriptions:
+  "add email_verified boolean to users"
+  "drop column legacy_flag from accounts"
+  "rename column name to full_name in customers"
+  "create table reviews with id, user_id, rating int, body text"
+  "drop table temp_imports"
+  "add index on orders(status, created_at)"
+  "add unique index on users(email)"
+  "change email type to varchar in users"
+
+Examples:
+  %(prog)s --change "add phone varchar to users" --dialect postgres
+  %(prog)s --change "create table reviews with id, user_id, rating int, body" --format prisma
+  %(prog)s --change "add index on orders(status)" --output migrations/001.sql --json
+        """,
+    )
+    parser.add_argument("--change", required=True, help="Natural-language description of the schema change")
+    parser.add_argument("--dialect", choices=["postgres", "mysql", "sqlite", "sqlserver"],
+                        default="postgres", help="Target database dialect (default: postgres)")
+    parser.add_argument("--format", choices=["sql", "prisma", "alembic"], default="sql",
+                        dest="fmt", help="Output format (default: sql)")
+    parser.add_argument("--output", help="Write migration to file instead of stdout")
+    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
+    args = parser.parse_args()
+
+    change = parse_change(args.change)
+    if not change:
+        print(f"Error: Could not parse change description: '{args.change}'", file=sys.stderr)
+        print("Run with --help to see supported patterns.", file=sys.stderr)
+        sys.exit(1)
+
+    gen_fn = GENERATORS.get(change["op"])
+    if not gen_fn:
+        print(f"Error: No generator for operation '{change['op']}'", file=sys.stderr)
+        sys.exit(1)
+
+    up, down, warnings = gen_fn(change, args.dialect)
+
+    fmt_fn = FORMATTERS[args.fmt]
+    up_formatted, down_formatted = fmt_fn(up, down, args.change)
+
+    migration = Migration(
+        description=args.change,
+        dialect=args.dialect,
+        format=args.fmt,
+        up=up_formatted,
+        down=down_formatted,
+        warnings=warnings,
+    )
+
+    if args.json_output:
+        print(json.dumps(migration.to_dict(), indent=2))
+    else:
+        if args.output:
+            with open(args.output, "w") as f:
+                f.write(migration.up)
+            print(f"Migration written to {args.output}")
+            if migration.down:
+                down_path = args.output.replace(".sql", "_down.sql")
+                with open(down_path, "w") as f:
+                    f.write(migration.down)
+                print(f"Rollback written to {down_path}")
+        else:
+            print(migration.up)
+            if migration.down:
+                print("\n" + "=" * 40 + " ROLLBACK " + "=" * 40 + "\n")
+                print(migration.down)
+
+        if warnings:
+            print("\nWarnings:")
+            for w in warnings:
+                print(f"  - {w}")
+
+
+if __name__ == "__main__":
+    main()
--- a/engineering/sql-database-assistant/scripts/query_optimizer.py
+++ b/engineering/sql-database-assistant/scripts/query_optimizer.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+"""
+SQL Query Optimizer — Static Analysis
+
+Analyzes SQL queries for common performance issues:
+- SELECT * usage
+- Missing WHERE clauses on UPDATE/DELETE
+- Cartesian joins (missing JOIN conditions)
+- Subqueries in SELECT list
+- Missing LIMIT on unbounded SELECTs
+- Function calls on indexed columns (non-sargable)
+- LIKE with leading wildcard
+- ORDER BY RAND()
+- UNION instead of UNION ALL
+- NOT IN with subquery (NULL-unsafe)
+
+Usage:
+    python query_optimizer.py --query "SELECT * FROM users"
+    python query_optimizer.py --query queries.sql --dialect postgres
+    python query_optimizer.py --query "SELECT * FROM orders" --json
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+from dataclasses import dataclass, asdict
+from typing import List, Optional
+
+
+@dataclass
+class Issue:
+    """A single optimization issue found in a query."""
+    severity: str  # critical, warning, info
+    rule: str
+    message: str
+    suggestion: str
+    line: Optional[int] = None
+
+
+@dataclass
+class QueryAnalysis:
+    """Analysis result for one SQL query."""
+    query: str
+    issues: List[Issue]
+    score: int  # 0-100, higher is better
+
+    def to_dict(self):
+        return {
+            "query": self.query[:200] + ("..." if len(self.query) > 200 else ""),
+            "issues": [asdict(i) for i in self.issues],
+            "issue_count": len(self.issues),
+            "score": self.score,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Rule checkers
+# ---------------------------------------------------------------------------
+
+def check_select_star(sql: str) -> Optional[Issue]:
+    """Detect SELECT * usage."""
+    if re.search(r'\bSELECT\s+\*\s', sql, re.IGNORECASE):
+        return Issue(
+            severity="warning",
+            rule="select-star",
+            message="SELECT * transfers unnecessary data and breaks on schema changes.",
+            suggestion="List only the columns you need: SELECT col1, col2, ...",
+        )
+    return None
+
+
+def check_missing_where(sql: str) -> Optional[Issue]:
+    """Detect UPDATE/DELETE without WHERE."""
+    upper = sql.upper().strip()
+    for keyword in ("UPDATE", "DELETE"):
+        if upper.startswith(keyword) and "WHERE" not in upper:
+            return Issue(
+                severity="critical",
+                rule="missing-where",
+                message=f"{keyword} without WHERE affects every row in the table.",
+                suggestion=f"Add a WHERE clause to restrict the {keyword} scope.",
+            )
+    return None
+
+
+def check_cartesian_join(sql: str) -> Optional[Issue]:
+    """Detect comma-separated tables without explicit JOIN or WHERE join condition."""
+    upper = sql.upper()
+    if "SELECT" not in upper:
+        return None
+    from_match = re.search(r'\bFROM\s+(.+?)(?:\bWHERE\b|\bGROUP\b|\bORDER\b|\bLIMIT\b|\bHAVING\b|;|$)',
+                           sql, re.IGNORECASE | re.DOTALL)
+    if not from_match:
+        return None
+    from_clause = from_match.group(1)
+    # Skip if explicit JOINs are used
+    if re.search(r'\bJOIN\b', from_clause, re.IGNORECASE):
+        return None
+    # Count comma-separated tables
+    tables = [t.strip() for t in from_clause.split(",") if t.strip()]
+    if len(tables) > 1 and "WHERE" not in upper:
+        return Issue(
+            severity="critical",
+            rule="cartesian-join",
+            message="Multiple tables in FROM without JOIN or WHERE creates a cartesian product.",
+            suggestion="Use explicit JOIN syntax with ON conditions.",
+        )
+    return None
+
+
+def check_subquery_in_select(sql: str) -> Optional[Issue]:
+    """Detect correlated subqueries in SELECT list."""
+    select_match = re.search(r'\bSELECT\b(.+?)\bFROM\b', sql, re.IGNORECASE | re.DOTALL)
+    if select_match:
+        select_clause = select_match.group(1)
+        if re.search(r'\(\s*SELECT\b', select_clause, re.IGNORECASE):
+            return Issue(
+                severity="warning",
+                rule="subquery-in-select",
+                message="Subquery in SELECT list executes once per row (correlated subquery).",
+                suggestion="Rewrite as a LEFT JOIN with aggregation.",
+            )
+    return None
+
+
+def check_missing_limit(sql: str) -> Optional[Issue]:
+    """Detect unbounded SELECT without LIMIT."""
+    upper = sql.upper().strip()
+    if not upper.startswith("SELECT"):
+        return None
+    # Skip if it's a subquery or aggregate-only
+    if re.search(r'\bCOUNT\s*\(', upper) and "GROUP BY" not in upper:
+        return None
+    if "LIMIT" not in upper and "FETCH" not in upper and "TOP " not in upper:
+        return Issue(
+            severity="info",
+            rule="missing-limit",
+            message="SELECT without LIMIT may return unbounded rows.",
+            suggestion="Add LIMIT to prevent returning excessive data.",
+        )
+    return None
+
+
+def check_function_on_column(sql: str) -> Optional[Issue]:
+    """Detect function calls on columns in WHERE (non-sargable)."""
+    where_match = re.search(r'\bWHERE\b(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|\bHAVING\b|;|$)',
+                            sql, re.IGNORECASE | re.DOTALL)
+    if not where_match:
+        return None
+    where_clause = where_match.group(1)
+    non_sargable = re.search(
+        r'\b(YEAR|MONTH|DAY|DATE|UPPER|LOWER|TRIM|CAST|COALESCE|IFNULL|NVL)\s*\(',
+        where_clause, re.IGNORECASE
+    )
+    if non_sargable:
+        func = non_sargable.group(1).upper()
+        return Issue(
+            severity="warning",
+            rule="non-sargable",
+            message=f"Function {func}() on column in WHERE prevents index usage.",
+            suggestion="Rewrite to compare the raw column against transformed constants.",
+        )
+    return None
+
+
+def check_leading_wildcard(sql: str) -> Optional[Issue]:
+    """Detect LIKE '%...' patterns."""
+    if re.search(r"LIKE\s+'%", sql, re.IGNORECASE):
+        return Issue(
+            severity="warning",
+            rule="leading-wildcard",
+            message="LIKE with leading wildcard prevents index usage.",
+            suggestion="Use full-text search (GIN index, FULLTEXT, FTS5) for substring matching.",
+        )
+    return None
+
+
+def check_order_by_rand(sql: str) -> Optional[Issue]:
+    """Detect ORDER BY RAND() / RANDOM()."""
+    if re.search(r'ORDER\s+BY\s+(RAND|RANDOM)\s*\(\)', sql, re.IGNORECASE):
+        return Issue(
+            severity="warning",
+            rule="order-by-rand",
+            message="ORDER BY RAND() scans and sorts the entire table.",
+            suggestion="Use application-side random sampling or TABLESAMPLE.",
+        )
+    return None
+
+
+def check_union_vs_union_all(sql: str) -> Optional[Issue]:
+    """Detect UNION without ALL (unnecessary dedup)."""
+    if re.search(r'\bUNION\b(?!\s+ALL\b)', sql, re.IGNORECASE):
+        return Issue(
+            severity="info",
+            rule="union-without-all",
+            message="UNION performs deduplication sort; use UNION ALL if duplicates are acceptable.",
+            suggestion="Replace UNION with UNION ALL unless you specifically need deduplication.",
+        )
+    return None
+
+
+def check_not_in_subquery(sql: str) -> Optional[Issue]:
+    """Detect NOT IN (SELECT ...) which is NULL-unsafe."""
+    if re.search(r'\bNOT\s+IN\s*\(\s*SELECT\b', sql, re.IGNORECASE):
+        return Issue(
+            severity="warning",
+            rule="not-in-subquery",
+            message="NOT IN with subquery returns no rows if any subquery result is NULL.",
+            suggestion="Use NOT EXISTS (SELECT 1 ...) instead.",
+        )
+    return None
+
+
+ALL_CHECKS = [
+    check_select_star,
+    check_missing_where,
+    check_cartesian_join,
+    check_subquery_in_select,
+    check_missing_limit,
+    check_function_on_column,
+    check_leading_wildcard,
+    check_order_by_rand,
+    check_union_vs_union_all,
+    check_not_in_subquery,
+]
+
+
+# ---------------------------------------------------------------------------
+# Analysis engine
+# ---------------------------------------------------------------------------
+
+def analyze_query(sql: str, dialect: str = "postgres") -> QueryAnalysis:
+    """Run all checks against a single SQL query."""
+    issues: List[Issue] = []
+    for check_fn in ALL_CHECKS:
+        issue = check_fn(sql)
+        if issue:
+            issues.append(issue)
+
+    # Score: start at 100, deduct per severity
+    score = 100
+    for issue in issues:
+        if issue.severity == "critical":
+            score -= 25
+        elif issue.severity == "warning":
+            score -= 10
+        else:
+            score -= 5
+    score = max(0, score)
+
+    return QueryAnalysis(query=sql.strip(), issues=issues, score=score)
+
+
+def split_queries(text: str) -> List[str]:
+    """Split SQL text into individual statements."""
+    queries = []
+    for stmt in text.split(";"):
+        stmt = stmt.strip()
+        if stmt and len(stmt) > 5:
+            queries.append(stmt + ";")
+    return queries
+
+
+# ---------------------------------------------------------------------------
+# Output formatting
+# ---------------------------------------------------------------------------
+
+SEVERITY_ICONS = {"critical": "[CRITICAL]", "warning": "[WARNING]", "info": "[INFO]"}
+
+
+def format_text(analyses: List[QueryAnalysis]) -> str:
+    """Format analysis results as human-readable text."""
+    lines = []
+    for i, analysis in enumerate(analyses, 1):
+        lines.append(f"{'='*60}")
+        lines.append(f"Query {i} (Score: {analysis.score}/100)")
+        lines.append(f"  {analysis.query[:120]}{'...' if len(analysis.query) > 120 else ''}")
+        lines.append("")
+        if not analysis.issues:
+            lines.append("  No issues detected.")
+        for issue in analysis.issues:
+            icon = SEVERITY_ICONS.get(issue.severity, "")
+            lines.append(f"  {icon} {issue.rule}: {issue.message}")
+            lines.append(f"    -> {issue.suggestion}")
+        lines.append("")
+    return "\n".join(lines)
+
+
+def format_json(analyses: List[QueryAnalysis]) -> str:
+    """Format analysis results as JSON."""
+    return json.dumps(
+        {"analyses": [a.to_dict() for a in analyses], "total_queries": len(analyses)},
+        indent=2,
+    )
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Analyze SQL queries for common performance issues.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --query "SELECT * FROM users"
+  %(prog)s --query queries.sql --dialect mysql
+  %(prog)s --query "DELETE FROM orders" --json
+        """,
+    )
+    parser.add_argument(
+        "--query", required=True,
+        help="SQL query string or path to a .sql file",
+    )
+    parser.add_argument(
+        "--dialect", choices=["postgres", "mysql", "sqlite", "sqlserver"],
+        default="postgres", help="SQL dialect (default: postgres)",
+    )
+    parser.add_argument(
+        "--json", action="store_true", dest="json_output",
+        help="Output results as JSON",
+    )
+    args = parser.parse_args()
+
+    # Determine if query is a file path or inline SQL
+    sql_text = args.query
+    if os.path.isfile(args.query):
+        with open(args.query, "r") as f:
+            sql_text = f.read()
+
+    queries = split_queries(sql_text)
+    if not queries:
+        # Treat the whole input as a single query
+        queries = [sql_text.strip()]
+
+    analyses = [analyze_query(q, args.dialect) for q in queries]
+
+    if args.json_output:
+        print(format_json(analyses))
+    else:
+        print(format_text(analyses))
+
+
+if __name__ == "__main__":
+    main()
--- a/engineering/sql-database-assistant/scripts/schema_explorer.py
+++ b/engineering/sql-database-assistant/scripts/schema_explorer.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+"""
+Schema Explorer
+
+Generates schema documentation from database introspection queries.
+Outputs the introspection SQL and sample documentation templates
+for PostgreSQL, MySQL, SQLite, and SQL Server.
+
+Since this tool runs without a live database connection, it generates:
+1. The introspection queries you need to run
+2. Documentation templates from the results
+3. Sample schema docs for common table patterns
+
+Usage:
+    python schema_explorer.py --dialect postgres --tables all --format md
+    python schema_explorer.py --dialect mysql --tables users,orders --format json
+    python schema_explorer.py --dialect sqlite --tables all --json
+"""
+
+import argparse
+import json
+import sys
+import textwrap
+from dataclasses import dataclass, asdict
+from typing import List, Optional, Dict
+
+
+# ---------------------------------------------------------------------------
+# Introspection query templates per dialect
+# ---------------------------------------------------------------------------
+
+INTROSPECTION_QUERIES: Dict[str, Dict[str, str]] = {
+    "postgres": {
+        "tables": textwrap.dedent("""\
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
+            ORDER BY table_name;"""),
+        "columns": textwrap.dedent("""\
+            SELECT table_name, column_name, data_type, character_maximum_length,
+                   is_nullable, column_default
+            FROM information_schema.columns
+            WHERE table_schema = 'public' {table_filter}
+            ORDER BY table_name, ordinal_position;"""),
+        "primary_keys": textwrap.dedent("""\
+            SELECT tc.table_name, kcu.column_name
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+            WHERE tc.constraint_type = 'PRIMARY KEY' AND tc.table_schema = 'public'
+            ORDER BY tc.table_name;"""),
+        "foreign_keys": textwrap.dedent("""\
+            SELECT tc.table_name, kcu.column_name,
+                   ccu.table_name AS foreign_table, ccu.column_name AS foreign_column
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+            JOIN information_schema.constraint_column_usage ccu
+              ON tc.constraint_name = ccu.constraint_name
+            WHERE tc.constraint_type = 'FOREIGN KEY'
+            ORDER BY tc.table_name;"""),
+        "indexes": textwrap.dedent("""\
+            SELECT schemaname, tablename, indexname, indexdef
+            FROM pg_indexes
+            WHERE schemaname = 'public'
+            ORDER BY tablename, indexname;"""),
+        "table_sizes": textwrap.dedent("""\
+            SELECT relname AS table_name,
+                   pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
+                   pg_size_pretty(pg_relation_size(relid)) AS data_size,
+                   pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
+            FROM pg_catalog.pg_statio_user_tables
+            ORDER BY pg_total_relation_size(relid) DESC;"""),
+    },
+    "mysql": {
+        "tables": textwrap.dedent("""\
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_schema = DATABASE() AND table_type = 'BASE TABLE'
+            ORDER BY table_name;"""),
+        "columns": textwrap.dedent("""\
+            SELECT table_name, column_name, column_type, is_nullable,
+                   column_default, column_key, extra
+            FROM information_schema.columns
+            WHERE table_schema = DATABASE() {table_filter}
+            ORDER BY table_name, ordinal_position;"""),
+        "foreign_keys": textwrap.dedent("""\
+            SELECT table_name, column_name, referenced_table_name, referenced_column_name
+            FROM information_schema.key_column_usage
+            WHERE table_schema = DATABASE() AND referenced_table_name IS NOT NULL
+            ORDER BY table_name;"""),
+        "indexes": textwrap.dedent("""\
+            SELECT table_name, index_name, non_unique, column_name, seq_in_index
+            FROM information_schema.statistics
+            WHERE table_schema = DATABASE()
+            ORDER BY table_name, index_name, seq_in_index;"""),
+        "table_sizes": textwrap.dedent("""\
+            SELECT table_name, table_rows,
+                   ROUND(data_length / 1024 / 1024, 2) AS data_mb,
+                   ROUND(index_length / 1024 / 1024, 2) AS index_mb
+            FROM information_schema.tables
+            WHERE table_schema = DATABASE()
+            ORDER BY data_length DESC;"""),
+    },
+    "sqlite": {
+        "tables": textwrap.dedent("""\
+            SELECT name FROM sqlite_master
+            WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
+            ORDER BY name;"""),
+        "columns": textwrap.dedent("""\
+            -- Run for each table:
+            PRAGMA table_info({table_name});"""),
+        "foreign_keys": textwrap.dedent("""\
+            -- Run for each table:
+            PRAGMA foreign_key_list({table_name});"""),
+        "indexes": textwrap.dedent("""\
+            SELECT name, tbl_name, sql FROM sqlite_master
+            WHERE type = 'index'
+            ORDER BY tbl_name, name;"""),
+        "schema_dump": textwrap.dedent("""\
+            SELECT name, sql FROM sqlite_master
+            WHERE type = 'table'
+            ORDER BY name;"""),
+    },
+    "sqlserver": {
+        "tables": textwrap.dedent("""\
+            SELECT TABLE_NAME
+            FROM INFORMATION_SCHEMA.TABLES
+            WHERE TABLE_TYPE = 'BASE TABLE'
+            ORDER BY TABLE_NAME;"""),
+        "columns": textwrap.dedent("""\
+            SELECT t.name AS table_name, c.name AS column_name,
+                   ty.name AS data_type, c.max_length, c.precision, c.scale,
+                   c.is_nullable, dc.definition AS default_value
+            FROM sys.columns c
+            JOIN sys.tables t ON c.object_id = t.object_id
+            JOIN sys.types ty ON c.user_type_id = ty.user_type_id
+            LEFT JOIN sys.default_constraints dc ON c.default_object_id = dc.object_id
+            {table_filter}
+            ORDER BY t.name, c.column_id;"""),
+        "foreign_keys": textwrap.dedent("""\
+            SELECT fk.name AS fk_name,
+                   tp.name AS parent_table, cp.name AS parent_column,
+                   tr.name AS referenced_table, cr.name AS referenced_column
+            FROM sys.foreign_keys fk
+            JOIN sys.foreign_key_columns fkc ON fk.object_id = fkc.constraint_object_id
+            JOIN sys.tables tp ON fkc.parent_object_id = tp.object_id
+            JOIN sys.columns cp ON fkc.parent_object_id = cp.object_id AND fkc.parent_column_id = cp.column_id
+            JOIN sys.tables tr ON fkc.referenced_object_id = tr.object_id
+            JOIN sys.columns cr ON fkc.referenced_object_id = cr.object_id AND fkc.referenced_column_id = cr.column_id
+            ORDER BY tp.name;"""),
+        "indexes": textwrap.dedent("""\
+            SELECT t.name AS table_name, i.name AS index_name,
+                   i.type_desc, i.is_unique, c.name AS column_name,
+                   ic.key_ordinal
+            FROM sys.indexes i
+            JOIN sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
+            JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
+            JOIN sys.tables t ON i.object_id = t.object_id
+            WHERE i.name IS NOT NULL
+            ORDER BY t.name, i.name, ic.key_ordinal;"""),
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# Documentation generators
+# ---------------------------------------------------------------------------
+
+SAMPLE_TABLES = {
+    "users": {
+        "columns": [
+            {"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
+            {"name": "email", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "Unique, indexed"},
+            {"name": "name", "type": "VARCHAR(255)", "nullable": "YES", "default": "NULL", "notes": "Display name"},
+            {"name": "password_hash", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "bcrypt hash"},
+            {"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
+            {"name": "updated_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
+        ],
+        "indexes": ["PRIMARY KEY (id)", "UNIQUE INDEX (email)"],
+        "foreign_keys": [],
+    },
+    "orders": {
+        "columns": [
+            {"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
+            {"name": "user_id", "type": "INTEGER", "nullable": "NO", "default": "-", "notes": "FK -> users.id"},
+            {"name": "status", "type": "VARCHAR(50)", "nullable": "NO", "default": "'pending'", "notes": "pending/paid/shipped/cancelled"},
+            {"name": "total", "type": "DECIMAL(19,4)", "nullable": "NO", "default": "0", "notes": "Order total in cents"},
+            {"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
+        ],
+        "indexes": ["PRIMARY KEY (id)", "INDEX (user_id)", "INDEX (status, created_at)"],
+        "foreign_keys": ["user_id -> users.id ON DELETE CASCADE"],
+    },
+}
+
+
+def generate_md(dialect: str, tables: List[str]) -> str:
+    """Generate markdown schema documentation."""
+    lines = [f"# Database Schema Documentation ({dialect.upper()})\n"]
+    lines.append(f"Generated by sql-database-assistant schema_explorer.\n")
+
+    # Introspection queries section
+    lines.append("## Introspection Queries\n")
+    lines.append("Run these queries against your database to extract schema information:\n")
+    queries = INTROSPECTION_QUERIES.get(dialect, {})
+    for qname, qsql in queries.items():
+        table_filter = ""
+        if "all" not in tables:
+            tlist = ", ".join(f"'{t}'" for t in tables)
+            table_filter = f"AND table_name IN ({tlist})"
+        qsql = qsql.replace("{table_filter}", table_filter)
+        qsql = qsql.replace("{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME")
+        lines.append(f"### {qname.replace('_', ' ').title()}\n")
+        lines.append(f"```sql\n{qsql}\n```\n")
+
+    # Sample documentation
+    lines.append("## Sample Table Documentation\n")
+    lines.append("Below is an example of the documentation format produced from query results:\n")
+
+    show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
+    for tname in show_tables:
+        sample = SAMPLE_TABLES.get(tname)
+        if not sample:
+            lines.append(f"### {tname}\n")
+            lines.append("_No sample data available. Run introspection queries above._\n")
+            continue
+
+        lines.append(f"### {tname}\n")
+        lines.append("| Column | Type | Nullable | Default | Notes |")
+        lines.append("|--------|------|----------|---------|-------|")
+        for col in sample["columns"]:
+            lines.append(f"| {col['name']} | {col['type']} | {col['nullable']} | {col['default']} | {col['notes']} |")
+        lines.append("")
+        if sample["indexes"]:
+            lines.append("**Indexes:** " + ", ".join(sample["indexes"]))
+        if sample["foreign_keys"]:
+            lines.append("**Foreign Keys:** " + ", ".join(sample["foreign_keys"]))
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def generate_json_output(dialect: str, tables: List[str]) -> dict:
+    """Generate JSON schema documentation."""
+    queries = INTROSPECTION_QUERIES.get(dialect, {})
+    processed = {}
+    for qname, qsql in queries.items():
+        table_filter = ""
+        if "all" not in tables:
+            tlist = ", ".join(f"'{t}'" for t in tables)
+            table_filter = f"AND table_name IN ({tlist})"
+        processed[qname] = qsql.replace("{table_filter}", table_filter).replace(
+            "{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME"
+        )
+
+    show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
+    sample_docs = {}
+    for tname in show_tables:
+        sample = SAMPLE_TABLES.get(tname)
+        if sample:
+            sample_docs[tname] = sample
+
+    return {
+        "dialect": dialect,
+        "requested_tables": tables,
+        "introspection_queries": processed,
+        "sample_documentation": sample_docs,
+        "instructions": "Run the introspection queries against your database, then use the results to populate documentation in the sample format shown.",
+    }
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate schema documentation from database introspection.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --dialect postgres --tables all --format md
+  %(prog)s --dialect mysql --tables users,orders --format json
+  %(prog)s --dialect sqlite --tables all --json
+        """,
+    )
+    parser.add_argument(
+        "--dialect", required=True, choices=["postgres", "mysql", "sqlite", "sqlserver"],
+        help="Target database dialect",
+    )
+    parser.add_argument(
+        "--tables", default="all",
+        help="Comma-separated table names or 'all' (default: all)",
+    )
+    parser.add_argument(
+        "--format", choices=["md", "json"], default="md", dest="fmt",
+        help="Output format (default: md)",
+    )
+    parser.add_argument(
+        "--json", action="store_true", dest="json_output",
+        help="Output as JSON (overrides --format)",
+    )
+    args = parser.parse_args()
+
+    tables = [t.strip() for t in args.tables.split(",")]
+
+    if args.json_output or args.fmt == "json":
+        result = generate_json_output(args.dialect, tables)
+        print(json.dumps(result, indent=2))
+    else:
+        print(generate_md(args.dialect, tables))
+
+
+if __name__ == "__main__":
+    main()