feat(engineering,ra-qm): add secrets-vault-manager, sql-database-assistant, gcp-cloud-architect, soc2-compliance

secrets-vault-manager (403-line SKILL.md, 3 scripts, 3 references):
- HashiCorp Vault, AWS SM, Azure KV, GCP SM integration
- Secret rotation, dynamic secrets, audit logging, emergency procedures

sql-database-assistant (457-line SKILL.md, 3 scripts, 3 references):
- Query optimization, migration generation, schema exploration
- Multi-DB support (PostgreSQL, MySQL, SQLite, SQL Server)
- ORM patterns (Prisma, Drizzle, TypeORM, SQLAlchemy)

gcp-cloud-architect (418-line SKILL.md, 3 scripts, 3 references):
- 6-step workflow mirroring aws-solution-architect for GCP
- Cloud Run, GKE, BigQuery, Cloud Functions, cost optimization
- Completes cloud trifecta (AWS + Azure + GCP)

soc2-compliance (417-line SKILL.md, 3 scripts, 3 references):
- SOC 2 Type I & II preparation, Trust Service Criteria mapping
- Control matrix generation, evidence tracking, gap analysis
- First SOC 2 skill in ra-qm-team (joins GDPR, ISO 27001, ISO 13485)

All 12 scripts pass --help. Docs generated, mkdocs.yml nav updated.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Reza Rezvani
2026-03-25 14:05:11 +01:00
parent 7a2189fa21
commit 87f3a007c9
36 changed files with 13450 additions and 6 deletions

View File

@@ -0,0 +1,442 @@
#!/usr/bin/env python3
"""
Migration Generator
Generates database migration file templates (up/down) from natural-language
schema change descriptions.
Supported operations:
- Add column, drop column, rename column
- Add table, drop table, rename table
- Add index, drop index
- Add constraint, drop constraint
- Change column type
Usage:
python migration_generator.py --change "add email_verified boolean to users" --dialect postgres
python migration_generator.py --change "rename column name to full_name in customers" --format alembic
python migration_generator.py --change "add index on orders(status, created_at)" --output 001_add_index.sql
python migration_generator.py --change "create table reviews with id, user_id, rating, body" --json
"""
import argparse
import json
import os
import re
import sys
import textwrap
from dataclasses import dataclass, asdict
from datetime import datetime
from typing import List, Optional, Tuple
@dataclass
class Migration:
"""A generated migration with up and down scripts."""
description: str
dialect: str
format: str
up: str
down: str
warnings: List[str]
def to_dict(self):
return asdict(self)
# ---------------------------------------------------------------------------
# Change parsers — extract structured intent from natural language
# ---------------------------------------------------------------------------
def parse_add_column(desc: str) -> Optional[dict]:
"""Parse: add <column> <type> to <table>"""
m = re.match(
r'add\s+(?:column\s+)?(\w+)\s+(\w[\w(),.]*)\s+(?:to|on)\s+(\w+)',
desc, re.IGNORECASE,
)
if m:
return {"op": "add_column", "column": m.group(1), "type": m.group(2), "table": m.group(3)}
return None
def parse_drop_column(desc: str) -> Optional[dict]:
"""Parse: drop/remove <column> from <table>"""
m = re.match(
r'(?:drop|remove)\s+(?:column\s+)?(\w+)\s+from\s+(\w+)',
desc, re.IGNORECASE,
)
if m:
return {"op": "drop_column", "column": m.group(1), "table": m.group(2)}
return None
def parse_rename_column(desc: str) -> Optional[dict]:
"""Parse: rename column <old> to <new> in <table>"""
m = re.match(
r'rename\s+column\s+(\w+)\s+to\s+(\w+)\s+in\s+(\w+)',
desc, re.IGNORECASE,
)
if m:
return {"op": "rename_column", "old": m.group(1), "new": m.group(2), "table": m.group(3)}
return None
def parse_add_table(desc: str) -> Optional[dict]:
"""Parse: create table <name> with <col1>, <col2>, ..."""
m = re.match(
r'create\s+table\s+(\w+)\s+with\s+(.+)',
desc, re.IGNORECASE,
)
if m:
cols = [c.strip() for c in m.group(2).split(",")]
return {"op": "add_table", "table": m.group(1), "columns": cols}
return None
def parse_drop_table(desc: str) -> Optional[dict]:
"""Parse: drop table <name>"""
m = re.match(r'drop\s+table\s+(\w+)', desc, re.IGNORECASE)
if m:
return {"op": "drop_table", "table": m.group(1)}
return None
def parse_add_index(desc: str) -> Optional[dict]:
"""Parse: add index on <table>(<col1>, <col2>)"""
m = re.match(
r'add\s+(?:unique\s+)?index\s+(?:on\s+)?(\w+)\s*\(([^)]+)\)',
desc, re.IGNORECASE,
)
if m:
unique = "unique" in desc.lower()
cols = [c.strip() for c in m.group(2).split(",")]
return {"op": "add_index", "table": m.group(1), "columns": cols, "unique": unique}
return None
def parse_change_type(desc: str) -> Optional[dict]:
"""Parse: change <column> type to <type> in <table>"""
m = re.match(
r'change\s+(?:column\s+)?(\w+)\s+type\s+to\s+(\w[\w(),.]*)\s+in\s+(\w+)',
desc, re.IGNORECASE,
)
if m:
return {"op": "change_type", "column": m.group(1), "new_type": m.group(2), "table": m.group(3)}
return None
PARSERS = [
parse_add_column,
parse_drop_column,
parse_rename_column,
parse_add_table,
parse_drop_table,
parse_add_index,
parse_change_type,
]
def parse_change(desc: str) -> Optional[dict]:
for parser in PARSERS:
result = parser(desc)
if result:
return result
return None
# ---------------------------------------------------------------------------
# SQL generators per dialect
# ---------------------------------------------------------------------------
TYPE_MAP = {
"boolean": {"postgres": "BOOLEAN", "mysql": "TINYINT(1)", "sqlite": "INTEGER", "sqlserver": "BIT"},
"text": {"postgres": "TEXT", "mysql": "TEXT", "sqlite": "TEXT", "sqlserver": "NVARCHAR(MAX)"},
"integer": {"postgres": "INTEGER", "mysql": "INT", "sqlite": "INTEGER", "sqlserver": "INT"},
"int": {"postgres": "INTEGER", "mysql": "INT", "sqlite": "INTEGER", "sqlserver": "INT"},
"serial": {"postgres": "SERIAL", "mysql": "INT AUTO_INCREMENT", "sqlite": "INTEGER", "sqlserver": "INT IDENTITY(1,1)"},
"varchar": {"postgres": "VARCHAR(255)", "mysql": "VARCHAR(255)", "sqlite": "TEXT", "sqlserver": "NVARCHAR(255)"},
"timestamp": {"postgres": "TIMESTAMP", "mysql": "DATETIME", "sqlite": "TEXT", "sqlserver": "DATETIME2"},
"uuid": {"postgres": "UUID", "mysql": "CHAR(36)", "sqlite": "TEXT", "sqlserver": "UNIQUEIDENTIFIER"},
"json": {"postgres": "JSONB", "mysql": "JSON", "sqlite": "TEXT", "sqlserver": "NVARCHAR(MAX)"},
"decimal": {"postgres": "DECIMAL(19,4)", "mysql": "DECIMAL(19,4)", "sqlite": "REAL", "sqlserver": "DECIMAL(19,4)"},
"float": {"postgres": "DOUBLE PRECISION", "mysql": "DOUBLE", "sqlite": "REAL", "sqlserver": "FLOAT"},
}
def map_type(type_name: str, dialect: str) -> str:
"""Map a generic type name to a dialect-specific type."""
key = type_name.lower().rstrip("()")
if key in TYPE_MAP and dialect in TYPE_MAP[key]:
return TYPE_MAP[key][dialect]
return type_name.upper()
def gen_add_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
col_type = map_type(change["type"], dialect)
table = change["table"]
col = change["column"]
up = f"ALTER TABLE {table} ADD COLUMN {col} {col_type};"
down = f"ALTER TABLE {table} DROP COLUMN {col};"
return up, down, []
def gen_drop_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
table = change["table"]
col = change["column"]
up = f"ALTER TABLE {table} DROP COLUMN {col};"
down = f"-- WARNING: Cannot fully reverse DROP COLUMN. Provide the original type.\nALTER TABLE {table} ADD COLUMN {col} TEXT;"
return up, down, ["Down migration uses TEXT as placeholder. Replace with the original column type."]
def gen_rename_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
table = change["table"]
old, new = change["old"], change["new"]
warnings = []
if dialect == "postgres":
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
elif dialect == "mysql":
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
elif dialect == "sqlite":
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
warnings.append("SQLite RENAME COLUMN requires version 3.25.0+.")
elif dialect == "sqlserver":
up = f"EXEC sp_rename '{table}.{old}', '{new}', 'COLUMN';"
down = f"EXEC sp_rename '{table}.{new}', '{old}', 'COLUMN';"
else:
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
return up, down, warnings
def gen_add_table(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
table = change["table"]
cols = change["columns"]
col_defs = []
has_id = False
for col in cols:
col = col.strip()
if col.lower() == "id":
has_id = True
if dialect == "postgres":
col_defs.append(" id SERIAL PRIMARY KEY")
elif dialect == "mysql":
col_defs.append(" id INT AUTO_INCREMENT PRIMARY KEY")
elif dialect == "sqlite":
col_defs.append(" id INTEGER PRIMARY KEY AUTOINCREMENT")
elif dialect == "sqlserver":
col_defs.append(" id INT IDENTITY(1,1) PRIMARY KEY")
else:
# Check if type is specified (e.g., "rating int")
parts = col.split()
if len(parts) >= 2:
col_defs.append(f" {parts[0]} {map_type(parts[1], dialect)}")
else:
col_defs.append(f" {col} TEXT")
cols_sql = ",\n".join(col_defs)
up = f"CREATE TABLE {table} (\n{cols_sql}\n);"
down = f"DROP TABLE {table};"
warnings = []
if not has_id:
warnings.append("Table has no explicit primary key. Consider adding an 'id' column.")
return up, down, warnings
def gen_drop_table(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
table = change["table"]
up = f"DROP TABLE {table};"
down = f"-- WARNING: Cannot reverse DROP TABLE without original DDL.\nCREATE TABLE {table} (id INTEGER PRIMARY KEY);"
return up, down, ["Down migration is a placeholder. Replace with the original CREATE TABLE statement."]
def gen_add_index(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
table = change["table"]
cols = change["columns"]
unique = "UNIQUE " if change.get("unique") else ""
idx_name = f"idx_{table}_{'_'.join(cols)}"
if dialect == "postgres":
up = f"CREATE {unique}INDEX CONCURRENTLY {idx_name} ON {table} ({', '.join(cols)});"
else:
up = f"CREATE {unique}INDEX {idx_name} ON {table} ({', '.join(cols)});"
down = f"DROP INDEX {idx_name};" if dialect != "mysql" else f"DROP INDEX {idx_name} ON {table};"
warnings = []
if dialect == "postgres":
warnings.append("CONCURRENTLY cannot run inside a transaction. Run outside migration transaction.")
return up, down, warnings
def gen_change_type(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
table = change["table"]
col = change["column"]
new_type = map_type(change["new_type"], dialect)
warnings = ["Down migration uses TEXT as placeholder. Replace with the original column type."]
if dialect == "postgres":
up = f"ALTER TABLE {table} ALTER COLUMN {col} TYPE {new_type};"
down = f"ALTER TABLE {table} ALTER COLUMN {col} TYPE TEXT;"
elif dialect == "mysql":
up = f"ALTER TABLE {table} MODIFY COLUMN {col} {new_type};"
down = f"ALTER TABLE {table} MODIFY COLUMN {col} TEXT;"
elif dialect == "sqlserver":
up = f"ALTER TABLE {table} ALTER COLUMN {col} {new_type};"
down = f"ALTER TABLE {table} ALTER COLUMN {col} NVARCHAR(MAX);"
else:
up = f"-- SQLite does not support ALTER COLUMN. Recreate the table."
down = f"-- SQLite does not support ALTER COLUMN. Recreate the table."
warnings.append("SQLite requires table recreation for type changes.")
return up, down, warnings
GENERATORS = {
"add_column": gen_add_column,
"drop_column": gen_drop_column,
"rename_column": gen_rename_column,
"add_table": gen_add_table,
"drop_table": gen_drop_table,
"add_index": gen_add_index,
"change_type": gen_change_type,
}
# ---------------------------------------------------------------------------
# Format wrappers
# ---------------------------------------------------------------------------
def wrap_sql(up: str, down: str, description: str) -> Tuple[str, str]:
"""Wrap as plain SQL migration files."""
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
header = f"-- Migration: {description}\n-- Generated: {datetime.now().isoformat()}\n\n"
return header + "-- Up\n" + up, header + "-- Down\n" + down
def wrap_prisma(up: str, down: str, description: str) -> Tuple[str, str]:
"""Format as Prisma migration SQL (Prisma uses raw SQL in migration.sql)."""
header = f"-- Migration: {description}\n-- Format: Prisma (migration.sql)\n\n"
return header + up, header + "-- Rollback\n" + down
def wrap_alembic(up: str, down: str, description: str) -> Tuple[str, str]:
"""Format as Alembic Python migration."""
slug = re.sub(r'\W+', '_', description.lower())[:40]
revision = datetime.now().strftime("%Y%m%d%H%M")
template = textwrap.dedent(f'''\
"""
{description}
Revision ID: {revision}
"""
from alembic import op
import sqlalchemy as sa
revision = '{revision}'
down_revision = None # Set to previous revision
def upgrade():
op.execute("""
{textwrap.indent(up, " ")}
""")
def downgrade():
op.execute("""
{textwrap.indent(down, " ")}
""")
''')
return template, ""
FORMATTERS = {
"sql": wrap_sql,
"prisma": wrap_prisma,
"alembic": wrap_alembic,
}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Generate database migration templates from change descriptions.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Supported change descriptions:
"add email_verified boolean to users"
"drop column legacy_flag from accounts"
"rename column name to full_name in customers"
"create table reviews with id, user_id, rating int, body text"
"drop table temp_imports"
"add index on orders(status, created_at)"
"add unique index on users(email)"
"change email type to varchar in users"
Examples:
%(prog)s --change "add phone varchar to users" --dialect postgres
%(prog)s --change "create table reviews with id, user_id, rating int, body" --format prisma
%(prog)s --change "add index on orders(status)" --output migrations/001.sql --json
""",
)
parser.add_argument("--change", required=True, help="Natural-language description of the schema change")
parser.add_argument("--dialect", choices=["postgres", "mysql", "sqlite", "sqlserver"],
default="postgres", help="Target database dialect (default: postgres)")
parser.add_argument("--format", choices=["sql", "prisma", "alembic"], default="sql",
dest="fmt", help="Output format (default: sql)")
parser.add_argument("--output", help="Write migration to file instead of stdout")
parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
args = parser.parse_args()
change = parse_change(args.change)
if not change:
print(f"Error: Could not parse change description: '{args.change}'", file=sys.stderr)
print("Run with --help to see supported patterns.", file=sys.stderr)
sys.exit(1)
gen_fn = GENERATORS.get(change["op"])
if not gen_fn:
print(f"Error: No generator for operation '{change['op']}'", file=sys.stderr)
sys.exit(1)
up, down, warnings = gen_fn(change, args.dialect)
fmt_fn = FORMATTERS[args.fmt]
up_formatted, down_formatted = fmt_fn(up, down, args.change)
migration = Migration(
description=args.change,
dialect=args.dialect,
format=args.fmt,
up=up_formatted,
down=down_formatted,
warnings=warnings,
)
if args.json_output:
print(json.dumps(migration.to_dict(), indent=2))
else:
if args.output:
with open(args.output, "w") as f:
f.write(migration.up)
print(f"Migration written to {args.output}")
if migration.down:
down_path = args.output.replace(".sql", "_down.sql")
with open(down_path, "w") as f:
f.write(migration.down)
print(f"Rollback written to {down_path}")
else:
print(migration.up)
if migration.down:
print("\n" + "=" * 40 + " ROLLBACK " + "=" * 40 + "\n")
print(migration.down)
if warnings:
print("\nWarnings:")
for w in warnings:
print(f" - {w}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,348 @@
#!/usr/bin/env python3
"""
SQL Query Optimizer — Static Analysis
Analyzes SQL queries for common performance issues:
- SELECT * usage
- Missing WHERE clauses on UPDATE/DELETE
- Cartesian joins (missing JOIN conditions)
- Subqueries in SELECT list
- Missing LIMIT on unbounded SELECTs
- Function calls on indexed columns (non-sargable)
- LIKE with leading wildcard
- ORDER BY RAND()
- UNION instead of UNION ALL
- NOT IN with subquery (NULL-unsafe)
Usage:
python query_optimizer.py --query "SELECT * FROM users"
python query_optimizer.py --query queries.sql --dialect postgres
python query_optimizer.py --query "SELECT * FROM orders" --json
"""
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass, asdict
from typing import List, Optional
@dataclass
class Issue:
"""A single optimization issue found in a query."""
severity: str # critical, warning, info
rule: str
message: str
suggestion: str
line: Optional[int] = None
@dataclass
class QueryAnalysis:
"""Analysis result for one SQL query."""
query: str
issues: List[Issue]
score: int # 0-100, higher is better
def to_dict(self):
return {
"query": self.query[:200] + ("..." if len(self.query) > 200 else ""),
"issues": [asdict(i) for i in self.issues],
"issue_count": len(self.issues),
"score": self.score,
}
# ---------------------------------------------------------------------------
# Rule checkers
# ---------------------------------------------------------------------------
def check_select_star(sql: str) -> Optional[Issue]:
"""Detect SELECT * usage."""
if re.search(r'\bSELECT\s+\*\s', sql, re.IGNORECASE):
return Issue(
severity="warning",
rule="select-star",
message="SELECT * transfers unnecessary data and breaks on schema changes.",
suggestion="List only the columns you need: SELECT col1, col2, ...",
)
return None
def check_missing_where(sql: str) -> Optional[Issue]:
"""Detect UPDATE/DELETE without WHERE."""
upper = sql.upper().strip()
for keyword in ("UPDATE", "DELETE"):
if upper.startswith(keyword) and "WHERE" not in upper:
return Issue(
severity="critical",
rule="missing-where",
message=f"{keyword} without WHERE affects every row in the table.",
suggestion=f"Add a WHERE clause to restrict the {keyword} scope.",
)
return None
def check_cartesian_join(sql: str) -> Optional[Issue]:
"""Detect comma-separated tables without explicit JOIN or WHERE join condition."""
upper = sql.upper()
if "SELECT" not in upper:
return None
from_match = re.search(r'\bFROM\s+(.+?)(?:\bWHERE\b|\bGROUP\b|\bORDER\b|\bLIMIT\b|\bHAVING\b|;|$)',
sql, re.IGNORECASE | re.DOTALL)
if not from_match:
return None
from_clause = from_match.group(1)
# Skip if explicit JOINs are used
if re.search(r'\bJOIN\b', from_clause, re.IGNORECASE):
return None
# Count comma-separated tables
tables = [t.strip() for t in from_clause.split(",") if t.strip()]
if len(tables) > 1 and "WHERE" not in upper:
return Issue(
severity="critical",
rule="cartesian-join",
message="Multiple tables in FROM without JOIN or WHERE creates a cartesian product.",
suggestion="Use explicit JOIN syntax with ON conditions.",
)
return None
def check_subquery_in_select(sql: str) -> Optional[Issue]:
"""Detect correlated subqueries in SELECT list."""
select_match = re.search(r'\bSELECT\b(.+?)\bFROM\b', sql, re.IGNORECASE | re.DOTALL)
if select_match:
select_clause = select_match.group(1)
if re.search(r'\(\s*SELECT\b', select_clause, re.IGNORECASE):
return Issue(
severity="warning",
rule="subquery-in-select",
message="Subquery in SELECT list executes once per row (correlated subquery).",
suggestion="Rewrite as a LEFT JOIN with aggregation.",
)
return None
def check_missing_limit(sql: str) -> Optional[Issue]:
"""Detect unbounded SELECT without LIMIT."""
upper = sql.upper().strip()
if not upper.startswith("SELECT"):
return None
# Skip if it's a subquery or aggregate-only
if re.search(r'\bCOUNT\s*\(', upper) and "GROUP BY" not in upper:
return None
if "LIMIT" not in upper and "FETCH" not in upper and "TOP " not in upper:
return Issue(
severity="info",
rule="missing-limit",
message="SELECT without LIMIT may return unbounded rows.",
suggestion="Add LIMIT to prevent returning excessive data.",
)
return None
def check_function_on_column(sql: str) -> Optional[Issue]:
"""Detect function calls on columns in WHERE (non-sargable)."""
where_match = re.search(r'\bWHERE\b(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|\bHAVING\b|;|$)',
sql, re.IGNORECASE | re.DOTALL)
if not where_match:
return None
where_clause = where_match.group(1)
non_sargable = re.search(
r'\b(YEAR|MONTH|DAY|DATE|UPPER|LOWER|TRIM|CAST|COALESCE|IFNULL|NVL)\s*\(',
where_clause, re.IGNORECASE
)
if non_sargable:
func = non_sargable.group(1).upper()
return Issue(
severity="warning",
rule="non-sargable",
message=f"Function {func}() on column in WHERE prevents index usage.",
suggestion="Rewrite to compare the raw column against transformed constants.",
)
return None
def check_leading_wildcard(sql: str) -> Optional[Issue]:
"""Detect LIKE '%...' patterns."""
if re.search(r"LIKE\s+'%", sql, re.IGNORECASE):
return Issue(
severity="warning",
rule="leading-wildcard",
message="LIKE with leading wildcard prevents index usage.",
suggestion="Use full-text search (GIN index, FULLTEXT, FTS5) for substring matching.",
)
return None
def check_order_by_rand(sql: str) -> Optional[Issue]:
"""Detect ORDER BY RAND() / RANDOM()."""
if re.search(r'ORDER\s+BY\s+(RAND|RANDOM)\s*\(\)', sql, re.IGNORECASE):
return Issue(
severity="warning",
rule="order-by-rand",
message="ORDER BY RAND() scans and sorts the entire table.",
suggestion="Use application-side random sampling or TABLESAMPLE.",
)
return None
def check_union_vs_union_all(sql: str) -> Optional[Issue]:
"""Detect UNION without ALL (unnecessary dedup)."""
if re.search(r'\bUNION\b(?!\s+ALL\b)', sql, re.IGNORECASE):
return Issue(
severity="info",
rule="union-without-all",
message="UNION performs deduplication sort; use UNION ALL if duplicates are acceptable.",
suggestion="Replace UNION with UNION ALL unless you specifically need deduplication.",
)
return None
def check_not_in_subquery(sql: str) -> Optional[Issue]:
"""Detect NOT IN (SELECT ...) which is NULL-unsafe."""
if re.search(r'\bNOT\s+IN\s*\(\s*SELECT\b', sql, re.IGNORECASE):
return Issue(
severity="warning",
rule="not-in-subquery",
message="NOT IN with subquery returns no rows if any subquery result is NULL.",
suggestion="Use NOT EXISTS (SELECT 1 ...) instead.",
)
return None
ALL_CHECKS = [
check_select_star,
check_missing_where,
check_cartesian_join,
check_subquery_in_select,
check_missing_limit,
check_function_on_column,
check_leading_wildcard,
check_order_by_rand,
check_union_vs_union_all,
check_not_in_subquery,
]
# ---------------------------------------------------------------------------
# Analysis engine
# ---------------------------------------------------------------------------
def analyze_query(sql: str, dialect: str = "postgres") -> QueryAnalysis:
"""Run all checks against a single SQL query."""
issues: List[Issue] = []
for check_fn in ALL_CHECKS:
issue = check_fn(sql)
if issue:
issues.append(issue)
# Score: start at 100, deduct per severity
score = 100
for issue in issues:
if issue.severity == "critical":
score -= 25
elif issue.severity == "warning":
score -= 10
else:
score -= 5
score = max(0, score)
return QueryAnalysis(query=sql.strip(), issues=issues, score=score)
def split_queries(text: str) -> List[str]:
"""Split SQL text into individual statements."""
queries = []
for stmt in text.split(";"):
stmt = stmt.strip()
if stmt and len(stmt) > 5:
queries.append(stmt + ";")
return queries
# ---------------------------------------------------------------------------
# Output formatting
# ---------------------------------------------------------------------------
SEVERITY_ICONS = {"critical": "[CRITICAL]", "warning": "[WARNING]", "info": "[INFO]"}
def format_text(analyses: List[QueryAnalysis]) -> str:
"""Format analysis results as human-readable text."""
lines = []
for i, analysis in enumerate(analyses, 1):
lines.append(f"{'='*60}")
lines.append(f"Query {i} (Score: {analysis.score}/100)")
lines.append(f" {analysis.query[:120]}{'...' if len(analysis.query) > 120 else ''}")
lines.append("")
if not analysis.issues:
lines.append(" No issues detected.")
for issue in analysis.issues:
icon = SEVERITY_ICONS.get(issue.severity, "")
lines.append(f" {icon} {issue.rule}: {issue.message}")
lines.append(f" -> {issue.suggestion}")
lines.append("")
return "\n".join(lines)
def format_json(analyses: List[QueryAnalysis]) -> str:
"""Format analysis results as JSON."""
return json.dumps(
{"analyses": [a.to_dict() for a in analyses], "total_queries": len(analyses)},
indent=2,
)
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Analyze SQL queries for common performance issues.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --query "SELECT * FROM users"
%(prog)s --query queries.sql --dialect mysql
%(prog)s --query "DELETE FROM orders" --json
""",
)
parser.add_argument(
"--query", required=True,
help="SQL query string or path to a .sql file",
)
parser.add_argument(
"--dialect", choices=["postgres", "mysql", "sqlite", "sqlserver"],
default="postgres", help="SQL dialect (default: postgres)",
)
parser.add_argument(
"--json", action="store_true", dest="json_output",
help="Output results as JSON",
)
args = parser.parse_args()
# Determine if query is a file path or inline SQL
sql_text = args.query
if os.path.isfile(args.query):
with open(args.query, "r") as f:
sql_text = f.read()
queries = split_queries(sql_text)
if not queries:
# Treat the whole input as a single query
queries = [sql_text.strip()]
analyses = [analyze_query(q, args.dialect) for q in queries]
if args.json_output:
print(format_json(analyses))
else:
print(format_text(analyses))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,315 @@
#!/usr/bin/env python3
"""
Schema Explorer
Generates schema documentation from database introspection queries.
Outputs the introspection SQL and sample documentation templates
for PostgreSQL, MySQL, SQLite, and SQL Server.
Since this tool runs without a live database connection, it generates:
1. The introspection queries you need to run
2. Documentation templates from the results
3. Sample schema docs for common table patterns
Usage:
python schema_explorer.py --dialect postgres --tables all --format md
python schema_explorer.py --dialect mysql --tables users,orders --format json
python schema_explorer.py --dialect sqlite --tables all --json
"""
import argparse
import json
import sys
import textwrap
from dataclasses import dataclass, asdict
from typing import List, Optional, Dict
# ---------------------------------------------------------------------------
# Introspection query templates per dialect
# ---------------------------------------------------------------------------
INTROSPECTION_QUERIES: Dict[str, Dict[str, str]] = {
"postgres": {
"tables": textwrap.dedent("""\
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
ORDER BY table_name;"""),
"columns": textwrap.dedent("""\
SELECT table_name, column_name, data_type, character_maximum_length,
is_nullable, column_default
FROM information_schema.columns
WHERE table_schema = 'public' {table_filter}
ORDER BY table_name, ordinal_position;"""),
"primary_keys": textwrap.dedent("""\
SELECT tc.table_name, kcu.column_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
WHERE tc.constraint_type = 'PRIMARY KEY' AND tc.table_schema = 'public'
ORDER BY tc.table_name;"""),
"foreign_keys": textwrap.dedent("""\
SELECT tc.table_name, kcu.column_name,
ccu.table_name AS foreign_table, ccu.column_name AS foreign_column
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
JOIN information_schema.constraint_column_usage ccu
ON tc.constraint_name = ccu.constraint_name
WHERE tc.constraint_type = 'FOREIGN KEY'
ORDER BY tc.table_name;"""),
"indexes": textwrap.dedent("""\
SELECT schemaname, tablename, indexname, indexdef
FROM pg_indexes
WHERE schemaname = 'public'
ORDER BY tablename, indexname;"""),
"table_sizes": textwrap.dedent("""\
SELECT relname AS table_name,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS data_size,
pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;"""),
},
"mysql": {
"tables": textwrap.dedent("""\
SELECT table_name
FROM information_schema.tables
WHERE table_schema = DATABASE() AND table_type = 'BASE TABLE'
ORDER BY table_name;"""),
"columns": textwrap.dedent("""\
SELECT table_name, column_name, column_type, is_nullable,
column_default, column_key, extra
FROM information_schema.columns
WHERE table_schema = DATABASE() {table_filter}
ORDER BY table_name, ordinal_position;"""),
"foreign_keys": textwrap.dedent("""\
SELECT table_name, column_name, referenced_table_name, referenced_column_name
FROM information_schema.key_column_usage
WHERE table_schema = DATABASE() AND referenced_table_name IS NOT NULL
ORDER BY table_name;"""),
"indexes": textwrap.dedent("""\
SELECT table_name, index_name, non_unique, column_name, seq_in_index
FROM information_schema.statistics
WHERE table_schema = DATABASE()
ORDER BY table_name, index_name, seq_in_index;"""),
"table_sizes": textwrap.dedent("""\
SELECT table_name, table_rows,
ROUND(data_length / 1024 / 1024, 2) AS data_mb,
ROUND(index_length / 1024 / 1024, 2) AS index_mb
FROM information_schema.tables
WHERE table_schema = DATABASE()
ORDER BY data_length DESC;"""),
},
"sqlite": {
"tables": textwrap.dedent("""\
SELECT name FROM sqlite_master
WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
ORDER BY name;"""),
"columns": textwrap.dedent("""\
-- Run for each table:
PRAGMA table_info({table_name});"""),
"foreign_keys": textwrap.dedent("""\
-- Run for each table:
PRAGMA foreign_key_list({table_name});"""),
"indexes": textwrap.dedent("""\
SELECT name, tbl_name, sql FROM sqlite_master
WHERE type = 'index'
ORDER BY tbl_name, name;"""),
"schema_dump": textwrap.dedent("""\
SELECT name, sql FROM sqlite_master
WHERE type = 'table'
ORDER BY name;"""),
},
"sqlserver": {
"tables": textwrap.dedent("""\
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
ORDER BY TABLE_NAME;"""),
"columns": textwrap.dedent("""\
SELECT t.name AS table_name, c.name AS column_name,
ty.name AS data_type, c.max_length, c.precision, c.scale,
c.is_nullable, dc.definition AS default_value
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
JOIN sys.types ty ON c.user_type_id = ty.user_type_id
LEFT JOIN sys.default_constraints dc ON c.default_object_id = dc.object_id
{table_filter}
ORDER BY t.name, c.column_id;"""),
"foreign_keys": textwrap.dedent("""\
SELECT fk.name AS fk_name,
tp.name AS parent_table, cp.name AS parent_column,
tr.name AS referenced_table, cr.name AS referenced_column
FROM sys.foreign_keys fk
JOIN sys.foreign_key_columns fkc ON fk.object_id = fkc.constraint_object_id
JOIN sys.tables tp ON fkc.parent_object_id = tp.object_id
JOIN sys.columns cp ON fkc.parent_object_id = cp.object_id AND fkc.parent_column_id = cp.column_id
JOIN sys.tables tr ON fkc.referenced_object_id = tr.object_id
JOIN sys.columns cr ON fkc.referenced_object_id = cr.object_id AND fkc.referenced_column_id = cr.column_id
ORDER BY tp.name;"""),
"indexes": textwrap.dedent("""\
SELECT t.name AS table_name, i.name AS index_name,
i.type_desc, i.is_unique, c.name AS column_name,
ic.key_ordinal
FROM sys.indexes i
JOIN sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
JOIN sys.tables t ON i.object_id = t.object_id
WHERE i.name IS NOT NULL
ORDER BY t.name, i.name, ic.key_ordinal;"""),
},
}
# ---------------------------------------------------------------------------
# Documentation generators
# ---------------------------------------------------------------------------
SAMPLE_TABLES = {
"users": {
"columns": [
{"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
{"name": "email", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "Unique, indexed"},
{"name": "name", "type": "VARCHAR(255)", "nullable": "YES", "default": "NULL", "notes": "Display name"},
{"name": "password_hash", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "bcrypt hash"},
{"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
{"name": "updated_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
],
"indexes": ["PRIMARY KEY (id)", "UNIQUE INDEX (email)"],
"foreign_keys": [],
},
"orders": {
"columns": [
{"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
{"name": "user_id", "type": "INTEGER", "nullable": "NO", "default": "-", "notes": "FK -> users.id"},
{"name": "status", "type": "VARCHAR(50)", "nullable": "NO", "default": "'pending'", "notes": "pending/paid/shipped/cancelled"},
{"name": "total", "type": "DECIMAL(19,4)", "nullable": "NO", "default": "0", "notes": "Order total in cents"},
{"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
],
"indexes": ["PRIMARY KEY (id)", "INDEX (user_id)", "INDEX (status, created_at)"],
"foreign_keys": ["user_id -> users.id ON DELETE CASCADE"],
},
}
def generate_md(dialect: str, tables: List[str]) -> str:
"""Generate markdown schema documentation."""
lines = [f"# Database Schema Documentation ({dialect.upper()})\n"]
lines.append(f"Generated by sql-database-assistant schema_explorer.\n")
# Introspection queries section
lines.append("## Introspection Queries\n")
lines.append("Run these queries against your database to extract schema information:\n")
queries = INTROSPECTION_QUERIES.get(dialect, {})
for qname, qsql in queries.items():
table_filter = ""
if "all" not in tables:
tlist = ", ".join(f"'{t}'" for t in tables)
table_filter = f"AND table_name IN ({tlist})"
qsql = qsql.replace("{table_filter}", table_filter)
qsql = qsql.replace("{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME")
lines.append(f"### {qname.replace('_', ' ').title()}\n")
lines.append(f"```sql\n{qsql}\n```\n")
# Sample documentation
lines.append("## Sample Table Documentation\n")
lines.append("Below is an example of the documentation format produced from query results:\n")
show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
for tname in show_tables:
sample = SAMPLE_TABLES.get(tname)
if not sample:
lines.append(f"### {tname}\n")
lines.append("_No sample data available. Run introspection queries above._\n")
continue
lines.append(f"### {tname}\n")
lines.append("| Column | Type | Nullable | Default | Notes |")
lines.append("|--------|------|----------|---------|-------|")
for col in sample["columns"]:
lines.append(f"| {col['name']} | {col['type']} | {col['nullable']} | {col['default']} | {col['notes']} |")
lines.append("")
if sample["indexes"]:
lines.append("**Indexes:** " + ", ".join(sample["indexes"]))
if sample["foreign_keys"]:
lines.append("**Foreign Keys:** " + ", ".join(sample["foreign_keys"]))
lines.append("")
return "\n".join(lines)
def generate_json_output(dialect: str, tables: List[str]) -> dict:
"""Generate JSON schema documentation."""
queries = INTROSPECTION_QUERIES.get(dialect, {})
processed = {}
for qname, qsql in queries.items():
table_filter = ""
if "all" not in tables:
tlist = ", ".join(f"'{t}'" for t in tables)
table_filter = f"AND table_name IN ({tlist})"
processed[qname] = qsql.replace("{table_filter}", table_filter).replace(
"{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME"
)
show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
sample_docs = {}
for tname in show_tables:
sample = SAMPLE_TABLES.get(tname)
if sample:
sample_docs[tname] = sample
return {
"dialect": dialect,
"requested_tables": tables,
"introspection_queries": processed,
"sample_documentation": sample_docs,
"instructions": "Run the introspection queries against your database, then use the results to populate documentation in the sample format shown.",
}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Generate schema documentation from database introspection.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --dialect postgres --tables all --format md
%(prog)s --dialect mysql --tables users,orders --format json
%(prog)s --dialect sqlite --tables all --json
""",
)
parser.add_argument(
"--dialect", required=True, choices=["postgres", "mysql", "sqlite", "sqlserver"],
help="Target database dialect",
)
parser.add_argument(
"--tables", default="all",
help="Comma-separated table names or 'all' (default: all)",
)
parser.add_argument(
"--format", choices=["md", "json"], default="md", dest="fmt",
help="Output format (default: md)",
)
parser.add_argument(
"--json", action="store_true", dest="json_output",
help="Output as JSON (overrides --format)",
)
args = parser.parse_args()
tables = [t.strip() for t in args.tables.split(",")]
if args.json_output or args.fmt == "json":
result = generate_json_output(args.dialect, tables)
print(json.dumps(result, indent=2))
else:
print(generate_md(args.dialect, tables))
if __name__ == "__main__":
main()