feat(engineering,ra-qm): add secrets-vault-manager, sql-database-assistant, gcp-cloud-architect, soc2-compliance
secrets-vault-manager (403-line SKILL.md, 3 scripts, 3 references): - HashiCorp Vault, AWS SM, Azure KV, GCP SM integration - Secret rotation, dynamic secrets, audit logging, emergency procedures sql-database-assistant (457-line SKILL.md, 3 scripts, 3 references): - Query optimization, migration generation, schema exploration - Multi-DB support (PostgreSQL, MySQL, SQLite, SQL Server) - ORM patterns (Prisma, Drizzle, TypeORM, SQLAlchemy) gcp-cloud-architect (418-line SKILL.md, 3 scripts, 3 references): - 6-step workflow mirroring aws-solution-architect for GCP - Cloud Run, GKE, BigQuery, Cloud Functions, cost optimization - Completes cloud trifecta (AWS + Azure + GCP) soc2-compliance (417-line SKILL.md, 3 scripts, 3 references): - SOC 2 Type I & II preparation, Trust Service Criteria mapping - Control matrix generation, evidence tracking, gap analysis - First SOC 2 skill in ra-qm-team (joins GDPR, ISO 27001, ISO 13485) All 12 scripts pass --help. Docs generated, mkdocs.yml nav updated. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,442 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration Generator
|
||||
|
||||
Generates database migration file templates (up/down) from natural-language
|
||||
schema change descriptions.
|
||||
|
||||
Supported operations:
|
||||
- Add column, drop column, rename column
|
||||
- Add table, drop table, rename table
|
||||
- Add index, drop index
|
||||
- Add constraint, drop constraint
|
||||
- Change column type
|
||||
|
||||
Usage:
|
||||
python migration_generator.py --change "add email_verified boolean to users" --dialect postgres
|
||||
python migration_generator.py --change "rename column name to full_name in customers" --format alembic
|
||||
python migration_generator.py --change "add index on orders(status, created_at)" --output 001_add_index.sql
|
||||
python migration_generator.py --change "create table reviews with id, user_id, rating, body" --json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
from dataclasses import dataclass, asdict
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass
|
||||
class Migration:
|
||||
"""A generated migration with up and down scripts."""
|
||||
description: str
|
||||
dialect: str
|
||||
format: str
|
||||
up: str
|
||||
down: str
|
||||
warnings: List[str]
|
||||
|
||||
def to_dict(self):
|
||||
return asdict(self)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Change parsers — extract structured intent from natural language
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_add_column(desc: str) -> Optional[dict]:
|
||||
"""Parse: add <column> <type> to <table>"""
|
||||
m = re.match(
|
||||
r'add\s+(?:column\s+)?(\w+)\s+(\w[\w(),.]*)\s+(?:to|on)\s+(\w+)',
|
||||
desc, re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return {"op": "add_column", "column": m.group(1), "type": m.group(2), "table": m.group(3)}
|
||||
return None
|
||||
|
||||
|
||||
def parse_drop_column(desc: str) -> Optional[dict]:
|
||||
"""Parse: drop/remove <column> from <table>"""
|
||||
m = re.match(
|
||||
r'(?:drop|remove)\s+(?:column\s+)?(\w+)\s+from\s+(\w+)',
|
||||
desc, re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return {"op": "drop_column", "column": m.group(1), "table": m.group(2)}
|
||||
return None
|
||||
|
||||
|
||||
def parse_rename_column(desc: str) -> Optional[dict]:
|
||||
"""Parse: rename column <old> to <new> in <table>"""
|
||||
m = re.match(
|
||||
r'rename\s+column\s+(\w+)\s+to\s+(\w+)\s+in\s+(\w+)',
|
||||
desc, re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return {"op": "rename_column", "old": m.group(1), "new": m.group(2), "table": m.group(3)}
|
||||
return None
|
||||
|
||||
|
||||
def parse_add_table(desc: str) -> Optional[dict]:
|
||||
"""Parse: create table <name> with <col1>, <col2>, ..."""
|
||||
m = re.match(
|
||||
r'create\s+table\s+(\w+)\s+with\s+(.+)',
|
||||
desc, re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
cols = [c.strip() for c in m.group(2).split(",")]
|
||||
return {"op": "add_table", "table": m.group(1), "columns": cols}
|
||||
return None
|
||||
|
||||
|
||||
def parse_drop_table(desc: str) -> Optional[dict]:
|
||||
"""Parse: drop table <name>"""
|
||||
m = re.match(r'drop\s+table\s+(\w+)', desc, re.IGNORECASE)
|
||||
if m:
|
||||
return {"op": "drop_table", "table": m.group(1)}
|
||||
return None
|
||||
|
||||
|
||||
def parse_add_index(desc: str) -> Optional[dict]:
|
||||
"""Parse: add index on <table>(<col1>, <col2>)"""
|
||||
m = re.match(
|
||||
r'add\s+(?:unique\s+)?index\s+(?:on\s+)?(\w+)\s*\(([^)]+)\)',
|
||||
desc, re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
unique = "unique" in desc.lower()
|
||||
cols = [c.strip() for c in m.group(2).split(",")]
|
||||
return {"op": "add_index", "table": m.group(1), "columns": cols, "unique": unique}
|
||||
return None
|
||||
|
||||
|
||||
def parse_change_type(desc: str) -> Optional[dict]:
|
||||
"""Parse: change <column> type to <type> in <table>"""
|
||||
m = re.match(
|
||||
r'change\s+(?:column\s+)?(\w+)\s+type\s+to\s+(\w[\w(),.]*)\s+in\s+(\w+)',
|
||||
desc, re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return {"op": "change_type", "column": m.group(1), "new_type": m.group(2), "table": m.group(3)}
|
||||
return None
|
||||
|
||||
|
||||
PARSERS = [
|
||||
parse_add_column,
|
||||
parse_drop_column,
|
||||
parse_rename_column,
|
||||
parse_add_table,
|
||||
parse_drop_table,
|
||||
parse_add_index,
|
||||
parse_change_type,
|
||||
]
|
||||
|
||||
|
||||
def parse_change(desc: str) -> Optional[dict]:
|
||||
for parser in PARSERS:
|
||||
result = parser(desc)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL generators per dialect
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TYPE_MAP = {
|
||||
"boolean": {"postgres": "BOOLEAN", "mysql": "TINYINT(1)", "sqlite": "INTEGER", "sqlserver": "BIT"},
|
||||
"text": {"postgres": "TEXT", "mysql": "TEXT", "sqlite": "TEXT", "sqlserver": "NVARCHAR(MAX)"},
|
||||
"integer": {"postgres": "INTEGER", "mysql": "INT", "sqlite": "INTEGER", "sqlserver": "INT"},
|
||||
"int": {"postgres": "INTEGER", "mysql": "INT", "sqlite": "INTEGER", "sqlserver": "INT"},
|
||||
"serial": {"postgres": "SERIAL", "mysql": "INT AUTO_INCREMENT", "sqlite": "INTEGER", "sqlserver": "INT IDENTITY(1,1)"},
|
||||
"varchar": {"postgres": "VARCHAR(255)", "mysql": "VARCHAR(255)", "sqlite": "TEXT", "sqlserver": "NVARCHAR(255)"},
|
||||
"timestamp": {"postgres": "TIMESTAMP", "mysql": "DATETIME", "sqlite": "TEXT", "sqlserver": "DATETIME2"},
|
||||
"uuid": {"postgres": "UUID", "mysql": "CHAR(36)", "sqlite": "TEXT", "sqlserver": "UNIQUEIDENTIFIER"},
|
||||
"json": {"postgres": "JSONB", "mysql": "JSON", "sqlite": "TEXT", "sqlserver": "NVARCHAR(MAX)"},
|
||||
"decimal": {"postgres": "DECIMAL(19,4)", "mysql": "DECIMAL(19,4)", "sqlite": "REAL", "sqlserver": "DECIMAL(19,4)"},
|
||||
"float": {"postgres": "DOUBLE PRECISION", "mysql": "DOUBLE", "sqlite": "REAL", "sqlserver": "FLOAT"},
|
||||
}
|
||||
|
||||
|
||||
def map_type(type_name: str, dialect: str) -> str:
|
||||
"""Map a generic type name to a dialect-specific type."""
|
||||
key = type_name.lower().rstrip("()")
|
||||
if key in TYPE_MAP and dialect in TYPE_MAP[key]:
|
||||
return TYPE_MAP[key][dialect]
|
||||
return type_name.upper()
|
||||
|
||||
|
||||
def gen_add_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
col_type = map_type(change["type"], dialect)
|
||||
table = change["table"]
|
||||
col = change["column"]
|
||||
up = f"ALTER TABLE {table} ADD COLUMN {col} {col_type};"
|
||||
down = f"ALTER TABLE {table} DROP COLUMN {col};"
|
||||
return up, down, []
|
||||
|
||||
|
||||
def gen_drop_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
table = change["table"]
|
||||
col = change["column"]
|
||||
up = f"ALTER TABLE {table} DROP COLUMN {col};"
|
||||
down = f"-- WARNING: Cannot fully reverse DROP COLUMN. Provide the original type.\nALTER TABLE {table} ADD COLUMN {col} TEXT;"
|
||||
return up, down, ["Down migration uses TEXT as placeholder. Replace with the original column type."]
|
||||
|
||||
|
||||
def gen_rename_column(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
table = change["table"]
|
||||
old, new = change["old"], change["new"]
|
||||
warnings = []
|
||||
if dialect == "postgres":
|
||||
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
|
||||
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
|
||||
elif dialect == "mysql":
|
||||
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
|
||||
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
|
||||
elif dialect == "sqlite":
|
||||
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
|
||||
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
|
||||
warnings.append("SQLite RENAME COLUMN requires version 3.25.0+.")
|
||||
elif dialect == "sqlserver":
|
||||
up = f"EXEC sp_rename '{table}.{old}', '{new}', 'COLUMN';"
|
||||
down = f"EXEC sp_rename '{table}.{new}', '{old}', 'COLUMN';"
|
||||
else:
|
||||
up = f"ALTER TABLE {table} RENAME COLUMN {old} TO {new};"
|
||||
down = f"ALTER TABLE {table} RENAME COLUMN {new} TO {old};"
|
||||
return up, down, warnings
|
||||
|
||||
|
||||
def gen_add_table(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
table = change["table"]
|
||||
cols = change["columns"]
|
||||
col_defs = []
|
||||
has_id = False
|
||||
for col in cols:
|
||||
col = col.strip()
|
||||
if col.lower() == "id":
|
||||
has_id = True
|
||||
if dialect == "postgres":
|
||||
col_defs.append(" id SERIAL PRIMARY KEY")
|
||||
elif dialect == "mysql":
|
||||
col_defs.append(" id INT AUTO_INCREMENT PRIMARY KEY")
|
||||
elif dialect == "sqlite":
|
||||
col_defs.append(" id INTEGER PRIMARY KEY AUTOINCREMENT")
|
||||
elif dialect == "sqlserver":
|
||||
col_defs.append(" id INT IDENTITY(1,1) PRIMARY KEY")
|
||||
else:
|
||||
# Check if type is specified (e.g., "rating int")
|
||||
parts = col.split()
|
||||
if len(parts) >= 2:
|
||||
col_defs.append(f" {parts[0]} {map_type(parts[1], dialect)}")
|
||||
else:
|
||||
col_defs.append(f" {col} TEXT")
|
||||
|
||||
cols_sql = ",\n".join(col_defs)
|
||||
up = f"CREATE TABLE {table} (\n{cols_sql}\n);"
|
||||
down = f"DROP TABLE {table};"
|
||||
warnings = []
|
||||
if not has_id:
|
||||
warnings.append("Table has no explicit primary key. Consider adding an 'id' column.")
|
||||
return up, down, warnings
|
||||
|
||||
|
||||
def gen_drop_table(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
table = change["table"]
|
||||
up = f"DROP TABLE {table};"
|
||||
down = f"-- WARNING: Cannot reverse DROP TABLE without original DDL.\nCREATE TABLE {table} (id INTEGER PRIMARY KEY);"
|
||||
return up, down, ["Down migration is a placeholder. Replace with the original CREATE TABLE statement."]
|
||||
|
||||
|
||||
def gen_add_index(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
table = change["table"]
|
||||
cols = change["columns"]
|
||||
unique = "UNIQUE " if change.get("unique") else ""
|
||||
idx_name = f"idx_{table}_{'_'.join(cols)}"
|
||||
if dialect == "postgres":
|
||||
up = f"CREATE {unique}INDEX CONCURRENTLY {idx_name} ON {table} ({', '.join(cols)});"
|
||||
else:
|
||||
up = f"CREATE {unique}INDEX {idx_name} ON {table} ({', '.join(cols)});"
|
||||
down = f"DROP INDEX {idx_name};" if dialect != "mysql" else f"DROP INDEX {idx_name} ON {table};"
|
||||
warnings = []
|
||||
if dialect == "postgres":
|
||||
warnings.append("CONCURRENTLY cannot run inside a transaction. Run outside migration transaction.")
|
||||
return up, down, warnings
|
||||
|
||||
|
||||
def gen_change_type(change: dict, dialect: str) -> Tuple[str, str, List[str]]:
|
||||
table = change["table"]
|
||||
col = change["column"]
|
||||
new_type = map_type(change["new_type"], dialect)
|
||||
warnings = ["Down migration uses TEXT as placeholder. Replace with the original column type."]
|
||||
if dialect == "postgres":
|
||||
up = f"ALTER TABLE {table} ALTER COLUMN {col} TYPE {new_type};"
|
||||
down = f"ALTER TABLE {table} ALTER COLUMN {col} TYPE TEXT;"
|
||||
elif dialect == "mysql":
|
||||
up = f"ALTER TABLE {table} MODIFY COLUMN {col} {new_type};"
|
||||
down = f"ALTER TABLE {table} MODIFY COLUMN {col} TEXT;"
|
||||
elif dialect == "sqlserver":
|
||||
up = f"ALTER TABLE {table} ALTER COLUMN {col} {new_type};"
|
||||
down = f"ALTER TABLE {table} ALTER COLUMN {col} NVARCHAR(MAX);"
|
||||
else:
|
||||
up = f"-- SQLite does not support ALTER COLUMN. Recreate the table."
|
||||
down = f"-- SQLite does not support ALTER COLUMN. Recreate the table."
|
||||
warnings.append("SQLite requires table recreation for type changes.")
|
||||
return up, down, warnings
|
||||
|
||||
|
||||
GENERATORS = {
|
||||
"add_column": gen_add_column,
|
||||
"drop_column": gen_drop_column,
|
||||
"rename_column": gen_rename_column,
|
||||
"add_table": gen_add_table,
|
||||
"drop_table": gen_drop_table,
|
||||
"add_index": gen_add_index,
|
||||
"change_type": gen_change_type,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Format wrappers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def wrap_sql(up: str, down: str, description: str) -> Tuple[str, str]:
|
||||
"""Wrap as plain SQL migration files."""
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
header = f"-- Migration: {description}\n-- Generated: {datetime.now().isoformat()}\n\n"
|
||||
return header + "-- Up\n" + up, header + "-- Down\n" + down
|
||||
|
||||
|
||||
def wrap_prisma(up: str, down: str, description: str) -> Tuple[str, str]:
|
||||
"""Format as Prisma migration SQL (Prisma uses raw SQL in migration.sql)."""
|
||||
header = f"-- Migration: {description}\n-- Format: Prisma (migration.sql)\n\n"
|
||||
return header + up, header + "-- Rollback\n" + down
|
||||
|
||||
|
||||
def wrap_alembic(up: str, down: str, description: str) -> Tuple[str, str]:
|
||||
"""Format as Alembic Python migration."""
|
||||
slug = re.sub(r'\W+', '_', description.lower())[:40]
|
||||
revision = datetime.now().strftime("%Y%m%d%H%M")
|
||||
template = textwrap.dedent(f'''\
|
||||
"""
|
||||
{description}
|
||||
|
||||
Revision ID: {revision}
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision = '{revision}'
|
||||
down_revision = None # Set to previous revision
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.execute("""
|
||||
{textwrap.indent(up, " ")}
|
||||
""")
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.execute("""
|
||||
{textwrap.indent(down, " ")}
|
||||
""")
|
||||
''')
|
||||
return template, ""
|
||||
|
||||
|
||||
FORMATTERS = {
|
||||
"sql": wrap_sql,
|
||||
"prisma": wrap_prisma,
|
||||
"alembic": wrap_alembic,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate database migration templates from change descriptions.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Supported change descriptions:
|
||||
"add email_verified boolean to users"
|
||||
"drop column legacy_flag from accounts"
|
||||
"rename column name to full_name in customers"
|
||||
"create table reviews with id, user_id, rating int, body text"
|
||||
"drop table temp_imports"
|
||||
"add index on orders(status, created_at)"
|
||||
"add unique index on users(email)"
|
||||
"change email type to varchar in users"
|
||||
|
||||
Examples:
|
||||
%(prog)s --change "add phone varchar to users" --dialect postgres
|
||||
%(prog)s --change "create table reviews with id, user_id, rating int, body" --format prisma
|
||||
%(prog)s --change "add index on orders(status)" --output migrations/001.sql --json
|
||||
""",
|
||||
)
|
||||
parser.add_argument("--change", required=True, help="Natural-language description of the schema change")
|
||||
parser.add_argument("--dialect", choices=["postgres", "mysql", "sqlite", "sqlserver"],
|
||||
default="postgres", help="Target database dialect (default: postgres)")
|
||||
parser.add_argument("--format", choices=["sql", "prisma", "alembic"], default="sql",
|
||||
dest="fmt", help="Output format (default: sql)")
|
||||
parser.add_argument("--output", help="Write migration to file instead of stdout")
|
||||
parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
change = parse_change(args.change)
|
||||
if not change:
|
||||
print(f"Error: Could not parse change description: '{args.change}'", file=sys.stderr)
|
||||
print("Run with --help to see supported patterns.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
gen_fn = GENERATORS.get(change["op"])
|
||||
if not gen_fn:
|
||||
print(f"Error: No generator for operation '{change['op']}'", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
up, down, warnings = gen_fn(change, args.dialect)
|
||||
|
||||
fmt_fn = FORMATTERS[args.fmt]
|
||||
up_formatted, down_formatted = fmt_fn(up, down, args.change)
|
||||
|
||||
migration = Migration(
|
||||
description=args.change,
|
||||
dialect=args.dialect,
|
||||
format=args.fmt,
|
||||
up=up_formatted,
|
||||
down=down_formatted,
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
if args.json_output:
|
||||
print(json.dumps(migration.to_dict(), indent=2))
|
||||
else:
|
||||
if args.output:
|
||||
with open(args.output, "w") as f:
|
||||
f.write(migration.up)
|
||||
print(f"Migration written to {args.output}")
|
||||
if migration.down:
|
||||
down_path = args.output.replace(".sql", "_down.sql")
|
||||
with open(down_path, "w") as f:
|
||||
f.write(migration.down)
|
||||
print(f"Rollback written to {down_path}")
|
||||
else:
|
||||
print(migration.up)
|
||||
if migration.down:
|
||||
print("\n" + "=" * 40 + " ROLLBACK " + "=" * 40 + "\n")
|
||||
print(migration.down)
|
||||
|
||||
if warnings:
|
||||
print("\nWarnings:")
|
||||
for w in warnings:
|
||||
print(f" - {w}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
348
engineering/sql-database-assistant/scripts/query_optimizer.py
Normal file
348
engineering/sql-database-assistant/scripts/query_optimizer.py
Normal file
@@ -0,0 +1,348 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SQL Query Optimizer — Static Analysis
|
||||
|
||||
Analyzes SQL queries for common performance issues:
|
||||
- SELECT * usage
|
||||
- Missing WHERE clauses on UPDATE/DELETE
|
||||
- Cartesian joins (missing JOIN conditions)
|
||||
- Subqueries in SELECT list
|
||||
- Missing LIMIT on unbounded SELECTs
|
||||
- Function calls on indexed columns (non-sargable)
|
||||
- LIKE with leading wildcard
|
||||
- ORDER BY RAND()
|
||||
- UNION instead of UNION ALL
|
||||
- NOT IN with subquery (NULL-unsafe)
|
||||
|
||||
Usage:
|
||||
python query_optimizer.py --query "SELECT * FROM users"
|
||||
python query_optimizer.py --query queries.sql --dialect postgres
|
||||
python query_optimizer.py --query "SELECT * FROM orders" --json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Issue:
|
||||
"""A single optimization issue found in a query."""
|
||||
severity: str # critical, warning, info
|
||||
rule: str
|
||||
message: str
|
||||
suggestion: str
|
||||
line: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryAnalysis:
|
||||
"""Analysis result for one SQL query."""
|
||||
query: str
|
||||
issues: List[Issue]
|
||||
score: int # 0-100, higher is better
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"query": self.query[:200] + ("..." if len(self.query) > 200 else ""),
|
||||
"issues": [asdict(i) for i in self.issues],
|
||||
"issue_count": len(self.issues),
|
||||
"score": self.score,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rule checkers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_select_star(sql: str) -> Optional[Issue]:
|
||||
"""Detect SELECT * usage."""
|
||||
if re.search(r'\bSELECT\s+\*\s', sql, re.IGNORECASE):
|
||||
return Issue(
|
||||
severity="warning",
|
||||
rule="select-star",
|
||||
message="SELECT * transfers unnecessary data and breaks on schema changes.",
|
||||
suggestion="List only the columns you need: SELECT col1, col2, ...",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_missing_where(sql: str) -> Optional[Issue]:
|
||||
"""Detect UPDATE/DELETE without WHERE."""
|
||||
upper = sql.upper().strip()
|
||||
for keyword in ("UPDATE", "DELETE"):
|
||||
if upper.startswith(keyword) and "WHERE" not in upper:
|
||||
return Issue(
|
||||
severity="critical",
|
||||
rule="missing-where",
|
||||
message=f"{keyword} without WHERE affects every row in the table.",
|
||||
suggestion=f"Add a WHERE clause to restrict the {keyword} scope.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_cartesian_join(sql: str) -> Optional[Issue]:
|
||||
"""Detect comma-separated tables without explicit JOIN or WHERE join condition."""
|
||||
upper = sql.upper()
|
||||
if "SELECT" not in upper:
|
||||
return None
|
||||
from_match = re.search(r'\bFROM\s+(.+?)(?:\bWHERE\b|\bGROUP\b|\bORDER\b|\bLIMIT\b|\bHAVING\b|;|$)',
|
||||
sql, re.IGNORECASE | re.DOTALL)
|
||||
if not from_match:
|
||||
return None
|
||||
from_clause = from_match.group(1)
|
||||
# Skip if explicit JOINs are used
|
||||
if re.search(r'\bJOIN\b', from_clause, re.IGNORECASE):
|
||||
return None
|
||||
# Count comma-separated tables
|
||||
tables = [t.strip() for t in from_clause.split(",") if t.strip()]
|
||||
if len(tables) > 1 and "WHERE" not in upper:
|
||||
return Issue(
|
||||
severity="critical",
|
||||
rule="cartesian-join",
|
||||
message="Multiple tables in FROM without JOIN or WHERE creates a cartesian product.",
|
||||
suggestion="Use explicit JOIN syntax with ON conditions.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_subquery_in_select(sql: str) -> Optional[Issue]:
|
||||
"""Detect correlated subqueries in SELECT list."""
|
||||
select_match = re.search(r'\bSELECT\b(.+?)\bFROM\b', sql, re.IGNORECASE | re.DOTALL)
|
||||
if select_match:
|
||||
select_clause = select_match.group(1)
|
||||
if re.search(r'\(\s*SELECT\b', select_clause, re.IGNORECASE):
|
||||
return Issue(
|
||||
severity="warning",
|
||||
rule="subquery-in-select",
|
||||
message="Subquery in SELECT list executes once per row (correlated subquery).",
|
||||
suggestion="Rewrite as a LEFT JOIN with aggregation.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_missing_limit(sql: str) -> Optional[Issue]:
|
||||
"""Detect unbounded SELECT without LIMIT."""
|
||||
upper = sql.upper().strip()
|
||||
if not upper.startswith("SELECT"):
|
||||
return None
|
||||
# Skip if it's a subquery or aggregate-only
|
||||
if re.search(r'\bCOUNT\s*\(', upper) and "GROUP BY" not in upper:
|
||||
return None
|
||||
if "LIMIT" not in upper and "FETCH" not in upper and "TOP " not in upper:
|
||||
return Issue(
|
||||
severity="info",
|
||||
rule="missing-limit",
|
||||
message="SELECT without LIMIT may return unbounded rows.",
|
||||
suggestion="Add LIMIT to prevent returning excessive data.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_function_on_column(sql: str) -> Optional[Issue]:
|
||||
"""Detect function calls on columns in WHERE (non-sargable)."""
|
||||
where_match = re.search(r'\bWHERE\b(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|\bHAVING\b|;|$)',
|
||||
sql, re.IGNORECASE | re.DOTALL)
|
||||
if not where_match:
|
||||
return None
|
||||
where_clause = where_match.group(1)
|
||||
non_sargable = re.search(
|
||||
r'\b(YEAR|MONTH|DAY|DATE|UPPER|LOWER|TRIM|CAST|COALESCE|IFNULL|NVL)\s*\(',
|
||||
where_clause, re.IGNORECASE
|
||||
)
|
||||
if non_sargable:
|
||||
func = non_sargable.group(1).upper()
|
||||
return Issue(
|
||||
severity="warning",
|
||||
rule="non-sargable",
|
||||
message=f"Function {func}() on column in WHERE prevents index usage.",
|
||||
suggestion="Rewrite to compare the raw column against transformed constants.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_leading_wildcard(sql: str) -> Optional[Issue]:
|
||||
"""Detect LIKE '%...' patterns."""
|
||||
if re.search(r"LIKE\s+'%", sql, re.IGNORECASE):
|
||||
return Issue(
|
||||
severity="warning",
|
||||
rule="leading-wildcard",
|
||||
message="LIKE with leading wildcard prevents index usage.",
|
||||
suggestion="Use full-text search (GIN index, FULLTEXT, FTS5) for substring matching.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_order_by_rand(sql: str) -> Optional[Issue]:
|
||||
"""Detect ORDER BY RAND() / RANDOM()."""
|
||||
if re.search(r'ORDER\s+BY\s+(RAND|RANDOM)\s*\(\)', sql, re.IGNORECASE):
|
||||
return Issue(
|
||||
severity="warning",
|
||||
rule="order-by-rand",
|
||||
message="ORDER BY RAND() scans and sorts the entire table.",
|
||||
suggestion="Use application-side random sampling or TABLESAMPLE.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_union_vs_union_all(sql: str) -> Optional[Issue]:
|
||||
"""Detect UNION without ALL (unnecessary dedup)."""
|
||||
if re.search(r'\bUNION\b(?!\s+ALL\b)', sql, re.IGNORECASE):
|
||||
return Issue(
|
||||
severity="info",
|
||||
rule="union-without-all",
|
||||
message="UNION performs deduplication sort; use UNION ALL if duplicates are acceptable.",
|
||||
suggestion="Replace UNION with UNION ALL unless you specifically need deduplication.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_not_in_subquery(sql: str) -> Optional[Issue]:
|
||||
"""Detect NOT IN (SELECT ...) which is NULL-unsafe."""
|
||||
if re.search(r'\bNOT\s+IN\s*\(\s*SELECT\b', sql, re.IGNORECASE):
|
||||
return Issue(
|
||||
severity="warning",
|
||||
rule="not-in-subquery",
|
||||
message="NOT IN with subquery returns no rows if any subquery result is NULL.",
|
||||
suggestion="Use NOT EXISTS (SELECT 1 ...) instead.",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
ALL_CHECKS = [
|
||||
check_select_star,
|
||||
check_missing_where,
|
||||
check_cartesian_join,
|
||||
check_subquery_in_select,
|
||||
check_missing_limit,
|
||||
check_function_on_column,
|
||||
check_leading_wildcard,
|
||||
check_order_by_rand,
|
||||
check_union_vs_union_all,
|
||||
check_not_in_subquery,
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Analysis engine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def analyze_query(sql: str, dialect: str = "postgres") -> QueryAnalysis:
|
||||
"""Run all checks against a single SQL query."""
|
||||
issues: List[Issue] = []
|
||||
for check_fn in ALL_CHECKS:
|
||||
issue = check_fn(sql)
|
||||
if issue:
|
||||
issues.append(issue)
|
||||
|
||||
# Score: start at 100, deduct per severity
|
||||
score = 100
|
||||
for issue in issues:
|
||||
if issue.severity == "critical":
|
||||
score -= 25
|
||||
elif issue.severity == "warning":
|
||||
score -= 10
|
||||
else:
|
||||
score -= 5
|
||||
score = max(0, score)
|
||||
|
||||
return QueryAnalysis(query=sql.strip(), issues=issues, score=score)
|
||||
|
||||
|
||||
def split_queries(text: str) -> List[str]:
|
||||
"""Split SQL text into individual statements."""
|
||||
queries = []
|
||||
for stmt in text.split(";"):
|
||||
stmt = stmt.strip()
|
||||
if stmt and len(stmt) > 5:
|
||||
queries.append(stmt + ";")
|
||||
return queries
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Output formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SEVERITY_ICONS = {"critical": "[CRITICAL]", "warning": "[WARNING]", "info": "[INFO]"}
|
||||
|
||||
|
||||
def format_text(analyses: List[QueryAnalysis]) -> str:
|
||||
"""Format analysis results as human-readable text."""
|
||||
lines = []
|
||||
for i, analysis in enumerate(analyses, 1):
|
||||
lines.append(f"{'='*60}")
|
||||
lines.append(f"Query {i} (Score: {analysis.score}/100)")
|
||||
lines.append(f" {analysis.query[:120]}{'...' if len(analysis.query) > 120 else ''}")
|
||||
lines.append("")
|
||||
if not analysis.issues:
|
||||
lines.append(" No issues detected.")
|
||||
for issue in analysis.issues:
|
||||
icon = SEVERITY_ICONS.get(issue.severity, "")
|
||||
lines.append(f" {icon} {issue.rule}: {issue.message}")
|
||||
lines.append(f" -> {issue.suggestion}")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_json(analyses: List[QueryAnalysis]) -> str:
|
||||
"""Format analysis results as JSON."""
|
||||
return json.dumps(
|
||||
{"analyses": [a.to_dict() for a in analyses], "total_queries": len(analyses)},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Analyze SQL queries for common performance issues.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --query "SELECT * FROM users"
|
||||
%(prog)s --query queries.sql --dialect mysql
|
||||
%(prog)s --query "DELETE FROM orders" --json
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--query", required=True,
|
||||
help="SQL query string or path to a .sql file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dialect", choices=["postgres", "mysql", "sqlite", "sqlserver"],
|
||||
default="postgres", help="SQL dialect (default: postgres)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json", action="store_true", dest="json_output",
|
||||
help="Output results as JSON",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine if query is a file path or inline SQL
|
||||
sql_text = args.query
|
||||
if os.path.isfile(args.query):
|
||||
with open(args.query, "r") as f:
|
||||
sql_text = f.read()
|
||||
|
||||
queries = split_queries(sql_text)
|
||||
if not queries:
|
||||
# Treat the whole input as a single query
|
||||
queries = [sql_text.strip()]
|
||||
|
||||
analyses = [analyze_query(q, args.dialect) for q in queries]
|
||||
|
||||
if args.json_output:
|
||||
print(format_json(analyses))
|
||||
else:
|
||||
print(format_text(analyses))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
315
engineering/sql-database-assistant/scripts/schema_explorer.py
Normal file
315
engineering/sql-database-assistant/scripts/schema_explorer.py
Normal file
@@ -0,0 +1,315 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Schema Explorer
|
||||
|
||||
Generates schema documentation from database introspection queries.
|
||||
Outputs the introspection SQL and sample documentation templates
|
||||
for PostgreSQL, MySQL, SQLite, and SQL Server.
|
||||
|
||||
Since this tool runs without a live database connection, it generates:
|
||||
1. The introspection queries you need to run
|
||||
2. Documentation templates from the results
|
||||
3. Sample schema docs for common table patterns
|
||||
|
||||
Usage:
|
||||
python schema_explorer.py --dialect postgres --tables all --format md
|
||||
python schema_explorer.py --dialect mysql --tables users,orders --format json
|
||||
python schema_explorer.py --dialect sqlite --tables all --json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import textwrap
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import List, Optional, Dict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Introspection query templates per dialect
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INTROSPECTION_QUERIES: Dict[str, Dict[str, str]] = {
|
||||
"postgres": {
|
||||
"tables": textwrap.dedent("""\
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_name;"""),
|
||||
"columns": textwrap.dedent("""\
|
||||
SELECT table_name, column_name, data_type, character_maximum_length,
|
||||
is_nullable, column_default
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'public' {table_filter}
|
||||
ORDER BY table_name, ordinal_position;"""),
|
||||
"primary_keys": textwrap.dedent("""\
|
||||
SELECT tc.table_name, kcu.column_name
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
WHERE tc.constraint_type = 'PRIMARY KEY' AND tc.table_schema = 'public'
|
||||
ORDER BY tc.table_name;"""),
|
||||
"foreign_keys": textwrap.dedent("""\
|
||||
SELECT tc.table_name, kcu.column_name,
|
||||
ccu.table_name AS foreign_table, ccu.column_name AS foreign_column
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
JOIN information_schema.constraint_column_usage ccu
|
||||
ON tc.constraint_name = ccu.constraint_name
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||
ORDER BY tc.table_name;"""),
|
||||
"indexes": textwrap.dedent("""\
|
||||
SELECT schemaname, tablename, indexname, indexdef
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = 'public'
|
||||
ORDER BY tablename, indexname;"""),
|
||||
"table_sizes": textwrap.dedent("""\
|
||||
SELECT relname AS table_name,
|
||||
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
|
||||
pg_size_pretty(pg_relation_size(relid)) AS data_size,
|
||||
pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
|
||||
FROM pg_catalog.pg_statio_user_tables
|
||||
ORDER BY pg_total_relation_size(relid) DESC;"""),
|
||||
},
|
||||
"mysql": {
|
||||
"tables": textwrap.dedent("""\
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = DATABASE() AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_name;"""),
|
||||
"columns": textwrap.dedent("""\
|
||||
SELECT table_name, column_name, column_type, is_nullable,
|
||||
column_default, column_key, extra
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = DATABASE() {table_filter}
|
||||
ORDER BY table_name, ordinal_position;"""),
|
||||
"foreign_keys": textwrap.dedent("""\
|
||||
SELECT table_name, column_name, referenced_table_name, referenced_column_name
|
||||
FROM information_schema.key_column_usage
|
||||
WHERE table_schema = DATABASE() AND referenced_table_name IS NOT NULL
|
||||
ORDER BY table_name;"""),
|
||||
"indexes": textwrap.dedent("""\
|
||||
SELECT table_name, index_name, non_unique, column_name, seq_in_index
|
||||
FROM information_schema.statistics
|
||||
WHERE table_schema = DATABASE()
|
||||
ORDER BY table_name, index_name, seq_in_index;"""),
|
||||
"table_sizes": textwrap.dedent("""\
|
||||
SELECT table_name, table_rows,
|
||||
ROUND(data_length / 1024 / 1024, 2) AS data_mb,
|
||||
ROUND(index_length / 1024 / 1024, 2) AS index_mb
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = DATABASE()
|
||||
ORDER BY data_length DESC;"""),
|
||||
},
|
||||
"sqlite": {
|
||||
"tables": textwrap.dedent("""\
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
|
||||
ORDER BY name;"""),
|
||||
"columns": textwrap.dedent("""\
|
||||
-- Run for each table:
|
||||
PRAGMA table_info({table_name});"""),
|
||||
"foreign_keys": textwrap.dedent("""\
|
||||
-- Run for each table:
|
||||
PRAGMA foreign_key_list({table_name});"""),
|
||||
"indexes": textwrap.dedent("""\
|
||||
SELECT name, tbl_name, sql FROM sqlite_master
|
||||
WHERE type = 'index'
|
||||
ORDER BY tbl_name, name;"""),
|
||||
"schema_dump": textwrap.dedent("""\
|
||||
SELECT name, sql FROM sqlite_master
|
||||
WHERE type = 'table'
|
||||
ORDER BY name;"""),
|
||||
},
|
||||
"sqlserver": {
|
||||
"tables": textwrap.dedent("""\
|
||||
SELECT TABLE_NAME
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_TYPE = 'BASE TABLE'
|
||||
ORDER BY TABLE_NAME;"""),
|
||||
"columns": textwrap.dedent("""\
|
||||
SELECT t.name AS table_name, c.name AS column_name,
|
||||
ty.name AS data_type, c.max_length, c.precision, c.scale,
|
||||
c.is_nullable, dc.definition AS default_value
|
||||
FROM sys.columns c
|
||||
JOIN sys.tables t ON c.object_id = t.object_id
|
||||
JOIN sys.types ty ON c.user_type_id = ty.user_type_id
|
||||
LEFT JOIN sys.default_constraints dc ON c.default_object_id = dc.object_id
|
||||
{table_filter}
|
||||
ORDER BY t.name, c.column_id;"""),
|
||||
"foreign_keys": textwrap.dedent("""\
|
||||
SELECT fk.name AS fk_name,
|
||||
tp.name AS parent_table, cp.name AS parent_column,
|
||||
tr.name AS referenced_table, cr.name AS referenced_column
|
||||
FROM sys.foreign_keys fk
|
||||
JOIN sys.foreign_key_columns fkc ON fk.object_id = fkc.constraint_object_id
|
||||
JOIN sys.tables tp ON fkc.parent_object_id = tp.object_id
|
||||
JOIN sys.columns cp ON fkc.parent_object_id = cp.object_id AND fkc.parent_column_id = cp.column_id
|
||||
JOIN sys.tables tr ON fkc.referenced_object_id = tr.object_id
|
||||
JOIN sys.columns cr ON fkc.referenced_object_id = cr.object_id AND fkc.referenced_column_id = cr.column_id
|
||||
ORDER BY tp.name;"""),
|
||||
"indexes": textwrap.dedent("""\
|
||||
SELECT t.name AS table_name, i.name AS index_name,
|
||||
i.type_desc, i.is_unique, c.name AS column_name,
|
||||
ic.key_ordinal
|
||||
FROM sys.indexes i
|
||||
JOIN sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
|
||||
JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
|
||||
JOIN sys.tables t ON i.object_id = t.object_id
|
||||
WHERE i.name IS NOT NULL
|
||||
ORDER BY t.name, i.name, ic.key_ordinal;"""),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Documentation generators
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SAMPLE_TABLES = {
|
||||
"users": {
|
||||
"columns": [
|
||||
{"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
|
||||
{"name": "email", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "Unique, indexed"},
|
||||
{"name": "name", "type": "VARCHAR(255)", "nullable": "YES", "default": "NULL", "notes": "Display name"},
|
||||
{"name": "password_hash", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "bcrypt hash"},
|
||||
{"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
|
||||
{"name": "updated_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
|
||||
],
|
||||
"indexes": ["PRIMARY KEY (id)", "UNIQUE INDEX (email)"],
|
||||
"foreign_keys": [],
|
||||
},
|
||||
"orders": {
|
||||
"columns": [
|
||||
{"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
|
||||
{"name": "user_id", "type": "INTEGER", "nullable": "NO", "default": "-", "notes": "FK -> users.id"},
|
||||
{"name": "status", "type": "VARCHAR(50)", "nullable": "NO", "default": "'pending'", "notes": "pending/paid/shipped/cancelled"},
|
||||
{"name": "total", "type": "DECIMAL(19,4)", "nullable": "NO", "default": "0", "notes": "Order total in cents"},
|
||||
{"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
|
||||
],
|
||||
"indexes": ["PRIMARY KEY (id)", "INDEX (user_id)", "INDEX (status, created_at)"],
|
||||
"foreign_keys": ["user_id -> users.id ON DELETE CASCADE"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def generate_md(dialect: str, tables: List[str]) -> str:
|
||||
"""Generate markdown schema documentation."""
|
||||
lines = [f"# Database Schema Documentation ({dialect.upper()})\n"]
|
||||
lines.append(f"Generated by sql-database-assistant schema_explorer.\n")
|
||||
|
||||
# Introspection queries section
|
||||
lines.append("## Introspection Queries\n")
|
||||
lines.append("Run these queries against your database to extract schema information:\n")
|
||||
queries = INTROSPECTION_QUERIES.get(dialect, {})
|
||||
for qname, qsql in queries.items():
|
||||
table_filter = ""
|
||||
if "all" not in tables:
|
||||
tlist = ", ".join(f"'{t}'" for t in tables)
|
||||
table_filter = f"AND table_name IN ({tlist})"
|
||||
qsql = qsql.replace("{table_filter}", table_filter)
|
||||
qsql = qsql.replace("{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME")
|
||||
lines.append(f"### {qname.replace('_', ' ').title()}\n")
|
||||
lines.append(f"```sql\n{qsql}\n```\n")
|
||||
|
||||
# Sample documentation
|
||||
lines.append("## Sample Table Documentation\n")
|
||||
lines.append("Below is an example of the documentation format produced from query results:\n")
|
||||
|
||||
show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
|
||||
for tname in show_tables:
|
||||
sample = SAMPLE_TABLES.get(tname)
|
||||
if not sample:
|
||||
lines.append(f"### {tname}\n")
|
||||
lines.append("_No sample data available. Run introspection queries above._\n")
|
||||
continue
|
||||
|
||||
lines.append(f"### {tname}\n")
|
||||
lines.append("| Column | Type | Nullable | Default | Notes |")
|
||||
lines.append("|--------|------|----------|---------|-------|")
|
||||
for col in sample["columns"]:
|
||||
lines.append(f"| {col['name']} | {col['type']} | {col['nullable']} | {col['default']} | {col['notes']} |")
|
||||
lines.append("")
|
||||
if sample["indexes"]:
|
||||
lines.append("**Indexes:** " + ", ".join(sample["indexes"]))
|
||||
if sample["foreign_keys"]:
|
||||
lines.append("**Foreign Keys:** " + ", ".join(sample["foreign_keys"]))
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_json_output(dialect: str, tables: List[str]) -> dict:
|
||||
"""Generate JSON schema documentation."""
|
||||
queries = INTROSPECTION_QUERIES.get(dialect, {})
|
||||
processed = {}
|
||||
for qname, qsql in queries.items():
|
||||
table_filter = ""
|
||||
if "all" not in tables:
|
||||
tlist = ", ".join(f"'{t}'" for t in tables)
|
||||
table_filter = f"AND table_name IN ({tlist})"
|
||||
processed[qname] = qsql.replace("{table_filter}", table_filter).replace(
|
||||
"{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME"
|
||||
)
|
||||
|
||||
show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
|
||||
sample_docs = {}
|
||||
for tname in show_tables:
|
||||
sample = SAMPLE_TABLES.get(tname)
|
||||
if sample:
|
||||
sample_docs[tname] = sample
|
||||
|
||||
return {
|
||||
"dialect": dialect,
|
||||
"requested_tables": tables,
|
||||
"introspection_queries": processed,
|
||||
"sample_documentation": sample_docs,
|
||||
"instructions": "Run the introspection queries against your database, then use the results to populate documentation in the sample format shown.",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate schema documentation from database introspection.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --dialect postgres --tables all --format md
|
||||
%(prog)s --dialect mysql --tables users,orders --format json
|
||||
%(prog)s --dialect sqlite --tables all --json
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dialect", required=True, choices=["postgres", "mysql", "sqlite", "sqlserver"],
|
||||
help="Target database dialect",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tables", default="all",
|
||||
help="Comma-separated table names or 'all' (default: all)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--format", choices=["md", "json"], default="md", dest="fmt",
|
||||
help="Output format (default: md)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json", action="store_true", dest="json_output",
|
||||
help="Output as JSON (overrides --format)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tables = [t.strip() for t in args.tables.split(",")]
|
||||
|
||||
if args.json_output or args.fmt == "json":
|
||||
result = generate_json_output(args.dialect, tables)
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print(generate_md(args.dialect, tables))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user