Files
claude-skills-reference/engineering/sql-database-assistant/scripts/schema_explorer.py
Reza Rezvani 87f3a007c9 feat(engineering,ra-qm): add secrets-vault-manager, sql-database-assistant, gcp-cloud-architect, soc2-compliance
secrets-vault-manager (403-line SKILL.md, 3 scripts, 3 references):
- HashiCorp Vault, AWS SM, Azure KV, GCP SM integration
- Secret rotation, dynamic secrets, audit logging, emergency procedures

sql-database-assistant (457-line SKILL.md, 3 scripts, 3 references):
- Query optimization, migration generation, schema exploration
- Multi-DB support (PostgreSQL, MySQL, SQLite, SQL Server)
- ORM patterns (Prisma, Drizzle, TypeORM, SQLAlchemy)

gcp-cloud-architect (418-line SKILL.md, 3 scripts, 3 references):
- 6-step workflow mirroring aws-solution-architect for GCP
- Cloud Run, GKE, BigQuery, Cloud Functions, cost optimization
- Completes cloud trifecta (AWS + Azure + GCP)

soc2-compliance (417-line SKILL.md, 3 scripts, 3 references):
- SOC 2 Type I & II preparation, Trust Service Criteria mapping
- Control matrix generation, evidence tracking, gap analysis
- First SOC 2 skill in ra-qm-team (joins GDPR, ISO 27001, ISO 13485)

All 12 scripts pass --help. Docs generated, mkdocs.yml nav updated.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 14:05:11 +01:00

316 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Schema Explorer
Generates schema documentation from database introspection queries.
Outputs the introspection SQL and sample documentation templates
for PostgreSQL, MySQL, SQLite, and SQL Server.
Since this tool runs without a live database connection, it generates:
1. The introspection queries you need to run
2. Documentation templates from the results
3. Sample schema docs for common table patterns
Usage:
python schema_explorer.py --dialect postgres --tables all --format md
python schema_explorer.py --dialect mysql --tables users,orders --format json
python schema_explorer.py --dialect sqlite --tables all --json
"""
import argparse
import json
import sys
import textwrap
from dataclasses import dataclass, asdict
from typing import List, Optional, Dict
# ---------------------------------------------------------------------------
# Introspection query templates per dialect
# ---------------------------------------------------------------------------
INTROSPECTION_QUERIES: Dict[str, Dict[str, str]] = {
"postgres": {
"tables": textwrap.dedent("""\
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
ORDER BY table_name;"""),
"columns": textwrap.dedent("""\
SELECT table_name, column_name, data_type, character_maximum_length,
is_nullable, column_default
FROM information_schema.columns
WHERE table_schema = 'public' {table_filter}
ORDER BY table_name, ordinal_position;"""),
"primary_keys": textwrap.dedent("""\
SELECT tc.table_name, kcu.column_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
WHERE tc.constraint_type = 'PRIMARY KEY' AND tc.table_schema = 'public'
ORDER BY tc.table_name;"""),
"foreign_keys": textwrap.dedent("""\
SELECT tc.table_name, kcu.column_name,
ccu.table_name AS foreign_table, ccu.column_name AS foreign_column
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
JOIN information_schema.constraint_column_usage ccu
ON tc.constraint_name = ccu.constraint_name
WHERE tc.constraint_type = 'FOREIGN KEY'
ORDER BY tc.table_name;"""),
"indexes": textwrap.dedent("""\
SELECT schemaname, tablename, indexname, indexdef
FROM pg_indexes
WHERE schemaname = 'public'
ORDER BY tablename, indexname;"""),
"table_sizes": textwrap.dedent("""\
SELECT relname AS table_name,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS data_size,
pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;"""),
},
"mysql": {
"tables": textwrap.dedent("""\
SELECT table_name
FROM information_schema.tables
WHERE table_schema = DATABASE() AND table_type = 'BASE TABLE'
ORDER BY table_name;"""),
"columns": textwrap.dedent("""\
SELECT table_name, column_name, column_type, is_nullable,
column_default, column_key, extra
FROM information_schema.columns
WHERE table_schema = DATABASE() {table_filter}
ORDER BY table_name, ordinal_position;"""),
"foreign_keys": textwrap.dedent("""\
SELECT table_name, column_name, referenced_table_name, referenced_column_name
FROM information_schema.key_column_usage
WHERE table_schema = DATABASE() AND referenced_table_name IS NOT NULL
ORDER BY table_name;"""),
"indexes": textwrap.dedent("""\
SELECT table_name, index_name, non_unique, column_name, seq_in_index
FROM information_schema.statistics
WHERE table_schema = DATABASE()
ORDER BY table_name, index_name, seq_in_index;"""),
"table_sizes": textwrap.dedent("""\
SELECT table_name, table_rows,
ROUND(data_length / 1024 / 1024, 2) AS data_mb,
ROUND(index_length / 1024 / 1024, 2) AS index_mb
FROM information_schema.tables
WHERE table_schema = DATABASE()
ORDER BY data_length DESC;"""),
},
"sqlite": {
"tables": textwrap.dedent("""\
SELECT name FROM sqlite_master
WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
ORDER BY name;"""),
"columns": textwrap.dedent("""\
-- Run for each table:
PRAGMA table_info({table_name});"""),
"foreign_keys": textwrap.dedent("""\
-- Run for each table:
PRAGMA foreign_key_list({table_name});"""),
"indexes": textwrap.dedent("""\
SELECT name, tbl_name, sql FROM sqlite_master
WHERE type = 'index'
ORDER BY tbl_name, name;"""),
"schema_dump": textwrap.dedent("""\
SELECT name, sql FROM sqlite_master
WHERE type = 'table'
ORDER BY name;"""),
},
"sqlserver": {
"tables": textwrap.dedent("""\
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
ORDER BY TABLE_NAME;"""),
"columns": textwrap.dedent("""\
SELECT t.name AS table_name, c.name AS column_name,
ty.name AS data_type, c.max_length, c.precision, c.scale,
c.is_nullable, dc.definition AS default_value
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
JOIN sys.types ty ON c.user_type_id = ty.user_type_id
LEFT JOIN sys.default_constraints dc ON c.default_object_id = dc.object_id
{table_filter}
ORDER BY t.name, c.column_id;"""),
"foreign_keys": textwrap.dedent("""\
SELECT fk.name AS fk_name,
tp.name AS parent_table, cp.name AS parent_column,
tr.name AS referenced_table, cr.name AS referenced_column
FROM sys.foreign_keys fk
JOIN sys.foreign_key_columns fkc ON fk.object_id = fkc.constraint_object_id
JOIN sys.tables tp ON fkc.parent_object_id = tp.object_id
JOIN sys.columns cp ON fkc.parent_object_id = cp.object_id AND fkc.parent_column_id = cp.column_id
JOIN sys.tables tr ON fkc.referenced_object_id = tr.object_id
JOIN sys.columns cr ON fkc.referenced_object_id = cr.object_id AND fkc.referenced_column_id = cr.column_id
ORDER BY tp.name;"""),
"indexes": textwrap.dedent("""\
SELECT t.name AS table_name, i.name AS index_name,
i.type_desc, i.is_unique, c.name AS column_name,
ic.key_ordinal
FROM sys.indexes i
JOIN sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
JOIN sys.tables t ON i.object_id = t.object_id
WHERE i.name IS NOT NULL
ORDER BY t.name, i.name, ic.key_ordinal;"""),
},
}
# ---------------------------------------------------------------------------
# Documentation generators
# ---------------------------------------------------------------------------
SAMPLE_TABLES = {
"users": {
"columns": [
{"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
{"name": "email", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "Unique, indexed"},
{"name": "name", "type": "VARCHAR(255)", "nullable": "YES", "default": "NULL", "notes": "Display name"},
{"name": "password_hash", "type": "VARCHAR(255)", "nullable": "NO", "default": "-", "notes": "bcrypt hash"},
{"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
{"name": "updated_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
],
"indexes": ["PRIMARY KEY (id)", "UNIQUE INDEX (email)"],
"foreign_keys": [],
},
"orders": {
"columns": [
{"name": "id", "type": "SERIAL / INT", "nullable": "NO", "default": "auto", "notes": "Primary key"},
{"name": "user_id", "type": "INTEGER", "nullable": "NO", "default": "-", "notes": "FK -> users.id"},
{"name": "status", "type": "VARCHAR(50)", "nullable": "NO", "default": "'pending'", "notes": "pending/paid/shipped/cancelled"},
{"name": "total", "type": "DECIMAL(19,4)", "nullable": "NO", "default": "0", "notes": "Order total in cents"},
{"name": "created_at", "type": "TIMESTAMP", "nullable": "NO", "default": "NOW()", "notes": ""},
],
"indexes": ["PRIMARY KEY (id)", "INDEX (user_id)", "INDEX (status, created_at)"],
"foreign_keys": ["user_id -> users.id ON DELETE CASCADE"],
},
}
def generate_md(dialect: str, tables: List[str]) -> str:
"""Generate markdown schema documentation."""
lines = [f"# Database Schema Documentation ({dialect.upper()})\n"]
lines.append(f"Generated by sql-database-assistant schema_explorer.\n")
# Introspection queries section
lines.append("## Introspection Queries\n")
lines.append("Run these queries against your database to extract schema information:\n")
queries = INTROSPECTION_QUERIES.get(dialect, {})
for qname, qsql in queries.items():
table_filter = ""
if "all" not in tables:
tlist = ", ".join(f"'{t}'" for t in tables)
table_filter = f"AND table_name IN ({tlist})"
qsql = qsql.replace("{table_filter}", table_filter)
qsql = qsql.replace("{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME")
lines.append(f"### {qname.replace('_', ' ').title()}\n")
lines.append(f"```sql\n{qsql}\n```\n")
# Sample documentation
lines.append("## Sample Table Documentation\n")
lines.append("Below is an example of the documentation format produced from query results:\n")
show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
for tname in show_tables:
sample = SAMPLE_TABLES.get(tname)
if not sample:
lines.append(f"### {tname}\n")
lines.append("_No sample data available. Run introspection queries above._\n")
continue
lines.append(f"### {tname}\n")
lines.append("| Column | Type | Nullable | Default | Notes |")
lines.append("|--------|------|----------|---------|-------|")
for col in sample["columns"]:
lines.append(f"| {col['name']} | {col['type']} | {col['nullable']} | {col['default']} | {col['notes']} |")
lines.append("")
if sample["indexes"]:
lines.append("**Indexes:** " + ", ".join(sample["indexes"]))
if sample["foreign_keys"]:
lines.append("**Foreign Keys:** " + ", ".join(sample["foreign_keys"]))
lines.append("")
return "\n".join(lines)
def generate_json_output(dialect: str, tables: List[str]) -> dict:
"""Generate JSON schema documentation."""
queries = INTROSPECTION_QUERIES.get(dialect, {})
processed = {}
for qname, qsql in queries.items():
table_filter = ""
if "all" not in tables:
tlist = ", ".join(f"'{t}'" for t in tables)
table_filter = f"AND table_name IN ({tlist})"
processed[qname] = qsql.replace("{table_filter}", table_filter).replace(
"{table_name}", tables[0] if tables and tables[0] != "all" else "TABLE_NAME"
)
show_tables = tables if "all" not in tables else list(SAMPLE_TABLES.keys())
sample_docs = {}
for tname in show_tables:
sample = SAMPLE_TABLES.get(tname)
if sample:
sample_docs[tname] = sample
return {
"dialect": dialect,
"requested_tables": tables,
"introspection_queries": processed,
"sample_documentation": sample_docs,
"instructions": "Run the introspection queries against your database, then use the results to populate documentation in the sample format shown.",
}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Generate schema documentation from database introspection.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --dialect postgres --tables all --format md
%(prog)s --dialect mysql --tables users,orders --format json
%(prog)s --dialect sqlite --tables all --json
""",
)
parser.add_argument(
"--dialect", required=True, choices=["postgres", "mysql", "sqlite", "sqlserver"],
help="Target database dialect",
)
parser.add_argument(
"--tables", default="all",
help="Comma-separated table names or 'all' (default: all)",
)
parser.add_argument(
"--format", choices=["md", "json"], default="md", dest="fmt",
help="Output format (default: md)",
)
parser.add_argument(
"--json", action="store_true", dest="json_output",
help="Output as JSON (overrides --format)",
)
args = parser.parse_args()
tables = [t.strip() for t in args.tables.split(",")]
if args.json_output or args.fmt == "json":
result = generate_json_output(args.dialect, tables)
print(json.dumps(result, indent=2))
else:
print(generate_md(args.dialect, tables))
if __name__ == "__main__":
main()