From dd07924f41a2a0b4219786711b012e53c98df256 Mon Sep 17 00:00:00 2001
From: Leo <leo@openclaw.ai>
Date: Mon, 16 Feb 2026 16:11:29 +0000
Subject: [PATCH] feat: add database-designer POWERFUL-tier skill

---
 engineering/database-designer/README.md       |  388 ++++++
 engineering/database-designer/SKILL.md        |  533 ++++++++
 .../assets/sample_query_patterns.json         |  375 ++++++
 .../assets/sample_schema.json                 |  372 +++++
 .../assets/sample_schema.sql                  |  207 +++
 .../index_optimization_sample.txt             |   60 +
 .../expected_outputs/migration_sample.txt     |  124 ++
 .../schema_analysis_sample.txt                |  222 +++
 .../database-designer/index_optimizer.py      |  926 +++++++++++++
 .../database-designer/migration_generator.py  | 1199 +++++++++++++++++
 .../database_selection_decision_tree.md       |  373 +++++
 .../references/index_strategy_patterns.md     |  424 ++++++
 .../references/normalization_guide.md         |  354 +++++
 .../database-designer/schema_analyzer.py      |  982 ++++++++++++++
 14 files changed, 6539 insertions(+)
 create mode 100644 engineering/database-designer/README.md
 create mode 100644 engineering/database-designer/SKILL.md
 create mode 100644 engineering/database-designer/assets/sample_query_patterns.json
 create mode 100644 engineering/database-designer/assets/sample_schema.json
 create mode 100644 engineering/database-designer/assets/sample_schema.sql
 create mode 100644 engineering/database-designer/expected_outputs/index_optimization_sample.txt
 create mode 100644 engineering/database-designer/expected_outputs/migration_sample.txt
 create mode 100644 engineering/database-designer/expected_outputs/schema_analysis_sample.txt
 create mode 100644 engineering/database-designer/index_optimizer.py
 create mode 100644 engineering/database-designer/migration_generator.py
 create mode 100644 engineering/database-designer/references/database_selection_decision_tree.md
 create mode 100644 engineering/database-designer/references/index_strategy_patterns.md
 create mode 100644 engineering/database-designer/references/normalization_guide.md
 create mode 100644 engineering/database-designer/schema_analyzer.py

diff --git a/engineering/database-designer/README.md b/engineering/database-designer/README.md
new file mode 100644
index 0000000..9d38831
--- /dev/null
+++ b/engineering/database-designer/README.md
@@ -0,0 +1,388 @@
+# Database Designer - POWERFUL Tier Skill
+
+A comprehensive database design and analysis toolkit that provides expert-level schema analysis, index optimization, and migration generation capabilities for modern database systems.
+
+## Features
+
+### 🔍 Schema Analyzer
+- **Normalization Analysis**: Automated detection of 1NF through BCNF violations
+- **Data Type Optimization**: Identifies antipatterns and inappropriate types
+- **Constraint Analysis**: Finds missing foreign keys, unique constraints, and checks
+- **ERD Generation**: Creates Mermaid diagrams from DDL or JSON schema
+- **Naming Convention Validation**: Ensures consistent naming patterns
+
+### ⚡ Index Optimizer  
+- **Missing Index Detection**: Identifies indexes needed for query patterns
+- **Composite Index Design**: Optimizes column ordering for maximum efficiency
+- **Redundancy Analysis**: Finds duplicate and overlapping indexes
+- **Performance Modeling**: Estimates selectivity and query performance impact
+- **Covering Index Recommendations**: Eliminates table lookups
+
+### 🚀 Migration Generator
+- **Zero-Downtime Migrations**: Implements expand-contract patterns
+- **Schema Evolution**: Handles column changes, table renames, constraint updates
+- **Data Migration Scripts**: Automated data transformation and validation
+- **Rollback Planning**: Complete reversal capabilities for all changes
+- **Execution Orchestration**: Dependency-aware migration ordering
+
+## Quick Start
+
+### Prerequisites
+- Python 3.7+ (no external dependencies required)
+- Database schema in SQL DDL format or JSON
+- Query patterns (for index optimization)
+
+### Installation
+```bash
+# Clone or download the database-designer skill
+cd engineering/database-designer/
+
+# Make scripts executable
+chmod +x *.py
+```
+
+## Usage Examples
+
+### Schema Analysis
+
+**Analyze SQL DDL file:**
+```bash
+python schema_analyzer.py --input assets/sample_schema.sql --output-format text
+```
+
+**Generate ERD diagram:**
+```bash
+python schema_analyzer.py --input assets/sample_schema.sql --generate-erd --output analysis.txt
+```
+
+**JSON schema analysis:**
+```bash
+python schema_analyzer.py --input assets/sample_schema.json --output-format json --output results.json
+```
+
+### Index Optimization
+
+**Basic index analysis:**
+```bash
+python index_optimizer.py --schema assets/sample_schema.json --queries assets/sample_query_patterns.json
+```
+
+**High-priority recommendations only:**
+```bash
+python index_optimizer.py --schema assets/sample_schema.json --queries assets/sample_query_patterns.json --min-priority 2
+```
+
+**JSON output with existing index analysis:**
+```bash
+python index_optimizer.py --schema assets/sample_schema.json --queries assets/sample_query_patterns.json --format json --analyze-existing
+```
+
+### Migration Generation
+
+**Generate migration between schemas:**
+```bash
+python migration_generator.py --current assets/current_schema.json --target assets/target_schema.json
+```
+
+**Zero-downtime migration:**
+```bash
+python migration_generator.py --current current.json --target target.json --zero-downtime --format sql
+```
+
+**Include validation queries:**
+```bash
+python migration_generator.py --current current.json --target target.json --include-validations --output migration_plan.txt
+```
+
+## Tool Documentation
+
+### Schema Analyzer
+
+**Input Formats:**
+- SQL DDL files (.sql)
+- JSON schema definitions (.json)
+
+**Key Capabilities:**
+- Detects 1NF violations (non-atomic values, repeating groups)
+- Identifies 2NF issues (partial dependencies in composite keys)
+- Finds 3NF problems (transitive dependencies)
+- Checks BCNF compliance (determinant key requirements)
+- Validates data types (VARCHAR(255) antipattern, inappropriate types)
+- Missing constraints (NOT NULL, UNIQUE, CHECK, foreign keys)
+- Naming convention adherence
+
+**Sample Command:**
+```bash
+python schema_analyzer.py \
+  --input sample_schema.sql \
+  --generate-erd \
+  --output-format text \
+  --output analysis.txt
+```
+
+**Output:**
+- Comprehensive text or JSON analysis report
+- Mermaid ERD diagram
+- Prioritized recommendations
+- SQL statements for improvements
+
+### Index Optimizer
+
+**Input Requirements:**
+- Schema definition (JSON format)
+- Query patterns with frequency and selectivity data
+
+**Analysis Features:**
+- Selectivity estimation based on column patterns
+- Composite index column ordering optimization  
+- Covering index recommendations for SELECT queries
+- Foreign key index validation
+- Redundancy detection (duplicates, overlaps, unused indexes)
+- Performance impact modeling
+
+**Sample Command:**
+```bash
+python index_optimizer.py \
+  --schema schema.json \
+  --queries query_patterns.json \
+  --format text \
+  --min-priority 3 \
+  --output recommendations.txt
+```
+
+**Output:**
+- Prioritized index recommendations
+- CREATE INDEX statements
+- Drop statements for redundant indexes
+- Performance impact analysis
+- Storage size estimates
+
+### Migration Generator
+
+**Input Requirements:**
+- Current schema (JSON format)
+- Target schema (JSON format)
+
+**Migration Strategies:**
+- Standard migrations with ALTER statements
+- Zero-downtime expand-contract patterns
+- Data migration and transformation scripts
+- Constraint management (add/drop in correct order)
+- Index management with timing estimates
+
+**Sample Command:**
+```bash
+python migration_generator.py \
+  --current current_schema.json \
+  --target target_schema.json \
+  --zero-downtime \
+  --include-validations \
+  --format text
+```
+
+**Output:**
+- Step-by-step migration plan
+- Forward and rollback SQL statements
+- Risk assessment for each step
+- Validation queries
+- Execution time estimates
+
+## File Structure
+
+```
+database-designer/
+├── README.md                          # This file
+├── SKILL.md                          # Comprehensive database design guide
+├── schema_analyzer.py                # Schema analysis tool
+├── index_optimizer.py                # Index optimization tool  
+├── migration_generator.py            # Migration generation tool
+├── references/                       # Reference documentation
+│   ├── normalization_guide.md        # Normalization principles and patterns
+│   ├── index_strategy_patterns.md    # Index design and optimization guide
+│   └── database_selection_decision_tree.md # Database technology selection
+├── assets/                           # Sample files and test data
+│   ├── sample_schema.sql            # Sample DDL with various issues
+│   ├── sample_schema.json           # JSON schema definition
+│   └── sample_query_patterns.json   # Query patterns for index analysis
+└── expected_outputs/                 # Example tool outputs
+    ├── schema_analysis_sample.txt   # Sample schema analysis report
+    ├── index_optimization_sample.txt # Sample index recommendations
+    └── migration_sample.txt         # Sample migration plan
+```
+
+## JSON Schema Format
+
+The tools use a standardized JSON format for schema definitions:
+
+```json
+{
+  "tables": {
+    "table_name": {
+      "columns": {
+        "column_name": {
+          "type": "VARCHAR(255)",
+          "nullable": true,
+          "unique": false,
+          "foreign_key": "other_table.column",
+          "default": "default_value",
+          "cardinality_estimate": 1000
+        }
+      },
+      "primary_key": ["id"],
+      "unique_constraints": [["email"], ["username"]],
+      "check_constraints": {
+        "chk_positive_price": "price > 0"
+      },
+      "indexes": [
+        {
+          "name": "idx_table_column",
+          "columns": ["column_name"],
+          "unique": false,
+          "partial_condition": "status = 'active'"
+        }
+      ]
+    }
+  }
+}
+```
+
+## Query Patterns Format
+
+For index optimization, provide query patterns in this format:
+
+```json
+{
+  "queries": [
+    {
+      "id": "user_lookup",
+      "type": "SELECT",
+      "table": "users",
+      "where_conditions": [
+        {
+          "column": "email",
+          "operator": "=",
+          "selectivity": 0.95
+        }
+      ],
+      "join_conditions": [
+        {
+          "local_column": "user_id",
+          "foreign_table": "orders",
+          "foreign_column": "id",
+          "join_type": "INNER"
+        }
+      ],
+      "order_by": [
+        {"column": "created_at", "direction": "DESC"}
+      ],
+      "frequency": 1000,
+      "avg_execution_time_ms": 5.2
+    }
+  ]
+}
+```
+
+## Best Practices
+
+### Schema Analysis
+1. **Start with DDL**: Use actual CREATE TABLE statements when possible
+2. **Include Constraints**: Capture all existing constraints and indexes
+3. **Consider History**: Some denormalization may be intentional for performance
+4. **Validate Results**: Review recommendations against business requirements
+
+### Index Optimization  
+1. **Real Query Patterns**: Use actual application queries, not theoretical ones
+2. **Include Frequency**: Query frequency is crucial for prioritization
+3. **Monitor Performance**: Validate recommendations with actual performance testing
+4. **Gradual Implementation**: Add indexes incrementally and monitor impact
+
+### Migration Planning
+1. **Test Migrations**: Always test on non-production environments first
+2. **Backup First**: Ensure complete backups before running migrations
+3. **Monitor Progress**: Watch for locks and performance impacts during execution
+4. **Rollback Ready**: Have rollback procedures tested and ready
+
+## Advanced Usage
+
+### Custom Selectivity Estimation
+The index optimizer uses pattern-based selectivity estimation. You can improve accuracy by providing cardinality estimates in your schema JSON:
+
+```json
+{
+  "columns": {
+    "status": {
+      "type": "VARCHAR(20)",
+      "cardinality_estimate": 5  # Only 5 distinct values
+    }
+  }
+}
+```
+
+### Zero-Downtime Migration Strategy
+For production systems, use the zero-downtime flag to generate expand-contract migrations:
+
+1. **Expand Phase**: Add new columns/tables without constraints
+2. **Dual Write**: Application writes to both old and new structures  
+3. **Backfill**: Populate new structures with existing data
+4. **Contract Phase**: Remove old structures after validation
+
+### Integration with CI/CD
+Integrate these tools into your deployment pipeline:
+
+```bash
+# Schema validation in CI
+python schema_analyzer.py --input schema.sql --output-format json | \
+  jq '.constraint_analysis.total_issues' | \
+  test $(cat) -eq 0 || exit 1
+
+# Generate migrations automatically
+python migration_generator.py \
+  --current prod_schema.json \
+  --target new_schema.json \
+  --zero-downtime \
+  --output migration.sql
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**"No tables found in input file"**
+- Ensure SQL DDL uses standard CREATE TABLE syntax
+- Check for syntax errors in DDL
+- Verify file encoding (UTF-8 recommended)
+
+**"Invalid JSON schema"**  
+- Validate JSON syntax with a JSON validator
+- Ensure all required fields are present
+- Check that foreign key references use "table.column" format
+
+**"Analysis shows no issues but problems exist"**
+- Tools use heuristic analysis - review recommendations carefully
+- Some design decisions may be intentional (denormalization for performance)
+- Consider domain-specific requirements not captured by general rules
+
+### Performance Tips
+
+**Large Schemas:**
+- Use `--output-format json` for machine processing
+- Consider analyzing subsets of tables for very large schemas
+- Provide cardinality estimates for better index recommendations
+
+**Complex Queries:**
+- Include actual execution times in query patterns
+- Provide realistic frequency estimates
+- Consider seasonal or usage pattern variations
+
+## Contributing
+
+This is a self-contained skill with no external dependencies. To extend functionality:
+
+1. Follow the existing code patterns
+2. Maintain Python standard library only requirement
+3. Add comprehensive test cases for new features
+4. Update documentation and examples
+
+## License
+
+This database designer skill is part of the claude-skills collection and follows the same licensing terms.
\ No newline at end of file
diff --git a/engineering/database-designer/SKILL.md b/engineering/database-designer/SKILL.md
new file mode 100644
index 0000000..276d15d
--- /dev/null
+++ b/engineering/database-designer/SKILL.md
@@ -0,0 +1,533 @@
+# Database Designer - POWERFUL Tier Skill
+
+## Overview
+
+A comprehensive database design skill that provides expert-level analysis, optimization, and migration capabilities for modern database systems. This skill combines theoretical principles with practical tools to help architects and developers create scalable, performant, and maintainable database schemas.
+
+## Core Competencies
+
+### Schema Design & Analysis
+- **Normalization Analysis**: Automated detection of normalization levels (1NF through BCNF)
+- **Denormalization Strategy**: Smart recommendations for performance optimization
+- **Data Type Optimization**: Identification of inappropriate types and size issues
+- **Constraint Analysis**: Missing foreign keys, unique constraints, and null checks
+- **Naming Convention Validation**: Consistent table and column naming patterns
+- **ERD Generation**: Automatic Mermaid diagram creation from DDL
+
+### Index Optimization
+- **Index Gap Analysis**: Identification of missing indexes on foreign keys and query patterns
+- **Composite Index Strategy**: Optimal column ordering for multi-column indexes
+- **Index Redundancy Detection**: Elimination of overlapping and unused indexes
+- **Performance Impact Modeling**: Selectivity estimation and query cost analysis
+- **Index Type Selection**: B-tree, hash, partial, covering, and specialized indexes
+
+### Migration Management
+- **Zero-Downtime Migrations**: Expand-contract pattern implementation
+- **Schema Evolution**: Safe column additions, deletions, and type changes
+- **Data Migration Scripts**: Automated data transformation and validation
+- **Rollback Strategy**: Complete reversal capabilities with validation
+- **Execution Planning**: Ordered migration steps with dependency resolution
+
+## Database Design Principles
+
+### Normalization Forms
+
+#### First Normal Form (1NF)
+- **Atomic Values**: Each column contains indivisible values
+- **Unique Column Names**: No duplicate column names within a table
+- **Uniform Data Types**: Each column contains the same type of data
+- **Row Uniqueness**: No duplicate rows in the table
+
+**Example Violation:**
+```sql
+-- BAD: Multiple phone numbers in one column
+CREATE TABLE contacts (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    phones VARCHAR(200)  -- "123-456-7890, 098-765-4321"
+);
+
+-- GOOD: Separate table for phone numbers
+CREATE TABLE contacts (
+    id INT PRIMARY KEY,
+    name VARCHAR(100)
+);
+
+CREATE TABLE contact_phones (
+    id INT PRIMARY KEY,
+    contact_id INT REFERENCES contacts(id),
+    phone_number VARCHAR(20),
+    phone_type VARCHAR(10)
+);
+```
+
+#### Second Normal Form (2NF)
+- **1NF Compliance**: Must satisfy First Normal Form
+- **Full Functional Dependency**: Non-key attributes depend on the entire primary key
+- **Partial Dependency Elimination**: Remove attributes that depend on part of a composite key
+
+**Example Violation:**
+```sql
+-- BAD: Student course table with partial dependencies
+CREATE TABLE student_courses (
+    student_id INT,
+    course_id INT,
+    student_name VARCHAR(100),  -- Depends only on student_id
+    course_name VARCHAR(100),   -- Depends only on course_id
+    grade CHAR(1),
+    PRIMARY KEY (student_id, course_id)
+);
+
+-- GOOD: Separate tables eliminate partial dependencies
+CREATE TABLE students (
+    id INT PRIMARY KEY,
+    name VARCHAR(100)
+);
+
+CREATE TABLE courses (
+    id INT PRIMARY KEY,
+    name VARCHAR(100)
+);
+
+CREATE TABLE enrollments (
+    student_id INT REFERENCES students(id),
+    course_id INT REFERENCES courses(id),
+    grade CHAR(1),
+    PRIMARY KEY (student_id, course_id)
+);
+```
+
+#### Third Normal Form (3NF)
+- **2NF Compliance**: Must satisfy Second Normal Form
+- **Transitive Dependency Elimination**: Non-key attributes should not depend on other non-key attributes
+- **Direct Dependency**: Non-key attributes depend directly on the primary key
+
+**Example Violation:**
+```sql
+-- BAD: Employee table with transitive dependency
+CREATE TABLE employees (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    department_id INT,
+    department_name VARCHAR(100),  -- Depends on department_id, not employee id
+    department_budget DECIMAL(10,2) -- Transitive dependency
+);
+
+-- GOOD: Separate department information
+CREATE TABLE departments (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    budget DECIMAL(10,2)
+);
+
+CREATE TABLE employees (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    department_id INT REFERENCES departments(id)
+);
+```
+
+#### Boyce-Codd Normal Form (BCNF)
+- **3NF Compliance**: Must satisfy Third Normal Form
+- **Determinant Key Rule**: Every determinant must be a candidate key
+- **Stricter 3NF**: Handles anomalies not covered by 3NF
+
+### Denormalization Strategies
+
+#### When to Denormalize
+1. **Read-Heavy Workloads**: High query frequency with acceptable write trade-offs
+2. **Performance Bottlenecks**: Join operations causing significant latency
+3. **Aggregation Needs**: Frequent calculation of derived values
+4. **Caching Requirements**: Pre-computed results for common queries
+
+#### Common Denormalization Patterns
+
+**Redundant Storage**
+```sql
+-- Store calculated values to avoid expensive joins
+CREATE TABLE orders (
+    id INT PRIMARY KEY,
+    customer_id INT REFERENCES customers(id),
+    customer_name VARCHAR(100), -- Denormalized from customers table
+    order_total DECIMAL(10,2),  -- Denormalized calculation
+    created_at TIMESTAMP
+);
+```
+
+**Materialized Aggregates**
+```sql
+-- Pre-computed summary tables
+CREATE TABLE customer_statistics (
+    customer_id INT PRIMARY KEY,
+    total_orders INT,
+    lifetime_value DECIMAL(12,2),
+    last_order_date DATE,
+    updated_at TIMESTAMP
+);
+```
+
+## Index Optimization Strategies
+
+### B-Tree Indexes
+- **Default Choice**: Best for range queries, sorting, and equality matches
+- **Column Order**: Most selective columns first for composite indexes
+- **Prefix Matching**: Supports leading column subset queries
+- **Maintenance Cost**: Balanced tree structure with logarithmic operations
+
+### Hash Indexes
+- **Equality Queries**: Optimal for exact match lookups
+- **Memory Efficiency**: Constant-time access for single-value queries
+- **Range Limitations**: Cannot support range or partial matches
+- **Use Cases**: Primary keys, unique constraints, cache keys
+
+### Composite Indexes
+```sql
+-- Query pattern determines optimal column order
+-- Query: WHERE status = 'active' AND created_date > '2023-01-01' ORDER BY priority DESC
+CREATE INDEX idx_task_status_date_priority 
+ON tasks (status, created_date, priority DESC);
+
+-- Query: WHERE user_id = 123 AND category IN ('A', 'B') AND date_field BETWEEN '...' AND '...'
+CREATE INDEX idx_user_category_date 
+ON user_activities (user_id, category, date_field);
+```
+
+### Covering Indexes
+```sql
+-- Include additional columns to avoid table lookups
+CREATE INDEX idx_user_email_covering 
+ON users (email) 
+INCLUDE (first_name, last_name, status);
+
+-- Query can be satisfied entirely from the index
+-- SELECT first_name, last_name, status FROM users WHERE email = 'user@example.com';
+```
+
+### Partial Indexes
+```sql
+-- Index only relevant subset of data
+CREATE INDEX idx_active_users_email 
+ON users (email) 
+WHERE status = 'active';
+
+-- Index for recent orders only
+CREATE INDEX idx_recent_orders_customer 
+ON orders (customer_id, created_at) 
+WHERE created_at > CURRENT_DATE - INTERVAL '30 days';
+```
+
+## Query Analysis & Optimization
+
+### Query Patterns Recognition
+1. **Equality Filters**: Single-column B-tree indexes
+2. **Range Queries**: B-tree with proper column ordering
+3. **Text Search**: Full-text indexes or trigram indexes
+4. **Join Operations**: Foreign key indexes on both sides
+5. **Sorting Requirements**: Indexes matching ORDER BY clauses
+
+### Index Selection Algorithm
+```
+1. Identify WHERE clause columns
+2. Determine most selective columns first
+3. Consider JOIN conditions
+4. Include ORDER BY columns if possible
+5. Evaluate covering index opportunities
+6. Check for existing overlapping indexes
+```
+
+## Data Modeling Patterns
+
+### Star Schema (Data Warehousing)
+```sql
+-- Central fact table
+CREATE TABLE sales_facts (
+    sale_id BIGINT PRIMARY KEY,
+    product_id INT REFERENCES products(id),
+    customer_id INT REFERENCES customers(id),
+    date_id INT REFERENCES date_dimension(id),
+    store_id INT REFERENCES stores(id),
+    quantity INT,
+    unit_price DECIMAL(8,2),
+    total_amount DECIMAL(10,2)
+);
+
+-- Dimension tables
+CREATE TABLE date_dimension (
+    id INT PRIMARY KEY,
+    date_value DATE,
+    year INT,
+    quarter INT,
+    month INT,
+    day_of_week INT,
+    is_weekend BOOLEAN
+);
+```
+
+### Snowflake Schema
+```sql
+-- Normalized dimension tables
+CREATE TABLE products (
+    id INT PRIMARY KEY,
+    name VARCHAR(200),
+    category_id INT REFERENCES product_categories(id),
+    brand_id INT REFERENCES brands(id)
+);
+
+CREATE TABLE product_categories (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    parent_category_id INT REFERENCES product_categories(id)
+);
+```
+
+### Document Model (JSON Storage)
+```sql
+-- Flexible document storage with indexing
+CREATE TABLE documents (
+    id UUID PRIMARY KEY,
+    document_type VARCHAR(50),
+    data JSONB,
+    created_at TIMESTAMP DEFAULT NOW(),
+    updated_at TIMESTAMP DEFAULT NOW()
+);
+
+-- Index on JSON properties
+CREATE INDEX idx_documents_user_id 
+ON documents USING GIN ((data->>'user_id'));
+
+CREATE INDEX idx_documents_status 
+ON documents ((data->>'status')) 
+WHERE document_type = 'order';
+```
+
+### Graph Data Patterns
+```sql
+-- Adjacency list for hierarchical data
+CREATE TABLE categories (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    parent_id INT REFERENCES categories(id),
+    level INT,
+    path VARCHAR(500)  -- Materialized path: "/1/5/12/"
+);
+
+-- Many-to-many relationships
+CREATE TABLE relationships (
+    id UUID PRIMARY KEY,
+    from_entity_id UUID,
+    to_entity_id UUID,
+    relationship_type VARCHAR(50),
+    created_at TIMESTAMP,
+    INDEX (from_entity_id, relationship_type),
+    INDEX (to_entity_id, relationship_type)
+);
+```
+
+## Migration Strategies
+
+### Zero-Downtime Migration (Expand-Contract Pattern)
+
+**Phase 1: Expand**
+```sql
+-- Add new column without constraints
+ALTER TABLE users ADD COLUMN new_email VARCHAR(255);
+
+-- Backfill data in batches
+UPDATE users SET new_email = email WHERE id BETWEEN 1 AND 1000;
+-- Continue in batches...
+
+-- Add constraints after backfill
+ALTER TABLE users ADD CONSTRAINT users_new_email_unique UNIQUE (new_email);
+ALTER TABLE users ALTER COLUMN new_email SET NOT NULL;
+```
+
+**Phase 2: Contract**
+```sql
+-- Update application to use new column
+-- Deploy application changes
+-- Verify new column is being used
+
+-- Remove old column
+ALTER TABLE users DROP COLUMN email;
+-- Rename new column
+ALTER TABLE users RENAME COLUMN new_email TO email;
+```
+
+### Data Type Changes
+```sql
+-- Safe string to integer conversion
+ALTER TABLE products ADD COLUMN sku_number INTEGER;
+UPDATE products SET sku_number = CAST(sku AS INTEGER) WHERE sku ~ '^[0-9]+$';
+-- Validate conversion success before dropping old column
+```
+
+## Partitioning Strategies
+
+### Horizontal Partitioning (Sharding)
+```sql
+-- Range partitioning by date
+CREATE TABLE sales_2023 PARTITION OF sales
+FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
+
+CREATE TABLE sales_2024 PARTITION OF sales
+FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');
+
+-- Hash partitioning by user_id
+CREATE TABLE user_data_0 PARTITION OF user_data
+FOR VALUES WITH (MODULUS 4, REMAINDER 0);
+
+CREATE TABLE user_data_1 PARTITION OF user_data
+FOR VALUES WITH (MODULUS 4, REMAINDER 1);
+```
+
+### Vertical Partitioning
+```sql
+-- Separate frequently accessed columns
+CREATE TABLE users_core (
+    id INT PRIMARY KEY,
+    email VARCHAR(255),
+    status VARCHAR(20),
+    created_at TIMESTAMP
+);
+
+-- Less frequently accessed profile data
+CREATE TABLE users_profile (
+    user_id INT PRIMARY KEY REFERENCES users_core(id),
+    bio TEXT,
+    preferences JSONB,
+    last_login TIMESTAMP
+);
+```
+
+## Connection Management
+
+### Connection Pooling
+- **Pool Size**: CPU cores × 2 + effective spindle count
+- **Connection Lifetime**: Rotate connections to prevent resource leaks
+- **Timeout Settings**: Connection, idle, and query timeouts
+- **Health Checks**: Regular connection validation
+
+### Read Replicas Strategy
+```sql
+-- Write queries to primary
+INSERT INTO users (email, name) VALUES ('user@example.com', 'John Doe');
+
+-- Read queries to replicas (with appropriate read preference)
+SELECT * FROM users WHERE status = 'active';  -- Route to read replica
+
+-- Consistent reads when required
+SELECT * FROM users WHERE id = LAST_INSERT_ID();  -- Route to primary
+```
+
+## Caching Layers
+
+### Cache-Aside Pattern
+```python
+def get_user(user_id):
+    # Try cache first
+    user = cache.get(f"user:{user_id}")
+    if user is None:
+        # Cache miss - query database
+        user = db.query("SELECT * FROM users WHERE id = %s", user_id)
+        # Store in cache
+        cache.set(f"user:{user_id}", user, ttl=3600)
+    return user
+```
+
+### Write-Through Cache
+- **Consistency**: Always keep cache and database in sync
+- **Write Latency**: Higher due to dual writes
+- **Data Safety**: No data loss on cache failures
+
+### Cache Invalidation Strategies
+1. **TTL-Based**: Time-based expiration
+2. **Event-Driven**: Invalidate on data changes
+3. **Version-Based**: Use version numbers for consistency
+4. **Tag-Based**: Group related cache entries
+
+## Database Selection Guide
+
+### SQL Databases
+**PostgreSQL**
+- **Strengths**: ACID compliance, complex queries, JSON support, extensibility
+- **Use Cases**: OLTP applications, data warehousing, geospatial data
+- **Scale**: Vertical scaling with read replicas
+
+**MySQL**
+- **Strengths**: Performance, replication, wide ecosystem support
+- **Use Cases**: Web applications, content management, e-commerce
+- **Scale**: Horizontal scaling through sharding
+
+### NoSQL Databases
+
+**Document Stores (MongoDB, CouchDB)**
+- **Strengths**: Flexible schema, horizontal scaling, developer productivity
+- **Use Cases**: Content management, catalogs, user profiles
+- **Trade-offs**: Eventual consistency, complex queries limitations
+
+**Key-Value Stores (Redis, DynamoDB)**
+- **Strengths**: High performance, simple model, excellent caching
+- **Use Cases**: Session storage, real-time analytics, gaming leaderboards
+- **Trade-offs**: Limited query capabilities, data modeling constraints
+
+**Column-Family (Cassandra, HBase)**
+- **Strengths**: Write-heavy workloads, linear scalability, fault tolerance
+- **Use Cases**: Time-series data, IoT applications, messaging systems
+- **Trade-offs**: Query flexibility, consistency model complexity
+
+**Graph Databases (Neo4j, Amazon Neptune)**
+- **Strengths**: Relationship queries, pattern matching, recommendation engines
+- **Use Cases**: Social networks, fraud detection, knowledge graphs
+- **Trade-offs**: Specialized use cases, learning curve
+
+### NewSQL Databases
+**Distributed SQL (CockroachDB, TiDB, Spanner)**
+- **Strengths**: SQL compatibility with horizontal scaling
+- **Use Cases**: Global applications requiring ACID guarantees
+- **Trade-offs**: Complexity, latency for distributed transactions
+
+## Tools & Scripts
+
+### Schema Analyzer
+- **Input**: SQL DDL files, JSON schema definitions
+- **Analysis**: Normalization compliance, constraint validation, naming conventions
+- **Output**: Analysis report, Mermaid ERD, improvement recommendations
+
+### Index Optimizer
+- **Input**: Schema definition, query patterns
+- **Analysis**: Missing indexes, redundancy detection, selectivity estimation
+- **Output**: Index recommendations, CREATE INDEX statements, performance projections
+
+### Migration Generator
+- **Input**: Current and target schemas
+- **Analysis**: Schema differences, dependency resolution, risk assessment
+- **Output**: Migration scripts, rollback plans, validation queries
+
+## Best Practices
+
+### Schema Design
+1. **Use meaningful names**: Clear, consistent naming conventions
+2. **Choose appropriate data types**: Right-sized columns for storage efficiency
+3. **Define proper constraints**: Foreign keys, check constraints, unique indexes
+4. **Consider future growth**: Plan for scale from the beginning
+5. **Document relationships**: Clear foreign key relationships and business rules
+
+### Performance Optimization
+1. **Index strategically**: Cover common query patterns without over-indexing
+2. **Monitor query performance**: Regular analysis of slow queries
+3. **Partition large tables**: Improve query performance and maintenance
+4. **Use appropriate isolation levels**: Balance consistency with performance
+5. **Implement connection pooling**: Efficient resource utilization
+
+### Security Considerations
+1. **Principle of least privilege**: Grant minimal necessary permissions
+2. **Encrypt sensitive data**: At rest and in transit
+3. **Audit access patterns**: Monitor and log database access
+4. **Validate inputs**: Prevent SQL injection attacks
+5. **Regular security updates**: Keep database software current
+
+## Conclusion
+
+Effective database design requires balancing multiple competing concerns: performance, scalability, maintainability, and business requirements. This skill provides the tools and knowledge to make informed decisions throughout the database lifecycle, from initial schema design through production optimization and evolution.
+
+The included tools automate common analysis and optimization tasks, while the comprehensive guides provide the theoretical foundation for making sound architectural decisions. Whether building a new system or optimizing an existing one, these resources provide expert-level guidance for creating robust, scalable database solutions.
\ No newline at end of file
diff --git a/engineering/database-designer/assets/sample_query_patterns.json b/engineering/database-designer/assets/sample_query_patterns.json
new file mode 100644
index 0000000..17c21f9
--- /dev/null
+++ b/engineering/database-designer/assets/sample_query_patterns.json
@@ -0,0 +1,375 @@
+{
+  "queries": [
+    {
+      "id": "user_login",
+      "type": "SELECT",
+      "table": "users",
+      "description": "User authentication lookup by email",
+      "where_conditions": [
+        {
+          "column": "email",
+          "operator": "=",
+          "selectivity": 0.95
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [],
+      "group_by": [],
+      "frequency": 5000,
+      "avg_execution_time_ms": 2.5
+    },
+    {
+      "id": "product_search_category",
+      "type": "SELECT",
+      "table": "products",
+      "description": "Product search within category with pagination",
+      "where_conditions": [
+        {
+          "column": "category_id",
+          "operator": "=",
+          "selectivity": 0.2
+        },
+        {
+          "column": "is_active",
+          "operator": "=",
+          "selectivity": 0.1
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "created_at", "direction": "DESC"}
+      ],
+      "group_by": [],
+      "frequency": 2500,
+      "avg_execution_time_ms": 15.2
+    },
+    {
+      "id": "product_search_price_range",
+      "type": "SELECT",
+      "table": "products",
+      "description": "Product search by price range and brand",
+      "where_conditions": [
+        {
+          "column": "price",
+          "operator": "BETWEEN",
+          "selectivity": 0.3
+        },
+        {
+          "column": "brand",
+          "operator": "=",
+          "selectivity": 0.05
+        },
+        {
+          "column": "is_active",
+          "operator": "=",
+          "selectivity": 0.1
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "price", "direction": "ASC"}
+      ],
+      "group_by": [],
+      "frequency": 800,
+      "avg_execution_time_ms": 25.7
+    },
+    {
+      "id": "user_orders_history",
+      "type": "SELECT",
+      "table": "orders",
+      "description": "User order history with pagination",
+      "where_conditions": [
+        {
+          "column": "user_id",
+          "operator": "=",
+          "selectivity": 0.8
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "created_at", "direction": "DESC"}
+      ],
+      "group_by": [],
+      "frequency": 1200,
+      "avg_execution_time_ms": 8.3
+    },
+    {
+      "id": "order_details_with_items",
+      "type": "SELECT",
+      "table": "orders",
+      "description": "Order details with order items (JOIN query)",
+      "where_conditions": [
+        {
+          "column": "id",
+          "operator": "=",
+          "selectivity": 1.0
+        }
+      ],
+      "join_conditions": [
+        {
+          "local_column": "id",
+          "foreign_table": "order_items",
+          "foreign_column": "order_id",
+          "join_type": "INNER"
+        }
+      ],
+      "order_by": [],
+      "group_by": [],
+      "frequency": 3000,
+      "avg_execution_time_ms": 12.1
+    },
+    {
+      "id": "pending_orders_processing",
+      "type": "SELECT",
+      "table": "orders",
+      "description": "Processing queue - pending orders by date",
+      "where_conditions": [
+        {
+          "column": "status",
+          "operator": "=",
+          "selectivity": 0.15
+        },
+        {
+          "column": "created_at",
+          "operator": ">=",
+          "selectivity": 0.3
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "created_at", "direction": "ASC"}
+      ],
+      "group_by": [],
+      "frequency": 150,
+      "avg_execution_time_ms": 45.2
+    },
+    {
+      "id": "user_orders_by_status",
+      "type": "SELECT",
+      "table": "orders",
+      "description": "User orders filtered by status",
+      "where_conditions": [
+        {
+          "column": "user_id",
+          "operator": "=",
+          "selectivity": 0.8
+        },
+        {
+          "column": "status",
+          "operator": "IN",
+          "selectivity": 0.4
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "created_at", "direction": "DESC"}
+      ],
+      "group_by": [],
+      "frequency": 600,
+      "avg_execution_time_ms": 18.5
+    },
+    {
+      "id": "product_reviews_summary",
+      "type": "SELECT",
+      "table": "product_reviews",
+      "description": "Product review aggregation",
+      "where_conditions": [
+        {
+          "column": "product_id",
+          "operator": "=",
+          "selectivity": 0.85
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [],
+      "group_by": ["product_id"],
+      "frequency": 1800,
+      "avg_execution_time_ms": 22.3
+    },
+    {
+      "id": "inventory_low_stock",
+      "type": "SELECT",
+      "table": "products",
+      "description": "Low inventory alert query",
+      "where_conditions": [
+        {
+          "column": "inventory_count",
+          "operator": "<=",
+          "selectivity": 0.1
+        },
+        {
+          "column": "is_active",
+          "operator": "=",
+          "selectivity": 0.1
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "inventory_count", "direction": "ASC"}
+      ],
+      "group_by": [],
+      "frequency": 50,
+      "avg_execution_time_ms": 35.8
+    },
+    {
+      "id": "popular_products_by_category",
+      "type": "SELECT",
+      "table": "order_items",
+      "description": "Popular products analysis with category join",
+      "where_conditions": [
+        {
+          "column": "created_at",
+          "operator": ">=",
+          "selectivity": 0.2
+        }
+      ],
+      "join_conditions": [
+        {
+          "local_column": "product_id",
+          "foreign_table": "products",
+          "foreign_column": "id",
+          "join_type": "INNER"
+        },
+        {
+          "local_column": "category_id",
+          "foreign_table": "categories",
+          "foreign_column": "id",
+          "join_type": "INNER"
+        }
+      ],
+      "order_by": [
+        {"column": "total_quantity", "direction": "DESC"}
+      ],
+      "group_by": ["product_id", "category_id"],
+      "frequency": 25,
+      "avg_execution_time_ms": 180.5
+    },
+    {
+      "id": "customer_purchase_history",
+      "type": "SELECT",
+      "table": "orders",
+      "description": "Customer analytics - purchase history with items",
+      "where_conditions": [
+        {
+          "column": "user_id",
+          "operator": "=",
+          "selectivity": 0.8
+        },
+        {
+          "column": "status",
+          "operator": "IN",
+          "selectivity": 0.6
+        }
+      ],
+      "join_conditions": [
+        {
+          "local_column": "id",
+          "foreign_table": "order_items",
+          "foreign_column": "order_id",
+          "join_type": "INNER"
+        }
+      ],
+      "order_by": [
+        {"column": "created_at", "direction": "DESC"}
+      ],
+      "group_by": [],
+      "frequency": 300,
+      "avg_execution_time_ms": 65.2
+    },
+    {
+      "id": "daily_sales_report",
+      "type": "SELECT",
+      "table": "orders",
+      "description": "Daily sales aggregation report",
+      "where_conditions": [
+        {
+          "column": "created_at",
+          "operator": ">=",
+          "selectivity": 0.05
+        },
+        {
+          "column": "status",
+          "operator": "IN",
+          "selectivity": 0.6
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "order_date", "direction": "DESC"}
+      ],
+      "group_by": ["DATE(created_at)"],
+      "frequency": 10,
+      "avg_execution_time_ms": 250.8
+    },
+    {
+      "id": "category_hierarchy_nav",
+      "type": "SELECT",
+      "table": "categories",
+      "description": "Category navigation - parent-child relationships",
+      "where_conditions": [
+        {
+          "column": "parent_id",
+          "operator": "=",
+          "selectivity": 0.2
+        },
+        {
+          "column": "is_active",
+          "operator": "=",
+          "selectivity": 0.1
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [
+        {"column": "sort_order", "direction": "ASC"}
+      ],
+      "group_by": [],
+      "frequency": 800,
+      "avg_execution_time_ms": 5.1
+    },
+    {
+      "id": "recent_user_reviews",
+      "type": "SELECT",
+      "table": "product_reviews",
+      "description": "Recent product reviews by user",
+      "where_conditions": [
+        {
+          "column": "user_id",
+          "operator": "=",
+          "selectivity": 0.95
+        }
+      ],
+      "join_conditions": [
+        {
+          "local_column": "product_id",
+          "foreign_table": "products",
+          "foreign_column": "id",
+          "join_type": "INNER"
+        }
+      ],
+      "order_by": [
+        {"column": "created_at", "direction": "DESC"}
+      ],
+      "group_by": [],
+      "frequency": 200,
+      "avg_execution_time_ms": 12.7
+    },
+    {
+      "id": "product_avg_rating",
+      "type": "SELECT",
+      "table": "product_reviews",
+      "description": "Product average rating calculation",
+      "where_conditions": [
+        {
+          "column": "product_id",
+          "operator": "IN",
+          "selectivity": 0.1
+        }
+      ],
+      "join_conditions": [],
+      "order_by": [],
+      "group_by": ["product_id"],
+      "frequency": 400,
+      "avg_execution_time_ms": 35.4
+    }
+  ]
+}
\ No newline at end of file
diff --git a/engineering/database-designer/assets/sample_schema.json b/engineering/database-designer/assets/sample_schema.json
new file mode 100644
index 0000000..3b53eab
--- /dev/null
+++ b/engineering/database-designer/assets/sample_schema.json
@@ -0,0 +1,372 @@
+{
+  "tables": {
+    "users": {
+      "columns": {
+        "id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 50000
+        },
+        "email": {
+          "type": "VARCHAR(255)",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 50000
+        },
+        "username": {
+          "type": "VARCHAR(50)",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 50000
+        },
+        "password_hash": {
+          "type": "VARCHAR(255)",
+          "nullable": false,
+          "cardinality_estimate": 50000
+        },
+        "first_name": {
+          "type": "VARCHAR(100)",
+          "nullable": true,
+          "cardinality_estimate": 25000
+        },
+        "last_name": {
+          "type": "VARCHAR(100)",
+          "nullable": true,
+          "cardinality_estimate": 30000
+        },
+        "status": {
+          "type": "VARCHAR(20)",
+          "nullable": false,
+          "default": "active",
+          "cardinality_estimate": 5
+        },
+        "created_at": {
+          "type": "TIMESTAMP",
+          "nullable": false,
+          "default": "CURRENT_TIMESTAMP"
+        }
+      },
+      "primary_key": ["id"],
+      "unique_constraints": [
+        ["email"],
+        ["username"]
+      ],
+      "check_constraints": {
+        "chk_status_valid": "status IN ('active', 'inactive', 'suspended', 'deleted')"
+      },
+      "indexes": [
+        {
+          "name": "idx_users_email",
+          "columns": ["email"],
+          "unique": true
+        },
+        {
+          "name": "idx_users_status",
+          "columns": ["status"]
+        }
+      ]
+    },
+    "products": {
+      "columns": {
+        "id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 10000
+        },
+        "name": {
+          "type": "VARCHAR(255)",
+          "nullable": false,
+          "cardinality_estimate": 9500
+        },
+        "sku": {
+          "type": "VARCHAR(50)",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 10000
+        },
+        "price": {
+          "type": "DECIMAL(10,2)",
+          "nullable": false,
+          "cardinality_estimate": 5000
+        },
+        "category_id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "foreign_key": "categories.id",
+          "cardinality_estimate": 50
+        },
+        "brand": {
+          "type": "VARCHAR(100)",
+          "nullable": true,
+          "cardinality_estimate": 200
+        },
+        "is_active": {
+          "type": "BOOLEAN",
+          "nullable": false,
+          "default": true,
+          "cardinality_estimate": 2
+        },
+        "inventory_count": {
+          "type": "INTEGER",
+          "nullable": false,
+          "default": 0,
+          "cardinality_estimate": 1000
+        },
+        "created_at": {
+          "type": "TIMESTAMP",
+          "nullable": false,
+          "default": "CURRENT_TIMESTAMP"
+        }
+      },
+      "primary_key": ["id"],
+      "unique_constraints": [
+        ["sku"]
+      ],
+      "check_constraints": {
+        "chk_price_positive": "price > 0",
+        "chk_inventory_non_negative": "inventory_count >= 0"
+      },
+      "indexes": [
+        {
+          "name": "idx_products_category",
+          "columns": ["category_id"]
+        },
+        {
+          "name": "idx_products_brand",
+          "columns": ["brand"]
+        },
+        {
+          "name": "idx_products_price",
+          "columns": ["price"]
+        },
+        {
+          "name": "idx_products_active_category",
+          "columns": ["is_active", "category_id"],
+          "partial_condition": "is_active = true"
+        }
+      ]
+    },
+    "orders": {
+      "columns": {
+        "id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 200000
+        },
+        "order_number": {
+          "type": "VARCHAR(50)",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 200000
+        },
+        "user_id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "foreign_key": "users.id",
+          "cardinality_estimate": 40000
+        },
+        "status": {
+          "type": "VARCHAR(50)",
+          "nullable": false,
+          "default": "pending",
+          "cardinality_estimate": 8
+        },
+        "total_amount": {
+          "type": "DECIMAL(10,2)",
+          "nullable": false,
+          "cardinality_estimate": 50000
+        },
+        "payment_method": {
+          "type": "VARCHAR(50)",
+          "nullable": true,
+          "cardinality_estimate": 10
+        },
+        "created_at": {
+          "type": "TIMESTAMP",
+          "nullable": false,
+          "default": "CURRENT_TIMESTAMP"
+        },
+        "shipped_at": {
+          "type": "TIMESTAMP",
+          "nullable": true
+        }
+      },
+      "primary_key": ["id"],
+      "unique_constraints": [
+        ["order_number"]
+      ],
+      "check_constraints": {
+        "chk_total_positive": "total_amount > 0",
+        "chk_status_valid": "status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled')"
+      },
+      "indexes": [
+        {
+          "name": "idx_orders_user",
+          "columns": ["user_id"]
+        },
+        {
+          "name": "idx_orders_status",
+          "columns": ["status"]
+        },
+        {
+          "name": "idx_orders_created",
+          "columns": ["created_at"]
+        },
+        {
+          "name": "idx_orders_user_status",
+          "columns": ["user_id", "status"]
+        }
+      ]
+    },
+    "order_items": {
+      "columns": {
+        "id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 800000
+        },
+        "order_id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "foreign_key": "orders.id",
+          "cardinality_estimate": 200000
+        },
+        "product_id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "foreign_key": "products.id",
+          "cardinality_estimate": 8000
+        },
+        "quantity": {
+          "type": "INTEGER",
+          "nullable": false,
+          "cardinality_estimate": 20
+        },
+        "unit_price": {
+          "type": "DECIMAL(10,2)",
+          "nullable": false,
+          "cardinality_estimate": 5000
+        },
+        "total_price": {
+          "type": "DECIMAL(10,2)",
+          "nullable": false,
+          "cardinality_estimate": 10000
+        }
+      },
+      "primary_key": ["id"],
+      "check_constraints": {
+        "chk_quantity_positive": "quantity > 0",
+        "chk_unit_price_positive": "unit_price > 0"
+      },
+      "indexes": [
+        {
+          "name": "idx_order_items_order",
+          "columns": ["order_id"]
+        },
+        {
+          "name": "idx_order_items_product",
+          "columns": ["product_id"]
+        }
+      ]
+    },
+    "categories": {
+      "columns": {
+        "id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 100
+        },
+        "name": {
+          "type": "VARCHAR(100)",
+          "nullable": false,
+          "cardinality_estimate": 100
+        },
+        "parent_id": {
+          "type": "INTEGER",
+          "nullable": true,
+          "foreign_key": "categories.id",
+          "cardinality_estimate": 20
+        },
+        "is_active": {
+          "type": "BOOLEAN",
+          "nullable": false,
+          "default": true,
+          "cardinality_estimate": 2
+        }
+      },
+      "primary_key": ["id"],
+      "indexes": [
+        {
+          "name": "idx_categories_parent",
+          "columns": ["parent_id"]
+        },
+        {
+          "name": "idx_categories_active",
+          "columns": ["is_active"]
+        }
+      ]
+    },
+    "product_reviews": {
+      "columns": {
+        "id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "unique": true,
+          "cardinality_estimate": 150000
+        },
+        "product_id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "foreign_key": "products.id",
+          "cardinality_estimate": 8000
+        },
+        "user_id": {
+          "type": "INTEGER",
+          "nullable": false,
+          "foreign_key": "users.id",
+          "cardinality_estimate": 30000
+        },
+        "rating": {
+          "type": "INTEGER",
+          "nullable": false,
+          "cardinality_estimate": 5
+        },
+        "review_text": {
+          "type": "TEXT",
+          "nullable": true
+        },
+        "created_at": {
+          "type": "TIMESTAMP",
+          "nullable": false,
+          "default": "CURRENT_TIMESTAMP"
+        }
+      },
+      "primary_key": ["id"],
+      "unique_constraints": [
+        ["product_id", "user_id"]
+      ],
+      "check_constraints": {
+        "chk_rating_valid": "rating BETWEEN 1 AND 5"
+      },
+      "indexes": [
+        {
+          "name": "idx_reviews_product",
+          "columns": ["product_id"]
+        },
+        {
+          "name": "idx_reviews_user",
+          "columns": ["user_id"]
+        },
+        {
+          "name": "idx_reviews_rating",
+          "columns": ["rating"]
+        }
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/engineering/database-designer/assets/sample_schema.sql b/engineering/database-designer/assets/sample_schema.sql
new file mode 100644
index 0000000..abc0294
--- /dev/null
+++ b/engineering/database-designer/assets/sample_schema.sql
@@ -0,0 +1,207 @@
+-- Sample E-commerce Database Schema
+-- Demonstrates various normalization levels and common patterns
+
+-- Users table - well normalized
+CREATE TABLE users (
+    id INTEGER PRIMARY KEY,
+    email VARCHAR(255) NOT NULL UNIQUE,
+    username VARCHAR(50) NOT NULL UNIQUE,
+    password_hash VARCHAR(255) NOT NULL,
+    first_name VARCHAR(100),
+    last_name VARCHAR(100),
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    status VARCHAR(20) DEFAULT 'active'
+);
+
+-- Categories table - hierarchical structure
+CREATE TABLE categories (
+    id INTEGER PRIMARY KEY,
+    name VARCHAR(100) NOT NULL,
+    slug VARCHAR(100) NOT NULL UNIQUE,
+    parent_id INTEGER REFERENCES categories(id),
+    description TEXT,
+    is_active BOOLEAN DEFAULT true,
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Products table - potential normalization issues
+CREATE TABLE products (
+    id INTEGER PRIMARY KEY,
+    name VARCHAR(255) NOT NULL,
+    sku VARCHAR(50) NOT NULL UNIQUE,
+    description TEXT,
+    price DECIMAL(10,2) NOT NULL,
+    cost DECIMAL(10,2),
+    weight DECIMAL(8,2),
+    dimensions VARCHAR(50), -- Potential 1NF violation: "10x5x3 inches"
+    category_id INTEGER REFERENCES categories(id),
+    category_name VARCHAR(100), -- Redundant with categories.name (3NF violation)
+    brand VARCHAR(100), -- Should be normalized to separate brands table
+    tags VARCHAR(500), -- Potential 1NF violation: comma-separated tags
+    inventory_count INTEGER DEFAULT 0,
+    reorder_point INTEGER DEFAULT 10,
+    supplier_name VARCHAR(100), -- Should be normalized
+    supplier_contact VARCHAR(255), -- Should be normalized
+    is_active BOOLEAN DEFAULT true,
+    featured BOOLEAN DEFAULT false,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Addresses table - good normalization
+CREATE TABLE addresses (
+    id INTEGER PRIMARY KEY,
+    user_id INTEGER REFERENCES users(id),
+    address_type VARCHAR(20) DEFAULT 'shipping', -- 'shipping', 'billing'
+    street_address VARCHAR(255) NOT NULL,
+    street_address_2 VARCHAR(255),
+    city VARCHAR(100) NOT NULL,
+    state VARCHAR(50) NOT NULL,
+    postal_code VARCHAR(20) NOT NULL,
+    country VARCHAR(50) NOT NULL DEFAULT 'US',
+    is_default BOOLEAN DEFAULT false,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Orders table - mixed normalization issues
+CREATE TABLE orders (
+    id INTEGER PRIMARY KEY,
+    order_number VARCHAR(50) NOT NULL UNIQUE,
+    user_id INTEGER REFERENCES users(id),
+    user_email VARCHAR(255), -- Denormalized for performance/historical reasons
+    user_name VARCHAR(200), -- Denormalized for performance/historical reasons
+    status VARCHAR(50) NOT NULL DEFAULT 'pending',
+    total_amount DECIMAL(10,2) NOT NULL,
+    tax_amount DECIMAL(10,2) NOT NULL,
+    shipping_amount DECIMAL(10,2) NOT NULL,
+    discount_amount DECIMAL(10,2) DEFAULT 0,
+    payment_method VARCHAR(50), -- Should be normalized to payment_methods
+    payment_status VARCHAR(50) DEFAULT 'pending',
+    shipping_address_id INTEGER REFERENCES addresses(id),
+    billing_address_id INTEGER REFERENCES addresses(id),
+    -- Denormalized shipping address for historical preservation
+    shipping_street VARCHAR(255),
+    shipping_city VARCHAR(100),
+    shipping_state VARCHAR(50),
+    shipping_postal_code VARCHAR(20),
+    shipping_country VARCHAR(50),
+    notes TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    shipped_at TIMESTAMP,
+    delivered_at TIMESTAMP
+);
+
+-- Order items table - properly normalized
+CREATE TABLE order_items (
+    id INTEGER PRIMARY KEY,
+    order_id INTEGER REFERENCES orders(id),
+    product_id INTEGER REFERENCES products(id),
+    product_name VARCHAR(255), -- Denormalized for historical reasons
+    product_sku VARCHAR(50), -- Denormalized for historical reasons
+    quantity INTEGER NOT NULL,
+    unit_price DECIMAL(10,2) NOT NULL,
+    total_price DECIMAL(10,2) NOT NULL, -- Calculated field (could be computed)
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Shopping cart table - session-based data
+CREATE TABLE shopping_cart (
+    id INTEGER PRIMARY KEY,
+    user_id INTEGER REFERENCES users(id),
+    session_id VARCHAR(255), -- For anonymous users
+    product_id INTEGER REFERENCES products(id),
+    quantity INTEGER NOT NULL DEFAULT 1,
+    added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE(user_id, product_id),
+    UNIQUE(session_id, product_id)
+);
+
+-- Product reviews - user-generated content
+CREATE TABLE product_reviews (
+    id INTEGER PRIMARY KEY,
+    product_id INTEGER REFERENCES products(id),
+    user_id INTEGER REFERENCES users(id),
+    rating INTEGER NOT NULL CHECK (rating BETWEEN 1 AND 5),
+    title VARCHAR(200),
+    review_text TEXT,
+    verified_purchase BOOLEAN DEFAULT false,
+    helpful_count INTEGER DEFAULT 0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE(product_id, user_id) -- One review per user per product
+);
+
+-- Coupons table - promotional data
+CREATE TABLE coupons (
+    id INTEGER PRIMARY KEY,
+    code VARCHAR(50) NOT NULL UNIQUE,
+    description VARCHAR(255),
+    discount_type VARCHAR(20) NOT NULL, -- 'percentage', 'fixed_amount'
+    discount_value DECIMAL(8,2) NOT NULL,
+    minimum_amount DECIMAL(10,2),
+    maximum_discount DECIMAL(10,2),
+    usage_limit INTEGER,
+    usage_count INTEGER DEFAULT 0,
+    valid_from TIMESTAMP NOT NULL,
+    valid_until TIMESTAMP NOT NULL,
+    is_active BOOLEAN DEFAULT true,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Audit log table - for tracking changes
+CREATE TABLE audit_log (
+    id INTEGER PRIMARY KEY,
+    table_name VARCHAR(50) NOT NULL,
+    record_id INTEGER NOT NULL,
+    action VARCHAR(20) NOT NULL, -- 'INSERT', 'UPDATE', 'DELETE'
+    old_values TEXT, -- JSON format
+    new_values TEXT, -- JSON format
+    user_id INTEGER REFERENCES users(id),
+    ip_address VARCHAR(45),
+    user_agent TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Problematic table - multiple normalization violations
+CREATE TABLE user_preferences (
+    user_id INTEGER PRIMARY KEY REFERENCES users(id),
+    preferred_categories VARCHAR(500), -- CSV list - 1NF violation
+    email_notifications VARCHAR(255), -- "daily,weekly,promotions" - 1NF violation
+    user_name VARCHAR(200), -- Redundant with users table - 3NF violation
+    user_email VARCHAR(255), -- Redundant with users table - 3NF violation
+    theme VARCHAR(50) DEFAULT 'light',
+    language VARCHAR(10) DEFAULT 'en',
+    timezone VARCHAR(50) DEFAULT 'UTC',
+    currency VARCHAR(3) DEFAULT 'USD',
+    date_format VARCHAR(20) DEFAULT 'YYYY-MM-DD',
+    newsletter_subscribed BOOLEAN DEFAULT true,
+    sms_notifications BOOLEAN DEFAULT false,
+    push_notifications BOOLEAN DEFAULT true,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Create some basic indexes (some missing, some redundant for demonstration)
+CREATE INDEX idx_users_email ON users (email);
+CREATE INDEX idx_users_username ON users (username); -- Redundant due to UNIQUE constraint
+CREATE INDEX idx_products_category ON products (category_id);
+CREATE INDEX idx_products_brand ON products (brand);
+CREATE INDEX idx_products_sku ON products (sku); -- Redundant due to UNIQUE constraint
+CREATE INDEX idx_orders_user ON orders (user_id);
+CREATE INDEX idx_orders_status ON orders (status);
+CREATE INDEX idx_orders_created ON orders (created_at);
+CREATE INDEX idx_order_items_order ON order_items (order_id);
+CREATE INDEX idx_order_items_product ON order_items (product_id);
+-- Missing index on addresses.user_id
+-- Missing composite index on orders (user_id, status)
+-- Missing index on product_reviews.product_id
+
+-- Constraints that should exist but are missing
+-- ALTER TABLE products ADD CONSTRAINT chk_price_positive CHECK (price > 0);
+-- ALTER TABLE products ADD CONSTRAINT chk_inventory_non_negative CHECK (inventory_count >= 0);
+-- ALTER TABLE order_items ADD CONSTRAINT chk_quantity_positive CHECK (quantity > 0);
+-- ALTER TABLE orders ADD CONSTRAINT chk_total_positive CHECK (total_amount > 0);
\ No newline at end of file
diff --git a/engineering/database-designer/expected_outputs/index_optimization_sample.txt b/engineering/database-designer/expected_outputs/index_optimization_sample.txt
new file mode 100644
index 0000000..271f0be
--- /dev/null
+++ b/engineering/database-designer/expected_outputs/index_optimization_sample.txt
@@ -0,0 +1,60 @@
+DATABASE INDEX OPTIMIZATION REPORT
+==================================================
+
+ANALYSIS SUMMARY
+----------------
+Tables Analyzed: 6
+Query Patterns: 15
+Existing Indexes: 12
+New Recommendations: 8
+High Priority: 4
+Redundancy Issues: 2
+
+HIGH PRIORITY RECOMMENDATIONS (4)
+----------------------------------
+1. orders: Optimize multi-column WHERE conditions: user_id, status, created_at
+   Columns: user_id, status, created_at
+   Benefit: Very High
+   SQL: CREATE INDEX idx_orders_user_status_created ON orders (user_id, status, created_at);
+
+2. products: Optimize WHERE category_id = AND is_active = queries
+   Columns: category_id, is_active
+   Benefit: High
+   SQL: CREATE INDEX idx_products_category_active ON products (category_id, is_active);
+
+3. order_items: Optimize JOIN with products table on product_id
+   Columns: product_id
+   Benefit: High (frequent JOINs)
+   SQL: CREATE INDEX idx_order_items_product_join ON order_items (product_id);
+
+4. product_reviews: Covering index for WHERE + ORDER BY optimization
+   Columns: product_id, created_at
+   Benefit: High (eliminates table lookups for SELECT)
+   SQL: CREATE INDEX idx_product_reviews_covering_product_created ON product_reviews (product_id, created_at) INCLUDE (rating, review_text);
+
+REDUNDANCY ISSUES (2)
+---------------------
+• DUPLICATE: Indexes 'idx_users_email' and 'unique_users_email' are identical
+  Recommendation: Drop one of the duplicate indexes
+  SQL: DROP INDEX idx_users_email;
+
+• OVERLAPPING: Index 'idx_products_category' overlaps 85% with 'idx_products_category_active'
+  Recommendation: Consider dropping 'idx_products_category' as it's largely covered by 'idx_products_category_active'
+  SQL: DROP INDEX idx_products_category;
+
+PERFORMANCE IMPACT ANALYSIS
+----------------------------
+Queries to be optimized: 12
+High impact optimizations: 6
+Estimated insert overhead: 40%
+
+RECOMMENDED CREATE INDEX STATEMENTS
+------------------------------------
+1. CREATE INDEX idx_orders_user_status_created ON orders (user_id, status, created_at);
+2. CREATE INDEX idx_products_category_active ON products (category_id, is_active);
+3. CREATE INDEX idx_order_items_product_join ON order_items (product_id);
+4. CREATE INDEX idx_product_reviews_covering_product_created ON product_reviews (product_id, created_at) INCLUDE (rating, review_text);
+5. CREATE INDEX idx_products_price_brand ON products (price, brand);
+6. CREATE INDEX idx_orders_status_created ON orders (status, created_at);
+7. CREATE INDEX idx_categories_parent_active ON categories (parent_id, is_active);
+8. CREATE INDEX idx_product_reviews_user_created ON product_reviews (user_id, created_at);
\ No newline at end of file
diff --git a/engineering/database-designer/expected_outputs/migration_sample.txt b/engineering/database-designer/expected_outputs/migration_sample.txt
new file mode 100644
index 0000000..ced0795
--- /dev/null
+++ b/engineering/database-designer/expected_outputs/migration_sample.txt
@@ -0,0 +1,124 @@
+DATABASE MIGRATION PLAN
+==================================================
+Migration ID: a7b3c9d2
+Created: 2024-02-16T15:30:00Z
+Zero Downtime: false
+
+MIGRATION SUMMARY
+-----------------
+Total Steps: 12
+Tables Added: 1
+Tables Dropped: 0
+Tables Renamed: 0
+Columns Added: 3
+Columns Dropped: 1
+Columns Modified: 2
+Constraints Added: 4
+Constraints Dropped: 1
+Indexes Added: 2
+Indexes Dropped: 1
+
+RISK ASSESSMENT
+---------------
+High Risk Steps: 3
+Medium Risk Steps: 4
+Low Risk Steps: 5
+
+MIGRATION STEPS
+---------------
+1. Create table brands with 4 columns (LOW risk)
+   Type: CREATE_TABLE
+   Forward SQL: CREATE TABLE brands (
+       id INTEGER PRIMARY KEY,
+       name VARCHAR(100) NOT NULL,
+       description TEXT,
+       created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+   );
+   Rollback SQL: DROP TABLE IF EXISTS brands;
+
+2. Add column brand_id to products (LOW risk)
+   Type: ADD_COLUMN
+   Forward SQL: ALTER TABLE products ADD COLUMN brand_id INTEGER;
+   Rollback SQL: ALTER TABLE products DROP COLUMN brand_id;
+
+3. Add column email_verified to users (LOW risk)
+   Type: ADD_COLUMN
+   Forward SQL: ALTER TABLE users ADD COLUMN email_verified BOOLEAN DEFAULT false;
+   Rollback SQL: ALTER TABLE users DROP COLUMN email_verified;
+
+4. Add column last_login to users (LOW risk)
+   Type: ADD_COLUMN
+   Forward SQL: ALTER TABLE users ADD COLUMN last_login TIMESTAMP;
+   Rollback SQL: ALTER TABLE users DROP COLUMN last_login;
+
+5. Modify column price: type: DECIMAL(10,2) -> DECIMAL(12,2) (LOW risk)
+   Type: MODIFY_COLUMN
+   Forward SQL: ALTER TABLE products
+       ALTER COLUMN price TYPE DECIMAL(12,2);
+   Rollback SQL: ALTER TABLE products
+       ALTER COLUMN price TYPE DECIMAL(10,2);
+
+6. Modify column inventory_count: nullable: true -> false (HIGH risk)
+   Type: MODIFY_COLUMN
+   Forward SQL: ALTER TABLE products
+       ALTER COLUMN inventory_count SET NOT NULL;
+   Rollback SQL: ALTER TABLE products
+       ALTER COLUMN inventory_count DROP NOT NULL;
+
+7. Add primary key on id (MEDIUM risk)
+   Type: ADD_CONSTRAINT
+   Forward SQL: ALTER TABLE brands ADD CONSTRAINT pk_brands PRIMARY KEY (id);
+   Rollback SQL: ALTER TABLE brands DROP CONSTRAINT pk_brands;
+
+8. Add foreign key constraint on brand_id (MEDIUM risk)
+   Type: ADD_CONSTRAINT
+   Forward SQL: ALTER TABLE products ADD CONSTRAINT fk_products_brand_id FOREIGN KEY (brand_id) REFERENCES brands(id);
+   Rollback SQL: ALTER TABLE products DROP CONSTRAINT fk_products_brand_id;
+
+9. Add unique constraint on name (MEDIUM risk)
+   Type: ADD_CONSTRAINT
+   Forward SQL: ALTER TABLE brands ADD CONSTRAINT uq_brands_name UNIQUE (name);
+   Rollback SQL: ALTER TABLE brands DROP CONSTRAINT uq_brands_name;
+
+10. Add check constraint: price > 0 (MEDIUM risk)
+    Type: ADD_CONSTRAINT
+    Forward SQL: ALTER TABLE products ADD CONSTRAINT chk_products_price_positive CHECK (price > 0);
+    Rollback SQL: ALTER TABLE products DROP CONSTRAINT chk_products_price_positive;
+
+11. Create index idx_products_brand_id on (brand_id) (LOW risk)
+    Type: ADD_INDEX
+    Forward SQL: CREATE INDEX idx_products_brand_id ON products (brand_id);
+    Rollback SQL: DROP INDEX idx_products_brand_id;
+    Estimated Time: 1-5 minutes depending on table size
+
+12. Create index idx_users_email_verified on (email_verified) (LOW risk)
+    Type: ADD_INDEX
+    Forward SQL: CREATE INDEX idx_users_email_verified ON users (email_verified);
+    Rollback SQL: DROP INDEX idx_users_email_verified;
+    Estimated Time: 1-5 minutes depending on table size
+
+VALIDATION CHECKS
+-----------------
+• Verify table brands exists
+  SQL: SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'brands';
+  Expected: 1
+
+• Verify column brand_id exists in products
+  SQL: SELECT COUNT(*) FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'brand_id';
+  Expected: 1
+
+• Verify column email_verified exists in users
+  SQL: SELECT COUNT(*) FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'email_verified';
+  Expected: 1
+
+• Verify column modification in products
+  SQL: SELECT data_type, is_nullable FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'price';
+  Expected: 1
+
+• Verify index idx_products_brand_id exists
+  SQL: SELECT COUNT(*) FROM information_schema.statistics WHERE index_name = 'idx_products_brand_id';
+  Expected: 1
+
+• Verify index idx_users_email_verified exists
+  SQL: SELECT COUNT(*) FROM information_schema.statistics WHERE index_name = 'idx_users_email_verified';
+  Expected: 1
\ No newline at end of file
diff --git a/engineering/database-designer/expected_outputs/schema_analysis_sample.txt b/engineering/database-designer/expected_outputs/schema_analysis_sample.txt
new file mode 100644
index 0000000..b5caff5
--- /dev/null
+++ b/engineering/database-designer/expected_outputs/schema_analysis_sample.txt
@@ -0,0 +1,222 @@
+DATABASE SCHEMA ANALYSIS REPORT
+==================================================
+
+SCHEMA OVERVIEW
+---------------
+Total Tables: 8
+Total Columns: 52
+Tables with Primary Keys: 8
+Total Foreign Keys: 6
+Total Indexes: 15
+
+KEY RECOMMENDATIONS
+------------------
+1. Address 3 high-severity issues immediately
+2. Add primary keys to tables: 
+3. Review 4 VARCHAR(255) columns for right-sizing
+4. Consider adding 2 foreign key constraints for referential integrity
+5. Review 8 normalization issues for schema optimization
+
+NORMALIZATION ISSUES (8 total)
+------------------------------
+High: 2, Medium: 3, Low: 2, Warning: 1
+
+• products: Column 'dimensions' appears to store delimited values
+  Suggestion: Create separate table for individual values with foreign key relationship
+
+• products: Column 'tags' appears to store delimited values
+  Suggestion: Create separate table for individual values with foreign key relationship
+
+• products: Columns ['category_name'] may have transitive dependency through 'category_id'
+  Suggestion: Consider creating separate 'category' table with these columns
+
+• orders: Columns ['shipping_street', 'shipping_city', 'shipping_state', 'shipping_postal_code', 'shipping_country'] may have transitive dependency through 'shipping_address_id'
+  Suggestion: Consider creating separate 'shipping_address' table with these columns
+
+• user_preferences: Column 'preferred_categories' appears to store delimited values
+  Suggestion: Create separate table for individual values with foreign key relationship
+
+DATA TYPE ISSUES (4 total)
+--------------------------
+• products.dimensions: VARCHAR(255) antipattern
+  Current: VARCHAR(50) → Suggested: Appropriately sized VARCHAR or TEXT
+  Rationale: VARCHAR(255) is often used as default without considering actual data length requirements
+
+• products.tags: VARCHAR(255) antipattern
+  Current: VARCHAR(500) → Suggested: Appropriately sized VARCHAR or TEXT
+  Rationale: VARCHAR(255) is often used as default without considering actual data length requirements
+
+• user_preferences.preferred_categories: VARCHAR(255) antipattern
+  Current: VARCHAR(500) → Suggested: Appropriately sized VARCHAR or TEXT
+  Rationale: VARCHAR(255) is often used as default without considering actual data length requirements
+
+• user_preferences.email_notifications: VARCHAR(255) antipattern
+  Current: VARCHAR(255) → Suggested: Appropriately sized VARCHAR or TEXT
+  Rationale: VARCHAR(255) is often used as default without considering actual data length requirements
+
+CONSTRAINT ISSUES (12 total)
+-----------------------------
+High: 0, Medium: 4, Low: 8
+
+• products: Column 'price' should validate positive values
+  Suggestion: Add CHECK constraint: price > 0
+
+• products: Column 'inventory_count' should validate positive values
+  Suggestion: Add CHECK constraint: inventory_count > 0
+
+• orders: Column 'total_amount' should validate positive values
+  Suggestion: Add CHECK constraint: total_amount > 0
+
+• order_items: Column 'quantity' should validate positive values
+  Suggestion: Add CHECK constraint: quantity > 0
+
+• order_items: Column 'unit_price' should validate positive values
+  Suggestion: Add CHECK constraint: unit_price > 0
+
+MISSING INDEXES (3 total)
+-------------------------
+• addresses.user_id (foreign_key)
+  SQL: CREATE INDEX idx_addresses_user_id ON addresses (user_id);
+
+• product_reviews.product_id (foreign_key)
+  SQL: CREATE INDEX idx_product_reviews_product_id ON product_reviews (product_id);
+
+• shopping_cart.user_id (foreign_key)
+  SQL: CREATE INDEX idx_shopping_cart_user_id ON shopping_cart (user_id);
+
+MERMAID ERD
+===========
+erDiagram
+    USERS {
+        INTEGER id "PK"
+        VARCHAR(255) email "NOT NULL"
+        VARCHAR(50) username "NOT NULL"
+        VARCHAR(255) password_hash "NOT NULL"
+        VARCHAR(100) first_name
+        VARCHAR(100) last_name
+        TIMESTAMP created_at
+        TIMESTAMP updated_at
+        VARCHAR(20) status
+    }
+    
+    CATEGORIES {
+        INTEGER id "PK"
+        VARCHAR(100) name "NOT NULL"
+        VARCHAR(100) slug "NOT NULL UNIQUE"
+        INTEGER parent_id "FK"
+        TEXT description
+        BOOLEAN is_active
+        INTEGER sort_order
+        TIMESTAMP created_at
+    }
+    
+    PRODUCTS {
+        INTEGER id "PK"
+        VARCHAR(255) name "NOT NULL"
+        VARCHAR(50) sku "NOT NULL UNIQUE"
+        TEXT description
+        DECIMAL(10,2) price "NOT NULL"
+        DECIMAL(10,2) cost
+        DECIMAL(8,2) weight
+        VARCHAR(50) dimensions
+        INTEGER category_id "FK"
+        VARCHAR(100) category_name
+        VARCHAR(100) brand
+        VARCHAR(500) tags
+        INTEGER inventory_count
+        INTEGER reorder_point
+        VARCHAR(100) supplier_name
+        VARCHAR(255) supplier_contact
+        BOOLEAN is_active
+        BOOLEAN featured
+        TIMESTAMP created_at
+        TIMESTAMP updated_at
+    }
+    
+    ADDRESSES {
+        INTEGER id "PK"
+        INTEGER user_id "FK"
+        VARCHAR(20) address_type
+        VARCHAR(255) street_address "NOT NULL"
+        VARCHAR(255) street_address_2
+        VARCHAR(100) city "NOT NULL"
+        VARCHAR(50) state "NOT NULL"
+        VARCHAR(20) postal_code "NOT NULL"
+        VARCHAR(50) country "NOT NULL"
+        BOOLEAN is_default
+        TIMESTAMP created_at
+    }
+    
+    ORDERS {
+        INTEGER id "PK"
+        VARCHAR(50) order_number "NOT NULL UNIQUE"
+        INTEGER user_id "FK"
+        VARCHAR(255) user_email
+        VARCHAR(200) user_name
+        VARCHAR(50) status "NOT NULL"
+        DECIMAL(10,2) total_amount "NOT NULL"
+        DECIMAL(10,2) tax_amount "NOT NULL"
+        DECIMAL(10,2) shipping_amount "NOT NULL"
+        DECIMAL(10,2) discount_amount
+        VARCHAR(50) payment_method
+        VARCHAR(50) payment_status
+        INTEGER shipping_address_id "FK"
+        INTEGER billing_address_id "FK"
+        VARCHAR(255) shipping_street
+        VARCHAR(100) shipping_city
+        VARCHAR(50) shipping_state
+        VARCHAR(20) shipping_postal_code
+        VARCHAR(50) shipping_country
+        TEXT notes
+        TIMESTAMP created_at
+        TIMESTAMP updated_at
+        TIMESTAMP shipped_at
+        TIMESTAMP delivered_at
+    }
+    
+    ORDER_ITEMS {
+        INTEGER id "PK"
+        INTEGER order_id "FK"
+        INTEGER product_id "FK"
+        VARCHAR(255) product_name
+        VARCHAR(50) product_sku
+        INTEGER quantity "NOT NULL"
+        DECIMAL(10,2) unit_price "NOT NULL"
+        DECIMAL(10,2) total_price "NOT NULL"
+        TIMESTAMP created_at
+    }
+    
+    SHOPPING_CART {
+        INTEGER id "PK"
+        INTEGER user_id "FK"
+        VARCHAR(255) session_id
+        INTEGER product_id "FK"
+        INTEGER quantity "NOT NULL"
+        TIMESTAMP added_at
+        TIMESTAMP updated_at
+    }
+    
+    PRODUCT_REVIEWS {
+        INTEGER id "PK"
+        INTEGER product_id "FK"
+        INTEGER user_id "FK"
+        INTEGER rating "NOT NULL"
+        VARCHAR(200) title
+        TEXT review_text
+        BOOLEAN verified_purchase
+        INTEGER helpful_count
+        TIMESTAMP created_at
+        TIMESTAMP updated_at
+    }
+
+    CATEGORIES ||--o{ CATEGORIES : has
+    CATEGORIES ||--o{ PRODUCTS : has
+    USERS ||--o{ ADDRESSES : has
+    USERS ||--o{ ORDERS : has
+    USERS ||--o{ SHOPPING_CART : has
+    USERS ||--o{ PRODUCT_REVIEWS : has
+    ADDRESSES ||--o{ ORDERS : has
+    ORDERS ||--o{ ORDER_ITEMS : has
+    PRODUCTS ||--o{ ORDER_ITEMS : has
+    PRODUCTS ||--o{ SHOPPING_CART : has
+    PRODUCTS ||--o{ PRODUCT_REVIEWS : has
\ No newline at end of file
diff --git a/engineering/database-designer/index_optimizer.py b/engineering/database-designer/index_optimizer.py
new file mode 100644
index 0000000..933c334
--- /dev/null
+++ b/engineering/database-designer/index_optimizer.py
@@ -0,0 +1,926 @@
+#!/usr/bin/env python3
+"""
+Database Index Optimizer
+
+Analyzes schema definitions and query patterns to recommend optimal indexes:
+- Identifies missing indexes for common query patterns
+- Detects redundant and overlapping indexes
+- Suggests composite index column ordering
+- Estimates selectivity and performance impact
+- Generates CREATE INDEX statements with rationale
+
+Input: Schema JSON + Query patterns JSON
+Output: Index recommendations + CREATE INDEX SQL + before/after analysis
+
+Usage:
+    python index_optimizer.py --schema schema.json --queries queries.json --output recommendations.json
+    python index_optimizer.py --schema schema.json --queries queries.json --format text
+    python index_optimizer.py --schema schema.json --queries queries.json --analyze-existing
+"""
+
+import argparse
+import json
+import re
+import sys
+from collections import defaultdict, namedtuple, Counter
+from typing import Dict, List, Set, Tuple, Optional, Any
+from dataclasses import dataclass, asdict
+import hashlib
+
+
+@dataclass
+class Column:
+    name: str
+    data_type: str
+    nullable: bool = True
+    unique: bool = False
+    cardinality_estimate: Optional[int] = None
+
+
+@dataclass
+class Index:
+    name: str
+    table: str
+    columns: List[str]
+    unique: bool = False
+    index_type: str = "btree"
+    partial_condition: Optional[str] = None
+    include_columns: List[str] = None
+    size_estimate: Optional[int] = None
+
+
+@dataclass
+class QueryPattern:
+    query_id: str
+    query_type: str  # SELECT, INSERT, UPDATE, DELETE
+    table: str
+    where_conditions: List[Dict[str, Any]]
+    join_conditions: List[Dict[str, Any]]
+    order_by: List[Dict[str, str]]  # column, direction
+    group_by: List[str]
+    frequency: int = 1
+    avg_execution_time_ms: Optional[float] = None
+
+
+@dataclass
+class IndexRecommendation:
+    recommendation_id: str
+    table: str
+    recommended_index: Index
+    reason: str
+    query_patterns_helped: List[str]
+    estimated_benefit: str
+    estimated_overhead: str
+    priority: int  # 1 = highest priority
+    sql_statement: str
+    selectivity_analysis: Dict[str, Any]
+
+
+@dataclass
+class RedundancyIssue:
+    issue_type: str  # DUPLICATE, OVERLAPPING, UNUSED
+    affected_indexes: List[str]
+    table: str
+    description: str
+    recommendation: str
+    sql_statements: List[str]
+
+
+class SelectivityEstimator:
+    """Estimates column selectivity based on naming patterns and data types."""
+    
+    def __init__(self):
+        # Selectivity patterns based on common column names and types
+        self.high_selectivity_patterns = [
+            r'.*_id$', r'^id$', r'uuid', r'guid', r'email', r'username', r'ssn',
+            r'account.*number', r'transaction.*id', r'reference.*number'
+        ]
+        
+        self.medium_selectivity_patterns = [
+            r'name$', r'title$', r'description$', r'address', r'phone', r'zip',
+            r'postal.*code', r'serial.*number', r'sku', r'product.*code'
+        ]
+        
+        self.low_selectivity_patterns = [
+            r'status$', r'type$', r'category', r'state$', r'flag$', r'active$',
+            r'enabled$', r'deleted$', r'visible$', r'gender$', r'priority$'
+        ]
+        
+        self.very_low_selectivity_patterns = [
+            r'is_.*', r'has_.*', r'can_.*', r'boolean', r'bool'
+        ]
+    
+    def estimate_selectivity(self, column: Column, table_size_estimate: int = 10000) -> float:
+        """Estimate column selectivity (0.0 = all same values, 1.0 = all unique values)."""
+        column_name_lower = column.name.lower()
+        
+        # Primary key or unique columns
+        if column.unique or column.name.lower() in ['id', 'uuid', 'guid']:
+            return 1.0
+        
+        # Check cardinality estimate if available
+        if column.cardinality_estimate:
+            return min(column.cardinality_estimate / table_size_estimate, 1.0)
+        
+        # Pattern-based estimation
+        for pattern in self.high_selectivity_patterns:
+            if re.search(pattern, column_name_lower):
+                return 0.9  # Very high selectivity
+        
+        for pattern in self.medium_selectivity_patterns:
+            if re.search(pattern, column_name_lower):
+                return 0.7  # Good selectivity
+        
+        for pattern in self.low_selectivity_patterns:
+            if re.search(pattern, column_name_lower):
+                return 0.2  # Poor selectivity
+        
+        for pattern in self.very_low_selectivity_patterns:
+            if re.search(pattern, column_name_lower):
+                return 0.1  # Very poor selectivity
+        
+        # Data type based estimation
+        data_type_upper = column.data_type.upper()
+        if data_type_upper.startswith('BOOL'):
+            return 0.1
+        elif data_type_upper.startswith(('TINYINT', 'SMALLINT')):
+            return 0.3
+        elif data_type_upper.startswith('INT'):
+            return 0.8
+        elif data_type_upper.startswith(('VARCHAR', 'TEXT')):
+            # Estimate based on column name
+            if 'name' in column_name_lower:
+                return 0.7
+            elif 'description' in column_name_lower or 'comment' in column_name_lower:
+                return 0.9
+            else:
+                return 0.6
+        
+        # Default moderate selectivity
+        return 0.5
+
+
+class IndexOptimizer:
+    def __init__(self):
+        self.tables: Dict[str, Dict[str, Column]] = {}
+        self.existing_indexes: Dict[str, List[Index]] = {}
+        self.query_patterns: List[QueryPattern] = []
+        self.selectivity_estimator = SelectivityEstimator()
+        
+        # Configuration
+        self.max_composite_index_columns = 6
+        self.min_selectivity_for_index = 0.1
+        self.redundancy_overlap_threshold = 0.8
+    
+    def load_schema(self, schema_data: Dict[str, Any]) -> None:
+        """Load schema definition."""
+        if 'tables' not in schema_data:
+            raise ValueError("Schema must contain 'tables' key")
+        
+        for table_name, table_def in schema_data['tables'].items():
+            self.tables[table_name] = {}
+            self.existing_indexes[table_name] = []
+            
+            # Load columns
+            for col_name, col_def in table_def.get('columns', {}).items():
+                column = Column(
+                    name=col_name,
+                    data_type=col_def.get('type', 'VARCHAR(255)'),
+                    nullable=col_def.get('nullable', True),
+                    unique=col_def.get('unique', False),
+                    cardinality_estimate=col_def.get('cardinality_estimate')
+                )
+                self.tables[table_name][col_name] = column
+            
+            # Load existing indexes
+            for idx_def in table_def.get('indexes', []):
+                index = Index(
+                    name=idx_def['name'],
+                    table=table_name,
+                    columns=idx_def['columns'],
+                    unique=idx_def.get('unique', False),
+                    index_type=idx_def.get('type', 'btree'),
+                    partial_condition=idx_def.get('partial_condition'),
+                    include_columns=idx_def.get('include_columns', [])
+                )
+                self.existing_indexes[table_name].append(index)
+    
+    def load_query_patterns(self, query_data: Dict[str, Any]) -> None:
+        """Load query patterns for analysis."""
+        if 'queries' not in query_data:
+            raise ValueError("Query data must contain 'queries' key")
+        
+        for query_def in query_data['queries']:
+            pattern = QueryPattern(
+                query_id=query_def['id'],
+                query_type=query_def.get('type', 'SELECT').upper(),
+                table=query_def['table'],
+                where_conditions=query_def.get('where_conditions', []),
+                join_conditions=query_def.get('join_conditions', []),
+                order_by=query_def.get('order_by', []),
+                group_by=query_def.get('group_by', []),
+                frequency=query_def.get('frequency', 1),
+                avg_execution_time_ms=query_def.get('avg_execution_time_ms')
+            )
+            self.query_patterns.append(pattern)
+    
+    def analyze_missing_indexes(self) -> List[IndexRecommendation]:
+        """Identify missing indexes based on query patterns."""
+        recommendations = []
+        
+        for pattern in self.query_patterns:
+            table_name = pattern.table
+            if table_name not in self.tables:
+                continue
+            
+            # Analyze WHERE conditions for single-column indexes
+            for condition in pattern.where_conditions:
+                column = condition.get('column')
+                operator = condition.get('operator', '=')
+                
+                if column and column in self.tables[table_name]:
+                    if not self._has_covering_index(table_name, [column]):
+                        recommendation = self._create_single_column_recommendation(
+                            table_name, column, pattern, operator
+                        )
+                        if recommendation:
+                            recommendations.append(recommendation)
+            
+            # Analyze composite indexes for multi-column WHERE conditions
+            where_columns = [cond.get('column') for cond in pattern.where_conditions 
+                           if cond.get('column') and cond.get('column') in self.tables[table_name]]
+            
+            if len(where_columns) > 1:
+                composite_recommendation = self._create_composite_recommendation(
+                    table_name, where_columns, pattern
+                )
+                if composite_recommendation:
+                    recommendations.append(composite_recommendation)
+            
+            # Analyze covering indexes for SELECT with ORDER BY
+            if pattern.order_by and where_columns:
+                covering_recommendation = self._create_covering_index_recommendation(
+                    table_name, where_columns, pattern
+                )
+                if covering_recommendation:
+                    recommendations.append(covering_recommendation)
+            
+            # Analyze JOIN conditions
+            for join_condition in pattern.join_conditions:
+                local_column = join_condition.get('local_column')
+                if local_column and local_column in self.tables[table_name]:
+                    if not self._has_covering_index(table_name, [local_column]):
+                        recommendation = self._create_join_index_recommendation(
+                            table_name, local_column, pattern, join_condition
+                        )
+                        if recommendation:
+                            recommendations.append(recommendation)
+        
+        # Remove duplicates and prioritize
+        recommendations = self._deduplicate_recommendations(recommendations)
+        recommendations = self._prioritize_recommendations(recommendations)
+        
+        return recommendations
+    
+    def _has_covering_index(self, table_name: str, columns: List[str]) -> bool:
+        """Check if existing indexes cover the specified columns."""
+        if table_name not in self.existing_indexes:
+            return False
+        
+        for index in self.existing_indexes[table_name]:
+            # Check if index starts with required columns (prefix match for composite)
+            if len(index.columns) >= len(columns):
+                if index.columns[:len(columns)] == columns:
+                    return True
+        
+        return False
+    
+    def _create_single_column_recommendation(
+        self, 
+        table_name: str, 
+        column: str, 
+        pattern: QueryPattern,
+        operator: str
+    ) -> Optional[IndexRecommendation]:
+        """Create recommendation for single-column index."""
+        column_obj = self.tables[table_name][column]
+        selectivity = self.selectivity_estimator.estimate_selectivity(column_obj)
+        
+        # Skip very low selectivity columns unless frequently used
+        if selectivity < self.min_selectivity_for_index and pattern.frequency < 100:
+            return None
+        
+        index_name = f"idx_{table_name}_{column}"
+        index = Index(
+            name=index_name,
+            table=table_name,
+            columns=[column],
+            unique=column_obj.unique,
+            index_type="btree"
+        )
+        
+        reason = f"Optimize WHERE {column} {operator} queries"
+        if pattern.frequency > 10:
+            reason += f" (used {pattern.frequency} times)"
+        
+        return IndexRecommendation(
+            recommendation_id=self._generate_recommendation_id(table_name, [column]),
+            table=table_name,
+            recommended_index=index,
+            reason=reason,
+            query_patterns_helped=[pattern.query_id],
+            estimated_benefit=self._estimate_benefit(selectivity, pattern.frequency),
+            estimated_overhead="Low (single column)",
+            priority=self._calculate_priority(selectivity, pattern.frequency, 1),
+            sql_statement=f"CREATE INDEX {index_name} ON {table_name} ({column});",
+            selectivity_analysis={
+                "column_selectivity": selectivity,
+                "estimated_reduction": f"{int(selectivity * 100)}%"
+            }
+        )
+    
+    def _create_composite_recommendation(
+        self, 
+        table_name: str, 
+        columns: List[str], 
+        pattern: QueryPattern
+    ) -> Optional[IndexRecommendation]:
+        """Create recommendation for composite index."""
+        if len(columns) > self.max_composite_index_columns:
+            columns = columns[:self.max_composite_index_columns]
+        
+        # Order columns by selectivity (most selective first)
+        column_selectivities = []
+        for col in columns:
+            col_obj = self.tables[table_name][col]
+            selectivity = self.selectivity_estimator.estimate_selectivity(col_obj)
+            column_selectivities.append((col, selectivity))
+        
+        # Sort by selectivity descending
+        column_selectivities.sort(key=lambda x: x[1], reverse=True)
+        ordered_columns = [col for col, _ in column_selectivities]
+        
+        # Calculate combined selectivity
+        combined_selectivity = min(sum(sel for _, sel in column_selectivities) / len(columns), 0.95)
+        
+        index_name = f"idx_{table_name}_{'_'.join(ordered_columns)}"
+        if len(index_name) > 63:  # PostgreSQL limit
+            index_name = f"idx_{table_name}_composite_{abs(hash('_'.join(ordered_columns))) % 10000}"
+        
+        index = Index(
+            name=index_name,
+            table=table_name,
+            columns=ordered_columns,
+            index_type="btree"
+        )
+        
+        reason = f"Optimize multi-column WHERE conditions: {', '.join(ordered_columns)}"
+        
+        return IndexRecommendation(
+            recommendation_id=self._generate_recommendation_id(table_name, ordered_columns),
+            table=table_name,
+            recommended_index=index,
+            reason=reason,
+            query_patterns_helped=[pattern.query_id],
+            estimated_benefit=self._estimate_benefit(combined_selectivity, pattern.frequency),
+            estimated_overhead=f"Medium (composite index with {len(ordered_columns)} columns)",
+            priority=self._calculate_priority(combined_selectivity, pattern.frequency, len(ordered_columns)),
+            sql_statement=f"CREATE INDEX {index_name} ON {table_name} ({', '.join(ordered_columns)});",
+            selectivity_analysis={
+                "column_selectivities": {col: sel for col, sel in column_selectivities},
+                "combined_selectivity": combined_selectivity,
+                "column_order_rationale": "Ordered by selectivity (most selective first)"
+            }
+        )
+    
+    def _create_covering_index_recommendation(
+        self, 
+        table_name: str, 
+        where_columns: List[str], 
+        pattern: QueryPattern
+    ) -> Optional[IndexRecommendation]:
+        """Create recommendation for covering index."""
+        order_columns = [col['column'] for col in pattern.order_by if col['column'] in self.tables[table_name]]
+        
+        # Combine WHERE and ORDER BY columns
+        index_columns = where_columns.copy()
+        include_columns = []
+        
+        # Add ORDER BY columns to index columns
+        for col in order_columns:
+            if col not in index_columns:
+                index_columns.append(col)
+        
+        # Limit index columns
+        if len(index_columns) > self.max_composite_index_columns:
+            include_columns = index_columns[self.max_composite_index_columns:]
+            index_columns = index_columns[:self.max_composite_index_columns]
+        
+        index_name = f"idx_{table_name}_covering_{'_'.join(index_columns[:3])}"
+        if len(index_name) > 63:
+            index_name = f"idx_{table_name}_covering_{abs(hash('_'.join(index_columns))) % 10000}"
+        
+        index = Index(
+            name=index_name,
+            table=table_name,
+            columns=index_columns,
+            include_columns=include_columns,
+            index_type="btree"
+        )
+        
+        reason = f"Covering index for WHERE + ORDER BY optimization"
+        
+        # Calculate selectivity for main columns
+        main_selectivity = 0.5  # Default for covering indexes
+        if where_columns:
+            selectivities = [
+                self.selectivity_estimator.estimate_selectivity(self.tables[table_name][col])
+                for col in where_columns[:2]  # Consider first 2 columns
+            ]
+            main_selectivity = max(selectivities)
+        
+        sql_parts = [f"CREATE INDEX {index_name} ON {table_name} ({', '.join(index_columns)})"]
+        if include_columns:
+            sql_parts.append(f" INCLUDE ({', '.join(include_columns)})")
+        sql_statement = ''.join(sql_parts) + ";"
+        
+        return IndexRecommendation(
+            recommendation_id=self._generate_recommendation_id(table_name, index_columns, "covering"),
+            table=table_name,
+            recommended_index=index,
+            reason=reason,
+            query_patterns_helped=[pattern.query_id],
+            estimated_benefit="High (eliminates table lookups for SELECT)",
+            estimated_overhead=f"High (covering index with {len(index_columns)} columns)",
+            priority=self._calculate_priority(main_selectivity, pattern.frequency, len(index_columns)),
+            sql_statement=sql_statement,
+            selectivity_analysis={
+                "main_columns_selectivity": main_selectivity,
+                "covering_benefit": "Eliminates table lookup for SELECT queries"
+            }
+        )
+    
+    def _create_join_index_recommendation(
+        self, 
+        table_name: str, 
+        column: str, 
+        pattern: QueryPattern,
+        join_condition: Dict[str, Any]
+    ) -> Optional[IndexRecommendation]:
+        """Create recommendation for JOIN optimization index."""
+        column_obj = self.tables[table_name][column]
+        selectivity = self.selectivity_estimator.estimate_selectivity(column_obj)
+        
+        index_name = f"idx_{table_name}_{column}_join"
+        index = Index(
+            name=index_name,
+            table=table_name,
+            columns=[column],
+            index_type="btree"
+        )
+        
+        foreign_table = join_condition.get('foreign_table', 'unknown')
+        reason = f"Optimize JOIN with {foreign_table} table on {column}"
+        
+        return IndexRecommendation(
+            recommendation_id=self._generate_recommendation_id(table_name, [column], "join"),
+            table=table_name,
+            recommended_index=index,
+            reason=reason,
+            query_patterns_helped=[pattern.query_id],
+            estimated_benefit=self._estimate_join_benefit(pattern.frequency),
+            estimated_overhead="Low (single column for JOIN)",
+            priority=2,  # JOINs are generally high priority
+            sql_statement=f"CREATE INDEX {index_name} ON {table_name} ({column});",
+            selectivity_analysis={
+                "column_selectivity": selectivity,
+                "join_optimization": True
+            }
+        )
+    
+    def _generate_recommendation_id(self, table: str, columns: List[str], suffix: str = "") -> str:
+        """Generate unique recommendation ID."""
+        content = f"{table}_{'_'.join(sorted(columns))}_{suffix}"
+        return hashlib.md5(content.encode()).hexdigest()[:8]
+    
+    def _estimate_benefit(self, selectivity: float, frequency: int) -> str:
+        """Estimate performance benefit of index."""
+        if selectivity > 0.8 and frequency > 50:
+            return "Very High"
+        elif selectivity > 0.6 and frequency > 20:
+            return "High"
+        elif selectivity > 0.4 or frequency > 10:
+            return "Medium"
+        else:
+            return "Low"
+    
+    def _estimate_join_benefit(self, frequency: int) -> str:
+        """Estimate benefit for JOIN indexes."""
+        if frequency > 50:
+            return "Very High (frequent JOINs)"
+        elif frequency > 20:
+            return "High (regular JOINs)"
+        elif frequency > 5:
+            return "Medium (occasional JOINs)"
+        else:
+            return "Low (rare JOINs)"
+    
+    def _calculate_priority(self, selectivity: float, frequency: int, column_count: int) -> int:
+        """Calculate priority score (1 = highest priority)."""
+        # Base score calculation
+        score = 0
+        
+        # Selectivity contribution (0-50 points)
+        score += int(selectivity * 50)
+        
+        # Frequency contribution (0-30 points)
+        score += min(frequency, 30)
+        
+        # Penalty for complex indexes (subtract points)
+        score -= (column_count - 1) * 5
+        
+        # Convert to priority levels
+        if score >= 70:
+            return 1  # Highest
+        elif score >= 50:
+            return 2  # High
+        elif score >= 30:
+            return 3  # Medium
+        else:
+            return 4  # Low
+    
+    def _deduplicate_recommendations(self, recommendations: List[IndexRecommendation]) -> List[IndexRecommendation]:
+        """Remove duplicate recommendations."""
+        seen_indexes = set()
+        unique_recommendations = []
+        
+        for rec in recommendations:
+            index_signature = (rec.table, tuple(rec.recommended_index.columns))
+            if index_signature not in seen_indexes:
+                seen_indexes.add(index_signature)
+                unique_recommendations.append(rec)
+            else:
+                # Merge query patterns helped
+                for existing_rec in unique_recommendations:
+                    if (existing_rec.table == rec.table and 
+                        existing_rec.recommended_index.columns == rec.recommended_index.columns):
+                        existing_rec.query_patterns_helped.extend(rec.query_patterns_helped)
+                        break
+        
+        return unique_recommendations
+    
+    def _prioritize_recommendations(self, recommendations: List[IndexRecommendation]) -> List[IndexRecommendation]:
+        """Sort recommendations by priority."""
+        return sorted(recommendations, key=lambda x: (x.priority, -len(x.query_patterns_helped)))
+    
+    def analyze_redundant_indexes(self) -> List[RedundancyIssue]:
+        """Identify redundant, overlapping, and potentially unused indexes."""
+        redundancy_issues = []
+        
+        for table_name, indexes in self.existing_indexes.items():
+            if len(indexes) < 2:
+                continue
+            
+            # Find duplicate indexes
+            duplicates = self._find_duplicate_indexes(table_name, indexes)
+            redundancy_issues.extend(duplicates)
+            
+            # Find overlapping indexes
+            overlapping = self._find_overlapping_indexes(table_name, indexes)
+            redundancy_issues.extend(overlapping)
+            
+            # Find potentially unused indexes
+            unused = self._find_unused_indexes(table_name, indexes)
+            redundancy_issues.extend(unused)
+        
+        return redundancy_issues
+    
+    def _find_duplicate_indexes(self, table_name: str, indexes: List[Index]) -> List[RedundancyIssue]:
+        """Find exactly duplicate indexes."""
+        issues = []
+        seen_signatures = {}
+        
+        for index in indexes:
+            signature = (tuple(index.columns), index.unique, index.partial_condition)
+            if signature in seen_signatures:
+                existing_index = seen_signatures[signature]
+                issues.append(RedundancyIssue(
+                    issue_type="DUPLICATE",
+                    affected_indexes=[existing_index.name, index.name],
+                    table=table_name,
+                    description=f"Indexes '{existing_index.name}' and '{index.name}' are identical",
+                    recommendation=f"Drop one of the duplicate indexes",
+                    sql_statements=[f"DROP INDEX {index.name};"]
+                ))
+            else:
+                seen_signatures[signature] = index
+        
+        return issues
+    
+    def _find_overlapping_indexes(self, table_name: str, indexes: List[Index]) -> List[RedundancyIssue]:
+        """Find overlapping indexes that might be redundant."""
+        issues = []
+        
+        for i, index1 in enumerate(indexes):
+            for index2 in indexes[i+1:]:
+                overlap_ratio = self._calculate_overlap_ratio(index1, index2)
+                
+                if overlap_ratio >= self.redundancy_overlap_threshold:
+                    # Determine which index to keep
+                    if len(index1.columns) <= len(index2.columns):
+                        redundant_index = index1
+                        keep_index = index2
+                    else:
+                        redundant_index = index2
+                        keep_index = index1
+                    
+                    issues.append(RedundancyIssue(
+                        issue_type="OVERLAPPING",
+                        affected_indexes=[index1.name, index2.name],
+                        table=table_name,
+                        description=f"Index '{redundant_index.name}' overlaps {int(overlap_ratio * 100)}% "
+                                   f"with '{keep_index.name}'",
+                        recommendation=f"Consider dropping '{redundant_index.name}' as it's largely "
+                                     f"covered by '{keep_index.name}'",
+                        sql_statements=[f"DROP INDEX {redundant_index.name};"]
+                    ))
+        
+        return issues
+    
+    def _calculate_overlap_ratio(self, index1: Index, index2: Index) -> float:
+        """Calculate overlap ratio between two indexes."""
+        cols1 = set(index1.columns)
+        cols2 = set(index2.columns)
+        
+        if not cols1 or not cols2:
+            return 0.0
+        
+        intersection = len(cols1.intersection(cols2))
+        union = len(cols1.union(cols2))
+        
+        return intersection / union if union > 0 else 0.0
+    
+    def _find_unused_indexes(self, table_name: str, indexes: List[Index]) -> List[RedundancyIssue]:
+        """Find potentially unused indexes based on query patterns."""
+        issues = []
+        
+        # Collect all columns used in query patterns for this table
+        used_columns = set()
+        table_patterns = [p for p in self.query_patterns if p.table == table_name]
+        
+        for pattern in table_patterns:
+            # Add WHERE condition columns
+            for condition in pattern.where_conditions:
+                if condition.get('column'):
+                    used_columns.add(condition['column'])
+            
+            # Add JOIN columns
+            for join in pattern.join_conditions:
+                if join.get('local_column'):
+                    used_columns.add(join['local_column'])
+            
+            # Add ORDER BY columns
+            for order in pattern.order_by:
+                if order.get('column'):
+                    used_columns.add(order['column'])
+            
+            # Add GROUP BY columns
+            used_columns.update(pattern.group_by)
+        
+        if not used_columns:
+            return issues  # Can't determine usage without query patterns
+        
+        for index in indexes:
+            index_columns = set(index.columns)
+            if not index_columns.intersection(used_columns):
+                issues.append(RedundancyIssue(
+                    issue_type="UNUSED",
+                    affected_indexes=[index.name],
+                    table=table_name,
+                    description=f"Index '{index.name}' columns {index.columns} are not used in any query patterns",
+                    recommendation="Consider dropping this index if it's truly unused (verify with query logs)",
+                    sql_statements=[f"-- Review usage before dropping\n-- DROP INDEX {index.name};"]
+                ))
+        
+        return issues
+    
+    def estimate_index_sizes(self) -> Dict[str, Dict[str, Any]]:
+        """Estimate storage requirements for recommended indexes."""
+        size_estimates = {}
+        
+        # This is a simplified estimation - in practice, would need actual table statistics
+        for table_name in self.tables:
+            size_estimates[table_name] = {
+                "estimated_table_rows": 10000,  # Default estimate
+                "existing_indexes_size_mb": len(self.existing_indexes.get(table_name, [])) * 5,  # Rough estimate
+                "index_overhead_per_column_mb": 2  # Rough estimate per column
+            }
+        
+        return size_estimates
+    
+    def generate_analysis_report(self) -> Dict[str, Any]:
+        """Generate comprehensive analysis report."""
+        recommendations = self.analyze_missing_indexes()
+        redundancy_issues = self.analyze_redundant_indexes()
+        size_estimates = self.estimate_index_sizes()
+        
+        # Calculate statistics
+        total_existing_indexes = sum(len(indexes) for indexes in self.existing_indexes.values())
+        tables_analyzed = len(self.tables)
+        query_patterns_analyzed = len(self.query_patterns)
+        
+        # Categorize recommendations by priority
+        high_priority = [r for r in recommendations if r.priority <= 2]
+        medium_priority = [r for r in recommendations if r.priority == 3]
+        low_priority = [r for r in recommendations if r.priority >= 4]
+        
+        return {
+            "analysis_summary": {
+                "tables_analyzed": tables_analyzed,
+                "query_patterns_analyzed": query_patterns_analyzed,
+                "existing_indexes": total_existing_indexes,
+                "total_recommendations": len(recommendations),
+                "high_priority_recommendations": len(high_priority),
+                "redundancy_issues_found": len(redundancy_issues)
+            },
+            "index_recommendations": {
+                "high_priority": [asdict(r) for r in high_priority],
+                "medium_priority": [asdict(r) for r in medium_priority],
+                "low_priority": [asdict(r) for r in low_priority]
+            },
+            "redundancy_analysis": [asdict(issue) for issue in redundancy_issues],
+            "size_estimates": size_estimates,
+            "sql_statements": {
+                "create_indexes": [rec.sql_statement for rec in recommendations],
+                "drop_redundant": [
+                    stmt for issue in redundancy_issues 
+                    for stmt in issue.sql_statements
+                ]
+            },
+            "performance_impact": self._generate_performance_impact_analysis(recommendations)
+        }
+    
+    def _generate_performance_impact_analysis(self, recommendations: List[IndexRecommendation]) -> Dict[str, Any]:
+        """Generate performance impact analysis."""
+        impact_analysis = {
+            "query_optimization": {},
+            "write_overhead": {},
+            "storage_impact": {}
+        }
+        
+        # Analyze query optimization impact
+        query_benefits = defaultdict(list)
+        for rec in recommendations:
+            for query_id in rec.query_patterns_helped:
+                query_benefits[query_id].append(rec.estimated_benefit)
+        
+        impact_analysis["query_optimization"] = {
+            "queries_improved": len(query_benefits),
+            "high_impact_queries": len([q for q, benefits in query_benefits.items() 
+                                      if any("High" in benefit for benefit in benefits)]),
+            "benefit_distribution": dict(Counter(
+                rec.estimated_benefit for rec in recommendations
+            ))
+        }
+        
+        # Analyze write overhead
+        impact_analysis["write_overhead"] = {
+            "total_new_indexes": len(recommendations),
+            "estimated_insert_overhead": f"{len(recommendations) * 5}%",  # Rough estimate
+            "tables_most_affected": list(Counter(rec.table for rec in recommendations).most_common(3))
+        }
+        
+        return impact_analysis
+    
+    def format_text_report(self, analysis: Dict[str, Any]) -> str:
+        """Format analysis as human-readable text report."""
+        lines = []
+        lines.append("DATABASE INDEX OPTIMIZATION REPORT")
+        lines.append("=" * 50)
+        lines.append("")
+        
+        # Summary
+        summary = analysis["analysis_summary"]
+        lines.append("ANALYSIS SUMMARY")
+        lines.append("-" * 16)
+        lines.append(f"Tables Analyzed: {summary['tables_analyzed']}")
+        lines.append(f"Query Patterns: {summary['query_patterns_analyzed']}")
+        lines.append(f"Existing Indexes: {summary['existing_indexes']}")
+        lines.append(f"New Recommendations: {summary['total_recommendations']}")
+        lines.append(f"High Priority: {summary['high_priority_recommendations']}")
+        lines.append(f"Redundancy Issues: {summary['redundancy_issues_found']}")
+        lines.append("")
+        
+        # High Priority Recommendations
+        high_priority = analysis["index_recommendations"]["high_priority"]
+        if high_priority:
+            lines.append(f"HIGH PRIORITY RECOMMENDATIONS ({len(high_priority)})")
+            lines.append("-" * 35)
+            for i, rec in enumerate(high_priority[:10], 1):  # Show top 10
+                lines.append(f"{i}. {rec['table']}: {rec['reason']}")
+                lines.append(f"   Columns: {', '.join(rec['recommended_index']['columns'])}")
+                lines.append(f"   Benefit: {rec['estimated_benefit']}")
+                lines.append(f"   SQL: {rec['sql_statement']}")
+                lines.append("")
+        
+        # Redundancy Issues
+        redundancy = analysis["redundancy_analysis"]
+        if redundancy:
+            lines.append(f"REDUNDANCY ISSUES ({len(redundancy)})")
+            lines.append("-" * 20)
+            for issue in redundancy[:5]:  # Show first 5
+                lines.append(f"• {issue['issue_type']}: {issue['description']}")
+                lines.append(f"  Recommendation: {issue['recommendation']}")
+                if issue['sql_statements']:
+                    lines.append(f"  SQL: {issue['sql_statements'][0]}")
+                lines.append("")
+        
+        # Performance Impact
+        perf_impact = analysis["performance_impact"]
+        lines.append("PERFORMANCE IMPACT ANALYSIS")
+        lines.append("-" * 30)
+        query_opt = perf_impact["query_optimization"]
+        lines.append(f"Queries to be optimized: {query_opt['queries_improved']}")
+        lines.append(f"High impact optimizations: {query_opt['high_impact_queries']}")
+        
+        write_overhead = perf_impact["write_overhead"]
+        lines.append(f"Estimated insert overhead: {write_overhead['estimated_insert_overhead']}")
+        lines.append("")
+        
+        # SQL Statements Summary
+        sql_statements = analysis["sql_statements"]
+        create_statements = sql_statements["create_indexes"]
+        if create_statements:
+            lines.append("RECOMMENDED CREATE INDEX STATEMENTS")
+            lines.append("-" * 36)
+            for i, stmt in enumerate(create_statements[:10], 1):
+                lines.append(f"{i}. {stmt}")
+            
+            if len(create_statements) > 10:
+                lines.append(f"... and {len(create_statements) - 10} more")
+            lines.append("")
+        
+        return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Optimize database indexes based on schema and query patterns")
+    parser.add_argument("--schema", "-s", required=True, help="Schema definition JSON file")
+    parser.add_argument("--queries", "-q", required=True, help="Query patterns JSON file")
+    parser.add_argument("--output", "-o", help="Output file (default: stdout)")
+    parser.add_argument("--format", "-f", choices=["json", "text"], default="text", 
+                       help="Output format")
+    parser.add_argument("--analyze-existing", "-e", action="store_true", 
+                       help="Include analysis of existing indexes")
+    parser.add_argument("--min-priority", "-p", type=int, default=4, 
+                       help="Minimum priority level to include (1=highest, 4=lowest)")
+    
+    args = parser.parse_args()
+    
+    try:
+        # Load schema
+        with open(args.schema, 'r') as f:
+            schema_data = json.load(f)
+        
+        # Load queries
+        with open(args.queries, 'r') as f:
+            query_data = json.load(f)
+        
+        # Initialize optimizer
+        optimizer = IndexOptimizer()
+        optimizer.load_schema(schema_data)
+        optimizer.load_query_patterns(query_data)
+        
+        # Generate analysis
+        analysis = optimizer.generate_analysis_report()
+        
+        # Filter by priority if specified
+        if args.min_priority < 4:
+            for priority_level in ["high_priority", "medium_priority", "low_priority"]:
+                analysis["index_recommendations"][priority_level] = [
+                    rec for rec in analysis["index_recommendations"][priority_level]
+                    if rec["priority"] <= args.min_priority
+                ]
+        
+        # Format output
+        if args.format == "json":
+            output = json.dumps(analysis, indent=2)
+        else:
+            output = optimizer.format_text_report(analysis)
+        
+        # Write output
+        if args.output:
+            with open(args.output, 'w') as f:
+                f.write(output)
+        else:
+            print(output)
+        
+        return 0
+        
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/engineering/database-designer/migration_generator.py b/engineering/database-designer/migration_generator.py
new file mode 100644
index 0000000..b98c910
--- /dev/null
+++ b/engineering/database-designer/migration_generator.py
@@ -0,0 +1,1199 @@
+#!/usr/bin/env python3
+"""
+Database Migration Generator
+
+Generates safe migration scripts between schema versions:
+- Compares current and target schemas
+- Generates ALTER TABLE statements for schema changes
+- Implements zero-downtime migration strategies (expand-contract pattern)
+- Creates rollback scripts for all changes
+- Generates validation queries to verify migrations
+- Handles complex changes like table splits/merges
+
+Input: Current schema JSON + Target schema JSON
+Output: Migration SQL + Rollback SQL + Validation queries + Execution plan
+
+Usage:
+    python migration_generator.py --current current_schema.json --target target_schema.json --output migration.sql
+    python migration_generator.py --current current.json --target target.json --format json
+    python migration_generator.py --current current.json --target target.json --zero-downtime
+    python migration_generator.py --current current.json --target target.json --validate-only
+"""
+
+import argparse
+import json
+import re
+import sys
+from collections import defaultdict, OrderedDict
+from typing import Dict, List, Set, Tuple, Optional, Any, Union
+from dataclasses import dataclass, asdict
+from datetime import datetime
+import hashlib
+
+
+@dataclass
+class Column:
+    name: str
+    data_type: str
+    nullable: bool = True
+    primary_key: bool = False
+    unique: bool = False
+    foreign_key: Optional[str] = None
+    default_value: Optional[str] = None
+    check_constraint: Optional[str] = None
+    
+
+@dataclass
+class Table:
+    name: str
+    columns: Dict[str, Column]
+    primary_key: List[str]
+    foreign_keys: Dict[str, str]  # column -> referenced_table.column
+    unique_constraints: List[List[str]]
+    check_constraints: Dict[str, str]
+    indexes: List[Dict[str, Any]]
+
+
+@dataclass
+class MigrationStep:
+    step_id: str
+    step_type: str
+    table: str
+    description: str
+    sql_forward: str
+    sql_rollback: str
+    validation_sql: Optional[str] = None
+    dependencies: List[str] = None
+    risk_level: str = "LOW"  # LOW, MEDIUM, HIGH
+    estimated_time: Optional[str] = None
+    zero_downtime_phase: Optional[str] = None  # EXPAND, CONTRACT, or None
+
+
+@dataclass
+class MigrationPlan:
+    migration_id: str
+    created_at: str
+    source_schema_hash: str
+    target_schema_hash: str
+    steps: List[MigrationStep]
+    summary: Dict[str, Any]
+    execution_order: List[str]
+    rollback_order: List[str]
+
+
+@dataclass
+class ValidationCheck:
+    check_id: str
+    check_type: str
+    table: str
+    description: str
+    sql_query: str
+    expected_result: Any
+    critical: bool = True
+
+
+class SchemaComparator:
+    """Compares two schema versions and identifies differences."""
+    
+    def __init__(self):
+        self.current_schema: Dict[str, Table] = {}
+        self.target_schema: Dict[str, Table] = {}
+        self.changes: Dict[str, List[Dict[str, Any]]] = {
+            'tables_added': [],
+            'tables_dropped': [],
+            'tables_renamed': [],
+            'columns_added': [],
+            'columns_dropped': [],
+            'columns_modified': [],
+            'columns_renamed': [],
+            'constraints_added': [],
+            'constraints_dropped': [],
+            'indexes_added': [],
+            'indexes_dropped': []
+        }
+    
+    def load_schemas(self, current_data: Dict[str, Any], target_data: Dict[str, Any]):
+        """Load current and target schemas."""
+        self.current_schema = self._parse_schema(current_data)
+        self.target_schema = self._parse_schema(target_data)
+    
+    def _parse_schema(self, schema_data: Dict[str, Any]) -> Dict[str, Table]:
+        """Parse schema JSON into Table objects."""
+        tables = {}
+        
+        if 'tables' not in schema_data:
+            return tables
+        
+        for table_name, table_def in schema_data['tables'].items():
+            columns = {}
+            primary_key = table_def.get('primary_key', [])
+            foreign_keys = {}
+            
+            # Parse columns
+            for col_name, col_def in table_def.get('columns', {}).items():
+                column = Column(
+                    name=col_name,
+                    data_type=col_def.get('type', 'VARCHAR(255)'),
+                    nullable=col_def.get('nullable', True),
+                    primary_key=col_name in primary_key,
+                    unique=col_def.get('unique', False),
+                    foreign_key=col_def.get('foreign_key'),
+                    default_value=col_def.get('default'),
+                    check_constraint=col_def.get('check_constraint')
+                )
+                columns[col_name] = column
+                
+                if column.foreign_key:
+                    foreign_keys[col_name] = column.foreign_key
+            
+            table = Table(
+                name=table_name,
+                columns=columns,
+                primary_key=primary_key,
+                foreign_keys=foreign_keys,
+                unique_constraints=table_def.get('unique_constraints', []),
+                check_constraints=table_def.get('check_constraints', {}),
+                indexes=table_def.get('indexes', [])
+            )
+            tables[table_name] = table
+        
+        return tables
+    
+    def compare_schemas(self) -> Dict[str, List[Dict[str, Any]]]:
+        """Compare schemas and identify all changes."""
+        self._compare_tables()
+        self._compare_columns()
+        self._compare_constraints()
+        self._compare_indexes()
+        return self.changes
+    
+    def _compare_tables(self):
+        """Compare table-level changes."""
+        current_tables = set(self.current_schema.keys())
+        target_tables = set(self.target_schema.keys())
+        
+        # Tables added
+        for table_name in target_tables - current_tables:
+            self.changes['tables_added'].append({
+                'table': table_name,
+                'definition': self.target_schema[table_name]
+            })
+        
+        # Tables dropped
+        for table_name in current_tables - target_tables:
+            self.changes['tables_dropped'].append({
+                'table': table_name,
+                'definition': self.current_schema[table_name]
+            })
+        
+        # Tables renamed (heuristic based on column similarity)
+        self._detect_renamed_tables(current_tables - target_tables, target_tables - current_tables)
+    
+    def _detect_renamed_tables(self, dropped_tables: Set[str], added_tables: Set[str]):
+        """Detect renamed tables based on column similarity."""
+        if not dropped_tables or not added_tables:
+            return
+        
+        # Calculate similarity scores
+        similarity_scores = []
+        for dropped_table in dropped_tables:
+            for added_table in added_tables:
+                score = self._calculate_table_similarity(dropped_table, added_table)
+                if score > 0.7:  # High similarity threshold
+                    similarity_scores.append((score, dropped_table, added_table))
+        
+        # Sort by similarity and identify renames
+        similarity_scores.sort(reverse=True)
+        used_tables = set()
+        
+        for score, old_name, new_name in similarity_scores:
+            if old_name not in used_tables and new_name not in used_tables:
+                self.changes['tables_renamed'].append({
+                    'old_name': old_name,
+                    'new_name': new_name,
+                    'similarity_score': score
+                })
+                used_tables.add(old_name)
+                used_tables.add(new_name)
+                
+                # Remove from added/dropped lists
+                self.changes['tables_added'] = [t for t in self.changes['tables_added'] if t['table'] != new_name]
+                self.changes['tables_dropped'] = [t for t in self.changes['tables_dropped'] if t['table'] != old_name]
+    
+    def _calculate_table_similarity(self, table1_name: str, table2_name: str) -> float:
+        """Calculate similarity between two tables based on columns."""
+        table1 = self.current_schema[table1_name]
+        table2 = self.target_schema[table2_name]
+        
+        cols1 = set(table1.columns.keys())
+        cols2 = set(table2.columns.keys())
+        
+        if not cols1 and not cols2:
+            return 1.0
+        elif not cols1 or not cols2:
+            return 0.0
+        
+        intersection = len(cols1.intersection(cols2))
+        union = len(cols1.union(cols2))
+        
+        return intersection / union
+    
+    def _compare_columns(self):
+        """Compare column-level changes."""
+        common_tables = set(self.current_schema.keys()).intersection(set(self.target_schema.keys()))
+        
+        for table_name in common_tables:
+            current_table = self.current_schema[table_name]
+            target_table = self.target_schema[table_name]
+            
+            current_columns = set(current_table.columns.keys())
+            target_columns = set(target_table.columns.keys())
+            
+            # Columns added
+            for col_name in target_columns - current_columns:
+                self.changes['columns_added'].append({
+                    'table': table_name,
+                    'column': col_name,
+                    'definition': target_table.columns[col_name]
+                })
+            
+            # Columns dropped
+            for col_name in current_columns - target_columns:
+                self.changes['columns_dropped'].append({
+                    'table': table_name,
+                    'column': col_name,
+                    'definition': current_table.columns[col_name]
+                })
+            
+            # Columns modified
+            for col_name in current_columns.intersection(target_columns):
+                current_col = current_table.columns[col_name]
+                target_col = target_table.columns[col_name]
+                
+                if self._columns_different(current_col, target_col):
+                    self.changes['columns_modified'].append({
+                        'table': table_name,
+                        'column': col_name,
+                        'current_definition': current_col,
+                        'target_definition': target_col,
+                        'changes': self._describe_column_changes(current_col, target_col)
+                    })
+    
+    def _columns_different(self, col1: Column, col2: Column) -> bool:
+        """Check if two columns have different definitions."""
+        return (col1.data_type != col2.data_type or
+                col1.nullable != col2.nullable or
+                col1.default_value != col2.default_value or
+                col1.unique != col2.unique or
+                col1.foreign_key != col2.foreign_key or
+                col1.check_constraint != col2.check_constraint)
+    
+    def _describe_column_changes(self, current_col: Column, target_col: Column) -> List[str]:
+        """Describe specific changes between column definitions."""
+        changes = []
+        
+        if current_col.data_type != target_col.data_type:
+            changes.append(f"type: {current_col.data_type} -> {target_col.data_type}")
+        
+        if current_col.nullable != target_col.nullable:
+            changes.append(f"nullable: {current_col.nullable} -> {target_col.nullable}")
+        
+        if current_col.default_value != target_col.default_value:
+            changes.append(f"default: {current_col.default_value} -> {target_col.default_value}")
+        
+        if current_col.unique != target_col.unique:
+            changes.append(f"unique: {current_col.unique} -> {target_col.unique}")
+        
+        if current_col.foreign_key != target_col.foreign_key:
+            changes.append(f"foreign_key: {current_col.foreign_key} -> {target_col.foreign_key}")
+        
+        return changes
+    
+    def _compare_constraints(self):
+        """Compare constraint changes."""
+        common_tables = set(self.current_schema.keys()).intersection(set(self.target_schema.keys()))
+        
+        for table_name in common_tables:
+            current_table = self.current_schema[table_name]
+            target_table = self.target_schema[table_name]
+            
+            # Compare primary keys
+            if current_table.primary_key != target_table.primary_key:
+                if current_table.primary_key:
+                    self.changes['constraints_dropped'].append({
+                        'table': table_name,
+                        'constraint_type': 'PRIMARY_KEY',
+                        'columns': current_table.primary_key
+                    })
+                
+                if target_table.primary_key:
+                    self.changes['constraints_added'].append({
+                        'table': table_name,
+                        'constraint_type': 'PRIMARY_KEY',
+                        'columns': target_table.primary_key
+                    })
+            
+            # Compare unique constraints
+            current_unique = set(tuple(uc) for uc in current_table.unique_constraints)
+            target_unique = set(tuple(uc) for uc in target_table.unique_constraints)
+            
+            for constraint in target_unique - current_unique:
+                self.changes['constraints_added'].append({
+                    'table': table_name,
+                    'constraint_type': 'UNIQUE',
+                    'columns': list(constraint)
+                })
+            
+            for constraint in current_unique - target_unique:
+                self.changes['constraints_dropped'].append({
+                    'table': table_name,
+                    'constraint_type': 'UNIQUE',
+                    'columns': list(constraint)
+                })
+            
+            # Compare check constraints
+            current_checks = set(current_table.check_constraints.items())
+            target_checks = set(target_table.check_constraints.items())
+            
+            for name, condition in target_checks - current_checks:
+                self.changes['constraints_added'].append({
+                    'table': table_name,
+                    'constraint_type': 'CHECK',
+                    'constraint_name': name,
+                    'condition': condition
+                })
+            
+            for name, condition in current_checks - target_checks:
+                self.changes['constraints_dropped'].append({
+                    'table': table_name,
+                    'constraint_type': 'CHECK',
+                    'constraint_name': name,
+                    'condition': condition
+                })
+    
+    def _compare_indexes(self):
+        """Compare index changes."""
+        common_tables = set(self.current_schema.keys()).intersection(set(self.target_schema.keys()))
+        
+        for table_name in common_tables:
+            current_indexes = {idx['name']: idx for idx in self.current_schema[table_name].indexes}
+            target_indexes = {idx['name']: idx for idx in self.target_schema[table_name].indexes}
+            
+            current_names = set(current_indexes.keys())
+            target_names = set(target_indexes.keys())
+            
+            # Indexes added
+            for idx_name in target_names - current_names:
+                self.changes['indexes_added'].append({
+                    'table': table_name,
+                    'index': target_indexes[idx_name]
+                })
+            
+            # Indexes dropped
+            for idx_name in current_names - target_names:
+                self.changes['indexes_dropped'].append({
+                    'table': table_name,
+                    'index': current_indexes[idx_name]
+                })
+
+
+class MigrationGenerator:
+    """Generates migration steps from schema differences."""
+    
+    def __init__(self, zero_downtime: bool = False):
+        self.zero_downtime = zero_downtime
+        self.migration_steps: List[MigrationStep] = []
+        self.step_counter = 0
+        
+        # Data type conversion safety
+        self.safe_type_conversions = {
+            ('VARCHAR(50)', 'VARCHAR(100)'): True,  # Expanding varchar
+            ('INT', 'BIGINT'): True,  # Expanding integer
+            ('DECIMAL(10,2)', 'DECIMAL(12,2)'): True,  # Expanding decimal precision
+        }
+        
+        self.risky_type_conversions = {
+            ('VARCHAR(100)', 'VARCHAR(50)'): 'Data truncation possible',
+            ('BIGINT', 'INT'): 'Data loss possible for large values',
+            ('TEXT', 'VARCHAR(255)'): 'Data truncation possible'
+        }
+    
+    def generate_migration(self, changes: Dict[str, List[Dict[str, Any]]]) -> MigrationPlan:
+        """Generate complete migration plan from schema changes."""
+        self.migration_steps = []
+        self.step_counter = 0
+        
+        # Generate steps in dependency order
+        self._generate_table_creation_steps(changes['tables_added'])
+        self._generate_column_addition_steps(changes['columns_added'])
+        self._generate_constraint_addition_steps(changes['constraints_added'])
+        self._generate_index_addition_steps(changes['indexes_added'])
+        self._generate_column_modification_steps(changes['columns_modified'])
+        self._generate_table_rename_steps(changes['tables_renamed'])
+        self._generate_index_removal_steps(changes['indexes_dropped'])
+        self._generate_constraint_removal_steps(changes['constraints_dropped'])
+        self._generate_column_removal_steps(changes['columns_dropped'])
+        self._generate_table_removal_steps(changes['tables_dropped'])
+        
+        # Create migration plan
+        migration_id = self._generate_migration_id(changes)
+        execution_order = [step.step_id for step in self.migration_steps]
+        rollback_order = list(reversed(execution_order))
+        
+        return MigrationPlan(
+            migration_id=migration_id,
+            created_at=datetime.now().isoformat(),
+            source_schema_hash=self._calculate_changes_hash(changes),
+            target_schema_hash="",  # Would be calculated from target schema
+            steps=self.migration_steps,
+            summary=self._generate_summary(changes),
+            execution_order=execution_order,
+            rollback_order=rollback_order
+        )
+    
+    def _generate_step_id(self) -> str:
+        """Generate unique step ID."""
+        self.step_counter += 1
+        return f"step_{self.step_counter:03d}"
+    
+    def _generate_table_creation_steps(self, tables_added: List[Dict[str, Any]]):
+        """Generate steps for creating new tables."""
+        for table_info in tables_added:
+            table = table_info['definition']
+            step = self._create_table_step(table)
+            self.migration_steps.append(step)
+    
+    def _create_table_step(self, table: Table) -> MigrationStep:
+        """Create migration step for table creation."""
+        columns_sql = []
+        
+        for col_name, column in table.columns.items():
+            col_sql = f"{col_name} {column.data_type}"
+            
+            if not column.nullable:
+                col_sql += " NOT NULL"
+            
+            if column.default_value:
+                col_sql += f" DEFAULT {column.default_value}"
+            
+            if column.unique:
+                col_sql += " UNIQUE"
+            
+            columns_sql.append(col_sql)
+        
+        # Add primary key
+        if table.primary_key:
+            pk_sql = f"PRIMARY KEY ({', '.join(table.primary_key)})"
+            columns_sql.append(pk_sql)
+        
+        # Add foreign keys
+        for col_name, ref in table.foreign_keys.items():
+            fk_sql = f"FOREIGN KEY ({col_name}) REFERENCES {ref}"
+            columns_sql.append(fk_sql)
+        
+        create_sql = f"CREATE TABLE {table.name} (\n    " + ",\n    ".join(columns_sql) + "\n);"
+        drop_sql = f"DROP TABLE IF EXISTS {table.name};"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="CREATE_TABLE",
+            table=table.name,
+            description=f"Create table {table.name} with {len(table.columns)} columns",
+            sql_forward=create_sql,
+            sql_rollback=drop_sql,
+            validation_sql=f"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = '{table.name}';",
+            risk_level="LOW"
+        )
+    
+    def _generate_column_addition_steps(self, columns_added: List[Dict[str, Any]]):
+        """Generate steps for adding columns."""
+        for col_info in columns_added:
+            if self.zero_downtime:
+                # For zero-downtime, add columns as nullable first
+                step = self._add_column_zero_downtime_step(col_info)
+            else:
+                step = self._add_column_step(col_info)
+            self.migration_steps.append(step)
+    
+    def _add_column_step(self, col_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for adding a column."""
+        table = col_info['table']
+        column = col_info['definition']
+        
+        col_sql = f"{column.name} {column.data_type}"
+        
+        if not column.nullable:
+            if column.default_value:
+                col_sql += f" DEFAULT {column.default_value} NOT NULL"
+            else:
+                # This is risky - adding NOT NULL without default
+                col_sql += " NOT NULL"
+        elif column.default_value:
+            col_sql += f" DEFAULT {column.default_value}"
+        
+        add_sql = f"ALTER TABLE {table} ADD COLUMN {col_sql};"
+        drop_sql = f"ALTER TABLE {table} DROP COLUMN {column.name};"
+        
+        risk_level = "HIGH" if not column.nullable and not column.default_value else "LOW"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="ADD_COLUMN",
+            table=table,
+            description=f"Add column {column.name} to {table}",
+            sql_forward=add_sql,
+            sql_rollback=drop_sql,
+            validation_sql=f"SELECT COUNT(*) FROM information_schema.columns WHERE table_name = '{table}' AND column_name = '{column.name}';",
+            risk_level=risk_level
+        )
+    
+    def _add_column_zero_downtime_step(self, col_info: Dict[str, Any]) -> MigrationStep:
+        """Create zero-downtime step for adding column."""
+        table = col_info['table']
+        column = col_info['definition']
+        
+        # Phase 1: Add as nullable with default if needed
+        col_sql = f"{column.name} {column.data_type}"
+        if column.default_value:
+            col_sql += f" DEFAULT {column.default_value}"
+        
+        add_sql = f"ALTER TABLE {table} ADD COLUMN {col_sql};"
+        
+        # If column should be NOT NULL, handle in separate phase
+        if not column.nullable:
+            # Add comment about needing follow-up step
+            add_sql += f"\n-- Follow-up needed: Add NOT NULL constraint after data population"
+        
+        drop_sql = f"ALTER TABLE {table} DROP COLUMN {column.name};"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="ADD_COLUMN_ZD",
+            table=table,
+            description=f"Add column {column.name} to {table} (zero-downtime phase 1)",
+            sql_forward=add_sql,
+            sql_rollback=drop_sql,
+            validation_sql=f"SELECT COUNT(*) FROM information_schema.columns WHERE table_name = '{table}' AND column_name = '{column.name}';",
+            risk_level="LOW",
+            zero_downtime_phase="EXPAND"
+        )
+    
+    def _generate_column_modification_steps(self, columns_modified: List[Dict[str, Any]]):
+        """Generate steps for modifying columns."""
+        for col_info in columns_modified:
+            if self.zero_downtime:
+                steps = self._modify_column_zero_downtime_steps(col_info)
+                self.migration_steps.extend(steps)
+            else:
+                step = self._modify_column_step(col_info)
+                self.migration_steps.append(step)
+    
+    def _modify_column_step(self, col_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for modifying a column."""
+        table = col_info['table']
+        column = col_info['column']
+        current_def = col_info['current_definition']
+        target_def = col_info['target_definition']
+        changes = col_info['changes']
+        
+        alter_statements = []
+        rollback_statements = []
+        
+        # Handle different types of changes
+        if current_def.data_type != target_def.data_type:
+            alter_statements.append(f"ALTER COLUMN {column} TYPE {target_def.data_type}")
+            rollback_statements.append(f"ALTER COLUMN {column} TYPE {current_def.data_type}")
+        
+        if current_def.nullable != target_def.nullable:
+            if target_def.nullable:
+                alter_statements.append(f"ALTER COLUMN {column} DROP NOT NULL")
+                rollback_statements.append(f"ALTER COLUMN {column} SET NOT NULL")
+            else:
+                alter_statements.append(f"ALTER COLUMN {column} SET NOT NULL")
+                rollback_statements.append(f"ALTER COLUMN {column} DROP NOT NULL")
+        
+        if current_def.default_value != target_def.default_value:
+            if target_def.default_value:
+                alter_statements.append(f"ALTER COLUMN {column} SET DEFAULT {target_def.default_value}")
+            else:
+                alter_statements.append(f"ALTER COLUMN {column} DROP DEFAULT")
+            
+            if current_def.default_value:
+                rollback_statements.append(f"ALTER COLUMN {column} SET DEFAULT {current_def.default_value}")
+            else:
+                rollback_statements.append(f"ALTER COLUMN {column} DROP DEFAULT")
+        
+        # Build SQL
+        alter_sql = f"ALTER TABLE {table}\n    " + ",\n    ".join(alter_statements) + ";"
+        rollback_sql = f"ALTER TABLE {table}\n    " + ",\n    ".join(rollback_statements) + ";"
+        
+        # Assess risk
+        risk_level = self._assess_column_modification_risk(current_def, target_def)
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="MODIFY_COLUMN",
+            table=table,
+            description=f"Modify column {column}: {', '.join(changes)}",
+            sql_forward=alter_sql,
+            sql_rollback=rollback_sql,
+            validation_sql=f"SELECT data_type, is_nullable FROM information_schema.columns WHERE table_name = '{table}' AND column_name = '{column}';",
+            risk_level=risk_level
+        )
+    
+    def _modify_column_zero_downtime_steps(self, col_info: Dict[str, Any]) -> List[MigrationStep]:
+        """Create zero-downtime steps for column modification."""
+        table = col_info['table']
+        column = col_info['column']
+        current_def = col_info['current_definition']
+        target_def = col_info['target_definition']
+        
+        steps = []
+        
+        # For zero-downtime, use expand-contract pattern
+        temp_column = f"{column}_new"
+        
+        # Step 1: Add new column
+        step1 = MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="ADD_TEMP_COLUMN",
+            table=table,
+            description=f"Add temporary column {temp_column} for zero-downtime migration",
+            sql_forward=f"ALTER TABLE {table} ADD COLUMN {temp_column} {target_def.data_type};",
+            sql_rollback=f"ALTER TABLE {table} DROP COLUMN {temp_column};",
+            zero_downtime_phase="EXPAND"
+        )
+        steps.append(step1)
+        
+        # Step 2: Copy data
+        step2 = MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="COPY_COLUMN_DATA",
+            table=table,
+            description=f"Copy data from {column} to {temp_column}",
+            sql_forward=f"UPDATE {table} SET {temp_column} = {column};",
+            sql_rollback=f"UPDATE {table} SET {temp_column} = NULL;",
+            zero_downtime_phase="EXPAND"
+        )
+        steps.append(step2)
+        
+        # Step 3: Drop old column
+        step3 = MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="DROP_OLD_COLUMN",
+            table=table,
+            description=f"Drop original column {column}",
+            sql_forward=f"ALTER TABLE {table} DROP COLUMN {column};",
+            sql_rollback=f"ALTER TABLE {table} ADD COLUMN {column} {current_def.data_type};",
+            zero_downtime_phase="CONTRACT"
+        )
+        steps.append(step3)
+        
+        # Step 4: Rename new column
+        step4 = MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="RENAME_COLUMN",
+            table=table,
+            description=f"Rename {temp_column} to {column}",
+            sql_forward=f"ALTER TABLE {table} RENAME COLUMN {temp_column} TO {column};",
+            sql_rollback=f"ALTER TABLE {table} RENAME COLUMN {column} TO {temp_column};",
+            zero_downtime_phase="CONTRACT"
+        )
+        steps.append(step4)
+        
+        return steps
+    
+    def _assess_column_modification_risk(self, current: Column, target: Column) -> str:
+        """Assess risk level of column modification."""
+        if current.data_type != target.data_type:
+            conversion_key = (current.data_type, target.data_type)
+            if conversion_key in self.risky_type_conversions:
+                return "HIGH"
+            elif conversion_key not in self.safe_type_conversions:
+                return "MEDIUM"
+        
+        if current.nullable and not target.nullable:
+            return "HIGH"  # Adding NOT NULL constraint
+        
+        return "LOW"
+    
+    def _generate_constraint_addition_steps(self, constraints_added: List[Dict[str, Any]]):
+        """Generate steps for adding constraints."""
+        for constraint_info in constraints_added:
+            step = self._add_constraint_step(constraint_info)
+            self.migration_steps.append(step)
+    
+    def _add_constraint_step(self, constraint_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for adding constraint."""
+        table = constraint_info['table']
+        constraint_type = constraint_info['constraint_type']
+        
+        if constraint_type == 'PRIMARY_KEY':
+            columns = constraint_info['columns']
+            constraint_name = f"pk_{table}"
+            add_sql = f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} PRIMARY KEY ({', '.join(columns)});"
+            drop_sql = f"ALTER TABLE {table} DROP CONSTRAINT {constraint_name};"
+            description = f"Add primary key on {', '.join(columns)}"
+            
+        elif constraint_type == 'UNIQUE':
+            columns = constraint_info['columns']
+            constraint_name = f"uq_{table}_{'_'.join(columns)}"
+            add_sql = f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} UNIQUE ({', '.join(columns)});"
+            drop_sql = f"ALTER TABLE {table} DROP CONSTRAINT {constraint_name};"
+            description = f"Add unique constraint on {', '.join(columns)}"
+            
+        elif constraint_type == 'CHECK':
+            constraint_name = constraint_info['constraint_name']
+            condition = constraint_info['condition']
+            add_sql = f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} CHECK ({condition});"
+            drop_sql = f"ALTER TABLE {table} DROP CONSTRAINT {constraint_name};"
+            description = f"Add check constraint: {condition}"
+            
+        else:
+            return None
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="ADD_CONSTRAINT",
+            table=table,
+            description=description,
+            sql_forward=add_sql,
+            sql_rollback=drop_sql,
+            risk_level="MEDIUM"  # Constraints can fail if data doesn't comply
+        )
+    
+    def _generate_index_addition_steps(self, indexes_added: List[Dict[str, Any]]):
+        """Generate steps for adding indexes."""
+        for index_info in indexes_added:
+            step = self._add_index_step(index_info)
+            self.migration_steps.append(step)
+    
+    def _add_index_step(self, index_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for adding index."""
+        table = index_info['table']
+        index = index_info['index']
+        
+        unique_keyword = "UNIQUE " if index.get('unique', False) else ""
+        columns_sql = ', '.join(index['columns'])
+        
+        create_sql = f"CREATE {unique_keyword}INDEX {index['name']} ON {table} ({columns_sql});"
+        drop_sql = f"DROP INDEX {index['name']};"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="ADD_INDEX",
+            table=table,
+            description=f"Create index {index['name']} on ({columns_sql})",
+            sql_forward=create_sql,
+            sql_rollback=drop_sql,
+            estimated_time="1-5 minutes depending on table size",
+            risk_level="LOW"
+        )
+    
+    def _generate_table_rename_steps(self, tables_renamed: List[Dict[str, Any]]):
+        """Generate steps for renaming tables."""
+        for rename_info in tables_renamed:
+            step = self._rename_table_step(rename_info)
+            self.migration_steps.append(step)
+    
+    def _rename_table_step(self, rename_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for renaming table."""
+        old_name = rename_info['old_name']
+        new_name = rename_info['new_name']
+        
+        rename_sql = f"ALTER TABLE {old_name} RENAME TO {new_name};"
+        rollback_sql = f"ALTER TABLE {new_name} RENAME TO {old_name};"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="RENAME_TABLE",
+            table=old_name,
+            description=f"Rename table {old_name} to {new_name}",
+            sql_forward=rename_sql,
+            sql_rollback=rollback_sql,
+            validation_sql=f"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = '{new_name}';",
+            risk_level="LOW"
+        )
+    
+    def _generate_column_removal_steps(self, columns_dropped: List[Dict[str, Any]]):
+        """Generate steps for removing columns."""
+        for col_info in columns_dropped:
+            step = self._drop_column_step(col_info)
+            self.migration_steps.append(step)
+    
+    def _drop_column_step(self, col_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for dropping column."""
+        table = col_info['table']
+        column = col_info['definition']
+        
+        drop_sql = f"ALTER TABLE {table} DROP COLUMN {column.name};"
+        
+        # Recreate column for rollback
+        col_sql = f"{column.name} {column.data_type}"
+        if not column.nullable:
+            col_sql += " NOT NULL"
+        if column.default_value:
+            col_sql += f" DEFAULT {column.default_value}"
+        
+        add_sql = f"ALTER TABLE {table} ADD COLUMN {col_sql};"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="DROP_COLUMN",
+            table=table,
+            description=f"Drop column {column.name} from {table}",
+            sql_forward=drop_sql,
+            sql_rollback=add_sql,
+            risk_level="HIGH"  # Data loss risk
+        )
+    
+    def _generate_constraint_removal_steps(self, constraints_dropped: List[Dict[str, Any]]):
+        """Generate steps for removing constraints."""
+        for constraint_info in constraints_dropped:
+            step = self._drop_constraint_step(constraint_info)
+            if step:
+                self.migration_steps.append(step)
+    
+    def _drop_constraint_step(self, constraint_info: Dict[str, Any]) -> Optional[MigrationStep]:
+        """Create step for dropping constraint."""
+        table = constraint_info['table']
+        constraint_type = constraint_info['constraint_type']
+        
+        if constraint_type == 'PRIMARY_KEY':
+            constraint_name = f"pk_{table}"
+            drop_sql = f"ALTER TABLE {table} DROP CONSTRAINT {constraint_name};"
+            columns = constraint_info['columns']
+            add_sql = f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} PRIMARY KEY ({', '.join(columns)});"
+            description = f"Drop primary key constraint"
+            
+        elif constraint_type == 'UNIQUE':
+            columns = constraint_info['columns']
+            constraint_name = f"uq_{table}_{'_'.join(columns)}"
+            drop_sql = f"ALTER TABLE {table} DROP CONSTRAINT {constraint_name};"
+            add_sql = f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} UNIQUE ({', '.join(columns)});"
+            description = f"Drop unique constraint on {', '.join(columns)}"
+            
+        elif constraint_type == 'CHECK':
+            constraint_name = constraint_info['constraint_name']
+            condition = constraint_info.get('condition', '')
+            drop_sql = f"ALTER TABLE {table} DROP CONSTRAINT {constraint_name};"
+            add_sql = f"ALTER TABLE {table} ADD CONSTRAINT {constraint_name} CHECK ({condition});"
+            description = f"Drop check constraint {constraint_name}"
+            
+        else:
+            return None
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="DROP_CONSTRAINT",
+            table=table,
+            description=description,
+            sql_forward=drop_sql,
+            sql_rollback=add_sql,
+            risk_level="MEDIUM"
+        )
+    
+    def _generate_index_removal_steps(self, indexes_dropped: List[Dict[str, Any]]):
+        """Generate steps for removing indexes."""
+        for index_info in indexes_dropped:
+            step = self._drop_index_step(index_info)
+            self.migration_steps.append(step)
+    
+    def _drop_index_step(self, index_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for dropping index."""
+        table = index_info['table']
+        index = index_info['index']
+        
+        drop_sql = f"DROP INDEX {index['name']};"
+        
+        # Recreate for rollback
+        unique_keyword = "UNIQUE " if index.get('unique', False) else ""
+        columns_sql = ', '.join(index['columns'])
+        create_sql = f"CREATE {unique_keyword}INDEX {index['name']} ON {table} ({columns_sql});"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="DROP_INDEX",
+            table=table,
+            description=f"Drop index {index['name']}",
+            sql_forward=drop_sql,
+            sql_rollback=create_sql,
+            risk_level="LOW"
+        )
+    
+    def _generate_table_removal_steps(self, tables_dropped: List[Dict[str, Any]]):
+        """Generate steps for removing tables."""
+        for table_info in tables_dropped:
+            step = self._drop_table_step(table_info)
+            self.migration_steps.append(step)
+    
+    def _drop_table_step(self, table_info: Dict[str, Any]) -> MigrationStep:
+        """Create step for dropping table."""
+        table = table_info['definition']
+        
+        drop_sql = f"DROP TABLE {table.name};"
+        
+        # Would need to recreate entire table for rollback
+        # This is simplified - full implementation would generate CREATE TABLE statement
+        create_sql = f"-- Recreate table {table.name} (implementation needed)"
+        
+        return MigrationStep(
+            step_id=self._generate_step_id(),
+            step_type="DROP_TABLE",
+            table=table.name,
+            description=f"Drop table {table.name}",
+            sql_forward=drop_sql,
+            sql_rollback=create_sql,
+            risk_level="HIGH"  # Data loss risk
+        )
+    
+    def _generate_migration_id(self, changes: Dict[str, List[Dict[str, Any]]]) -> str:
+        """Generate unique migration ID."""
+        content = json.dumps(changes, sort_keys=True)
+        return hashlib.md5(content.encode()).hexdigest()[:8]
+    
+    def _calculate_changes_hash(self, changes: Dict[str, List[Dict[str, Any]]]) -> str:
+        """Calculate hash of changes for versioning."""
+        content = json.dumps(changes, sort_keys=True)
+        return hashlib.md5(content.encode()).hexdigest()
+    
+    def _generate_summary(self, changes: Dict[str, List[Dict[str, Any]]]) -> Dict[str, Any]:
+        """Generate migration summary."""
+        summary = {
+            "total_steps": len(self.migration_steps),
+            "changes_summary": {
+                "tables_added": len(changes['tables_added']),
+                "tables_dropped": len(changes['tables_dropped']),
+                "tables_renamed": len(changes['tables_renamed']),
+                "columns_added": len(changes['columns_added']),
+                "columns_dropped": len(changes['columns_dropped']),
+                "columns_modified": len(changes['columns_modified']),
+                "constraints_added": len(changes['constraints_added']),
+                "constraints_dropped": len(changes['constraints_dropped']),
+                "indexes_added": len(changes['indexes_added']),
+                "indexes_dropped": len(changes['indexes_dropped'])
+            },
+            "risk_assessment": {
+                "high_risk_steps": len([s for s in self.migration_steps if s.risk_level == "HIGH"]),
+                "medium_risk_steps": len([s for s in self.migration_steps if s.risk_level == "MEDIUM"]),
+                "low_risk_steps": len([s for s in self.migration_steps if s.risk_level == "LOW"])
+            },
+            "zero_downtime": self.zero_downtime
+        }
+        
+        return summary
+
+
+class ValidationGenerator:
+    """Generates validation queries for migration verification."""
+    
+    def generate_validations(self, migration_plan: MigrationPlan) -> List[ValidationCheck]:
+        """Generate validation checks for migration plan."""
+        validations = []
+        
+        for step in migration_plan.steps:
+            if step.step_type == "CREATE_TABLE":
+                validations.append(self._create_table_validation(step))
+            elif step.step_type == "ADD_COLUMN":
+                validations.append(self._add_column_validation(step))
+            elif step.step_type == "MODIFY_COLUMN":
+                validations.append(self._modify_column_validation(step))
+            elif step.step_type == "ADD_INDEX":
+                validations.append(self._add_index_validation(step))
+        
+        return validations
+    
+    def _create_table_validation(self, step: MigrationStep) -> ValidationCheck:
+        """Create validation for table creation."""
+        return ValidationCheck(
+            check_id=f"validate_{step.step_id}",
+            check_type="TABLE_EXISTS",
+            table=step.table,
+            description=f"Verify table {step.table} exists",
+            sql_query=f"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = '{step.table}';",
+            expected_result=1
+        )
+    
+    def _add_column_validation(self, step: MigrationStep) -> ValidationCheck:
+        """Create validation for column addition."""
+        # Extract column name from SQL
+        column_match = re.search(r'ADD COLUMN (\w+)', step.sql_forward)
+        column_name = column_match.group(1) if column_match else "unknown"
+        
+        return ValidationCheck(
+            check_id=f"validate_{step.step_id}",
+            check_type="COLUMN_EXISTS",
+            table=step.table,
+            description=f"Verify column {column_name} exists in {step.table}",
+            sql_query=f"SELECT COUNT(*) FROM information_schema.columns WHERE table_name = '{step.table}' AND column_name = '{column_name}';",
+            expected_result=1
+        )
+    
+    def _modify_column_validation(self, step: MigrationStep) -> ValidationCheck:
+        """Create validation for column modification."""
+        return ValidationCheck(
+            check_id=f"validate_{step.step_id}",
+            check_type="COLUMN_MODIFIED",
+            table=step.table,
+            description=f"Verify column modification in {step.table}",
+            sql_query=step.validation_sql or f"SELECT 1;",  # Use provided validation or default
+            expected_result=1
+        )
+    
+    def _add_index_validation(self, step: MigrationStep) -> ValidationCheck:
+        """Create validation for index addition."""
+        # Extract index name from SQL
+        index_match = re.search(r'INDEX (\w+)', step.sql_forward)
+        index_name = index_match.group(1) if index_match else "unknown"
+        
+        return ValidationCheck(
+            check_id=f"validate_{step.step_id}",
+            check_type="INDEX_EXISTS",
+            table=step.table,
+            description=f"Verify index {index_name} exists",
+            sql_query=f"SELECT COUNT(*) FROM information_schema.statistics WHERE index_name = '{index_name}';",
+            expected_result=1
+        )
+
+
+def format_migration_plan_text(plan: MigrationPlan, validations: List[ValidationCheck] = None) -> str:
+    """Format migration plan as human-readable text."""
+    lines = []
+    lines.append("DATABASE MIGRATION PLAN")
+    lines.append("=" * 50)
+    lines.append(f"Migration ID: {plan.migration_id}")
+    lines.append(f"Created: {plan.created_at}")
+    lines.append(f"Zero Downtime: {plan.summary['zero_downtime']}")
+    lines.append("")
+    
+    # Summary
+    summary = plan.summary
+    lines.append("MIGRATION SUMMARY")
+    lines.append("-" * 17)
+    lines.append(f"Total Steps: {summary['total_steps']}")
+    
+    changes = summary['changes_summary']
+    for change_type, count in changes.items():
+        if count > 0:
+            lines.append(f"{change_type.replace('_', ' ').title()}: {count}")
+    lines.append("")
+    
+    # Risk Assessment
+    risk = summary['risk_assessment']
+    lines.append("RISK ASSESSMENT")
+    lines.append("-" * 15)
+    lines.append(f"High Risk Steps: {risk['high_risk_steps']}")
+    lines.append(f"Medium Risk Steps: {risk['medium_risk_steps']}")
+    lines.append(f"Low Risk Steps: {risk['low_risk_steps']}")
+    lines.append("")
+    
+    # Migration Steps
+    lines.append("MIGRATION STEPS")
+    lines.append("-" * 15)
+    for i, step in enumerate(plan.steps, 1):
+        lines.append(f"{i}. {step.description} ({step.risk_level} risk)")
+        lines.append(f"   Type: {step.step_type}")
+        if step.zero_downtime_phase:
+            lines.append(f"   Phase: {step.zero_downtime_phase}")
+        lines.append(f"   Forward SQL: {step.sql_forward}")
+        lines.append(f"   Rollback SQL: {step.sql_rollback}")
+        if step.estimated_time:
+            lines.append(f"   Estimated Time: {step.estimated_time}")
+        lines.append("")
+    
+    # Validation Checks
+    if validations:
+        lines.append("VALIDATION CHECKS")
+        lines.append("-" * 17)
+        for validation in validations:
+            lines.append(f"• {validation.description}")
+            lines.append(f"  SQL: {validation.sql_query}")
+            lines.append(f"  Expected: {validation.expected_result}")
+            lines.append("")
+    
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate database migration scripts")
+    parser.add_argument("--current", "-c", required=True, help="Current schema JSON file")
+    parser.add_argument("--target", "-t", required=True, help="Target schema JSON file")
+    parser.add_argument("--output", "-o", help="Output file (default: stdout)")
+    parser.add_argument("--format", "-f", choices=["json", "text", "sql"], default="text",
+                       help="Output format")
+    parser.add_argument("--zero-downtime", "-z", action="store_true",
+                       help="Generate zero-downtime migration strategy")
+    parser.add_argument("--validate-only", "-v", action="store_true",
+                       help="Only generate validation queries")
+    parser.add_argument("--include-validations", action="store_true",
+                       help="Include validation queries in output")
+    
+    args = parser.parse_args()
+    
+    try:
+        # Load schemas
+        with open(args.current, 'r') as f:
+            current_schema = json.load(f)
+        
+        with open(args.target, 'r') as f:
+            target_schema = json.load(f)
+        
+        # Compare schemas
+        comparator = SchemaComparator()
+        comparator.load_schemas(current_schema, target_schema)
+        changes = comparator.compare_schemas()
+        
+        if not any(changes.values()):
+            print("No schema changes detected.")
+            return 0
+        
+        # Generate migration
+        generator = MigrationGenerator(zero_downtime=args.zero_downtime)
+        migration_plan = generator.generate_migration(changes)
+        
+        # Generate validations if requested
+        validations = None
+        if args.include_validations or args.validate_only:
+            validator = ValidationGenerator()
+            validations = validator.generate_validations(migration_plan)
+        
+        # Format output
+        if args.validate_only:
+            output = json.dumps([asdict(v) for v in validations], indent=2)
+        elif args.format == "json":
+            result = {"migration_plan": asdict(migration_plan)}
+            if validations:
+                result["validations"] = [asdict(v) for v in validations]
+            output = json.dumps(result, indent=2)
+        elif args.format == "sql":
+            sql_lines = []
+            sql_lines.append("-- Database Migration Script")
+            sql_lines.append(f"-- Migration ID: {migration_plan.migration_id}")
+            sql_lines.append(f"-- Created: {migration_plan.created_at}")
+            sql_lines.append("")
+            
+            for step in migration_plan.steps:
+                sql_lines.append(f"-- Step: {step.description}")
+                sql_lines.append(step.sql_forward)
+                sql_lines.append("")
+            
+            output = "\n".join(sql_lines)
+        else:  # text format
+            output = format_migration_plan_text(migration_plan, validations)
+        
+        # Write output
+        if args.output:
+            with open(args.output, 'w') as f:
+                f.write(output)
+        else:
+            print(output)
+        
+        return 0
+        
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/engineering/database-designer/references/database_selection_decision_tree.md b/engineering/database-designer/references/database_selection_decision_tree.md
new file mode 100644
index 0000000..8c0913d
--- /dev/null
+++ b/engineering/database-designer/references/database_selection_decision_tree.md
@@ -0,0 +1,373 @@
+# Database Selection Decision Tree
+
+## Overview
+
+Choosing the right database technology is crucial for application success. This guide provides a systematic approach to database selection based on specific requirements, data patterns, and operational constraints.
+
+## Decision Framework
+
+### Primary Questions
+
+1. **What is your primary use case?**
+   - OLTP (Online Transaction Processing)
+   - OLAP (Online Analytical Processing)  
+   - Real-time analytics
+   - Content management
+   - Search and discovery
+   - Time-series data
+   - Graph relationships
+
+2. **What are your consistency requirements?**
+   - Strong consistency (ACID)
+   - Eventual consistency
+   - Causal consistency
+   - Session consistency
+
+3. **What are your scalability needs?**
+   - Vertical scaling sufficient
+   - Horizontal scaling required
+   - Global distribution needed
+   - Multi-region requirements
+
+4. **What is your data structure?**
+   - Structured (relational)
+   - Semi-structured (JSON/XML)
+   - Unstructured (documents, media)
+   - Graph relationships
+   - Time-series data
+   - Key-value pairs
+
+## Decision Tree
+
+```
+START: What is your primary use case?
+│
+├── OLTP (Transactional Applications)
+│   │
+│   ├── Do you need strong ACID guarantees?
+│   │   ├── YES → Do you need horizontal scaling?
+│   │   │   ├── YES → Distributed SQL
+│   │   │   │   ├── CockroachDB (Global, multi-region)
+│   │   │   │   ├── TiDB (MySQL compatibility)
+│   │   │   │   └── Spanner (Google Cloud)
+│   │   │   └── NO → Traditional SQL
+│   │   │       ├── PostgreSQL (Feature-rich, extensions)
+│   │   │       ├── MySQL (Performance, ecosystem)
+│   │   │       └── SQL Server (Microsoft stack)
+│   │   └── NO → Are you primarily key-value access?
+│   │       ├── YES → Key-Value Stores
+│   │       │   ├── Redis (In-memory, caching)
+│   │       │   ├── DynamoDB (AWS managed)
+│   │       │   └── Cassandra (High availability)
+│   │       └── NO → Document Stores
+│   │           ├── MongoDB (General purpose)
+│   │           ├── CouchDB (Sync, replication)
+│   │           └── Amazon DocumentDB (MongoDB compatible)
+│   │
+├── OLAP (Analytics and Reporting)
+│   │
+│   ├── What is your data volume?
+│   │   ├── Small to Medium (< 1TB) → Traditional SQL with optimization
+│   │   │   ├── PostgreSQL with columnar extensions
+│   │   │   ├── MySQL with analytics engine
+│   │   │   └── SQL Server with columnstore
+│   │   ├── Large (1TB - 100TB) → Data Warehouse Solutions
+│   │   │   ├── Snowflake (Cloud-native)
+│   │   │   ├── BigQuery (Google Cloud)
+│   │   │   ├── Redshift (AWS)
+│   │   │   └── Synapse (Azure)
+│   │   └── Very Large (> 100TB) → Big Data Platforms
+│   │       ├── Databricks (Unified analytics)
+│   │       ├── Apache Spark on cloud
+│   │       └── Hadoop ecosystem
+│   │
+├── Real-time Analytics
+│   │
+│   ├── Do you need sub-second query responses?
+│   │   ├── YES → Stream Processing + OLAP
+│   │   │   ├── ClickHouse (Fast analytics)
+│   │   │   ├── Apache Druid (Real-time OLAP)
+│   │   │   ├── Pinot (LinkedIn's real-time DB)
+│   │   │   └── TimescaleDB (Time-series)
+│   │   └── NO → Traditional OLAP solutions
+│   │
+├── Search and Discovery
+│   │
+│   ├── What type of search?
+│   │   ├── Full-text search → Search Engines
+│   │   │   ├── Elasticsearch (Full-featured)
+│   │   │   ├── OpenSearch (AWS fork of ES)
+│   │   │   └── Solr (Apache Lucene-based)
+│   │   ├── Vector/similarity search → Vector Databases
+│   │   │   ├── Pinecone (Managed vector DB)
+│   │   │   ├── Weaviate (Open source)
+│   │   │   ├── Chroma (Embeddings)
+│   │   │   └── PostgreSQL with pgvector
+│   │   └── Faceted search → Search + SQL combination
+│   │
+├── Graph Relationships
+│   │
+│   ├── Do you need complex graph traversals?
+│   │   ├── YES → Graph Databases
+│   │   │   ├── Neo4j (Property graph)
+│   │   │   ├── Amazon Neptune (Multi-model)
+│   │   │   ├── ArangoDB (Multi-model)
+│   │   │   └── TigerGraph (Analytics focused)
+│   │   └── NO → SQL with recursive queries
+│   │       └── PostgreSQL with recursive CTEs
+│   │
+└── Time-series Data
+    │
+    ├── What is your write volume?
+        ├── High (millions/sec) → Specialized Time-series
+        │   ├── InfluxDB (Purpose-built)
+        │   ├── TimescaleDB (PostgreSQL extension)
+        │   ├── Apache Druid (Analytics focused)
+        │   └── Prometheus (Monitoring)
+        └── Medium → SQL with time-series optimization
+            └── PostgreSQL with partitioning
+```
+
+## Database Categories Deep Dive
+
+### Traditional SQL Databases
+
+**PostgreSQL**
+- **Best For**: Complex queries, JSON data, extensions, geospatial
+- **Strengths**: Feature-rich, reliable, strong consistency, extensible
+- **Use Cases**: OLTP, mixed workloads, JSON documents, geospatial applications
+- **Scaling**: Vertical scaling, read replicas, partitioning
+- **When to Choose**: Need SQL features, complex queries, moderate scale
+
+**MySQL**
+- **Best For**: Web applications, read-heavy workloads, simple schema
+- **Strengths**: Performance, replication, large ecosystem
+- **Use Cases**: Web apps, content management, e-commerce
+- **Scaling**: Read replicas, sharding, clustering (MySQL Cluster)
+- **When to Choose**: Simple schema, performance priority, large community
+
+**SQL Server**
+- **Best For**: Microsoft ecosystem, enterprise features, business intelligence
+- **Strengths**: Integration, tooling, enterprise features
+- **Use Cases**: Enterprise applications, .NET applications, BI
+- **Scaling**: Always On availability groups, partitioning
+- **When to Choose**: Microsoft stack, enterprise requirements
+
+### Distributed SQL (NewSQL)
+
+**CockroachDB**
+- **Best For**: Global applications, strong consistency, horizontal scaling
+- **Strengths**: ACID guarantees, automatic scaling, survival
+- **Use Cases**: Multi-region apps, financial services, global SaaS
+- **Trade-offs**: Complex setup, higher latency for global transactions
+- **When to Choose**: Need SQL + global scale + consistency
+
+**TiDB**
+- **Best For**: MySQL compatibility with horizontal scaling
+- **Strengths**: MySQL protocol, HTAP (hybrid), cloud-native
+- **Use Cases**: MySQL migrations, hybrid workloads
+- **When to Choose**: Existing MySQL expertise, need scale
+
+### NoSQL Document Stores
+
+**MongoDB**
+- **Best For**: Flexible schema, rapid development, document-centric data
+- **Strengths**: Developer experience, flexible schema, rich queries
+- **Use Cases**: Content management, catalogs, user profiles, IoT
+- **Scaling**: Automatic sharding, replica sets
+- **When to Choose**: Schema evolution, document structure, rapid development
+
+**CouchDB**
+- **Best For**: Offline-first applications, multi-master replication
+- **Strengths**: HTTP API, replication, conflict resolution
+- **Use Cases**: Mobile apps, distributed systems, offline scenarios
+- **When to Choose**: Need offline capabilities, bi-directional sync
+
+### Key-Value Stores
+
+**Redis**
+- **Best For**: Caching, sessions, real-time applications, pub/sub
+- **Strengths**: Performance, data structures, persistence options
+- **Use Cases**: Caching, leaderboards, real-time analytics, queues
+- **Scaling**: Clustering, sentinel for HA
+- **When to Choose**: High performance, simple data model, caching
+
+**DynamoDB**
+- **Best For**: Serverless applications, predictable performance, AWS ecosystem
+- **Strengths**: Managed, auto-scaling, consistent performance
+- **Use Cases**: Web applications, gaming, IoT, mobile backends
+- **Trade-offs**: Vendor lock-in, limited querying
+- **When to Choose**: AWS ecosystem, serverless, managed solution
+
+### Column-Family Stores
+
+**Cassandra**
+- **Best For**: Write-heavy workloads, high availability, linear scalability
+- **Strengths**: No single point of failure, tunable consistency
+- **Use Cases**: Time-series, IoT, messaging, activity feeds
+- **Trade-offs**: Complex operations, eventual consistency
+- **When to Choose**: High write volume, availability over consistency
+
+**HBase**
+- **Best For**: Big data applications, Hadoop ecosystem
+- **Strengths**: Hadoop integration, consistent reads
+- **Use Cases**: Analytics on big data, time-series at scale
+- **When to Choose**: Hadoop ecosystem, very large datasets
+
+### Graph Databases
+
+**Neo4j**
+- **Best For**: Complex relationships, graph algorithms, traversals
+- **Strengths**: Mature ecosystem, Cypher query language, algorithms
+- **Use Cases**: Social networks, recommendation engines, fraud detection
+- **Trade-offs**: Specialized use case, learning curve
+- **When to Choose**: Relationship-heavy data, graph algorithms
+
+### Time-Series Databases
+
+**InfluxDB**
+- **Best For**: Time-series data, IoT, monitoring, analytics
+- **Strengths**: Purpose-built, efficient storage, query language
+- **Use Cases**: IoT sensors, monitoring, DevOps metrics
+- **When to Choose**: High-volume time-series data
+
+**TimescaleDB**
+- **Best For**: Time-series with SQL familiarity
+- **Strengths**: PostgreSQL compatibility, SQL queries, ecosystem
+- **Use Cases**: Financial data, IoT with complex queries
+- **When to Choose**: Time-series + SQL requirements
+
+### Search Engines
+
+**Elasticsearch**
+- **Best For**: Full-text search, log analysis, real-time search
+- **Strengths**: Powerful search, analytics, ecosystem (ELK stack)
+- **Use Cases**: Search applications, log analysis, monitoring
+- **Trade-offs**: Complex operations, resource intensive
+- **When to Choose**: Advanced search requirements, analytics
+
+### Data Warehouses
+
+**Snowflake**
+- **Best For**: Cloud-native analytics, data sharing, varied workloads
+- **Strengths**: Separation of compute/storage, automatic scaling
+- **Use Cases**: Data warehousing, analytics, data science
+- **When to Choose**: Cloud-native, analytics-focused, multi-cloud
+
+**BigQuery**
+- **Best For**: Serverless analytics, Google ecosystem, machine learning
+- **Strengths**: Serverless, petabyte scale, ML integration
+- **Use Cases**: Analytics, data science, reporting
+- **When to Choose**: Google Cloud, serverless analytics
+
+## Selection Criteria Matrix
+
+| Criterion | SQL | NewSQL | Document | Key-Value | Column-Family | Graph | Time-Series |
+|-----------|-----|--------|----------|-----------|---------------|-------|-------------|
+| ACID Guarantees | ✅ Strong | ✅ Strong | ⚠️ Eventual | ⚠️ Eventual | ⚠️ Tunable | ⚠️ Varies | ⚠️ Varies |
+| Horizontal Scaling | ❌ Limited | ✅ Native | ✅ Native | ✅ Native | ✅ Native | ⚠️ Limited | ✅ Native |
+| Query Flexibility | ✅ High | ✅ High | ⚠️ Moderate | ❌ Low | ❌ Low | ✅ High | ⚠️ Specialized |
+| Schema Flexibility | ❌ Rigid | ❌ Rigid | ✅ High | ✅ High | ⚠️ Moderate | ✅ High | ⚠️ Structured |
+| Performance (Reads) | ⚠️ Good | ⚠️ Good | ✅ Excellent | ✅ Excellent | ✅ Excellent | ⚠️ Good | ✅ Excellent |
+| Performance (Writes) | ⚠️ Good | ⚠️ Good | ✅ Excellent | ✅ Excellent | ✅ Excellent | ⚠️ Good | ✅ Excellent |
+| Operational Complexity | ✅ Low | ❌ High | ⚠️ Moderate | ✅ Low | ❌ High | ⚠️ Moderate | ⚠️ Moderate |
+| Ecosystem Maturity | ✅ Mature | ⚠️ Growing | ✅ Mature | ✅ Mature | ✅ Mature | ✅ Mature | ⚠️ Growing |
+
+## Decision Checklist
+
+### Requirements Analysis
+- [ ] **Data Volume**: Current and projected data size
+- [ ] **Transaction Volume**: Reads per second, writes per second
+- [ ] **Consistency Requirements**: Strong vs eventual consistency needs
+- [ ] **Query Patterns**: Simple lookups vs complex analytics
+- [ ] **Schema Evolution**: How often does schema change?
+- [ ] **Geographic Distribution**: Single region vs global
+- [ ] **Availability Requirements**: Acceptable downtime
+- [ ] **Team Expertise**: Existing knowledge and learning curve
+- [ ] **Budget Constraints**: Licensing, infrastructure, operational costs
+- [ ] **Compliance Requirements**: Data residency, audit trails
+
+### Technical Evaluation
+- [ ] **Performance Testing**: Benchmark with realistic data and queries
+- [ ] **Scalability Testing**: Test scaling limits and patterns
+- [ ] **Failure Scenarios**: Test backup, recovery, and failure handling
+- [ ] **Integration Testing**: APIs, connectors, ecosystem tools
+- [ ] **Migration Path**: How to migrate from current system
+- [ ] **Monitoring and Observability**: Available tooling and metrics
+
+### Operational Considerations
+- [ ] **Management Complexity**: Setup, configuration, maintenance
+- [ ] **Backup and Recovery**: Built-in vs external tools
+- [ ] **Security Features**: Authentication, authorization, encryption
+- [ ] **Upgrade Path**: Version compatibility and upgrade process
+- [ ] **Support Options**: Community vs commercial support
+- [ ] **Lock-in Risk**: Portability and vendor independence
+
+## Common Decision Patterns
+
+### E-commerce Platform
+**Typical Choice**: PostgreSQL or MySQL
+- **Primary Data**: Product catalog, orders, users (structured)
+- **Query Patterns**: OLTP with some analytics
+- **Consistency**: Strong consistency for financial data
+- **Scale**: Moderate with read replicas
+- **Additional**: Redis for caching, Elasticsearch for product search
+
+### IoT/Sensor Data Platform
+**Typical Choice**: TimescaleDB or InfluxDB
+- **Primary Data**: Time-series sensor readings
+- **Query Patterns**: Time-based aggregations, trend analysis
+- **Scale**: High write volume, moderate read volume
+- **Additional**: Kafka for ingestion, PostgreSQL for metadata
+
+### Social Media Application
+**Typical Choice**: Combination approach
+- **User Profiles**: MongoDB (flexible schema)
+- **Relationships**: Neo4j (graph relationships)
+- **Activity Feeds**: Cassandra (high write volume)
+- **Search**: Elasticsearch (content discovery)
+- **Caching**: Redis (sessions, real-time data)
+
+### Analytics Platform
+**Typical Choice**: Snowflake or BigQuery
+- **Primary Use**: Complex analytical queries
+- **Data Volume**: Large (TB to PB scale)
+- **Query Patterns**: Ad-hoc analytics, reporting
+- **Users**: Data analysts, data scientists
+- **Additional**: Data lake (S3/GCS) for raw data storage
+
+### Global SaaS Application
+**Typical Choice**: CockroachDB or DynamoDB
+- **Requirements**: Multi-region, strong consistency
+- **Scale**: Global user base
+- **Compliance**: Data residency requirements
+- **Availability**: High availability across regions
+
+## Migration Strategies
+
+### From Monolithic to Distributed
+1. **Assessment**: Identify scaling bottlenecks
+2. **Data Partitioning**: Plan how to split data
+3. **Gradual Migration**: Move non-critical data first
+4. **Dual Writes**: Run both systems temporarily
+5. **Validation**: Verify data consistency
+6. **Cutover**: Switch reads and writes gradually
+
+### Technology Stack Evolution
+1. **Start Simple**: Begin with PostgreSQL or MySQL
+2. **Identify Bottlenecks**: Monitor performance and scaling issues
+3. **Selective Scaling**: Move specific workloads to specialized databases
+4. **Polyglot Persistence**: Use multiple databases for different use cases
+5. **Service Boundaries**: Align database choice with service boundaries
+
+## Conclusion
+
+Database selection should be driven by:
+
+1. **Specific Use Case Requirements**: Not all applications need the same database
+2. **Data Characteristics**: Structure, volume, and access patterns matter
+3. **Non-functional Requirements**: Consistency, availability, performance targets
+4. **Team and Organizational Factors**: Expertise, operational capacity, budget
+5. **Evolution Path**: How requirements and scale will change over time
+
+The best database choice is often not a single technology, but a combination of databases that each excel at their specific use case within your application architecture.
\ No newline at end of file
diff --git a/engineering/database-designer/references/index_strategy_patterns.md b/engineering/database-designer/references/index_strategy_patterns.md
new file mode 100644
index 0000000..b3e63f9
--- /dev/null
+++ b/engineering/database-designer/references/index_strategy_patterns.md
@@ -0,0 +1,424 @@
+# Index Strategy Patterns
+
+## Overview
+
+Database indexes are critical for query performance, but they come with trade-offs. This guide covers proven patterns for index design, optimization strategies, and common pitfalls to avoid.
+
+## Index Types and Use Cases
+
+### B-Tree Indexes (Default)
+
+**Best For:**
+- Equality queries (`WHERE column = value`)
+- Range queries (`WHERE column BETWEEN x AND y`)
+- Sorting (`ORDER BY column`)
+- Pattern matching with leading wildcards (`WHERE column LIKE 'prefix%'`)
+
+**Characteristics:**
+- Logarithmic lookup time O(log n)
+- Supports partial matches on composite indexes
+- Most versatile index type
+
+**Example:**
+```sql
+-- Single column B-tree index
+CREATE INDEX idx_customers_email ON customers (email);
+
+-- Composite B-tree index
+CREATE INDEX idx_orders_customer_date ON orders (customer_id, order_date);
+```
+
+### Hash Indexes
+
+**Best For:**
+- Exact equality matches only
+- High-cardinality columns
+- Primary key lookups
+
+**Characteristics:**
+- Constant lookup time O(1) for exact matches
+- Cannot support range queries or sorting
+- Memory-efficient for equality operations
+
+**Example:**
+```sql
+-- Hash index for exact lookups (PostgreSQL)
+CREATE INDEX idx_users_id_hash ON users USING HASH (user_id);
+```
+
+### Partial Indexes
+
+**Best For:**
+- Filtering on subset of data
+- Reducing index size and maintenance overhead
+- Query patterns that consistently use specific filters
+
+**Example:**
+```sql
+-- Index only active users
+CREATE INDEX idx_active_users_email 
+ON users (email) 
+WHERE status = 'active';
+
+-- Index recent orders only
+CREATE INDEX idx_recent_orders 
+ON orders (customer_id, created_at) 
+WHERE created_at > CURRENT_DATE - INTERVAL '90 days';
+
+-- Index non-null values only
+CREATE INDEX idx_customers_phone 
+ON customers (phone_number) 
+WHERE phone_number IS NOT NULL;
+```
+
+### Covering Indexes
+
+**Best For:**
+- Eliminating table lookups for SELECT queries
+- Frequently accessed column combinations
+- Read-heavy workloads
+
+**Example:**
+```sql
+-- Covering index with INCLUDE clause (SQL Server/PostgreSQL)
+CREATE INDEX idx_orders_customer_covering 
+ON orders (customer_id, order_date) 
+INCLUDE (order_total, status);
+
+-- Query can be satisfied entirely from index:
+-- SELECT order_total, status FROM orders 
+-- WHERE customer_id = 123 AND order_date > '2024-01-01';
+```
+
+### Functional/Expression Indexes
+
+**Best For:**
+- Queries on transformed column values
+- Case-insensitive searches
+- Complex calculations
+
+**Example:**
+```sql
+-- Case-insensitive email searches
+CREATE INDEX idx_users_email_lower 
+ON users (LOWER(email));
+
+-- Date part extraction
+CREATE INDEX idx_orders_month 
+ON orders (EXTRACT(MONTH FROM order_date));
+
+-- JSON field indexing
+CREATE INDEX idx_users_preferences_theme 
+ON users ((preferences->>'theme'));
+```
+
+## Composite Index Design Patterns
+
+### Column Ordering Strategy
+
+**Rule: Most Selective First**
+```sql
+-- Query: WHERE status = 'active' AND city = 'New York' AND age > 25
+-- Assume: status has 3 values, city has 100 values, age has 80 values
+
+-- GOOD: Most selective column first
+CREATE INDEX idx_users_city_age_status ON users (city, age, status);
+
+-- BAD: Least selective first
+CREATE INDEX idx_users_status_city_age ON users (status, city, age);
+```
+
+**Selectivity Calculation:**
+```sql
+-- Estimate selectivity for each column
+SELECT 
+    'status' as column_name,
+    COUNT(DISTINCT status)::float / COUNT(*) as selectivity
+FROM users
+UNION ALL
+SELECT 
+    'city' as column_name,
+    COUNT(DISTINCT city)::float / COUNT(*) as selectivity
+FROM users
+UNION ALL
+SELECT 
+    'age' as column_name,
+    COUNT(DISTINCT age)::float / COUNT(*) as selectivity
+FROM users;
+```
+
+### Query Pattern Matching
+
+**Pattern 1: Equality + Range**
+```sql
+-- Query: WHERE customer_id = 123 AND order_date BETWEEN '2024-01-01' AND '2024-03-31'
+CREATE INDEX idx_orders_customer_date ON orders (customer_id, order_date);
+```
+
+**Pattern 2: Multiple Equality Conditions**
+```sql
+-- Query: WHERE status = 'active' AND category = 'premium' AND region = 'US'
+CREATE INDEX idx_users_status_category_region ON users (status, category, region);
+```
+
+**Pattern 3: Equality + Sorting**
+```sql
+-- Query: WHERE category = 'electronics' ORDER BY price DESC, created_at DESC
+CREATE INDEX idx_products_category_price_date ON products (category, price DESC, created_at DESC);
+```
+
+### Prefix Optimization
+
+**Efficient Prefix Usage:**
+```sql
+-- Index supports all these queries efficiently:
+CREATE INDEX idx_users_lastname_firstname_email ON users (last_name, first_name, email);
+
+-- ✓ Uses index: WHERE last_name = 'Smith'
+-- ✓ Uses index: WHERE last_name = 'Smith' AND first_name = 'John'  
+-- ✓ Uses index: WHERE last_name = 'Smith' AND first_name = 'John' AND email = 'john@...'
+-- ✗ Cannot use index: WHERE first_name = 'John'
+-- ✗ Cannot use index: WHERE email = 'john@...'
+```
+
+## Performance Optimization Patterns
+
+### Index Intersection vs Composite Indexes
+
+**Scenario: Multiple single-column indexes**
+```sql
+CREATE INDEX idx_users_age ON users (age);
+CREATE INDEX idx_users_city ON users (city);
+CREATE INDEX idx_users_status ON users (status);
+
+-- Query: WHERE age > 25 AND city = 'NYC' AND status = 'active'
+-- Database may use index intersection (combining multiple indexes)
+-- Performance varies by database engine and data distribution
+```
+
+**Better: Purpose-built composite index**
+```sql
+-- More efficient for the specific query pattern
+CREATE INDEX idx_users_city_status_age ON users (city, status, age);
+```
+
+### Index Size vs Performance Trade-off
+
+**Wide Indexes (Many Columns):**
+```sql
+-- Pros: Covers many query patterns, excellent for covering queries
+-- Cons: Large index size, slower writes, more memory usage
+CREATE INDEX idx_orders_comprehensive 
+ON orders (customer_id, order_date, status, total_amount, shipping_method, created_at)
+INCLUDE (order_notes, billing_address);
+```
+
+**Narrow Indexes (Few Columns):**
+```sql
+-- Pros: Smaller size, faster writes, less memory
+-- Cons: May not cover all query patterns
+CREATE INDEX idx_orders_customer_date ON orders (customer_id, order_date);
+CREATE INDEX idx_orders_status ON orders (status);
+```
+
+### Maintenance Optimization
+
+**Regular Index Analysis:**
+```sql
+-- PostgreSQL: Check index usage statistics
+SELECT 
+    schemaname,
+    tablename,
+    indexname,
+    idx_scan as index_scans,
+    idx_tup_read as tuples_read,
+    idx_tup_fetch as tuples_fetched
+FROM pg_stat_user_indexes
+WHERE idx_scan = 0  -- Potentially unused indexes
+ORDER BY schemaname, tablename;
+
+-- Check index size
+SELECT 
+    indexname,
+    pg_size_pretty(pg_relation_size(indexname::regclass)) as index_size
+FROM pg_indexes
+WHERE schemaname = 'public'
+ORDER BY pg_relation_size(indexname::regclass) DESC;
+```
+
+## Common Anti-Patterns
+
+### 1. Over-Indexing
+
+**Problem:**
+```sql
+-- Too many similar indexes
+CREATE INDEX idx_orders_customer ON orders (customer_id);
+CREATE INDEX idx_orders_customer_date ON orders (customer_id, order_date);  
+CREATE INDEX idx_orders_customer_status ON orders (customer_id, status);
+CREATE INDEX idx_orders_customer_date_status ON orders (customer_id, order_date, status);
+```
+
+**Solution:**
+```sql
+-- One well-designed composite index can often replace several
+CREATE INDEX idx_orders_customer_date_status ON orders (customer_id, order_date, status);
+-- Drop redundant indexes: idx_orders_customer, idx_orders_customer_date, idx_orders_customer_status
+```
+
+### 2. Wrong Column Order
+
+**Problem:**
+```sql
+-- Query: WHERE active = true AND user_type = 'premium' AND city = 'Chicago'
+-- Bad order: boolean first (lowest selectivity)
+CREATE INDEX idx_users_active_type_city ON users (active, user_type, city);
+```
+
+**Solution:**
+```sql
+-- Good order: most selective first
+CREATE INDEX idx_users_city_type_active ON users (city, user_type, active);
+```
+
+### 3. Ignoring Query Patterns
+
+**Problem:**
+```sql
+-- Index doesn't match common query patterns
+CREATE INDEX idx_products_name ON products (product_name);
+
+-- But queries are: WHERE category = 'electronics' AND price BETWEEN 100 AND 500
+-- Index is not helpful for these queries
+```
+
+**Solution:**
+```sql
+-- Match actual query patterns
+CREATE INDEX idx_products_category_price ON products (category, price);
+```
+
+### 4. Function in WHERE Without Functional Index
+
+**Problem:**
+```sql
+-- Query uses function but no functional index
+SELECT * FROM users WHERE LOWER(email) = 'john@example.com';
+-- Regular index on email won't help
+```
+
+**Solution:**
+```sql
+-- Create functional index
+CREATE INDEX idx_users_email_lower ON users (LOWER(email));
+```
+
+## Advanced Patterns
+
+### Multi-Column Statistics
+
+**When Columns Are Correlated:**
+```sql
+-- If city and state are highly correlated, create extended statistics
+CREATE STATISTICS stats_address_correlation ON city, state FROM addresses;
+ANALYZE addresses;
+
+-- Helps query planner make better decisions for:
+-- WHERE city = 'New York' AND state = 'NY'
+```
+
+### Conditional Indexes for Data Lifecycle
+
+**Pattern: Different indexes for different data ages**
+```sql
+-- Hot data (recent orders) - optimized for OLTP
+CREATE INDEX idx_orders_hot_customer_date 
+ON orders (customer_id, order_date DESC) 
+WHERE order_date > CURRENT_DATE - INTERVAL '30 days';
+
+-- Warm data (older orders) - optimized for analytics  
+CREATE INDEX idx_orders_warm_date_total 
+ON orders (order_date, total_amount) 
+WHERE order_date <= CURRENT_DATE - INTERVAL '30 days' 
+  AND order_date > CURRENT_DATE - INTERVAL '1 year';
+
+-- Cold data (archived orders) - minimal indexing
+CREATE INDEX idx_orders_cold_date 
+ON orders (order_date) 
+WHERE order_date <= CURRENT_DATE - INTERVAL '1 year';
+```
+
+### Index-Only Scan Optimization
+
+**Design indexes to avoid table access:**
+```sql
+-- Query: SELECT order_id, total_amount, status FROM orders WHERE customer_id = ?
+CREATE INDEX idx_orders_customer_covering 
+ON orders (customer_id) 
+INCLUDE (order_id, total_amount, status);
+
+-- Or as composite index (if database doesn't support INCLUDE)
+CREATE INDEX idx_orders_customer_covering 
+ON orders (customer_id, order_id, total_amount, status);
+```
+
+## Index Monitoring and Maintenance
+
+### Performance Monitoring Queries
+
+**Find slow queries that might benefit from indexes:**
+```sql
+-- PostgreSQL: Find queries with high cost
+SELECT 
+    query,
+    calls,
+    total_time,
+    mean_time,
+    rows
+FROM pg_stat_statements
+WHERE mean_time > 1000  -- Queries taking > 1 second
+ORDER BY mean_time DESC;
+```
+
+**Identify missing indexes:**
+```sql
+-- Look for sequential scans on large tables
+SELECT 
+    schemaname,
+    tablename,
+    seq_scan,
+    seq_tup_read,
+    idx_scan,
+    n_tup_ins + n_tup_upd + n_tup_del as write_activity
+FROM pg_stat_user_tables
+WHERE seq_scan > 100 
+  AND seq_tup_read > 100000  -- Large sequential scans
+  AND (idx_scan = 0 OR seq_scan > idx_scan * 2)
+ORDER BY seq_tup_read DESC;
+```
+
+### Index Maintenance Schedule
+
+**Regular Maintenance Tasks:**
+```sql
+-- Rebuild fragmented indexes (SQL Server)
+ALTER INDEX ALL ON orders REBUILD;
+
+-- Update statistics (PostgreSQL)
+ANALYZE orders;
+
+-- Check for unused indexes monthly
+SELECT * FROM pg_stat_user_indexes WHERE idx_scan = 0;
+```
+
+## Conclusion
+
+Effective index strategy requires:
+
+1. **Understanding Query Patterns**: Analyze actual application queries, not theoretical scenarios
+2. **Measuring Performance**: Use query execution plans and timing to validate index effectiveness  
+3. **Balancing Trade-offs**: More indexes improve reads but slow writes and increase storage
+4. **Regular Maintenance**: Monitor index usage and performance, remove unused indexes
+5. **Iterative Improvement**: Start with essential indexes, add and optimize based on real usage
+
+The goal is not to index every possible query pattern, but to create a focused set of indexes that provide maximum benefit for your application's specific workload while minimizing maintenance overhead.
\ No newline at end of file
diff --git a/engineering/database-designer/references/normalization_guide.md b/engineering/database-designer/references/normalization_guide.md
new file mode 100644
index 0000000..f57cc6f
--- /dev/null
+++ b/engineering/database-designer/references/normalization_guide.md
@@ -0,0 +1,354 @@
+# Database Normalization Guide
+
+## Overview
+
+Database normalization is the process of organizing data to minimize redundancy and dependency issues. It involves decomposing tables to eliminate data anomalies and improve data integrity.
+
+## Normal Forms
+
+### First Normal Form (1NF)
+
+**Requirements:**
+- Each column contains atomic (indivisible) values
+- Each column contains values of the same type
+- Each column has a unique name
+- The order of data storage doesn't matter
+
+**Violations and Solutions:**
+
+**Problem: Multiple values in single column**
+```sql
+-- BAD: Multiple phone numbers in one column
+CREATE TABLE customers (
+    id INT PRIMARY KEY,
+    name VARCHAR(100),
+    phones VARCHAR(500)  -- "555-1234, 555-5678, 555-9012"
+);
+
+-- GOOD: Separate table for multiple phones
+CREATE TABLE customers (
+    id INT PRIMARY KEY,
+    name VARCHAR(100)
+);
+
+CREATE TABLE customer_phones (
+    id INT PRIMARY KEY,
+    customer_id INT REFERENCES customers(id),
+    phone VARCHAR(20),
+    phone_type VARCHAR(10) -- 'mobile', 'home', 'work'
+);
+```
+
+**Problem: Repeating groups**
+```sql
+-- BAD: Repeating column patterns
+CREATE TABLE orders (
+    order_id INT PRIMARY KEY,
+    customer_id INT,
+    item1_name VARCHAR(100),
+    item1_qty INT,
+    item1_price DECIMAL(8,2),
+    item2_name VARCHAR(100),
+    item2_qty INT,
+    item2_price DECIMAL(8,2),
+    item3_name VARCHAR(100),
+    item3_qty INT,
+    item3_price DECIMAL(8,2)
+);
+
+-- GOOD: Separate table for order items
+CREATE TABLE orders (
+    order_id INT PRIMARY KEY,
+    customer_id INT,
+    order_date DATE
+);
+
+CREATE TABLE order_items (
+    id INT PRIMARY KEY,
+    order_id INT REFERENCES orders(order_id),
+    item_name VARCHAR(100),
+    quantity INT,
+    unit_price DECIMAL(8,2)
+);
+```
+
+### Second Normal Form (2NF)
+
+**Requirements:**
+- Must be in 1NF
+- All non-key attributes must be fully functionally dependent on the primary key
+- No partial dependencies (applies only to tables with composite primary keys)
+
+**Violations and Solutions:**
+
+**Problem: Partial dependency on composite key**
+```sql
+-- BAD: Student course enrollment with partial dependencies
+CREATE TABLE student_courses (
+    student_id INT,
+    course_id INT,
+    student_name VARCHAR(100),    -- Depends only on student_id
+    student_major VARCHAR(50),    -- Depends only on student_id
+    course_title VARCHAR(200),    -- Depends only on course_id
+    course_credits INT,           -- Depends only on course_id
+    grade CHAR(2),               -- Depends on both student_id AND course_id
+    PRIMARY KEY (student_id, course_id)
+);
+
+-- GOOD: Separate tables eliminate partial dependencies
+CREATE TABLE students (
+    student_id INT PRIMARY KEY,
+    student_name VARCHAR(100),
+    student_major VARCHAR(50)
+);
+
+CREATE TABLE courses (
+    course_id INT PRIMARY KEY,
+    course_title VARCHAR(200),
+    course_credits INT
+);
+
+CREATE TABLE enrollments (
+    student_id INT,
+    course_id INT,
+    grade CHAR(2),
+    enrollment_date DATE,
+    PRIMARY KEY (student_id, course_id),
+    FOREIGN KEY (student_id) REFERENCES students(student_id),
+    FOREIGN KEY (course_id) REFERENCES courses(course_id)
+);
+```
+
+### Third Normal Form (3NF)
+
+**Requirements:**
+- Must be in 2NF
+- No transitive dependencies (non-key attributes should not depend on other non-key attributes)
+- All non-key attributes must depend directly on the primary key
+
+**Violations and Solutions:**
+
+**Problem: Transitive dependency**
+```sql
+-- BAD: Employee table with transitive dependency
+CREATE TABLE employees (
+    employee_id INT PRIMARY KEY,
+    employee_name VARCHAR(100),
+    department_id INT,
+    department_name VARCHAR(100),     -- Depends on department_id, not employee_id
+    department_location VARCHAR(100), -- Transitive dependency through department_id
+    department_budget DECIMAL(10,2),  -- Transitive dependency through department_id
+    salary DECIMAL(8,2)
+);
+
+-- GOOD: Separate department information
+CREATE TABLE departments (
+    department_id INT PRIMARY KEY,
+    department_name VARCHAR(100),
+    department_location VARCHAR(100),
+    department_budget DECIMAL(10,2)
+);
+
+CREATE TABLE employees (
+    employee_id INT PRIMARY KEY,
+    employee_name VARCHAR(100),
+    department_id INT,
+    salary DECIMAL(8,2),
+    FOREIGN KEY (department_id) REFERENCES departments(department_id)
+);
+```
+
+### Boyce-Codd Normal Form (BCNF)
+
+**Requirements:**
+- Must be in 3NF
+- Every determinant must be a candidate key
+- Stricter than 3NF - handles cases where 3NF doesn't eliminate all anomalies
+
+**Violations and Solutions:**
+
+**Problem: Determinant that's not a candidate key**
+```sql
+-- BAD: Student advisor relationship with BCNF violation
+-- Assumption: Each student has one advisor per subject, 
+-- each advisor teaches only one subject, but can advise multiple students
+CREATE TABLE student_advisor (
+    student_id INT,
+    subject VARCHAR(50),
+    advisor_id INT,
+    PRIMARY KEY (student_id, subject)
+);
+-- Problem: advisor_id determines subject, but advisor_id is not a candidate key
+
+-- GOOD: Separate the functional dependencies
+CREATE TABLE advisors (
+    advisor_id INT PRIMARY KEY,
+    subject VARCHAR(50)
+);
+
+CREATE TABLE student_advisor_assignments (
+    student_id INT,
+    advisor_id INT,
+    PRIMARY KEY (student_id, advisor_id),
+    FOREIGN KEY (advisor_id) REFERENCES advisors(advisor_id)
+);
+```
+
+## Denormalization Strategies
+
+### When to Denormalize
+
+1. **Performance Requirements**: When query performance is more critical than storage efficiency
+2. **Read-Heavy Workloads**: When data is read much more frequently than it's updated
+3. **Reporting Systems**: When complex joins negatively impact reporting performance
+4. **Caching Strategies**: When pre-computed values eliminate expensive calculations
+
+### Common Denormalization Patterns
+
+**1. Redundant Storage for Performance**
+```sql
+-- Store frequently accessed calculated values
+CREATE TABLE orders (
+    order_id INT PRIMARY KEY,
+    customer_id INT,
+    order_total DECIMAL(10,2),     -- Denormalized: sum of order_items.total
+    item_count INT,                -- Denormalized: count of order_items
+    created_at TIMESTAMP
+);
+
+CREATE TABLE order_items (
+    item_id INT PRIMARY KEY,
+    order_id INT,
+    product_id INT,
+    quantity INT,
+    unit_price DECIMAL(8,2),
+    total DECIMAL(10,2)            -- quantity * unit_price (denormalized)
+);
+```
+
+**2. Materialized Aggregates**
+```sql
+-- Pre-computed summary tables for reporting
+CREATE TABLE monthly_sales_summary (
+    year_month VARCHAR(7),         -- '2024-03'
+    product_category VARCHAR(50),
+    total_sales DECIMAL(12,2),
+    total_units INT,
+    avg_order_value DECIMAL(8,2),
+    unique_customers INT,
+    updated_at TIMESTAMP
+);
+```
+
+**3. Historical Data Snapshots**
+```sql
+-- Store historical state to avoid complex temporal queries
+CREATE TABLE customer_status_history (
+    id INT PRIMARY KEY,
+    customer_id INT,
+    status VARCHAR(20),
+    tier VARCHAR(10),
+    total_lifetime_value DECIMAL(12,2), -- Snapshot at this point in time
+    snapshot_date DATE
+);
+```
+
+## Trade-offs Analysis
+
+### Normalization Benefits
+- **Data Integrity**: Reduced risk of inconsistent data
+- **Storage Efficiency**: Less data duplication
+- **Update Efficiency**: Changes need to be made in only one place
+- **Flexibility**: Easier to modify schema as requirements change
+
+### Normalization Costs
+- **Query Complexity**: More joins required for data retrieval
+- **Performance Impact**: Joins can be expensive on large datasets
+- **Development Complexity**: More complex data access patterns
+
+### Denormalization Benefits
+- **Query Performance**: Fewer joins, faster queries
+- **Simplified Queries**: Direct access to related data
+- **Read Optimization**: Optimized for data retrieval patterns
+- **Reduced Load**: Less database processing for common operations
+
+### Denormalization Costs
+- **Data Redundancy**: Increased storage requirements
+- **Update Complexity**: Multiple places may need updates
+- **Consistency Risk**: Higher risk of data inconsistencies
+- **Maintenance Overhead**: Additional code to maintain derived values
+
+## Best Practices
+
+### 1. Start with Full Normalization
+- Begin with a fully normalized design
+- Identify performance bottlenecks through testing
+- Selectively denormalize based on actual performance needs
+
+### 2. Use Triggers for Consistency
+```sql
+-- Trigger to maintain denormalized order_total
+CREATE TRIGGER update_order_total
+AFTER INSERT OR UPDATE OR DELETE ON order_items
+FOR EACH ROW
+BEGIN
+    UPDATE orders 
+    SET order_total = (
+        SELECT SUM(quantity * unit_price) 
+        FROM order_items 
+        WHERE order_id = NEW.order_id
+    )
+    WHERE order_id = NEW.order_id;
+END;
+```
+
+### 3. Consider Materialized Views
+```sql
+-- Materialized view for complex aggregations
+CREATE MATERIALIZED VIEW customer_summary AS
+SELECT 
+    c.customer_id,
+    c.customer_name,
+    COUNT(o.order_id) as order_count,
+    SUM(o.order_total) as lifetime_value,
+    AVG(o.order_total) as avg_order_value,
+    MAX(o.created_at) as last_order_date
+FROM customers c
+LEFT JOIN orders o ON c.customer_id = o.customer_id
+GROUP BY c.customer_id, c.customer_name;
+```
+
+### 4. Document Denormalization Decisions
+- Clearly document why denormalization was chosen
+- Specify which data is derived and how it's maintained
+- Include performance benchmarks that justify the decision
+
+### 5. Monitor and Validate
+- Implement validation checks for denormalized data
+- Regular audits to ensure data consistency
+- Performance monitoring to validate denormalization benefits
+
+## Common Anti-Patterns
+
+### 1. Premature Denormalization
+Starting with denormalized design without understanding actual performance requirements.
+
+### 2. Over-Normalization
+Creating too many small tables that require excessive joins for simple queries.
+
+### 3. Inconsistent Approach
+Mixing normalized and denormalized patterns without clear strategy.
+
+### 4. Ignoring Maintenance
+Denormalizing without proper mechanisms to maintain data consistency.
+
+## Conclusion
+
+Normalization and denormalization are both valuable tools in database design. The key is understanding when to apply each approach:
+
+- **Use normalization** for transactional systems where data integrity is paramount
+- **Consider denormalization** for analytical systems or when performance testing reveals bottlenecks
+- **Apply selectively** based on actual usage patterns and performance requirements
+- **Maintain consistency** through proper design patterns and validation mechanisms
+
+The goal is not to achieve perfect normalization or denormalization, but to create a design that best serves your application's specific needs while maintaining data quality and system performance.
\ No newline at end of file
diff --git a/engineering/database-designer/schema_analyzer.py b/engineering/database-designer/schema_analyzer.py
new file mode 100644
index 0000000..391c76b
--- /dev/null
+++ b/engineering/database-designer/schema_analyzer.py
@@ -0,0 +1,982 @@
+#!/usr/bin/env python3
+"""
+Database Schema Analyzer
+
+Analyzes SQL DDL statements and JSON schema definitions for:
+- Normalization level compliance (1NF-BCNF)
+- Missing constraints (FK, NOT NULL, UNIQUE)
+- Data type issues and antipatterns
+- Naming convention violations
+- Missing indexes on foreign key columns
+- Table relationship mapping
+- Generates Mermaid ERD diagrams
+
+Input: SQL DDL file or JSON schema definition
+Output: Analysis report + Mermaid ERD + recommendations
+
+Usage:
+    python schema_analyzer.py --input schema.sql --output-format json
+    python schema_analyzer.py --input schema.json --output-format text
+    python schema_analyzer.py --input schema.sql --generate-erd --output analysis.json
+"""
+
+import argparse
+import json
+import re
+import sys
+from collections import defaultdict, namedtuple
+from typing import Dict, List, Set, Tuple, Optional, Any
+from dataclasses import dataclass, asdict
+
+
+@dataclass
+class Column:
+    name: str
+    data_type: str
+    nullable: bool = True
+    primary_key: bool = False
+    unique: bool = False
+    foreign_key: Optional[str] = None
+    default_value: Optional[str] = None
+    check_constraint: Optional[str] = None
+
+
+@dataclass
+class Index:
+    name: str
+    table: str
+    columns: List[str]
+    unique: bool = False
+    index_type: str = "btree"
+
+
+@dataclass
+class Table:
+    name: str
+    columns: List[Column]
+    primary_key: List[str]
+    foreign_keys: List[Tuple[str, str]]  # (column, referenced_table.column)
+    unique_constraints: List[List[str]]
+    check_constraints: Dict[str, str]
+    indexes: List[Index]
+
+
+@dataclass
+class NormalizationIssue:
+    table: str
+    issue_type: str
+    severity: str
+    description: str
+    suggestion: str
+    columns_affected: List[str]
+
+
+@dataclass
+class DataTypeIssue:
+    table: str
+    column: str
+    current_type: str
+    issue: str
+    suggested_type: str
+    rationale: str
+
+
+@dataclass
+class ConstraintIssue:
+    table: str
+    issue_type: str
+    severity: str
+    description: str
+    suggestion: str
+    columns_affected: List[str]
+
+
+@dataclass
+class NamingIssue:
+    table: str
+    column: Optional[str]
+    issue: str
+    current_name: str
+    suggested_name: str
+
+
+class SchemaAnalyzer:
+    def __init__(self):
+        self.tables: Dict[str, Table] = {}
+        self.normalization_issues: List[NormalizationIssue] = []
+        self.datatype_issues: List[DataTypeIssue] = []
+        self.constraint_issues: List[ConstraintIssue] = []
+        self.naming_issues: List[NamingIssue] = []
+        
+        # Data type antipatterns
+        self.varchar_255_pattern = re.compile(r'VARCHAR\(255\)', re.IGNORECASE)
+        self.bad_datetime_patterns = [
+            re.compile(r'VARCHAR\(\d+\)', re.IGNORECASE),
+            re.compile(r'CHAR\(\d+\)', re.IGNORECASE)
+        ]
+        
+        # Naming conventions
+        self.table_naming_pattern = re.compile(r'^[a-z][a-z0-9_]*[a-z0-9]$')
+        self.column_naming_pattern = re.compile(r'^[a-z][a-z0-9_]*[a-z0-9]$')
+        
+    def parse_sql_ddl(self, ddl_content: str) -> None:
+        """Parse SQL DDL statements and extract schema information."""
+        # Remove comments and normalize whitespace
+        ddl_content = re.sub(r'--.*$', '', ddl_content, flags=re.MULTILINE)
+        ddl_content = re.sub(r'/\*.*?\*/', '', ddl_content, flags=re.DOTALL)
+        ddl_content = re.sub(r'\s+', ' ', ddl_content.strip())
+        
+        # Extract CREATE TABLE statements
+        create_table_pattern = re.compile(
+            r'CREATE\s+TABLE\s+(\w+)\s*\(\s*(.*?)\s*\)',
+            re.IGNORECASE | re.DOTALL
+        )
+        
+        for match in create_table_pattern.finditer(ddl_content):
+            table_name = match.group(1).lower()
+            table_definition = match.group(2)
+            
+            table = self._parse_table_definition(table_name, table_definition)
+            self.tables[table_name] = table
+            
+        # Extract CREATE INDEX statements
+        self._parse_indexes(ddl_content)
+        
+    def _parse_table_definition(self, table_name: str, definition: str) -> Table:
+        """Parse individual table definition."""
+        columns = []
+        primary_key = []
+        foreign_keys = []
+        unique_constraints = []
+        check_constraints = {}
+        
+        # Split by commas, but handle nested parentheses
+        parts = self._split_table_parts(definition)
+        
+        for part in parts:
+            part = part.strip()
+            if not part:
+                continue
+                
+            if part.upper().startswith('PRIMARY KEY'):
+                primary_key = self._parse_primary_key(part)
+            elif part.upper().startswith('FOREIGN KEY'):
+                fk = self._parse_foreign_key(part)
+                if fk:
+                    foreign_keys.append(fk)
+            elif part.upper().startswith('UNIQUE'):
+                unique = self._parse_unique_constraint(part)
+                if unique:
+                    unique_constraints.append(unique)
+            elif part.upper().startswith('CHECK'):
+                check = self._parse_check_constraint(part)
+                if check:
+                    check_constraints.update(check)
+            else:
+                # Column definition
+                column = self._parse_column_definition(part)
+                if column:
+                    columns.append(column)
+                    if column.primary_key:
+                        primary_key.append(column.name)
+        
+        return Table(
+            name=table_name,
+            columns=columns,
+            primary_key=primary_key,
+            foreign_keys=foreign_keys,
+            unique_constraints=unique_constraints,
+            check_constraints=check_constraints,
+            indexes=[]
+        )
+    
+    def _split_table_parts(self, definition: str) -> List[str]:
+        """Split table definition by commas, respecting nested parentheses."""
+        parts = []
+        current_part = ""
+        paren_count = 0
+        
+        for char in definition:
+            if char == '(':
+                paren_count += 1
+            elif char == ')':
+                paren_count -= 1
+            elif char == ',' and paren_count == 0:
+                parts.append(current_part.strip())
+                current_part = ""
+                continue
+            
+            current_part += char
+        
+        if current_part.strip():
+            parts.append(current_part.strip())
+            
+        return parts
+    
+    def _parse_column_definition(self, definition: str) -> Optional[Column]:
+        """Parse individual column definition."""
+        # Pattern for column definition
+        pattern = re.compile(
+            r'(\w+)\s+([A-Z]+(?:\(\d+(?:,\d+)?\))?)\s*(.*)',
+            re.IGNORECASE
+        )
+        
+        match = pattern.match(definition.strip())
+        if not match:
+            return None
+            
+        column_name = match.group(1).lower()
+        data_type = match.group(2).upper()
+        constraints = match.group(3).upper() if match.group(3) else ""
+        
+        column = Column(
+            name=column_name,
+            data_type=data_type,
+            nullable='NOT NULL' not in constraints,
+            primary_key='PRIMARY KEY' in constraints,
+            unique='UNIQUE' in constraints
+        )
+        
+        # Parse foreign key reference
+        fk_pattern = re.compile(r'REFERENCES\s+(\w+)\s*\(\s*(\w+)\s*\)', re.IGNORECASE)
+        fk_match = fk_pattern.search(constraints)
+        if fk_match:
+            column.foreign_key = f"{fk_match.group(1).lower()}.{fk_match.group(2).lower()}"
+        
+        # Parse default value
+        default_pattern = re.compile(r'DEFAULT\s+([^,\s]+)', re.IGNORECASE)
+        default_match = default_pattern.search(constraints)
+        if default_match:
+            column.default_value = default_match.group(1)
+        
+        return column
+    
+    def _parse_primary_key(self, definition: str) -> List[str]:
+        """Parse PRIMARY KEY constraint."""
+        pattern = re.compile(r'PRIMARY\s+KEY\s*\(\s*(.*?)\s*\)', re.IGNORECASE)
+        match = pattern.search(definition)
+        if match:
+            columns = [col.strip().lower() for col in match.group(1).split(',')]
+            return columns
+        return []
+    
+    def _parse_foreign_key(self, definition: str) -> Optional[Tuple[str, str]]:
+        """Parse FOREIGN KEY constraint."""
+        pattern = re.compile(
+            r'FOREIGN\s+KEY\s*\(\s*(\w+)\s*\)\s+REFERENCES\s+(\w+)\s*\(\s*(\w+)\s*\)',
+            re.IGNORECASE
+        )
+        match = pattern.search(definition)
+        if match:
+            column = match.group(1).lower()
+            ref_table = match.group(2).lower()
+            ref_column = match.group(3).lower()
+            return (column, f"{ref_table}.{ref_column}")
+        return None
+    
+    def _parse_unique_constraint(self, definition: str) -> Optional[List[str]]:
+        """Parse UNIQUE constraint."""
+        pattern = re.compile(r'UNIQUE\s*\(\s*(.*?)\s*\)', re.IGNORECASE)
+        match = pattern.search(definition)
+        if match:
+            columns = [col.strip().lower() for col in match.group(1).split(',')]
+            return columns
+        return None
+    
+    def _parse_check_constraint(self, definition: str) -> Optional[Dict[str, str]]:
+        """Parse CHECK constraint."""
+        pattern = re.compile(r'CHECK\s*\(\s*(.*?)\s*\)', re.IGNORECASE)
+        match = pattern.search(definition)
+        if match:
+            constraint_name = f"check_constraint_{len(self.tables)}"
+            return {constraint_name: match.group(1)}
+        return None
+    
+    def _parse_indexes(self, ddl_content: str) -> None:
+        """Parse CREATE INDEX statements."""
+        index_pattern = re.compile(
+            r'CREATE\s+(?:(UNIQUE)\s+)?INDEX\s+(\w+)\s+ON\s+(\w+)\s*\(\s*(.*?)\s*\)',
+            re.IGNORECASE
+        )
+        
+        for match in index_pattern.finditer(ddl_content):
+            unique = match.group(1) is not None
+            index_name = match.group(2).lower()
+            table_name = match.group(3).lower()
+            columns_str = match.group(4)
+            
+            columns = [col.strip().lower() for col in columns_str.split(',')]
+            
+            index = Index(
+                name=index_name,
+                table=table_name,
+                columns=columns,
+                unique=unique
+            )
+            
+            if table_name in self.tables:
+                self.tables[table_name].indexes.append(index)
+    
+    def parse_json_schema(self, json_content: str) -> None:
+        """Parse JSON schema definition."""
+        try:
+            schema = json.loads(json_content)
+            
+            if 'tables' not in schema:
+                raise ValueError("JSON schema must contain 'tables' key")
+            
+            for table_name, table_def in schema['tables'].items():
+                table = self._parse_json_table(table_name.lower(), table_def)
+                self.tables[table_name.lower()] = table
+                
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON: {e}")
+    
+    def _parse_json_table(self, table_name: str, table_def: Dict[str, Any]) -> Table:
+        """Parse JSON table definition."""
+        columns = []
+        primary_key = table_def.get('primary_key', [])
+        foreign_keys = []
+        unique_constraints = table_def.get('unique_constraints', [])
+        check_constraints = table_def.get('check_constraints', {})
+        
+        for col_name, col_def in table_def.get('columns', {}).items():
+            column = Column(
+                name=col_name.lower(),
+                data_type=col_def.get('type', 'VARCHAR(255)').upper(),
+                nullable=col_def.get('nullable', True),
+                primary_key=col_name.lower() in [pk.lower() for pk in primary_key],
+                unique=col_def.get('unique', False),
+                foreign_key=col_def.get('foreign_key'),
+                default_value=col_def.get('default')
+            )
+            
+            columns.append(column)
+            
+            if column.foreign_key:
+                foreign_keys.append((column.name, column.foreign_key))
+        
+        return Table(
+            name=table_name,
+            columns=columns,
+            primary_key=[pk.lower() for pk in primary_key],
+            foreign_keys=foreign_keys,
+            unique_constraints=unique_constraints,
+            check_constraints=check_constraints,
+            indexes=[]
+        )
+    
+    def analyze_normalization(self) -> None:
+        """Analyze normalization compliance."""
+        for table_name, table in self.tables.items():
+            self._check_first_normal_form(table)
+            self._check_second_normal_form(table)
+            self._check_third_normal_form(table)
+            self._check_bcnf(table)
+    
+    def _check_first_normal_form(self, table: Table) -> None:
+        """Check First Normal Form compliance."""
+        # Check for atomic values (no arrays or delimited strings)
+        for column in table.columns:
+            if any(pattern in column.data_type.upper() for pattern in ['ARRAY', 'JSON', 'TEXT']):
+                if 'JSON' in column.data_type.upper():
+                    # JSON columns can violate 1NF if storing arrays
+                    self.normalization_issues.append(NormalizationIssue(
+                        table=table.name,
+                        issue_type="1NF_VIOLATION",
+                        severity="WARNING",
+                        description=f"Column '{column.name}' uses JSON type which may contain non-atomic values",
+                        suggestion="Consider normalizing JSON arrays into separate tables",
+                        columns_affected=[column.name]
+                    ))
+            
+            # Check for potential delimited values in VARCHAR/TEXT
+            if column.data_type.upper().startswith(('VARCHAR', 'CHAR', 'TEXT')):
+                if any(delimiter in column.name.lower() for delimiter in ['list', 'array', 'tags', 'items']):
+                    self.normalization_issues.append(NormalizationIssue(
+                        table=table.name,
+                        issue_type="1NF_VIOLATION",
+                        severity="HIGH",
+                        description=f"Column '{column.name}' appears to store delimited values",
+                        suggestion="Create separate table for individual values with foreign key relationship",
+                        columns_affected=[column.name]
+                    ))
+    
+    def _check_second_normal_form(self, table: Table) -> None:
+        """Check Second Normal Form compliance."""
+        if len(table.primary_key) <= 1:
+            return  # 2NF only applies to tables with composite primary keys
+        
+        # Look for potential partial dependencies
+        non_key_columns = [col for col in table.columns if col.name not in table.primary_key]
+        
+        for column in non_key_columns:
+            # Heuristic: columns that seem related to only part of the composite key
+            for pk_part in table.primary_key:
+                if pk_part in column.name or column.name.startswith(pk_part.split('_')[0]):
+                    self.normalization_issues.append(NormalizationIssue(
+                        table=table.name,
+                        issue_type="2NF_VIOLATION",
+                        severity="MEDIUM",
+                        description=f"Column '{column.name}' may have partial dependency on '{pk_part}'",
+                        suggestion=f"Consider moving '{column.name}' to a separate table related to '{pk_part}'",
+                        columns_affected=[column.name, pk_part]
+                    ))
+                    break
+    
+    def _check_third_normal_form(self, table: Table) -> None:
+        """Check Third Normal Form compliance."""
+        # Look for transitive dependencies
+        non_key_columns = [col for col in table.columns if col.name not in table.primary_key]
+        
+        # Group columns by potential entities they describe
+        entity_groups = defaultdict(list)
+        for column in non_key_columns:
+            # Simple heuristic: group by prefix before underscore
+            prefix = column.name.split('_')[0]
+            if prefix != column.name:  # Has underscore
+                entity_groups[prefix].append(column.name)
+        
+        for entity, columns in entity_groups.items():
+            if len(columns) > 1 and entity != table.name.split('_')[0]:
+                # Potential entity that should be in its own table
+                id_column = f"{entity}_id"
+                if id_column in [col.name for col in table.columns]:
+                    self.normalization_issues.append(NormalizationIssue(
+                        table=table.name,
+                        issue_type="3NF_VIOLATION",
+                        severity="MEDIUM",
+                        description=f"Columns {columns} may have transitive dependency through '{id_column}'",
+                        suggestion=f"Consider creating separate '{entity}' table with these columns",
+                        columns_affected=columns + [id_column]
+                    ))
+    
+    def _check_bcnf(self, table: Table) -> None:
+        """Check Boyce-Codd Normal Form compliance."""
+        # BCNF violations are complex to detect without functional dependencies
+        # Provide general guidance for composite keys
+        if len(table.primary_key) > 2:
+            self.normalization_issues.append(NormalizationIssue(
+                table=table.name,
+                issue_type="BCNF_WARNING",
+                severity="LOW",
+                description=f"Table has composite primary key with {len(table.primary_key)} columns",
+                suggestion="Review functional dependencies to ensure BCNF compliance",
+                columns_affected=table.primary_key
+            ))
+    
+    def analyze_data_types(self) -> None:
+        """Analyze data type usage for antipatterns."""
+        for table_name, table in self.tables.items():
+            for column in table.columns:
+                self._check_varchar_255_antipattern(table.name, column)
+                self._check_inappropriate_types(table.name, column)
+                self._check_size_optimization(table.name, column)
+    
+    def _check_varchar_255_antipattern(self, table_name: str, column: Column) -> None:
+        """Check for VARCHAR(255) antipattern."""
+        if self.varchar_255_pattern.match(column.data_type):
+            self.datatype_issues.append(DataTypeIssue(
+                table=table_name,
+                column=column.name,
+                current_type=column.data_type,
+                issue="VARCHAR(255) antipattern",
+                suggested_type="Appropriately sized VARCHAR or TEXT",
+                rationale="VARCHAR(255) is often used as default without considering actual data length requirements"
+            ))
+    
+    def _check_inappropriate_types(self, table_name: str, column: Column) -> None:
+        """Check for inappropriate data types."""
+        # Date/time stored as string
+        if column.name.lower() in ['date', 'time', 'created', 'updated', 'modified', 'timestamp']:
+            if column.data_type.upper().startswith(('VARCHAR', 'CHAR', 'TEXT')):
+                self.datatype_issues.append(DataTypeIssue(
+                    table=table_name,
+                    column=column.name,
+                    current_type=column.data_type,
+                    issue="Date/time stored as string",
+                    suggested_type="TIMESTAMP, DATE, or TIME",
+                    rationale="Proper date/time types enable date arithmetic and indexing optimization"
+                ))
+        
+        # Boolean stored as string/integer
+        if column.name.lower() in ['active', 'enabled', 'deleted', 'visible', 'published']:
+            if not column.data_type.upper().startswith('BOOL'):
+                self.datatype_issues.append(DataTypeIssue(
+                    table=table_name,
+                    column=column.name,
+                    current_type=column.data_type,
+                    issue="Boolean value stored as non-boolean type",
+                    suggested_type="BOOLEAN",
+                    rationale="Boolean type is more explicit and can be more storage efficient"
+                ))
+        
+        # Numeric IDs as VARCHAR
+        if column.name.lower().endswith('_id') or column.name.lower() == 'id':
+            if column.data_type.upper().startswith(('VARCHAR', 'CHAR')):
+                self.datatype_issues.append(DataTypeIssue(
+                    table=table_name,
+                    column=column.name,
+                    current_type=column.data_type,
+                    issue="Numeric ID stored as string",
+                    suggested_type="INTEGER, BIGINT, or UUID",
+                    rationale="Numeric types are more efficient for ID columns and enable better indexing"
+                ))
+    
+    def _check_size_optimization(self, table_name: str, column: Column) -> None:
+        """Check for size optimization opportunities."""
+        # Oversized integer types
+        if column.data_type.upper() == 'BIGINT':
+            if not any(keyword in column.name.lower() for keyword in ['timestamp', 'big', 'large', 'count']):
+                self.datatype_issues.append(DataTypeIssue(
+                    table=table_name,
+                    column=column.name,
+                    current_type=column.data_type,
+                    issue="Potentially oversized integer type",
+                    suggested_type="INTEGER",
+                    rationale="INTEGER is sufficient for most ID and count fields unless very large values are expected"
+                ))
+    
+    def analyze_constraints(self) -> None:
+        """Analyze missing constraints."""
+        for table_name, table in self.tables.items():
+            self._check_missing_primary_key(table)
+            self._check_missing_foreign_key_constraints(table)
+            self._check_missing_not_null_constraints(table)
+            self._check_missing_unique_constraints(table)
+            self._check_missing_check_constraints(table)
+    
+    def _check_missing_primary_key(self, table: Table) -> None:
+        """Check for missing primary key."""
+        if not table.primary_key:
+            self.constraint_issues.append(ConstraintIssue(
+                table=table.name,
+                issue_type="MISSING_PRIMARY_KEY",
+                severity="HIGH",
+                description="Table has no primary key defined",
+                suggestion="Add a primary key column (e.g., 'id' with auto-increment)",
+                columns_affected=[]
+            ))
+    
+    def _check_missing_foreign_key_constraints(self, table: Table) -> None:
+        """Check for missing foreign key constraints."""
+        for column in table.columns:
+            if column.name.endswith('_id') and column.name != 'id':
+                # Potential foreign key column
+                if not column.foreign_key:
+                    referenced_table = column.name[:-3]  # Remove '_id' suffix
+                    if referenced_table in self.tables or referenced_table + 's' in self.tables:
+                        self.constraint_issues.append(ConstraintIssue(
+                            table=table.name,
+                            issue_type="MISSING_FOREIGN_KEY",
+                            severity="MEDIUM",
+                            description=f"Column '{column.name}' appears to be a foreign key but has no constraint",
+                            suggestion=f"Add foreign key constraint referencing {referenced_table} table",
+                            columns_affected=[column.name]
+                        ))
+    
+    def _check_missing_not_null_constraints(self, table: Table) -> None:
+        """Check for missing NOT NULL constraints."""
+        for column in table.columns:
+            if column.nullable and column.name in ['email', 'name', 'title', 'status']:
+                self.constraint_issues.append(ConstraintIssue(
+                    table=table.name,
+                    issue_type="MISSING_NOT_NULL",
+                    severity="LOW",
+                    description=f"Column '{column.name}' allows NULL but typically should not",
+                    suggestion=f"Consider adding NOT NULL constraint to '{column.name}'",
+                    columns_affected=[column.name]
+                ))
+    
+    def _check_missing_unique_constraints(self, table: Table) -> None:
+        """Check for missing unique constraints."""
+        for column in table.columns:
+            if column.name in ['email', 'username', 'slug', 'code'] and not column.unique:
+                if column.name not in table.primary_key:
+                    self.constraint_issues.append(ConstraintIssue(
+                        table=table.name,
+                        issue_type="MISSING_UNIQUE",
+                        severity="MEDIUM",
+                        description=f"Column '{column.name}' should likely have UNIQUE constraint",
+                        suggestion=f"Add UNIQUE constraint to '{column.name}'",
+                        columns_affected=[column.name]
+                    ))
+    
+    def _check_missing_check_constraints(self, table: Table) -> None:
+        """Check for missing check constraints."""
+        for column in table.columns:
+            # Email format validation
+            if column.name == 'email' and 'email' not in str(table.check_constraints):
+                self.constraint_issues.append(ConstraintIssue(
+                    table=table.name,
+                    issue_type="MISSING_CHECK_CONSTRAINT",
+                    severity="LOW",
+                    description=f"Email column lacks format validation",
+                    suggestion="Add CHECK constraint for email format validation",
+                    columns_affected=[column.name]
+                ))
+            
+            # Positive values for counts, prices, etc.
+            if column.name.lower() in ['price', 'amount', 'count', 'quantity', 'age']:
+                if column.name not in str(table.check_constraints):
+                    self.constraint_issues.append(ConstraintIssue(
+                        table=table.name,
+                        issue_type="MISSING_CHECK_CONSTRAINT",
+                        severity="LOW",
+                        description=f"Column '{column.name}' should validate positive values",
+                        suggestion=f"Add CHECK constraint: {column.name} > 0",
+                        columns_affected=[column.name]
+                    ))
+    
+    def analyze_naming_conventions(self) -> None:
+        """Analyze naming convention compliance."""
+        for table_name, table in self.tables.items():
+            self._check_table_naming(table_name)
+            for column in table.columns:
+                self._check_column_naming(table_name, column.name)
+    
+    def _check_table_naming(self, table_name: str) -> None:
+        """Check table naming conventions."""
+        if not self.table_naming_pattern.match(table_name):
+            suggested_name = self._suggest_table_name(table_name)
+            self.naming_issues.append(NamingIssue(
+                table=table_name,
+                column=None,
+                issue="Invalid table naming convention",
+                current_name=table_name,
+                suggested_name=suggested_name
+            ))
+        
+        # Check for plural naming
+        if not table_name.endswith('s') and table_name not in ['data', 'information']:
+            self.naming_issues.append(NamingIssue(
+                table=table_name,
+                column=None,
+                issue="Table name should be plural",
+                current_name=table_name,
+                suggested_name=table_name + 's'
+            ))
+    
+    def _check_column_naming(self, table_name: str, column_name: str) -> None:
+        """Check column naming conventions."""
+        if not self.column_naming_pattern.match(column_name):
+            suggested_name = self._suggest_column_name(column_name)
+            self.naming_issues.append(NamingIssue(
+                table=table_name,
+                column=column_name,
+                issue="Invalid column naming convention",
+                current_name=column_name,
+                suggested_name=suggested_name
+            ))
+    
+    def _suggest_table_name(self, table_name: str) -> str:
+        """Suggest corrected table name."""
+        # Convert to snake_case and make plural
+        name = re.sub(r'([A-Z])', r'_\1', table_name).lower().strip('_')
+        return name + 's' if not name.endswith('s') else name
+    
+    def _suggest_column_name(self, column_name: str) -> str:
+        """Suggest corrected column name."""
+        # Convert to snake_case
+        return re.sub(r'([A-Z])', r'_\1', column_name).lower().strip('_')
+    
+    def check_missing_indexes(self) -> List[Dict[str, Any]]:
+        """Check for missing indexes on foreign key columns."""
+        missing_indexes = []
+        
+        for table_name, table in self.tables.items():
+            existing_indexed_columns = set()
+            
+            # Collect existing indexed columns
+            for index in table.indexes:
+                existing_indexed_columns.update(index.columns)
+            
+            # Primary key columns are automatically indexed
+            existing_indexed_columns.update(table.primary_key)
+            
+            # Check foreign key columns
+            for column in table.columns:
+                if column.foreign_key and column.name not in existing_indexed_columns:
+                    missing_indexes.append({
+                        'table': table_name,
+                        'column': column.name,
+                        'type': 'foreign_key',
+                        'suggestion': f"CREATE INDEX idx_{table_name}_{column.name} ON {table_name} ({column.name});"
+                    })
+        
+        return missing_indexes
+    
+    def generate_mermaid_erd(self) -> str:
+        """Generate Mermaid ERD diagram."""
+        erd_lines = ["erDiagram"]
+        
+        # Add table definitions
+        for table_name, table in self.tables.items():
+            erd_lines.append(f"    {table_name.upper()} {{")
+            
+            for column in table.columns:
+                data_type = column.data_type
+                constraints = []
+                
+                if column.primary_key:
+                    constraints.append("PK")
+                if column.foreign_key:
+                    constraints.append("FK")
+                if not column.nullable:
+                    constraints.append("NOT NULL")
+                if column.unique:
+                    constraints.append("UNIQUE")
+                
+                constraint_str = " ".join(constraints)
+                if constraint_str:
+                    constraint_str = f" \"{constraint_str}\""
+                
+                erd_lines.append(f"        {data_type} {column.name}{constraint_str}")
+            
+            erd_lines.append("    }")
+        
+        # Add relationships
+        relationships = set()
+        for table_name, table in self.tables.items():
+            for column in table.columns:
+                if column.foreign_key:
+                    ref_table = column.foreign_key.split('.')[0]
+                    if ref_table in self.tables:
+                        relationship = f"    {ref_table.upper()} ||--o{{ {table_name.upper()} : has"
+                        relationships.add(relationship)
+        
+        erd_lines.extend(sorted(relationships))
+        
+        return "\n".join(erd_lines)
+    
+    def get_analysis_summary(self) -> Dict[str, Any]:
+        """Get comprehensive analysis summary."""
+        return {
+            "schema_overview": {
+                "total_tables": len(self.tables),
+                "total_columns": sum(len(table.columns) for table in self.tables.values()),
+                "tables_with_primary_keys": len([t for t in self.tables.values() if t.primary_key]),
+                "total_foreign_keys": sum(len(table.foreign_keys) for table in self.tables.values()),
+                "total_indexes": sum(len(table.indexes) for table in self.tables.values())
+            },
+            "normalization_analysis": {
+                "total_issues": len(self.normalization_issues),
+                "by_severity": {
+                    "high": len([i for i in self.normalization_issues if i.severity == "HIGH"]),
+                    "medium": len([i for i in self.normalization_issues if i.severity == "MEDIUM"]),
+                    "low": len([i for i in self.normalization_issues if i.severity == "LOW"]),
+                    "warning": len([i for i in self.normalization_issues if i.severity == "WARNING"])
+                },
+                "issues": [asdict(issue) for issue in self.normalization_issues]
+            },
+            "data_type_analysis": {
+                "total_issues": len(self.datatype_issues),
+                "issues": [asdict(issue) for issue in self.datatype_issues]
+            },
+            "constraint_analysis": {
+                "total_issues": len(self.constraint_issues),
+                "by_severity": {
+                    "high": len([i for i in self.constraint_issues if i.severity == "HIGH"]),
+                    "medium": len([i for i in self.constraint_issues if i.severity == "MEDIUM"]),
+                    "low": len([i for i in self.constraint_issues if i.severity == "LOW"])
+                },
+                "issues": [asdict(issue) for issue in self.constraint_issues]
+            },
+            "naming_analysis": {
+                "total_issues": len(self.naming_issues),
+                "issues": [asdict(issue) for issue in self.naming_issues]
+            },
+            "missing_indexes": self.check_missing_indexes(),
+            "recommendations": self._generate_recommendations()
+        }
+    
+    def _generate_recommendations(self) -> List[str]:
+        """Generate high-level recommendations."""
+        recommendations = []
+        
+        # High severity issues
+        high_severity_issues = [
+            i for i in self.normalization_issues + self.constraint_issues 
+            if i.severity == "HIGH"
+        ]
+        
+        if high_severity_issues:
+            recommendations.append(f"Address {len(high_severity_issues)} high-severity issues immediately")
+        
+        # Missing primary keys
+        tables_without_pk = [name for name, table in self.tables.items() if not table.primary_key]
+        if tables_without_pk:
+            recommendations.append(f"Add primary keys to tables: {', '.join(tables_without_pk)}")
+        
+        # Data type improvements
+        varchar_255_issues = [i for i in self.datatype_issues if "VARCHAR(255)" in i.issue]
+        if varchar_255_issues:
+            recommendations.append(f"Review {len(varchar_255_issues)} VARCHAR(255) columns for right-sizing")
+        
+        # Missing foreign keys
+        missing_fks = [i for i in self.constraint_issues if i.issue_type == "MISSING_FOREIGN_KEY"]
+        if missing_fks:
+            recommendations.append(f"Consider adding {len(missing_fks)} foreign key constraints for referential integrity")
+        
+        # Normalization improvements
+        normalization_issues_count = len(self.normalization_issues)
+        if normalization_issues_count > 0:
+            recommendations.append(f"Review {normalization_issues_count} normalization issues for schema optimization")
+        
+        return recommendations
+    
+    def format_text_report(self, analysis: Dict[str, Any]) -> str:
+        """Format analysis as human-readable text report."""
+        lines = []
+        lines.append("DATABASE SCHEMA ANALYSIS REPORT")
+        lines.append("=" * 50)
+        lines.append("")
+        
+        # Overview
+        overview = analysis["schema_overview"]
+        lines.append("SCHEMA OVERVIEW")
+        lines.append("-" * 15)
+        lines.append(f"Total Tables: {overview['total_tables']}")
+        lines.append(f"Total Columns: {overview['total_columns']}")
+        lines.append(f"Tables with Primary Keys: {overview['tables_with_primary_keys']}")
+        lines.append(f"Total Foreign Keys: {overview['total_foreign_keys']}")
+        lines.append(f"Total Indexes: {overview['total_indexes']}")
+        lines.append("")
+        
+        # Recommendations
+        if analysis["recommendations"]:
+            lines.append("KEY RECOMMENDATIONS")
+            lines.append("-" * 18)
+            for i, rec in enumerate(analysis["recommendations"], 1):
+                lines.append(f"{i}. {rec}")
+            lines.append("")
+        
+        # Normalization Issues
+        norm_analysis = analysis["normalization_analysis"]
+        if norm_analysis["total_issues"] > 0:
+            lines.append(f"NORMALIZATION ISSUES ({norm_analysis['total_issues']} total)")
+            lines.append("-" * 25)
+            severity_counts = norm_analysis["by_severity"]
+            lines.append(f"High: {severity_counts['high']}, Medium: {severity_counts['medium']}, "
+                        f"Low: {severity_counts['low']}, Warning: {severity_counts['warning']}")
+            lines.append("")
+            
+            for issue in norm_analysis["issues"][:5]:  # Show first 5
+                lines.append(f"• {issue['table']}: {issue['description']}")
+                lines.append(f"  Suggestion: {issue['suggestion']}")
+                lines.append("")
+        
+        # Data Type Issues
+        dt_analysis = analysis["data_type_analysis"]
+        if dt_analysis["total_issues"] > 0:
+            lines.append(f"DATA TYPE ISSUES ({dt_analysis['total_issues']} total)")
+            lines.append("-" * 20)
+            for issue in dt_analysis["issues"][:5]:  # Show first 5
+                lines.append(f"• {issue['table']}.{issue['column']}: {issue['issue']}")
+                lines.append(f"  Current: {issue['current_type']} → Suggested: {issue['suggested_type']}")
+                lines.append(f"  Rationale: {issue['rationale']}")
+                lines.append("")
+        
+        # Constraint Issues
+        const_analysis = analysis["constraint_analysis"]
+        if const_analysis["total_issues"] > 0:
+            lines.append(f"CONSTRAINT ISSUES ({const_analysis['total_issues']} total)")
+            lines.append("-" * 20)
+            severity_counts = const_analysis["by_severity"]
+            lines.append(f"High: {severity_counts['high']}, Medium: {severity_counts['medium']}, "
+                        f"Low: {severity_counts['low']}")
+            lines.append("")
+            
+            for issue in const_analysis["issues"][:5]:  # Show first 5
+                lines.append(f"• {issue['table']}: {issue['description']}")
+                lines.append(f"  Suggestion: {issue['suggestion']}")
+                lines.append("")
+        
+        # Missing Indexes
+        missing_idx = analysis["missing_indexes"]
+        if missing_idx:
+            lines.append(f"MISSING INDEXES ({len(missing_idx)} total)")
+            lines.append("-" * 17)
+            for idx in missing_idx[:5]:  # Show first 5
+                lines.append(f"• {idx['table']}.{idx['column']} ({idx['type']})")
+                lines.append(f"  SQL: {idx['suggestion']}")
+                lines.append("")
+        
+        return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Analyze database schema for design issues and generate ERD")
+    parser.add_argument("--input", "-i", required=True, help="Input file (SQL DDL or JSON schema)")
+    parser.add_argument("--output", "-o", help="Output file (default: stdout)")
+    parser.add_argument("--output-format", "-f", choices=["json", "text"], default="text",
+                       help="Output format")
+    parser.add_argument("--generate-erd", "-e", action="store_true", help="Include Mermaid ERD in output")
+    parser.add_argument("--erd-only", action="store_true", help="Output only the Mermaid ERD")
+    
+    args = parser.parse_args()
+    
+    try:
+        # Read input file
+        with open(args.input, 'r') as f:
+            content = f.read()
+        
+        # Initialize analyzer
+        analyzer = SchemaAnalyzer()
+        
+        # Parse input based on file extension
+        if args.input.lower().endswith('.json'):
+            analyzer.parse_json_schema(content)
+        else:
+            analyzer.parse_sql_ddl(content)
+        
+        if not analyzer.tables:
+            print("Error: No tables found in input file", file=sys.stderr)
+            return 1
+        
+        if args.erd_only:
+            # Output only ERD
+            erd = analyzer.generate_mermaid_erd()
+            if args.output:
+                with open(args.output, 'w') as f:
+                    f.write(erd)
+            else:
+                print(erd)
+            return 0
+        
+        # Perform analysis
+        analyzer.analyze_normalization()
+        analyzer.analyze_data_types()
+        analyzer.analyze_constraints()
+        analyzer.analyze_naming_conventions()
+        
+        # Generate report
+        analysis = analyzer.get_analysis_summary()
+        
+        if args.generate_erd:
+            analysis["mermaid_erd"] = analyzer.generate_mermaid_erd()
+        
+        # Output results
+        if args.output_format == "json":
+            output = json.dumps(analysis, indent=2)
+        else:
+            output = analyzer.format_text_report(analysis)
+            if args.generate_erd:
+                output += "\n\nMERMAID ERD\n" + "=" * 11 + "\n"
+                output += analysis["mermaid_erd"]
+        
+        if args.output:
+            with open(args.output, 'w') as f:
+                f.write(output)
+        else:
+            print(output)
+        
+        return 0
+        
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file