diff --git a/.codex/skills-index.json b/.codex/skills-index.json index 7392a7b..229cfb6 100644 --- a/.codex/skills-index.json +++ b/.codex/skills-index.json @@ -45,7 +45,7 @@ "name": "senior-backend", "source": "../../engineering-team/senior-backend", "category": "engineering", - "description": "Comprehensive backend development skill for building scalable backend systems using NodeJS, Express, Go, Python, Postgres, GraphQL, REST APIs. Includes API scaffolding, database optimization, security implementation, and performance tuning. Use when designing APIs, optimizing database queries, implementing business logic, handling authentication/authorization, or reviewing backend code." + "description": "This skill should be used when the user asks to \"design REST APIs\", \"optimize database queries\", \"implement authentication\", \"build microservices\", \"review backend code\", \"set up GraphQL\", \"handle database migrations\", or \"load test APIs\". Use for Node.js/Express/Fastify development, PostgreSQL optimization, API security, and backend architecture patterns." }, { "name": "senior-computer-vision", diff --git a/engineering-team/senior-backend/SKILL.md b/engineering-team/senior-backend/SKILL.md index 3cf41a9..0ae08a6 100644 --- a/engineering-team/senior-backend/SKILL.md +++ b/engineering-team/senior-backend/SKILL.md @@ -1,209 +1,434 @@ --- name: senior-backend -description: Comprehensive backend development skill for building scalable backend systems using NodeJS, Express, Go, Python, Postgres, GraphQL, REST APIs. Includes API scaffolding, database optimization, security implementation, and performance tuning. Use when designing APIs, optimizing database queries, implementing business logic, handling authentication/authorization, or reviewing backend code. +description: This skill should be used when the user asks to "design REST APIs", "optimize database queries", "implement authentication", "build microservices", "review backend code", "set up GraphQL", "handle database migrations", or "load test APIs". Use for Node.js/Express/Fastify development, PostgreSQL optimization, API security, and backend architecture patterns. --- -# Senior Backend +# Senior Backend Engineer -Complete toolkit for senior backend with modern tools and best practices. +Backend development patterns, API design, database optimization, and security practices. + +## Table of Contents + +- [Quick Start](#quick-start) +- [Tools Overview](#tools-overview) + - [API Scaffolder](#1-api-scaffolder) + - [Database Migration Tool](#2-database-migration-tool) + - [API Load Tester](#3-api-load-tester) +- [Backend Development Workflows](#backend-development-workflows) + - [API Design Workflow](#api-design-workflow) + - [Database Optimization Workflow](#database-optimization-workflow) + - [Security Hardening Workflow](#security-hardening-workflow) +- [Reference Documentation](#reference-documentation) +- [Common Patterns Quick Reference](#common-patterns-quick-reference) + +--- ## Quick Start -### Main Capabilities - -This skill provides three core capabilities through automated scripts: - ```bash -# Script 1: Api Scaffolder -python scripts/api_scaffolder.py [options] +# Generate API routes from OpenAPI spec +python scripts/api_scaffolder.py openapi.yaml --framework express --output src/routes/ -# Script 2: Database Migration Tool -python scripts/database_migration_tool.py [options] +# Analyze database schema and generate migrations +python scripts/database_migration_tool.py --connection postgres://localhost/mydb --analyze -# Script 3: Api Load Tester -python scripts/api_load_tester.py [options] +# Load test an API endpoint +python scripts/api_load_tester.py https://api.example.com/users --concurrency 50 --duration 30 ``` -## Core Capabilities +--- -### 1. Api Scaffolder +## Tools Overview -Automated tool for api scaffolder tasks. +### 1. API Scaffolder -**Features:** -- Automated scaffolding -- Best practices built-in -- Configurable templates -- Quality checks +Generates API route handlers, middleware, and OpenAPI specifications from schema definitions. + +**Input:** OpenAPI spec (YAML/JSON) or database schema +**Output:** Route handlers, validation middleware, TypeScript types **Usage:** ```bash -python scripts/api_scaffolder.py [options] +# Generate Express routes from OpenAPI spec +python scripts/api_scaffolder.py openapi.yaml --framework express --output src/routes/ + +# Output: +# Generated 12 route handlers in src/routes/ +# - GET /users (listUsers) +# - POST /users (createUser) +# - GET /users/{id} (getUser) +# - PUT /users/{id} (updateUser) +# - DELETE /users/{id} (deleteUser) +# ... +# Created validation middleware: src/middleware/validators.ts +# Created TypeScript types: src/types/api.ts + +# Generate from database schema +python scripts/api_scaffolder.py --from-db postgres://localhost/mydb --output src/routes/ + +# Generate OpenAPI spec from existing routes +python scripts/api_scaffolder.py src/routes/ --generate-spec --output openapi.yaml ``` +**Supported Frameworks:** +- Express.js (`--framework express`) +- Fastify (`--framework fastify`) +- Koa (`--framework koa`) + +--- + ### 2. Database Migration Tool -Comprehensive analysis and optimization tool. +Analyzes database schemas, detects changes, and generates migration files with rollback support. -**Features:** -- Deep analysis -- Performance metrics -- Recommendations -- Automated fixes +**Input:** Database connection string or schema files +**Output:** Migration files, schema diff report, optimization suggestions **Usage:** ```bash -python scripts/database_migration_tool.py [--verbose] +# Analyze current schema and suggest optimizations +python scripts/database_migration_tool.py --connection postgres://localhost/mydb --analyze + +# Output: +# === Database Analysis Report === +# Tables: 24 +# Total rows: 1,247,832 +# +# MISSING INDEXES (5 found): +# orders.user_id - 847ms avg query time, ADD INDEX recommended +# products.category_id - 234ms avg query time, ADD INDEX recommended +# +# N+1 QUERY RISKS (3 found): +# users -> orders relationship (no eager loading) +# +# SUGGESTED MIGRATIONS: +# 1. Add index on orders(user_id) +# 2. Add index on products(category_id) +# 3. Add composite index on order_items(order_id, product_id) + +# Generate migration from schema diff +python scripts/database_migration_tool.py --connection postgres://localhost/mydb \ + --compare schema/v2.sql --output migrations/ + +# Output: +# Generated migration: migrations/20240115_add_user_indexes.sql +# Generated rollback: migrations/20240115_add_user_indexes_rollback.sql + +# Dry-run a migration +python scripts/database_migration_tool.py --connection postgres://localhost/mydb \ + --migrate migrations/20240115_add_user_indexes.sql --dry-run ``` -### 3. Api Load Tester +--- -Advanced tooling for specialized tasks. +### 3. API Load Tester -**Features:** -- Expert-level automation -- Custom configurations -- Integration ready -- Production-grade output +Performs HTTP load testing with configurable concurrency, measuring latency percentiles and throughput. + +**Input:** API endpoint URL and test configuration +**Output:** Performance report with latency distribution, error rates, throughput metrics **Usage:** ```bash -python scripts/api_load_tester.py [arguments] [options] +# Basic load test +python scripts/api_load_tester.py https://api.example.com/users --concurrency 50 --duration 30 + +# Output: +# === Load Test Results === +# Target: https://api.example.com/users +# Duration: 30s | Concurrency: 50 +# +# THROUGHPUT: +# Total requests: 15,247 +# Requests/sec: 508.2 +# Successful: 15,102 (99.0%) +# Failed: 145 (1.0%) +# +# LATENCY (ms): +# Min: 12 +# Avg: 89 +# P50: 67 +# P95: 198 +# P99: 423 +# Max: 1,247 +# +# ERRORS: +# Connection timeout: 89 +# HTTP 503: 56 +# +# RECOMMENDATION: P99 latency (423ms) exceeds 200ms target. +# Consider: connection pooling, query optimization, or horizontal scaling. + +# Test with custom headers and body +python scripts/api_load_tester.py https://api.example.com/orders \ + --method POST \ + --header "Authorization: Bearer token123" \ + --body '{"product_id": 1, "quantity": 2}' \ + --concurrency 100 \ + --duration 60 + +# Compare two endpoints +python scripts/api_load_tester.py https://api.example.com/v1/users https://api.example.com/v2/users \ + --compare --concurrency 50 --duration 30 ``` +--- + +## Backend Development Workflows + +### API Design Workflow + +Use when designing a new API or refactoring existing endpoints. + +**Step 1: Define resources and operations** +```yaml +# openapi.yaml +openapi: 3.0.3 +info: + title: User Service API + version: 1.0.0 +paths: + /users: + get: + summary: List users + parameters: + - name: limit + in: query + schema: + type: integer + default: 20 + post: + summary: Create user + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateUser' +``` + +**Step 2: Generate route scaffolding** +```bash +python scripts/api_scaffolder.py openapi.yaml --framework express --output src/routes/ +``` + +**Step 3: Implement business logic** +```typescript +// src/routes/users.ts (generated, then customized) +export const createUser = async (req: Request, res: Response) => { + const { email, name } = req.body; + + // Add business logic + const user = await userService.create({ email, name }); + + res.status(201).json(user); +}; +``` + +**Step 4: Add validation middleware** +```bash +# Validation is auto-generated from OpenAPI schema +# src/middleware/validators.ts includes: +# - Request body validation +# - Query parameter validation +# - Path parameter validation +``` + +**Step 5: Generate updated OpenAPI spec** +```bash +python scripts/api_scaffolder.py src/routes/ --generate-spec --output openapi.yaml +``` + +--- + +### Database Optimization Workflow + +Use when queries are slow or database performance needs improvement. + +**Step 1: Analyze current performance** +```bash +python scripts/database_migration_tool.py --connection $DATABASE_URL --analyze +``` + +**Step 2: Identify slow queries** +```sql +-- Check query execution plans +EXPLAIN ANALYZE SELECT * FROM orders +WHERE user_id = 123 +ORDER BY created_at DESC +LIMIT 10; + +-- Look for: Seq Scan (bad), Index Scan (good) +``` + +**Step 3: Generate index migrations** +```bash +python scripts/database_migration_tool.py --connection $DATABASE_URL \ + --suggest-indexes --output migrations/ +``` + +**Step 4: Test migration (dry-run)** +```bash +python scripts/database_migration_tool.py --connection $DATABASE_URL \ + --migrate migrations/add_indexes.sql --dry-run +``` + +**Step 5: Apply and verify** +```bash +# Apply migration +python scripts/database_migration_tool.py --connection $DATABASE_URL \ + --migrate migrations/add_indexes.sql + +# Verify improvement +python scripts/database_migration_tool.py --connection $DATABASE_URL --analyze +``` + +--- + +### Security Hardening Workflow + +Use when preparing an API for production or after a security review. + +**Step 1: Review authentication setup** +```typescript +// Verify JWT configuration +const jwtConfig = { + secret: process.env.JWT_SECRET, // Must be from env, never hardcoded + expiresIn: '1h', // Short-lived tokens + algorithm: 'RS256' // Prefer asymmetric +}; +``` + +**Step 2: Add rate limiting** +```typescript +import rateLimit from 'express-rate-limit'; + +const apiLimiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // 100 requests per window + standardHeaders: true, + legacyHeaders: false, +}); + +app.use('/api/', apiLimiter); +``` + +**Step 3: Validate all inputs** +```typescript +import { z } from 'zod'; + +const CreateUserSchema = z.object({ + email: z.string().email().max(255), + name: z.string().min(1).max(100), + age: z.number().int().positive().optional() +}); + +// Use in route handler +const data = CreateUserSchema.parse(req.body); +``` + +**Step 4: Load test with attack patterns** +```bash +# Test rate limiting +python scripts/api_load_tester.py https://api.example.com/login \ + --concurrency 200 --duration 10 --expect-rate-limit + +# Test input validation +python scripts/api_load_tester.py https://api.example.com/users \ + --method POST \ + --body '{"email": "not-an-email"}' \ + --expect-status 400 +``` + +**Step 5: Review security headers** +```typescript +import helmet from 'helmet'; + +app.use(helmet({ + contentSecurityPolicy: true, + crossOriginEmbedderPolicy: true, + crossOriginOpenerPolicy: true, + crossOriginResourcePolicy: true, + hsts: { maxAge: 31536000, includeSubDomains: true }, +})); +``` + +--- + ## Reference Documentation -### Api Design Patterns +| File | Contains | Use When | +|------|----------|----------| +| `references/api_design_patterns.md` | REST vs GraphQL, versioning, error handling, pagination | Designing new APIs | +| `references/database_optimization_guide.md` | Indexing strategies, query optimization, N+1 solutions | Fixing slow queries | +| `references/backend_security_practices.md` | OWASP Top 10, auth patterns, input validation | Security hardening | -Comprehensive guide available in `references/api_design_patterns.md`: +--- -- Detailed patterns and practices -- Code examples -- Best practices -- Anti-patterns to avoid -- Real-world scenarios +## Common Patterns Quick Reference -### Database Optimization Guide - -Complete workflow documentation in `references/database_optimization_guide.md`: - -- Step-by-step processes -- Optimization strategies -- Tool integrations -- Performance tuning -- Troubleshooting guide - -### Backend Security Practices - -Technical reference guide in `references/backend_security_practices.md`: - -- Technology stack details -- Configuration examples -- Integration patterns -- Security considerations -- Scalability guidelines - -## Tech Stack - -**Languages:** TypeScript, JavaScript, Python, Go, Swift, Kotlin -**Frontend:** React, Next.js, React Native, Flutter -**Backend:** Node.js, Express, GraphQL, REST APIs -**Database:** PostgreSQL, Prisma, NeonDB, Supabase -**DevOps:** Docker, Kubernetes, Terraform, GitHub Actions, CircleCI -**Cloud:** AWS, GCP, Azure - -## Development Workflow - -### 1. Setup and Configuration - -```bash -# Install dependencies -npm install -# or -pip install -r requirements.txt - -# Configure environment -cp .env.example .env +### REST API Response Format +```json +{ + "data": { "id": 1, "name": "John" }, + "meta": { "requestId": "abc-123" } +} ``` -### 2. Run Quality Checks - -```bash -# Use the analyzer script -python scripts/database_migration_tool.py . - -# Review recommendations -# Apply fixes +### Error Response Format +```json +{ + "error": { + "code": "VALIDATION_ERROR", + "message": "Invalid email format", + "details": [{ "field": "email", "message": "must be valid email" }] + }, + "meta": { "requestId": "abc-123" } +} ``` -### 3. Implement Best Practices +### HTTP Status Codes +| Code | Use Case | +|------|----------| +| 200 | Success (GET, PUT, PATCH) | +| 201 | Created (POST) | +| 204 | No Content (DELETE) | +| 400 | Validation error | +| 401 | Authentication required | +| 403 | Permission denied | +| 404 | Resource not found | +| 429 | Rate limit exceeded | +| 500 | Internal server error | -Follow the patterns and practices documented in: -- `references/api_design_patterns.md` -- `references/database_optimization_guide.md` -- `references/backend_security_practices.md` +### Database Index Strategy +```sql +-- Single column (equality lookups) +CREATE INDEX idx_users_email ON users(email); -## Best Practices Summary +-- Composite (multi-column queries) +CREATE INDEX idx_orders_user_status ON orders(user_id, status); -### Code Quality -- Follow established patterns -- Write comprehensive tests -- Document decisions -- Review regularly +-- Partial (filtered queries) +CREATE INDEX idx_orders_active ON orders(created_at) WHERE status = 'active'; -### Performance -- Measure before optimizing -- Use appropriate caching -- Optimize critical paths -- Monitor in production +-- Covering (avoid table lookup) +CREATE INDEX idx_users_email_name ON users(email) INCLUDE (name); +``` -### Security -- Validate all inputs -- Use parameterized queries -- Implement proper authentication -- Keep dependencies updated - -### Maintainability -- Write clear code -- Use consistent naming -- Add helpful comments -- Keep it simple +--- ## Common Commands ```bash -# Development -npm run dev -npm run build -npm run test -npm run lint +# API Development +python scripts/api_scaffolder.py openapi.yaml --framework express +python scripts/api_scaffolder.py src/routes/ --generate-spec -# Analysis -python scripts/database_migration_tool.py . -python scripts/api_load_tester.py --analyze +# Database Operations +python scripts/database_migration_tool.py --connection $DATABASE_URL --analyze +python scripts/database_migration_tool.py --connection $DATABASE_URL --migrate file.sql -# Deployment -docker build -t app:latest . -docker-compose up -d -kubectl apply -f k8s/ +# Performance Testing +python scripts/api_load_tester.py https://api.example.com/endpoint --concurrency 50 +python scripts/api_load_tester.py https://api.example.com/endpoint --compare baseline.json ``` - -## Troubleshooting - -### Common Issues - -Check the comprehensive troubleshooting section in `references/backend_security_practices.md`. - -### Getting Help - -- Review reference documentation -- Check script output messages -- Consult tech stack documentation -- Review error logs - -## Resources - -- Pattern Reference: `references/api_design_patterns.md` -- Workflow Guide: `references/database_optimization_guide.md` -- Technical Guide: `references/backend_security_practices.md` -- Tool Scripts: `scripts/` directory diff --git a/engineering-team/senior-backend/references/api_design_patterns.md b/engineering-team/senior-backend/references/api_design_patterns.md index 3d1f653..e45a976 100644 --- a/engineering-team/senior-backend/references/api_design_patterns.md +++ b/engineering-team/senior-backend/references/api_design_patterns.md @@ -1,103 +1,530 @@ -# Api Design Patterns +# API Design Patterns -## Overview +Concrete patterns for REST and GraphQL API design with examples. -This reference guide provides comprehensive information for senior backend. +## Patterns Index -## Patterns and Practices +1. [REST vs GraphQL Decision](#1-rest-vs-graphql-decision) +2. [Resource Naming Conventions](#2-resource-naming-conventions) +3. [API Versioning Strategies](#3-api-versioning-strategies) +4. [Error Handling Patterns](#4-error-handling-patterns) +5. [Pagination Patterns](#5-pagination-patterns) +6. [Authentication Patterns](#6-authentication-patterns) +7. [Rate Limiting Design](#7-rate-limiting-design) +8. [Idempotency Patterns](#8-idempotency-patterns) -### Pattern 1: Best Practice Implementation +--- -**Description:** -Detailed explanation of the pattern. +## 1. REST vs GraphQL Decision -**When to Use:** -- Scenario 1 -- Scenario 2 -- Scenario 3 +### When to Use REST + +| Scenario | Why REST | +|----------|----------| +| Simple CRUD operations | Less complexity, widely understood | +| Public APIs | Better caching, easier documentation | +| File uploads/downloads | Native HTTP support | +| Microservices communication | Simpler service-to-service calls | +| Caching is critical | HTTP caching built-in | + +### When to Use GraphQL + +| Scenario | Why GraphQL | +|----------|-------------| +| Mobile apps with bandwidth constraints | Request only needed fields | +| Complex nested data | Single request for related data | +| Rapidly changing frontend requirements | Frontend-driven queries | +| Multiple client types | Each client queries what it needs | +| Real-time subscriptions needed | Built-in subscription support | + +### Hybrid Approach + +``` +┌─────────────────────────────────────────────────────┐ +│ API Gateway │ +├─────────────────────────────────────────────────────┤ +│ /api/v1/* → REST (Public API, webhooks) │ +│ /graphql → GraphQL (Mobile apps, dashboards) │ +│ /files/* → REST (File uploads/downloads) │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## 2. Resource Naming Conventions + +### REST Endpoint Patterns + +``` +# Collections (plural nouns) +GET /users # List users +POST /users # Create user +GET /users/{id} # Get user +PUT /users/{id} # Replace user +PATCH /users/{id} # Update user +DELETE /users/{id} # Delete user + +# Nested resources +GET /users/{id}/orders # User's orders +POST /users/{id}/orders # Create order for user +GET /users/{id}/orders/{orderId} # Specific order + +# Actions (when CRUD doesn't fit) +POST /users/{id}/activate # Activate user +POST /orders/{id}/cancel # Cancel order +POST /payments/{id}/refund # Refund payment + +# Filtering, sorting, pagination +GET /users?status=active&sort=-created_at&limit=20&offset=40 +GET /orders?user_id=123&status=pending +``` + +### Naming Rules + +| Rule | Good | Bad | +|------|------|-----| +| Use plural nouns | `/users` | `/user` | +| Use lowercase | `/user-profiles` | `/userProfiles` | +| Use hyphens | `/order-items` | `/order_items` | +| No verbs in URLs | `POST /orders` | `POST /createOrder` | +| No file extensions | `/users/123` | `/users/123.json` | + +--- + +## 3. API Versioning Strategies + +### Strategy Comparison + +| Strategy | Example | Pros | Cons | +|----------|---------|------|------| +| URL Path | `/api/v1/users` | Explicit, easy routing | URL changes | +| Header | `Accept: application/vnd.api+json;version=1` | Clean URLs | Hidden version | +| Query Param | `/users?version=1` | Easy to test | Pollutes query string | + +### Recommended: URL Path Versioning -**Implementation:** ```typescript -// Example code implementation -export class Example { - // Implementation details +// Express routing +import v1Routes from './routes/v1'; +import v2Routes from './routes/v2'; + +app.use('/api/v1', v1Routes); +app.use('/api/v2', v2Routes); +``` + +### Deprecation Strategy + +```typescript +// Add deprecation headers +app.use('/api/v1', (req, res, next) => { + res.set('Deprecation', 'true'); + res.set('Sunset', 'Sat, 01 Jun 2025 00:00:00 GMT'); + res.set('Link', '; rel="successor-version"'); + next(); +}, v1Routes); +``` + +### Breaking vs Non-Breaking Changes + +**Non-breaking (safe):** +- Adding new endpoints +- Adding optional fields +- Adding new enum values at end + +**Breaking (requires new version):** +- Removing endpoints or fields +- Renaming fields +- Changing field types +- Changing required/optional status + +--- + +## 4. Error Handling Patterns + +### Standard Error Response Format + +```json +{ + "error": { + "code": "VALIDATION_ERROR", + "message": "Request validation failed", + "details": [ + { + "field": "email", + "code": "INVALID_FORMAT", + "message": "Must be a valid email address" + }, + { + "field": "age", + "code": "OUT_OF_RANGE", + "message": "Must be between 18 and 120" + } + ], + "documentation_url": "https://api.example.com/docs/errors#validation" + }, + "meta": { + "request_id": "req_abc123", + "timestamp": "2024-01-15T10:30:00Z" + } } ``` -**Benefits:** -- Benefit 1 -- Benefit 2 -- Benefit 3 +### Error Codes by Category -**Trade-offs:** -- Consider 1 -- Consider 2 -- Consider 3 - -### Pattern 2: Advanced Technique - -**Description:** -Another important pattern for senior backend. - -**Implementation:** ```typescript -// Advanced example -async function advancedExample() { - // Code here +// Client errors (4xx) +const ClientErrors = { + VALIDATION_ERROR: 400, + INVALID_JSON: 400, + AUTHENTICATION_REQUIRED: 401, + INVALID_TOKEN: 401, + TOKEN_EXPIRED: 401, + PERMISSION_DENIED: 403, + RESOURCE_NOT_FOUND: 404, + METHOD_NOT_ALLOWED: 405, + CONFLICT: 409, + RATE_LIMIT_EXCEEDED: 429, +}; + +// Server errors (5xx) +const ServerErrors = { + INTERNAL_ERROR: 500, + DATABASE_ERROR: 500, + EXTERNAL_SERVICE_ERROR: 502, + SERVICE_UNAVAILABLE: 503, +}; +``` + +### Error Handler Implementation + +```typescript +// Express error handler +interface ApiError extends Error { + code: string; + statusCode: number; + details?: Array<{ field: string; message: string }>; +} + +const errorHandler: ErrorRequestHandler = (err: ApiError, req, res, next) => { + const statusCode = err.statusCode || 500; + const code = err.code || 'INTERNAL_ERROR'; + + // Log server errors + if (statusCode >= 500) { + logger.error({ err, requestId: req.id }, 'Server error'); + } + + res.status(statusCode).json({ + error: { + code, + message: statusCode >= 500 ? 'An unexpected error occurred' : err.message, + details: err.details, + ...(process.env.NODE_ENV === 'development' && { stack: err.stack }), + }, + meta: { + request_id: req.id, + timestamp: new Date().toISOString(), + }, + }); +}; +``` + +--- + +## 5. Pagination Patterns + +### Offset-Based Pagination + +``` +GET /users?limit=20&offset=40 + +Response: +{ + "data": [...], + "pagination": { + "total": 1250, + "limit": 20, + "offset": 40, + "has_more": true + } } ``` -## Guidelines +**Pros:** Simple, supports random access +**Cons:** Inconsistent with concurrent inserts/deletes -### Code Organization -- Clear structure -- Logical separation -- Consistent naming -- Proper documentation +### Cursor-Based Pagination -### Performance Considerations -- Optimization strategies -- Bottleneck identification -- Monitoring approaches -- Scaling techniques +``` +GET /users?limit=20&cursor=eyJpZCI6MTIzfQ== -### Security Best Practices -- Input validation -- Authentication -- Authorization -- Data protection +Response: +{ + "data": [...], + "pagination": { + "limit": 20, + "next_cursor": "eyJpZCI6MTQzfQ==", + "prev_cursor": "eyJpZCI6MTIzfQ==", + "has_more": true + } +} +``` -## Common Patterns +**Pros:** Consistent with real-time data, efficient +**Cons:** No random access, cursor encoding required -### Pattern A -Implementation details and examples. +### Implementation Example -### Pattern B -Implementation details and examples. +```typescript +// Cursor-based pagination +interface CursorPagination { + limit: number; + cursor?: string; + direction?: 'forward' | 'backward'; +} -### Pattern C -Implementation details and examples. +async function paginatedQuery( + query: QueryBuilder, + { limit, cursor, direction = 'forward' }: CursorPagination +): Promise<{ data: T[]; nextCursor?: string; hasMore: boolean }> { + // Decode cursor + const decoded = cursor ? JSON.parse(Buffer.from(cursor, 'base64').toString()) : null; -## Anti-Patterns to Avoid + // Apply cursor condition + if (decoded) { + query = direction === 'forward' + ? query.where('id', '>', decoded.id) + : query.where('id', '<', decoded.id); + } -### Anti-Pattern 1 -What not to do and why. + // Fetch one extra to check if more exist + const results = await query.limit(limit + 1).orderBy('id', direction === 'forward' ? 'asc' : 'desc'); -### Anti-Pattern 2 -What not to do and why. + const hasMore = results.length > limit; + const data = hasMore ? results.slice(0, -1) : results; -## Tools and Resources + // Encode next cursor + const nextCursor = hasMore + ? Buffer.from(JSON.stringify({ id: data[data.length - 1].id })).toString('base64') + : undefined; -### Recommended Tools -- Tool 1: Purpose -- Tool 2: Purpose -- Tool 3: Purpose + return { data, nextCursor, hasMore }; +} +``` -### Further Reading -- Resource 1 -- Resource 2 -- Resource 3 +--- -## Conclusion +## 6. Authentication Patterns -Key takeaways for using this reference guide effectively. +### JWT Authentication Flow + +``` +┌──────────┐ 1. Login ┌──────────┐ +│ Client │ ──────────────────▶ │ Server │ +└──────────┘ └──────────┘ + │ + 2. Return JWT │ +◀──────────────────────────────────────── + {access_token, refresh_token} │ + │ + 3. API Request │ +───────────────────────────────────────▶ + Authorization: Bearer {token} │ + │ + 4. Validate & Respond │ +◀──────────────────────────────────────── +``` + +### JWT Implementation + +```typescript +import jwt from 'jsonwebtoken'; + +interface TokenPayload { + userId: string; + email: string; + roles: string[]; +} + +// Generate tokens +function generateTokens(user: User): { accessToken: string; refreshToken: string } { + const payload: TokenPayload = { + userId: user.id, + email: user.email, + roles: user.roles, + }; + + const accessToken = jwt.sign(payload, process.env.JWT_SECRET!, { + expiresIn: '15m', + algorithm: 'RS256', + }); + + const refreshToken = jwt.sign( + { userId: user.id, tokenVersion: user.tokenVersion }, + process.env.JWT_REFRESH_SECRET!, + { expiresIn: '7d', algorithm: 'RS256' } + ); + + return { accessToken, refreshToken }; +} + +// Middleware +const authenticate: RequestHandler = async (req, res, next) => { + const authHeader = req.headers.authorization; + if (!authHeader?.startsWith('Bearer ')) { + return res.status(401).json({ error: { code: 'AUTHENTICATION_REQUIRED' } }); + } + + try { + const token = authHeader.slice(7); + const payload = jwt.verify(token, process.env.JWT_SECRET!) as TokenPayload; + req.user = payload; + next(); + } catch (err) { + if (err instanceof jwt.TokenExpiredError) { + return res.status(401).json({ error: { code: 'TOKEN_EXPIRED' } }); + } + return res.status(401).json({ error: { code: 'INVALID_TOKEN' } }); + } +}; +``` + +### API Key Authentication (Service-to-Service) + +```typescript +// API key middleware +const apiKeyAuth: RequestHandler = async (req, res, next) => { + const apiKey = req.headers['x-api-key'] as string; + + if (!apiKey) { + return res.status(401).json({ error: { code: 'API_KEY_REQUIRED' } }); + } + + // Hash and lookup (never store plain API keys) + const hashedKey = crypto.createHash('sha256').update(apiKey).digest('hex'); + const client = await db.apiClients.findByHashedKey(hashedKey); + + if (!client || !client.isActive) { + return res.status(401).json({ error: { code: 'INVALID_API_KEY' } }); + } + + req.apiClient = client; + next(); +}; +``` + +--- + +## 7. Rate Limiting Design + +### Rate Limit Headers + +``` +HTTP/1.1 200 OK +X-RateLimit-Limit: 100 +X-RateLimit-Remaining: 95 +X-RateLimit-Reset: 1705312800 +Retry-After: 60 +``` + +### Tiered Rate Limits + +```typescript +const rateLimits = { + anonymous: { requests: 60, window: '1m' }, + authenticated: { requests: 1000, window: '1h' }, + premium: { requests: 10000, window: '1h' }, +}; + +// Implementation with Redis +import { RateLimiterRedis } from 'rate-limiter-flexible'; + +const createRateLimiter = (tier: keyof typeof rateLimits) => { + const config = rateLimits[tier]; + return new RateLimiterRedis({ + storeClient: redisClient, + keyPrefix: `ratelimit:${tier}`, + points: config.requests, + duration: parseDuration(config.window), + }); +}; +``` + +### Rate Limit Response + +```json +{ + "error": { + "code": "RATE_LIMIT_EXCEEDED", + "message": "Too many requests", + "details": { + "limit": 100, + "window": "1 minute", + "retry_after": 45 + } + } +} +``` + +--- + +## 8. Idempotency Patterns + +### Idempotency Key Header + +``` +POST /payments +Idempotency-Key: payment_abc123_attempt1 +Content-Type: application/json + +{ + "amount": 1000, + "currency": "USD" +} +``` + +### Implementation + +```typescript +const idempotencyMiddleware: RequestHandler = async (req, res, next) => { + const idempotencyKey = req.headers['idempotency-key'] as string; + + if (!idempotencyKey) { + return next(); // Optional for some endpoints + } + + // Check for existing response + const cached = await redis.get(`idempotency:${idempotencyKey}`); + if (cached) { + const { statusCode, body } = JSON.parse(cached); + return res.status(statusCode).json(body); + } + + // Store response after processing + const originalJson = res.json.bind(res); + res.json = (body: any) => { + redis.setex( + `idempotency:${idempotencyKey}`, + 86400, // 24 hours + JSON.stringify({ statusCode: res.statusCode, body }) + ); + return originalJson(body); + }; + + next(); +}; +``` + +--- + +## Quick Reference: HTTP Methods + +| Method | Idempotent | Safe | Cacheable | Request Body | +|--------|------------|------|-----------|--------------| +| GET | Yes | Yes | Yes | No | +| HEAD | Yes | Yes | Yes | No | +| POST | No | No | Conditional | Yes | +| PUT | Yes | No | No | Yes | +| PATCH | No | No | No | Yes | +| DELETE | Yes | No | No | Optional | +| OPTIONS | Yes | Yes | No | No | diff --git a/engineering-team/senior-backend/references/backend_security_practices.md b/engineering-team/senior-backend/references/backend_security_practices.md index 892299d..e07c417 100644 --- a/engineering-team/senior-backend/references/backend_security_practices.md +++ b/engineering-team/senior-backend/references/backend_security_practices.md @@ -1,103 +1,1075 @@ # Backend Security Practices -## Overview +Security patterns and OWASP Top 10 mitigations for Node.js/Express applications. -This reference guide provides comprehensive information for senior backend. +## Guide Index -## Patterns and Practices +1. [OWASP Top 10 Mitigations](#1-owasp-top-10-mitigations) +2. [Input Validation](#2-input-validation) +3. [SQL Injection Prevention](#3-sql-injection-prevention) +4. [XSS Prevention](#4-xss-prevention) +5. [Authentication Security](#5-authentication-security) +6. [Authorization Patterns](#6-authorization-patterns) +7. [Security Headers](#7-security-headers) +8. [Secrets Management](#8-secrets-management) +9. [Logging and Monitoring](#9-logging-and-monitoring) -### Pattern 1: Best Practice Implementation +--- -**Description:** -Detailed explanation of the pattern. +## 1. OWASP Top 10 Mitigations -**When to Use:** -- Scenario 1 -- Scenario 2 -- Scenario 3 +### A01: Broken Access Control -**Implementation:** ```typescript -// Example code implementation -export class Example { - // Implementation details +// BAD: Direct object reference +app.get('/users/:id/profile', async (req, res) => { + const user = await db.users.findById(req.params.id); + res.json(user); // Anyone can access any user! +}); + +// GOOD: Verify ownership +app.get('/users/:id/profile', authenticate, async (req, res) => { + const userId = req.params.id; + + // Verify user can only access their own data + if (req.user.id !== userId && !req.user.roles.includes('admin')) { + return res.status(403).json({ error: { code: 'FORBIDDEN' } }); + } + + const user = await db.users.findById(userId); + res.json(user); +}); +``` + +### A02: Cryptographic Failures + +```typescript +// BAD: Weak hashing +const hash = crypto.createHash('md5').update(password).digest('hex'); + +// GOOD: bcrypt with appropriate cost factor +import bcrypt from 'bcrypt'; + +const SALT_ROUNDS = 12; // Adjust based on hardware + +async function hashPassword(password: string): Promise { + return bcrypt.hash(password, SALT_ROUNDS); +} + +async function verifyPassword(password: string, hash: string): Promise { + return bcrypt.compare(password, hash); } ``` -**Benefits:** -- Benefit 1 -- Benefit 2 -- Benefit 3 +### A03: Injection -**Trade-offs:** -- Consider 1 -- Consider 2 -- Consider 3 - -### Pattern 2: Advanced Technique - -**Description:** -Another important pattern for senior backend. - -**Implementation:** ```typescript -// Advanced example -async function advancedExample() { - // Code here +// BAD: String concatenation in SQL +const query = `SELECT * FROM users WHERE email = '${email}'`; + +// GOOD: Parameterized queries +const result = await db.query( + 'SELECT * FROM users WHERE email = $1', + [email] +); +``` + +### A04: Insecure Design + +```typescript +// BAD: No rate limiting on sensitive operations +app.post('/forgot-password', async (req, res) => { + await sendResetEmail(req.body.email); + res.json({ message: 'If email exists, reset link sent' }); +}); + +// GOOD: Rate limit + consistent response time +import rateLimit from 'express-rate-limit'; + +const passwordResetLimiter = rateLimit({ + windowMs: 15 * 60 * 1000, + max: 3, // 3 attempts per 15 minutes + skipSuccessfulRequests: false, +}); + +app.post('/forgot-password', passwordResetLimiter, async (req, res) => { + const startTime = Date.now(); + + try { + const user = await db.users.findByEmail(req.body.email); + if (user) { + await sendResetEmail(user.email); + } + } catch (err) { + logger.error(err); + } + + // Consistent response time prevents timing attacks + const elapsed = Date.now() - startTime; + const minDelay = 500; + if (elapsed < minDelay) { + await sleep(minDelay - elapsed); + } + + // Same response regardless of email existence + res.json({ message: 'If email exists, reset link sent' }); +}); +``` + +### A05: Security Misconfiguration + +```typescript +// BAD: Detailed errors in production +app.use((err, req, res, next) => { + res.status(500).json({ + error: err.message, + stack: err.stack, // Exposes internals! + }); +}); + +// GOOD: Environment-aware error handling +app.use((err: Error, req: Request, res: Response, next: NextFunction) => { + const requestId = req.id; + + // Always log full error internally + logger.error({ err, requestId }, 'Unhandled error'); + + // Return safe response + res.status(500).json({ + error: { + code: 'INTERNAL_ERROR', + message: process.env.NODE_ENV === 'development' + ? err.message + : 'An unexpected error occurred', + requestId, + }, + }); +}); +``` + +### A06: Vulnerable Components + +```bash +# Check for vulnerabilities +npm audit + +# Fix automatically where possible +npm audit fix + +# Check specific package +npm audit --package-lock-only + +# Use Snyk for deeper analysis +npx snyk test +``` + +```typescript +// Automated dependency updates (package.json) +{ + "scripts": { + "security:audit": "npm audit --audit-level=high", + "security:check": "snyk test", + "preinstall": "npm audit" + } } ``` -## Guidelines +### A07: Authentication Failures -### Code Organization -- Clear structure -- Logical separation -- Consistent naming -- Proper documentation +```typescript +// BAD: Weak session management +app.post('/login', async (req, res) => { + const user = await authenticate(req.body); + req.session.userId = user.id; // Session fixation risk + res.json({ success: true }); +}); -### Performance Considerations -- Optimization strategies -- Bottleneck identification -- Monitoring approaches -- Scaling techniques +// GOOD: Regenerate session on authentication +app.post('/login', async (req, res) => { + const user = await authenticate(req.body); -### Security Best Practices -- Input validation -- Authentication -- Authorization -- Data protection + // Regenerate session to prevent fixation + req.session.regenerate((err) => { + if (err) return next(err); -## Common Patterns + req.session.userId = user.id; + req.session.createdAt = Date.now(); -### Pattern A -Implementation details and examples. + req.session.save((err) => { + if (err) return next(err); + res.json({ success: true }); + }); + }); +}); +``` -### Pattern B -Implementation details and examples. +### A08: Software and Data Integrity Failures -### Pattern C -Implementation details and examples. +```typescript +// Verify webhook signatures (e.g., Stripe) +import Stripe from 'stripe'; -## Anti-Patterns to Avoid +app.post('/webhooks/stripe', + express.raw({ type: 'application/json' }), + async (req, res) => { + const sig = req.headers['stripe-signature'] as string; + const endpointSecret = process.env.STRIPE_WEBHOOK_SECRET!; -### Anti-Pattern 1 -What not to do and why. + let event: Stripe.Event; -### Anti-Pattern 2 -What not to do and why. + try { + event = stripe.webhooks.constructEvent( + req.body, + sig, + endpointSecret + ); + } catch (err) { + logger.warn({ err }, 'Webhook signature verification failed'); + return res.status(400).json({ error: 'Invalid signature' }); + } -## Tools and Resources + // Process verified event + await handleStripeEvent(event); + res.json({ received: true }); + } +); +``` -### Recommended Tools -- Tool 1: Purpose -- Tool 2: Purpose -- Tool 3: Purpose +### A09: Security Logging Failures -### Further Reading -- Resource 1 -- Resource 2 -- Resource 3 +```typescript +// Comprehensive security logging +import pino from 'pino'; -## Conclusion +const logger = pino({ + level: process.env.LOG_LEVEL || 'info', + redact: ['req.headers.authorization', 'req.body.password'], // Redact sensitive +}); -Key takeaways for using this reference guide effectively. +// Log security events +function logSecurityEvent(event: { + type: 'LOGIN_SUCCESS' | 'LOGIN_FAILURE' | 'ACCESS_DENIED' | 'SUSPICIOUS_ACTIVITY'; + userId?: string; + ip: string; + userAgent: string; + details?: Record; +}) { + logger.info({ + security: true, + ...event, + timestamp: new Date().toISOString(), + }, `Security event: ${event.type}`); +} + +// Usage +app.post('/login', async (req, res) => { + try { + const user = await authenticate(req.body); + logSecurityEvent({ + type: 'LOGIN_SUCCESS', + userId: user.id, + ip: req.ip, + userAgent: req.headers['user-agent'] || '', + }); + // ... + } catch (err) { + logSecurityEvent({ + type: 'LOGIN_FAILURE', + ip: req.ip, + userAgent: req.headers['user-agent'] || '', + details: { email: req.body.email }, + }); + // ... + } +}); +``` + +### A10: Server-Side Request Forgery (SSRF) + +```typescript +// BAD: Unvalidated URL fetch +app.post('/fetch-url', async (req, res) => { + const response = await fetch(req.body.url); // SSRF vulnerability! + res.json({ data: await response.text() }); +}); + +// GOOD: URL allowlist and validation +import { URL } from 'url'; + +const ALLOWED_HOSTS = ['api.example.com', 'cdn.example.com']; + +function isAllowedUrl(urlString: string): boolean { + try { + const url = new URL(urlString); + + // Block internal IPs + const blockedPatterns = [ + /^localhost$/i, + /^127\./, + /^10\./, + /^172\.(1[6-9]|2[0-9]|3[0-1])\./, + /^192\.168\./, + /^0\./, + /^169\.254\./, + /^\[::1\]$/, + /^metadata\.google\.internal$/, + /^169\.254\.169\.254$/, + ]; + + if (blockedPatterns.some(p => p.test(url.hostname))) { + return false; + } + + // Only allow HTTPS + if (url.protocol !== 'https:') { + return false; + } + + // Check allowlist + return ALLOWED_HOSTS.includes(url.hostname); + } catch { + return false; + } +} + +app.post('/fetch-url', async (req, res) => { + const { url } = req.body; + + if (!isAllowedUrl(url)) { + return res.status(400).json({ error: { code: 'INVALID_URL' } }); + } + + const response = await fetch(url, { + timeout: 5000, + follow: 0, // Don't follow redirects + }); + + res.json({ data: await response.text() }); +}); +``` + +--- + +## 2. Input Validation + +### Schema Validation with Zod + +```typescript +import { z } from 'zod'; + +// Define schemas +const CreateUserSchema = z.object({ + email: z.string().email().max(255).toLowerCase(), + password: z.string() + .min(8, 'Password must be at least 8 characters') + .max(72, 'Password must be at most 72 characters') // bcrypt limit + .regex(/[A-Z]/, 'Password must contain uppercase letter') + .regex(/[a-z]/, 'Password must contain lowercase letter') + .regex(/[0-9]/, 'Password must contain number'), + name: z.string().min(1).max(100).trim(), + age: z.number().int().min(18).max(120).optional(), +}); + +const PaginationSchema = z.object({ + limit: z.coerce.number().int().min(1).max(100).default(20), + offset: z.coerce.number().int().min(0).default(0), + sort: z.enum(['asc', 'desc']).default('desc'), +}); + +// Validation middleware +function validate(schema: z.ZodSchema) { + return (req: Request, res: Response, next: NextFunction) => { + const result = schema.safeParse(req.body); + + if (!result.success) { + const details = result.error.errors.map(err => ({ + field: err.path.join('.'), + code: err.code, + message: err.message, + })); + + return res.status(400).json({ + error: { + code: 'VALIDATION_ERROR', + message: 'Request validation failed', + details, + }, + }); + } + + req.body = result.data; + next(); + }; +} + +// Usage +app.post('/users', validate(CreateUserSchema), async (req, res) => { + // req.body is now typed and validated + const user = await userService.create(req.body); + res.status(201).json(user); +}); +``` + +### Sanitization + +```typescript +import DOMPurify from 'isomorphic-dompurify'; +import xss from 'xss'; + +// HTML sanitization for rich text fields +function sanitizeHtml(dirty: string): string { + return DOMPurify.sanitize(dirty, { + ALLOWED_TAGS: ['b', 'i', 'em', 'strong', 'a', 'p', 'br'], + ALLOWED_ATTR: ['href'], + }); +} + +// Plain text sanitization (strip all HTML) +function sanitizePlainText(dirty: string): string { + return xss(dirty, { + whiteList: {}, + stripIgnoreTag: true, + stripIgnoreTagBody: ['script'], + }); +} + +// File path sanitization +import path from 'path'; + +function sanitizePath(userPath: string, baseDir: string): string | null { + const resolved = path.resolve(baseDir, userPath); + + // Prevent directory traversal + if (!resolved.startsWith(baseDir)) { + return null; + } + + return resolved; +} +``` + +--- + +## 3. SQL Injection Prevention + +### Parameterized Queries + +```typescript +// BAD: String interpolation +const email = "'; DROP TABLE users; --"; +db.query(`SELECT * FROM users WHERE email = '${email}'`); + +// GOOD: Parameterized query (pg) +const result = await db.query( + 'SELECT * FROM users WHERE email = $1', + [email] +); + +// GOOD: Parameterized query (mysql2) +const [rows] = await connection.execute( + 'SELECT * FROM users WHERE email = ?', + [email] +); +``` + +### Query Builders + +```typescript +// Using Knex.js +const users = await knex('users') + .where('email', email) // Automatically parameterized + .andWhere('status', 'active') + .select('id', 'name', 'email'); + +// Dynamic WHERE with safe column names +const ALLOWED_COLUMNS = ['name', 'email', 'created_at'] as const; + +function buildUserQuery(filters: Record) { + let query = knex('users').select('id', 'name', 'email'); + + for (const [column, value] of Object.entries(filters)) { + // Validate column name against allowlist + if (ALLOWED_COLUMNS.includes(column as any)) { + query = query.where(column, value); + } + } + + return query; +} +``` + +### ORM Safety + +```typescript +// Prisma (safe by default) +const user = await prisma.user.findUnique({ + where: { email }, // Automatically escaped +}); + +// TypeORM (safe by default) +const user = await userRepository.findOne({ + where: { email }, // Automatically escaped +}); + +// DANGER: Raw queries still require parameterization +// BAD +await prisma.$queryRawUnsafe(`SELECT * FROM users WHERE email = '${email}'`); + +// GOOD +await prisma.$queryRaw`SELECT * FROM users WHERE email = ${email}`; +``` + +--- + +## 4. XSS Prevention + +### Output Encoding + +```typescript +// Server-side template rendering (EJS) +// In template: <%= userInput %> (escaped) +// NOT: <%- userInput %> (raw, dangerous) + +// Manual HTML encoding +function escapeHtml(str: string): string { + return str + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + +// JSON response (automatically safe in modern frameworks) +res.json({ message: userInput }); // JSON.stringify escapes by default +``` + +### Content Security Policy + +```typescript +import helmet from 'helmet'; + +app.use(helmet.contentSecurityPolicy({ + directives: { + defaultSrc: ["'self'"], + scriptSrc: ["'self'", "'strict-dynamic'"], + styleSrc: ["'self'", "'unsafe-inline'"], // Consider using nonces + imgSrc: ["'self'", "data:", "https:"], + fontSrc: ["'self'"], + objectSrc: ["'none'"], + frameAncestors: ["'none'"], + baseUri: ["'self'"], + formAction: ["'self'"], + upgradeInsecureRequests: [], + }, +})); +``` + +### API Response Safety + +```typescript +// Set correct Content-Type for JSON APIs +app.use((req, res, next) => { + res.setHeader('Content-Type', 'application/json; charset=utf-8'); + res.setHeader('X-Content-Type-Options', 'nosniff'); + next(); +}); + +// Disable JSONP (if not needed) +// Don't implement callback parameter handling + +// Safe JSON response +res.json({ + data: sanitizedData, + // Never reflect raw user input +}); +``` + +--- + +## 5. Authentication Security + +### Password Storage + +```typescript +import bcrypt from 'bcrypt'; +import { randomBytes } from 'crypto'; + +const SALT_ROUNDS = 12; + +async function hashPassword(password: string): Promise { + return bcrypt.hash(password, SALT_ROUNDS); +} + +async function verifyPassword(password: string, hash: string): Promise { + return bcrypt.compare(password, hash); +} + +// For password reset tokens +function generateSecureToken(): string { + return randomBytes(32).toString('hex'); +} + +// Token expiration (store in DB) +interface PasswordResetToken { + token: string; // Hashed + userId: string; + expiresAt: Date; // 1 hour from creation +} +``` + +### JWT Best Practices + +```typescript +import jwt from 'jsonwebtoken'; + +// Use asymmetric keys in production +const PRIVATE_KEY = process.env.JWT_PRIVATE_KEY!; +const PUBLIC_KEY = process.env.JWT_PUBLIC_KEY!; + +interface AccessTokenPayload { + sub: string; // User ID + email: string; + roles: string[]; + iat: number; + exp: number; +} + +function generateAccessToken(user: User): string { + const payload: Omit = { + sub: user.id, + email: user.email, + roles: user.roles, + }; + + return jwt.sign(payload, PRIVATE_KEY, { + algorithm: 'RS256', + expiresIn: '15m', + issuer: 'api.example.com', + audience: 'example.com', + }); +} + +function verifyAccessToken(token: string): AccessTokenPayload { + return jwt.verify(token, PUBLIC_KEY, { + algorithms: ['RS256'], + issuer: 'api.example.com', + audience: 'example.com', + }) as AccessTokenPayload; +} + +// Refresh tokens should be stored in DB and rotated +interface RefreshToken { + id: string; + token: string; // Hashed + userId: string; + expiresAt: Date; + family: string; // For rotation detection + isRevoked: boolean; +} +``` + +### Session Management + +```typescript +import session from 'express-session'; +import RedisStore from 'connect-redis'; +import { createClient } from 'redis'; + +const redisClient = createClient({ url: process.env.REDIS_URL }); + +app.use(session({ + store: new RedisStore({ client: redisClient }), + name: 'sessionId', // Don't use default 'connect.sid' + secret: process.env.SESSION_SECRET!, + resave: false, + saveUninitialized: false, + cookie: { + secure: process.env.NODE_ENV === 'production', + httpOnly: true, + sameSite: 'strict', + maxAge: 24 * 60 * 60 * 1000, // 24 hours + domain: process.env.COOKIE_DOMAIN, + }, +})); + +// Regenerate session on privilege change +async function elevateSession(req: Request): Promise { + return new Promise((resolve, reject) => { + const userId = req.session.userId; + req.session.regenerate((err) => { + if (err) return reject(err); + req.session.userId = userId; + req.session.elevated = true; + req.session.elevatedAt = Date.now(); + resolve(); + }); + }); +} +``` + +--- + +## 6. Authorization Patterns + +### Role-Based Access Control (RBAC) + +```typescript +type Role = 'user' | 'moderator' | 'admin'; +type Permission = 'read:users' | 'write:users' | 'delete:users' | 'read:admin'; + +const ROLE_PERMISSIONS: Record = { + user: ['read:users'], + moderator: ['read:users', 'write:users'], + admin: ['read:users', 'write:users', 'delete:users', 'read:admin'], +}; + +function hasPermission(userRoles: Role[], required: Permission): boolean { + return userRoles.some(role => + ROLE_PERMISSIONS[role]?.includes(required) + ); +} + +// Middleware +function requirePermission(permission: Permission) { + return (req: Request, res: Response, next: NextFunction) => { + if (!hasPermission(req.user.roles, permission)) { + return res.status(403).json({ + error: { code: 'FORBIDDEN', message: 'Insufficient permissions' }, + }); + } + next(); + }; +} + +// Usage +app.delete('/users/:id', + authenticate, + requirePermission('delete:users'), + deleteUserHandler +); +``` + +### Attribute-Based Access Control (ABAC) + +```typescript +interface AccessContext { + user: { id: string; roles: string[]; department: string }; + resource: { ownerId: string; department: string; sensitivity: string }; + action: 'read' | 'write' | 'delete'; + environment: { time: Date; ip: string }; +} + +interface Policy { + name: string; + condition: (ctx: AccessContext) => boolean; +} + +const policies: Policy[] = [ + { + name: 'owner-full-access', + condition: (ctx) => ctx.resource.ownerId === ctx.user.id, + }, + { + name: 'same-department-read', + condition: (ctx) => + ctx.action === 'read' && + ctx.resource.department === ctx.user.department, + }, + { + name: 'admin-override', + condition: (ctx) => ctx.user.roles.includes('admin'), + }, + { + name: 'no-sensitive-outside-hours', + condition: (ctx) => { + const hour = ctx.environment.time.getHours(); + return ctx.resource.sensitivity !== 'high' || (hour >= 9 && hour <= 17); + }, + }, +]; + +function evaluateAccess(ctx: AccessContext): boolean { + return policies.some(policy => policy.condition(ctx)); +} +``` + +--- + +## 7. Security Headers + +### Complete Helmet Configuration + +```typescript +import helmet from 'helmet'; + +app.use(helmet({ + // Content Security Policy + contentSecurityPolicy: { + directives: { + defaultSrc: ["'self'"], + scriptSrc: ["'self'"], + styleSrc: ["'self'", "'unsafe-inline'"], + imgSrc: ["'self'", "data:", "https:"], + connectSrc: ["'self'", "https://api.example.com"], + fontSrc: ["'self'"], + objectSrc: ["'none'"], + mediaSrc: ["'none'"], + frameSrc: ["'none'"], + }, + }, + // Strict Transport Security + hsts: { + maxAge: 31536000, + includeSubDomains: true, + preload: true, + }, + // Prevent clickjacking + frameguard: { action: 'deny' }, + // Prevent MIME sniffing + noSniff: true, + // XSS filter (legacy browsers) + xssFilter: true, + // Hide X-Powered-By + hidePoweredBy: true, + // Referrer policy + referrerPolicy: { policy: 'strict-origin-when-cross-origin' }, + // Cross-origin policies + crossOriginEmbedderPolicy: false, // Enable if using SharedArrayBuffer + crossOriginOpenerPolicy: { policy: 'same-origin' }, + crossOriginResourcePolicy: { policy: 'same-origin' }, +})); + +// CORS configuration +import cors from 'cors'; + +app.use(cors({ + origin: ['https://example.com', 'https://app.example.com'], + methods: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH'], + allowedHeaders: ['Content-Type', 'Authorization'], + credentials: true, + maxAge: 86400, // 24 hours +})); +``` + +### Header Reference + +| Header | Purpose | Value | +|--------|---------|-------| +| `Strict-Transport-Security` | Force HTTPS | `max-age=31536000; includeSubDomains; preload` | +| `Content-Security-Policy` | Prevent XSS | See above | +| `X-Content-Type-Options` | Prevent MIME sniffing | `nosniff` | +| `X-Frame-Options` | Prevent clickjacking | `DENY` | +| `Referrer-Policy` | Control referrer info | `strict-origin-when-cross-origin` | +| `Permissions-Policy` | Feature restrictions | `geolocation=(), microphone=()` | + +--- + +## 8. Secrets Management + +### Environment Variables + +```typescript +// config/secrets.ts +import { z } from 'zod'; + +const SecretsSchema = z.object({ + DATABASE_URL: z.string().url(), + JWT_SECRET: z.string().min(32), + JWT_PRIVATE_KEY: z.string(), + JWT_PUBLIC_KEY: z.string(), + REDIS_URL: z.string().url(), + STRIPE_SECRET_KEY: z.string().startsWith('sk_'), + STRIPE_WEBHOOK_SECRET: z.string().startsWith('whsec_'), +}); + +// Validate on startup +export const secrets = SecretsSchema.parse(process.env); + +// NEVER log secrets +console.log('Config loaded:', { + database: secrets.DATABASE_URL.replace(/\/\/.*@/, '//***@'), + redis: 'configured', + stripe: 'configured', +}); +``` + +### Secret Rotation + +```typescript +// Support multiple keys during rotation +const JWT_SECRETS = [ + process.env.JWT_SECRET_CURRENT!, + process.env.JWT_SECRET_PREVIOUS!, // Keep for grace period +].filter(Boolean); + +function verifyTokenWithRotation(token: string): TokenPayload | null { + for (const secret of JWT_SECRETS) { + try { + return jwt.verify(token, secret) as TokenPayload; + } catch { + continue; + } + } + return null; +} +``` + +### Vault Integration + +```typescript +import Vault from 'node-vault'; + +const vault = Vault({ + endpoint: process.env.VAULT_ADDR, + token: process.env.VAULT_TOKEN, +}); + +async function getSecret(path: string): Promise { + const result = await vault.read(`secret/data/${path}`); + return result.data.data.value; +} + +// Cache secrets with TTL +const secretsCache = new Map(); +const CACHE_TTL = 5 * 60 * 1000; // 5 minutes + +async function getCachedSecret(path: string): Promise { + const cached = secretsCache.get(path); + if (cached && cached.expiresAt > Date.now()) { + return cached.value; + } + + const value = await getSecret(path); + secretsCache.set(path, { value, expiresAt: Date.now() + CACHE_TTL }); + return value; +} +``` + +--- + +## 9. Logging and Monitoring + +### Security Event Logging + +```typescript +import pino from 'pino'; + +const logger = pino({ + level: 'info', + redact: { + paths: [ + 'req.headers.authorization', + 'req.headers.cookie', + 'req.body.password', + 'req.body.token', + '*.password', + '*.secret', + '*.apiKey', + ], + censor: '[REDACTED]', + }, +}); + +// Security event types +type SecurityEventType = + | 'AUTH_SUCCESS' + | 'AUTH_FAILURE' + | 'AUTH_LOCKOUT' + | 'PASSWORD_CHANGED' + | 'PASSWORD_RESET_REQUEST' + | 'PERMISSION_DENIED' + | 'RATE_LIMIT_EXCEEDED' + | 'SUSPICIOUS_ACTIVITY' + | 'TOKEN_REVOKED'; + +interface SecurityEvent { + type: SecurityEventType; + userId?: string; + ip: string; + userAgent: string; + path: string; + details?: Record; +} + +function logSecurityEvent(event: SecurityEvent): void { + logger.info({ + security: true, + ...event, + timestamp: new Date().toISOString(), + }, `Security: ${event.type}`); +} +``` + +### Request Logging + +```typescript +import pinoHttp from 'pino-http'; + +app.use(pinoHttp({ + logger, + genReqId: (req) => req.headers['x-request-id'] || crypto.randomUUID(), + serializers: { + req: (req) => ({ + id: req.id, + method: req.method, + url: req.url, + remoteAddress: req.remoteAddress, + // Don't log headers by default (may contain sensitive data) + }), + res: (res) => ({ + statusCode: res.statusCode, + }), + }, + customLogLevel: (req, res, err) => { + if (res.statusCode >= 500 || err) return 'error'; + if (res.statusCode >= 400) return 'warn'; + return 'info'; + }, +})); +``` + +### Alerting Thresholds + +| Metric | Warning | Critical | +|--------|---------|----------| +| Failed logins per IP (15 min) | > 5 | > 10 | +| Failed logins per account (1 hour) | > 3 | > 5 | +| 403 responses per IP (5 min) | > 10 | > 50 | +| 500 errors (5 min) | > 5 | > 20 | +| Request rate per IP (1 min) | > 100 | > 500 | + +--- + +## Quick Reference: Security Checklist + +### Authentication +- [ ] bcrypt with cost >= 12 for password hashing +- [ ] JWT with RS256, short expiry (15-30 min) +- [ ] Refresh token rotation with family detection +- [ ] Session regeneration on login +- [ ] Secure cookie flags (httpOnly, secure, sameSite) + +### Input Validation +- [ ] Schema validation on all inputs (Zod) +- [ ] Parameterized queries (never string concat) +- [ ] File path sanitization +- [ ] Content-Type validation + +### Headers +- [ ] Strict-Transport-Security +- [ ] Content-Security-Policy +- [ ] X-Content-Type-Options: nosniff +- [ ] X-Frame-Options: DENY +- [ ] CORS with specific origins + +### Logging +- [ ] Redact sensitive fields +- [ ] Log security events +- [ ] Include request IDs +- [ ] Alert on anomalies + +### Dependencies +- [ ] npm audit in CI +- [ ] Automated dependency updates +- [ ] Lock file committed diff --git a/engineering-team/senior-backend/references/database_optimization_guide.md b/engineering-team/senior-backend/references/database_optimization_guide.md index d7e7125..03412ed 100644 --- a/engineering-team/senior-backend/references/database_optimization_guide.md +++ b/engineering-team/senior-backend/references/database_optimization_guide.md @@ -1,103 +1,593 @@ # Database Optimization Guide -## Overview +Practical strategies for PostgreSQL query optimization, indexing, and performance tuning. -This reference guide provides comprehensive information for senior backend. +## Guide Index -## Patterns and Practices +1. [Query Analysis with EXPLAIN](#1-query-analysis-with-explain) +2. [Indexing Strategies](#2-indexing-strategies) +3. [N+1 Query Problem](#3-n1-query-problem) +4. [Connection Pooling](#4-connection-pooling) +5. [Query Optimization Patterns](#5-query-optimization-patterns) +6. [Database Migrations](#6-database-migrations) +7. [Monitoring and Alerting](#7-monitoring-and-alerting) -### Pattern 1: Best Practice Implementation +--- -**Description:** -Detailed explanation of the pattern. +## 1. Query Analysis with EXPLAIN -**When to Use:** -- Scenario 1 -- Scenario 2 -- Scenario 3 +### Basic EXPLAIN Usage -**Implementation:** -```typescript -// Example code implementation -export class Example { - // Implementation details -} +```sql +-- Show query plan +EXPLAIN SELECT * FROM orders WHERE user_id = 123; + +-- Show plan with actual execution times +EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123; + +-- Show buffers and I/O statistics +EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) +SELECT * FROM orders WHERE user_id = 123; ``` -**Benefits:** -- Benefit 1 -- Benefit 2 -- Benefit 3 +### Reading EXPLAIN Output -**Trade-offs:** -- Consider 1 -- Consider 2 -- Consider 3 - -### Pattern 2: Advanced Technique - -**Description:** -Another important pattern for senior backend. - -**Implementation:** -```typescript -// Advanced example -async function advancedExample() { - // Code here -} +``` + QUERY PLAN +--------------------------------------------------------------------------- + Index Scan using idx_orders_user_id on orders (cost=0.43..8.45 rows=10 width=120) + Index Cond: (user_id = 123) + Buffers: shared hit=3 + Planning Time: 0.152 ms + Execution Time: 0.089 ms ``` -## Guidelines +**Key metrics:** +- `cost`: Estimated cost (startup..total) +- `rows`: Estimated row count +- `width`: Average row size in bytes +- `actual time`: Real execution time (with ANALYZE) +- `Buffers: shared hit`: Pages read from cache -### Code Organization -- Clear structure -- Logical separation -- Consistent naming -- Proper documentation +### Scan Types (Best to Worst) -### Performance Considerations -- Optimization strategies -- Bottleneck identification -- Monitoring approaches -- Scaling techniques +| Scan Type | Description | Performance | +|-----------|-------------|-------------| +| Index Only Scan | Data from index alone | Best | +| Index Scan | Index lookup + heap fetch | Good | +| Bitmap Index Scan | Multiple index conditions | Good | +| Index Scan + Filter | Index + row filtering | Okay | +| Seq Scan (small table) | Full table scan | Okay | +| Seq Scan (large table) | Full table scan | Bad | +| Nested Loop (large) | O(n*m) join | Very Bad | -### Security Best Practices -- Input validation -- Authentication -- Authorization -- Data protection +### Warning Signs -## Common Patterns +```sql +-- BAD: Sequential scan on large table +Seq Scan on orders (cost=0.00..1854231.00 rows=50000000 width=120) + Filter: (status = 'pending') + Rows Removed by Filter: 49500000 -### Pattern A -Implementation details and examples. +-- BAD: Nested loop with high iterations +Nested Loop (cost=0.43..2847593.20 rows=12500000 width=240) + -> Seq Scan on users (cost=0.00..1250.00 rows=50000 width=120) + -> Index Scan on orders (cost=0.43..45.73 rows=250 width=120) + Index Cond: (orders.user_id = users.id) +``` -### Pattern B -Implementation details and examples. +--- -### Pattern C -Implementation details and examples. +## 2. Indexing Strategies -## Anti-Patterns to Avoid +### Index Types -### Anti-Pattern 1 -What not to do and why. +```sql +-- B-tree (default, most common) +CREATE INDEX idx_users_email ON users(email); -### Anti-Pattern 2 -What not to do and why. +-- Hash (equality only, rarely better than B-tree) +CREATE INDEX idx_users_id_hash ON users USING hash(id); -## Tools and Resources +-- GIN (arrays, JSONB, full-text search) +CREATE INDEX idx_products_tags ON products USING gin(tags); +CREATE INDEX idx_users_data ON users USING gin(metadata jsonb_path_ops); -### Recommended Tools -- Tool 1: Purpose -- Tool 2: Purpose -- Tool 3: Purpose +-- GiST (geometric, range types, full-text) +CREATE INDEX idx_locations_point ON locations USING gist(coordinates); +``` -### Further Reading -- Resource 1 -- Resource 2 -- Resource 3 +### Composite Indexes -## Conclusion +```sql +-- Order matters! Column with = first, then range/sort +CREATE INDEX idx_orders_user_status_date +ON orders(user_id, status, created_at DESC); -Key takeaways for using this reference guide effectively. +-- This index supports: +-- WHERE user_id = ? +-- WHERE user_id = ? AND status = ? +-- WHERE user_id = ? AND status = ? ORDER BY created_at DESC +-- WHERE user_id = ? ORDER BY created_at DESC + +-- This index does NOT efficiently support: +-- WHERE status = ? (user_id not in query) +-- WHERE created_at > ? (leftmost column not in query) +``` + +### Partial Indexes + +```sql +-- Index only active users (smaller, faster) +CREATE INDEX idx_users_active_email +ON users(email) +WHERE status = 'active'; + +-- Index only recent orders +CREATE INDEX idx_orders_recent +ON orders(created_at DESC) +WHERE created_at > CURRENT_DATE - INTERVAL '90 days'; + +-- Index only unprocessed items +CREATE INDEX idx_queue_pending +ON job_queue(priority DESC, created_at) +WHERE processed_at IS NULL; +``` + +### Covering Indexes (Index-Only Scans) + +```sql +-- Include non-indexed columns to avoid heap lookup +CREATE INDEX idx_users_email_covering +ON users(email) +INCLUDE (name, created_at); + +-- Query can be satisfied from index alone +SELECT name, created_at FROM users WHERE email = 'test@example.com'; +-- Result: Index Only Scan +``` + +### Index Maintenance + +```sql +-- Check index usage +SELECT + schemaname, + tablename, + indexname, + idx_scan, + idx_tup_read, + idx_tup_fetch, + pg_size_pretty(pg_relation_size(indexrelid)) as size +FROM pg_stat_user_indexes +ORDER BY idx_scan ASC; + +-- Find unused indexes (candidates for removal) +SELECT indexrelid::regclass as index, + relid::regclass as table, + pg_size_pretty(pg_relation_size(indexrelid)) as size +FROM pg_stat_user_indexes +WHERE idx_scan = 0 + AND indexrelid NOT IN (SELECT conindid FROM pg_constraint); + +-- Rebuild bloated indexes +REINDEX INDEX CONCURRENTLY idx_orders_user_id; +``` + +--- + +## 3. N+1 Query Problem + +### The Problem + +```typescript +// BAD: N+1 queries +const users = await db.query('SELECT * FROM users LIMIT 100'); + +for (const user of users) { + // This runs 100 times! + const orders = await db.query( + 'SELECT * FROM orders WHERE user_id = $1', + [user.id] + ); + user.orders = orders; +} +// Total queries: 1 + 100 = 101 +``` + +### Solution 1: JOIN + +```typescript +// GOOD: Single query with JOIN +const usersWithOrders = await db.query(` + SELECT u.*, o.id as order_id, o.total, o.status + FROM users u + LEFT JOIN orders o ON o.user_id = u.id + LIMIT 100 +`); +// Total queries: 1 +``` + +### Solution 2: Batch Loading (DataLoader pattern) + +```typescript +// GOOD: Two queries with batch loading +const users = await db.query('SELECT * FROM users LIMIT 100'); +const userIds = users.map(u => u.id); + +const orders = await db.query( + 'SELECT * FROM orders WHERE user_id = ANY($1)', + [userIds] +); + +// Group orders by user_id +const ordersByUser = groupBy(orders, 'user_id'); +users.forEach(user => { + user.orders = ordersByUser[user.id] || []; +}); +// Total queries: 2 +``` + +### Solution 3: ORM Eager Loading + +```typescript +// Prisma +const users = await prisma.user.findMany({ + take: 100, + include: { orders: true } +}); + +// TypeORM +const users = await userRepository.find({ + take: 100, + relations: ['orders'] +}); + +// Sequelize +const users = await User.findAll({ + limit: 100, + include: [{ model: Order }] +}); +``` + +### Detecting N+1 in Production + +```typescript +// Query logging middleware +let queryCount = 0; +const originalQuery = db.query; + +db.query = async (...args) => { + queryCount++; + if (queryCount > 10) { + console.warn(`High query count: ${queryCount} in single request`); + console.trace(); + } + return originalQuery.apply(db, args); +}; +``` + +--- + +## 4. Connection Pooling + +### Why Pooling Matters + +``` +Without pooling: +Request → Create connection → Query → Close connection + (50-100ms overhead) + +With pooling: +Request → Get connection from pool → Query → Return to pool + (0-1ms overhead) +``` + +### pg-pool Configuration + +```typescript +import { Pool } from 'pg'; + +const pool = new Pool({ + host: process.env.DB_HOST, + port: 5432, + database: process.env.DB_NAME, + user: process.env.DB_USER, + password: process.env.DB_PASSWORD, + + // Pool settings + min: 5, // Minimum connections + max: 20, // Maximum connections + idleTimeoutMillis: 30000, // Close idle connections after 30s + connectionTimeoutMillis: 5000, // Fail if can't connect in 5s + + // Statement timeout (cancel long queries) + statement_timeout: 30000, +}); + +// Health check +pool.on('error', (err, client) => { + console.error('Unexpected pool error', err); +}); +``` + +### Pool Sizing Formula + +``` +Optimal connections = (CPU cores * 2) + effective_spindle_count + +For SSD with 4 cores: +connections = (4 * 2) + 1 = 9 + +For multiple app servers: +connections_per_server = total_connections / num_servers +``` + +### PgBouncer for High Scale + +```ini +# pgbouncer.ini +[databases] +mydb = host=localhost port=5432 dbname=mydb + +[pgbouncer] +listen_port = 6432 +listen_addr = 0.0.0.0 +auth_type = md5 +auth_file = /etc/pgbouncer/userlist.txt +pool_mode = transaction +max_client_conn = 1000 +default_pool_size = 20 +reserve_pool_size = 5 +``` + +--- + +## 5. Query Optimization Patterns + +### Pagination Optimization + +```sql +-- BAD: OFFSET is slow for large values +SELECT * FROM orders ORDER BY created_at DESC LIMIT 20 OFFSET 10000; +-- Must scan 10,020 rows, discard 10,000 + +-- GOOD: Cursor-based pagination +SELECT * FROM orders +WHERE created_at < '2024-01-15T10:00:00Z' +ORDER BY created_at DESC +LIMIT 20; +-- Only scans 20 rows +``` + +### Batch Updates + +```sql +-- BAD: Individual updates +UPDATE orders SET status = 'shipped' WHERE id = 1; +UPDATE orders SET status = 'shipped' WHERE id = 2; +-- ...repeat 1000 times + +-- GOOD: Batch update +UPDATE orders +SET status = 'shipped' +WHERE id = ANY(ARRAY[1, 2, 3, ...1000]); + +-- GOOD: Update from values +UPDATE orders o +SET status = v.new_status +FROM (VALUES + (1, 'shipped'), + (2, 'delivered'), + (3, 'cancelled') +) AS v(id, new_status) +WHERE o.id = v.id; +``` + +### Avoiding SELECT * + +```sql +-- BAD: Fetches all columns including large text/blob +SELECT * FROM articles WHERE published = true; + +-- GOOD: Only fetch needed columns +SELECT id, title, summary, author_id, published_at +FROM articles +WHERE published = true; +``` + +### Using EXISTS vs IN + +```sql +-- For checking existence, EXISTS is often faster +-- BAD +SELECT * FROM users +WHERE id IN (SELECT user_id FROM orders WHERE total > 1000); + +-- GOOD (for large subquery results) +SELECT * FROM users u +WHERE EXISTS ( + SELECT 1 FROM orders o + WHERE o.user_id = u.id AND o.total > 1000 +); +``` + +### Materialized Views for Complex Aggregations + +```sql +-- Create materialized view for expensive aggregations +CREATE MATERIALIZED VIEW daily_sales_summary AS +SELECT + date_trunc('day', created_at) as date, + product_id, + COUNT(*) as order_count, + SUM(quantity) as total_quantity, + SUM(total) as total_revenue +FROM orders +GROUP BY date_trunc('day', created_at), product_id; + +-- Create index on materialized view +CREATE INDEX idx_daily_sales_date ON daily_sales_summary(date); + +-- Refresh periodically +REFRESH MATERIALIZED VIEW CONCURRENTLY daily_sales_summary; +``` + +--- + +## 6. Database Migrations + +### Migration Best Practices + +```sql +-- Always include rollback +-- migrations/20240115_001_add_user_status.sql +-- UP +ALTER TABLE users ADD COLUMN status VARCHAR(20) DEFAULT 'active'; +CREATE INDEX CONCURRENTLY idx_users_status ON users(status); + +-- DOWN (in separate file or comment) +DROP INDEX CONCURRENTLY IF EXISTS idx_users_status; +ALTER TABLE users DROP COLUMN IF EXISTS status; +``` + +### Safe Column Addition + +```sql +-- SAFE: Add nullable column (no table rewrite) +ALTER TABLE users ADD COLUMN phone VARCHAR(20); + +-- SAFE: Add column with volatile default (PG 11+) +ALTER TABLE users ADD COLUMN created_at TIMESTAMP DEFAULT NOW(); + +-- UNSAFE: Add column with constant default (table rewrite before PG 11) +-- ALTER TABLE users ADD COLUMN score INTEGER DEFAULT 0; + +-- SAFE alternative for constant default: +ALTER TABLE users ADD COLUMN score INTEGER; +UPDATE users SET score = 0 WHERE score IS NULL; +ALTER TABLE users ALTER COLUMN score SET DEFAULT 0; +ALTER TABLE users ALTER COLUMN score SET NOT NULL; +``` + +### Safe Index Creation + +```sql +-- UNSAFE: Locks table +CREATE INDEX idx_orders_user ON orders(user_id); + +-- SAFE: Non-blocking +CREATE INDEX CONCURRENTLY idx_orders_user ON orders(user_id); + +-- Note: CONCURRENTLY cannot run in a transaction +``` + +### Safe Column Removal + +```sql +-- Step 1: Stop writing to column (application change) +-- Step 2: Wait for all deployments +-- Step 3: Drop column +ALTER TABLE users DROP COLUMN IF EXISTS legacy_field; +``` + +--- + +## 7. Monitoring and Alerting + +### Key Metrics to Monitor + +```sql +-- Active connections +SELECT count(*) FROM pg_stat_activity WHERE state = 'active'; + +-- Connection by state +SELECT state, count(*) +FROM pg_stat_activity +GROUP BY state; + +-- Long-running queries +SELECT + pid, + now() - pg_stat_activity.query_start AS duration, + query, + state +FROM pg_stat_activity +WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes' + AND state != 'idle'; + +-- Table bloat +SELECT + schemaname, + tablename, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as total_size, + pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) as table_size, + pg_size_pretty(pg_indexes_size(schemaname||'.'||tablename)) as index_size +FROM pg_tables +WHERE schemaname = 'public' +ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC +LIMIT 10; +``` + +### pg_stat_statements for Query Analysis + +```sql +-- Enable extension +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + +-- Find slowest queries +SELECT + round(total_exec_time::numeric, 2) as total_time_ms, + calls, + round(mean_exec_time::numeric, 2) as avg_time_ms, + round((100 * total_exec_time / sum(total_exec_time) over())::numeric, 2) as percentage, + query +FROM pg_stat_statements +ORDER BY total_exec_time DESC +LIMIT 10; + +-- Find most frequent queries +SELECT + calls, + round(total_exec_time::numeric, 2) as total_time_ms, + round(mean_exec_time::numeric, 2) as avg_time_ms, + query +FROM pg_stat_statements +ORDER BY calls DESC +LIMIT 10; +``` + +### Alert Thresholds + +| Metric | Warning | Critical | +|--------|---------|----------| +| Connection usage | > 70% | > 90% | +| Query time P95 | > 500ms | > 2s | +| Replication lag | > 30s | > 5m | +| Disk usage | > 70% | > 85% | +| Cache hit ratio | < 95% | < 90% | + +--- + +## Quick Reference: PostgreSQL Commands + +```sql +-- Check table sizes +SELECT pg_size_pretty(pg_total_relation_size('orders')); + +-- Check index sizes +SELECT pg_size_pretty(pg_indexes_size('orders')); + +-- Kill a query +SELECT pg_cancel_backend(pid); -- Graceful +SELECT pg_terminate_backend(pid); -- Force + +-- Check locks +SELECT * FROM pg_locks WHERE granted = false; + +-- Vacuum analyze (update statistics) +VACUUM ANALYZE orders; + +-- Check autovacuum status +SELECT * FROM pg_stat_user_tables WHERE relname = 'orders'; +``` diff --git a/engineering-team/senior-backend/scripts/api_load_tester.py b/engineering-team/senior-backend/scripts/api_load_tester.py index 3cad305..afa35aa 100755 --- a/engineering-team/senior-backend/scripts/api_load_tester.py +++ b/engineering-team/senior-backend/scripts/api_load_tester.py @@ -1,81 +1,545 @@ #!/usr/bin/env python3 """ -Api Load Tester -Automated tool for senior backend tasks +API Load Tester + +Performs HTTP load testing with configurable concurrency, measuring latency +percentiles, throughput, and error rates. + +Usage: + python api_load_tester.py https://api.example.com/users --concurrency 50 --duration 30 + python api_load_tester.py https://api.example.com/orders --method POST --body '{"item": 1}' + python api_load_tester.py https://api.example.com/v1/users https://api.example.com/v2/users --compare """ import os import sys import json import argparse -from pathlib import Path -from typing import Dict, List, Optional +import time +import statistics +import threading +import queue +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, field, asdict +from typing import Dict, List, Optional, Tuple +from datetime import datetime +from urllib.request import Request, urlopen +from urllib.error import URLError, HTTPError +from urllib.parse import urlparse +import ssl + + +@dataclass +class RequestResult: + """Result of a single HTTP request.""" + success: bool + status_code: int + latency_ms: float + error: Optional[str] = None + response_size: int = 0 + + +@dataclass +class LoadTestResults: + """Aggregated load test results.""" + target_url: str + method: str + duration_seconds: float + concurrency: int + total_requests: int + successful_requests: int + failed_requests: int + requests_per_second: float + + # Latency metrics (milliseconds) + latency_min: float + latency_max: float + latency_avg: float + latency_p50: float + latency_p90: float + latency_p95: float + latency_p99: float + latency_stddev: float + + # Error breakdown + errors_by_type: Dict[str, int] = field(default_factory=dict) + + # Transfer metrics + total_bytes_received: int = 0 + throughput_mbps: float = 0.0 + + def success_rate(self) -> float: + """Calculate success rate percentage.""" + if self.total_requests == 0: + return 0.0 + return (self.successful_requests / self.total_requests) * 100 + + +def calculate_percentile(data: List[float], percentile: float) -> float: + """Calculate percentile from sorted data.""" + if not data: + return 0.0 + k = (len(data) - 1) * (percentile / 100) + f = int(k) + c = f + 1 if f + 1 < len(data) else f + return data[f] + (data[c] - data[f]) * (k - f) + + +class HTTPClient: + """HTTP client with configurable settings.""" + + def __init__(self, timeout: float = 30.0, headers: Optional[Dict[str, str]] = None, + verify_ssl: bool = True): + self.timeout = timeout + self.headers = headers or {} + self.verify_ssl = verify_ssl + + # Create SSL context + if not verify_ssl: + self.ssl_context = ssl.create_default_context() + self.ssl_context.check_hostname = False + self.ssl_context.verify_mode = ssl.CERT_NONE + else: + self.ssl_context = None + + def request(self, url: str, method: str = 'GET', body: Optional[bytes] = None) -> RequestResult: + """Execute HTTP request and return result.""" + start_time = time.perf_counter() -class ApiLoadTester: - """Main class for api load tester functionality""" - - def __init__(self, target_path: str, verbose: bool = False): - self.target_path = Path(target_path) - self.verbose = verbose - self.results = {} - - def run(self) -> Dict: - """Execute the main functionality""" - print(f"🚀 Running {self.__class__.__name__}...") - print(f"📁 Target: {self.target_path}") - try: - self.validate_target() - self.analyze() - self.generate_report() - - print("✅ Completed successfully!") - return self.results - + request = Request(url, data=body, method=method) + + # Add headers + for key, value in self.headers.items(): + request.add_header(key, value) + + # Add content-type for POST/PUT + if body and method in ['POST', 'PUT', 'PATCH']: + if 'Content-Type' not in self.headers: + request.add_header('Content-Type', 'application/json') + + # Execute request + with urlopen(request, timeout=self.timeout, context=self.ssl_context) as response: + response_data = response.read() + elapsed = (time.perf_counter() - start_time) * 1000 + + return RequestResult( + success=True, + status_code=response.status, + latency_ms=elapsed, + response_size=len(response_data), + ) + + except HTTPError as e: + elapsed = (time.perf_counter() - start_time) * 1000 + return RequestResult( + success=False, + status_code=e.code, + latency_ms=elapsed, + error=f"HTTP {e.code}: {e.reason}", + ) + + except URLError as e: + elapsed = (time.perf_counter() - start_time) * 1000 + return RequestResult( + success=False, + status_code=0, + latency_ms=elapsed, + error=f"Connection error: {str(e.reason)}", + ) + + except TimeoutError: + elapsed = (time.perf_counter() - start_time) * 1000 + return RequestResult( + success=False, + status_code=0, + latency_ms=elapsed, + error="Connection timeout", + ) + except Exception as e: - print(f"❌ Error: {e}") - sys.exit(1) - - def validate_target(self): - """Validate the target path exists and is accessible""" - if not self.target_path.exists(): - raise ValueError(f"Target path does not exist: {self.target_path}") - - if self.verbose: - print(f"✓ Target validated: {self.target_path}") - - def analyze(self): - """Perform the main analysis or operation""" - if self.verbose: - print("📊 Analyzing...") - - # Main logic here - self.results['status'] = 'success' - self.results['target'] = str(self.target_path) - self.results['findings'] = [] - - # Add analysis results - if self.verbose: - print(f"✓ Analysis complete: {len(self.results.get('findings', []))} findings") - - def generate_report(self): - """Generate and display the report""" - print("\n" + "="*50) - print("REPORT") - print("="*50) - print(f"Target: {self.results.get('target')}") - print(f"Status: {self.results.get('status')}") - print(f"Findings: {len(self.results.get('findings', []))}") - print("="*50 + "\n") + elapsed = (time.perf_counter() - start_time) * 1000 + return RequestResult( + success=False, + status_code=0, + latency_ms=elapsed, + error=str(e), + ) + + +class LoadTester: + """HTTP load testing engine.""" + + def __init__(self, url: str, method: str = 'GET', body: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, concurrency: int = 10, + duration: float = 10.0, timeout: float = 30.0, verify_ssl: bool = True): + self.url = url + self.method = method.upper() + self.body = body.encode() if body else None + self.headers = headers or {} + self.concurrency = concurrency + self.duration = duration + self.timeout = timeout + self.verify_ssl = verify_ssl + + self.results: List[RequestResult] = [] + self.stop_event = threading.Event() + self.results_lock = threading.Lock() + + def run(self) -> LoadTestResults: + """Execute load test and return results.""" + print(f"Load Testing: {self.url}") + print(f"Method: {self.method}") + print(f"Concurrency: {self.concurrency}") + print(f"Duration: {self.duration}s") + print("-" * 50) + + self.results = [] + self.stop_event.clear() + + start_time = time.time() + + # Start worker threads + with ThreadPoolExecutor(max_workers=self.concurrency) as executor: + futures = [] + for _ in range(self.concurrency): + future = executor.submit(self._worker) + futures.append(future) + + # Wait for duration + time.sleep(self.duration) + self.stop_event.set() + + # Wait for workers to finish + for future in as_completed(futures): + try: + future.result() + except Exception as e: + print(f"Worker error: {e}") + + elapsed_time = time.time() - start_time + + return self._aggregate_results(elapsed_time) + + def _worker(self): + """Worker thread that continuously sends requests.""" + client = HTTPClient( + timeout=self.timeout, + headers=self.headers, + verify_ssl=self.verify_ssl, + ) + + while not self.stop_event.is_set(): + result = client.request(self.url, self.method, self.body) + + with self.results_lock: + self.results.append(result) + + def _aggregate_results(self, elapsed_time: float) -> LoadTestResults: + """Aggregate individual results into summary.""" + if not self.results: + return LoadTestResults( + target_url=self.url, + method=self.method, + duration_seconds=elapsed_time, + concurrency=self.concurrency, + total_requests=0, + successful_requests=0, + failed_requests=0, + requests_per_second=0, + latency_min=0, + latency_max=0, + latency_avg=0, + latency_p50=0, + latency_p90=0, + latency_p95=0, + latency_p99=0, + latency_stddev=0, + ) + + # Separate successful and failed + successful = [r for r in self.results if r.success] + failed = [r for r in self.results if not r.success] + + # Latency calculations (from successful requests) + latencies = sorted([r.latency_ms for r in successful]) if successful else [0] + + # Error breakdown + errors_by_type: Dict[str, int] = {} + for r in failed: + error_type = r.error or 'Unknown' + errors_by_type[error_type] = errors_by_type.get(error_type, 0) + 1 + + # Calculate throughput + total_bytes = sum(r.response_size for r in successful) + throughput_mbps = (total_bytes * 8) / (elapsed_time * 1_000_000) if elapsed_time > 0 else 0 + + return LoadTestResults( + target_url=self.url, + method=self.method, + duration_seconds=elapsed_time, + concurrency=self.concurrency, + total_requests=len(self.results), + successful_requests=len(successful), + failed_requests=len(failed), + requests_per_second=len(self.results) / elapsed_time if elapsed_time > 0 else 0, + latency_min=min(latencies), + latency_max=max(latencies), + latency_avg=statistics.mean(latencies) if latencies else 0, + latency_p50=calculate_percentile(latencies, 50), + latency_p90=calculate_percentile(latencies, 90), + latency_p95=calculate_percentile(latencies, 95), + latency_p99=calculate_percentile(latencies, 99), + latency_stddev=statistics.stdev(latencies) if len(latencies) > 1 else 0, + errors_by_type=errors_by_type, + total_bytes_received=total_bytes, + throughput_mbps=throughput_mbps, + ) + + +def print_results(results: LoadTestResults, verbose: bool = False): + """Print formatted load test results.""" + print("\n" + "=" * 60) + print("LOAD TEST RESULTS") + print("=" * 60) + + print(f"\nTarget: {results.target_url}") + print(f"Method: {results.method}") + print(f"Duration: {results.duration_seconds:.1f}s") + print(f"Concurrency: {results.concurrency}") + + print(f"\nTHROUGHPUT:") + print(f" Total requests: {results.total_requests:,}") + print(f" Requests/sec: {results.requests_per_second:.1f}") + print(f" Successful: {results.successful_requests:,} ({results.success_rate():.1f}%)") + print(f" Failed: {results.failed_requests:,}") + + print(f"\nLATENCY (ms):") + print(f" Min: {results.latency_min:.1f}") + print(f" Avg: {results.latency_avg:.1f}") + print(f" P50: {results.latency_p50:.1f}") + print(f" P90: {results.latency_p90:.1f}") + print(f" P95: {results.latency_p95:.1f}") + print(f" P99: {results.latency_p99:.1f}") + print(f" Max: {results.latency_max:.1f}") + print(f" StdDev: {results.latency_stddev:.1f}") + + if results.errors_by_type: + print(f"\nERRORS:") + for error_type, count in sorted(results.errors_by_type.items(), key=lambda x: -x[1]): + print(f" {error_type}: {count}") + + if verbose: + print(f"\nTRANSFER:") + print(f" Total bytes: {results.total_bytes_received:,}") + print(f" Throughput: {results.throughput_mbps:.2f} Mbps") + + # Recommendations + print(f"\nRECOMMENDATIONS:") + + if results.latency_p99 > 500: + print(f" Warning: P99 latency ({results.latency_p99:.0f}ms) exceeds 500ms") + print(f" Consider: Connection pooling, query optimization, caching") + + if results.latency_p95 > 200: + print(f" Warning: P95 latency ({results.latency_p95:.0f}ms) exceeds 200ms target") + + if results.success_rate() < 99.0: + print(f" Warning: Success rate ({results.success_rate():.1f}%) below 99%") + print(f" Check server capacity and error logs") + + if results.latency_stddev > results.latency_avg: + print(f" Warning: High latency variance (stddev > avg)") + print(f" Indicates inconsistent performance") + + if results.success_rate() >= 99.0 and results.latency_p95 <= 200: + print(f" Performance looks good for this load level") + + print("=" * 60) + + +def compare_results(results1: LoadTestResults, results2: LoadTestResults): + """Compare two load test results.""" + print("\n" + "=" * 60) + print("COMPARISON RESULTS") + print("=" * 60) + + print(f"\n{'Metric':<25} {'Endpoint 1':<15} {'Endpoint 2':<15} {'Diff':<15}") + print("-" * 70) + + # Helper to format diff + def diff_str(v1: float, v2: float, lower_better: bool = True) -> str: + if v1 == 0: + return "N/A" + diff_pct = ((v2 - v1) / v1) * 100 + symbol = "-" if (diff_pct < 0) == lower_better else "+" + color_good = diff_pct < 0 if lower_better else diff_pct > 0 + return f"{symbol}{abs(diff_pct):.1f}%" + + metrics = [ + ("Requests/sec", results1.requests_per_second, results2.requests_per_second, False), + ("Success rate (%)", results1.success_rate(), results2.success_rate(), False), + ("Latency Avg (ms)", results1.latency_avg, results2.latency_avg, True), + ("Latency P50 (ms)", results1.latency_p50, results2.latency_p50, True), + ("Latency P90 (ms)", results1.latency_p90, results2.latency_p90, True), + ("Latency P95 (ms)", results1.latency_p95, results2.latency_p95, True), + ("Latency P99 (ms)", results1.latency_p99, results2.latency_p99, True), + ] + + for name, v1, v2, lower_better in metrics: + print(f"{name:<25} {v1:<15.1f} {v2:<15.1f} {diff_str(v1, v2, lower_better):<15}") + + print("-" * 70) + + # Summary + print(f"\nEndpoint 1: {results1.target_url}") + print(f"Endpoint 2: {results2.target_url}") + + # Determine winner + score1, score2 = 0, 0 + + if results1.requests_per_second > results2.requests_per_second: + score1 += 1 + else: + score2 += 1 + + if results1.latency_p95 < results2.latency_p95: + score1 += 1 + else: + score2 += 1 + + if results1.success_rate() > results2.success_rate(): + score1 += 1 + else: + score2 += 1 + + print(f"\nOverall: {'Endpoint 1' if score1 > score2 else 'Endpoint 2'} performs better") + + print("=" * 60) + + +class APILoadTester: + """Main load tester class with CLI integration.""" + + def __init__(self, urls: List[str], method: str = 'GET', body: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, concurrency: int = 10, + duration: float = 10.0, timeout: float = 30.0, compare: bool = False, + verbose: bool = False, verify_ssl: bool = True): + self.urls = urls + self.method = method + self.body = body + self.headers = headers or {} + self.concurrency = concurrency + self.duration = duration + self.timeout = timeout + self.compare = compare + self.verbose = verbose + self.verify_ssl = verify_ssl + + def run(self) -> Dict: + """Execute load test(s) and return results.""" + results = [] + + for url in self.urls: + tester = LoadTester( + url=url, + method=self.method, + body=self.body, + headers=self.headers, + concurrency=self.concurrency, + duration=self.duration, + timeout=self.timeout, + verify_ssl=self.verify_ssl, + ) + + result = tester.run() + results.append(result) + + if not self.compare: + print_results(result, self.verbose) + + if self.compare and len(results) >= 2: + compare_results(results[0], results[1]) + + return { + 'status': 'success', + 'results': [asdict(r) for r in results], + } + + +def parse_headers(header_args: Optional[List[str]]) -> Dict[str, str]: + """Parse header arguments into dictionary.""" + headers = {} + if header_args: + for h in header_args: + if ':' in h: + key, value = h.split(':', 1) + headers[key.strip()] = value.strip() + return headers + def main(): - """Main entry point""" + """CLI entry point.""" parser = argparse.ArgumentParser( - description="Api Load Tester" + description='HTTP load testing tool', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + %(prog)s https://api.example.com/users --concurrency 50 --duration 30 + %(prog)s https://api.example.com/orders --method POST --body '{"item": 1}' + %(prog)s https://api.example.com/v1 https://api.example.com/v2 --compare + %(prog)s https://api.example.com/health --header "Authorization: Bearer token" + ''' + ) + + parser.add_argument( + 'urls', + nargs='+', + help='URL(s) to test' ) parser.add_argument( - 'target', - help='Target path to analyze or process' + '--method', '-m', + default='GET', + choices=['GET', 'POST', 'PUT', 'PATCH', 'DELETE'], + help='HTTP method (default: GET)' + ) + parser.add_argument( + '--body', '-b', + help='Request body (JSON string)' + ) + parser.add_argument( + '--header', '-H', + action='append', + dest='headers', + help='HTTP header (format: "Name: Value")' + ) + parser.add_argument( + '--concurrency', '-c', + type=int, + default=10, + help='Number of concurrent requests (default: 10)' + ) + parser.add_argument( + '--duration', '-d', + type=float, + default=10.0, + help='Test duration in seconds (default: 10)' + ) + parser.add_argument( + '--timeout', '-t', + type=float, + default=30.0, + help='Request timeout in seconds (default: 30)' + ) + parser.add_argument( + '--compare', + action='store_true', + help='Compare two endpoints (requires two URLs)' + ) + parser.add_argument( + '--no-verify-ssl', + action='store_true', + help='Disable SSL certificate verification' ) parser.add_argument( '--verbose', '-v', @@ -89,26 +553,55 @@ def main(): ) parser.add_argument( '--output', '-o', - help='Output file path' + help='Output file path for results' ) - + args = parser.parse_args() - - tool = ApiLoadTester( - args.target, - verbose=args.verbose - ) - - results = tool.run() - - if args.json: - output = json.dumps(results, indent=2) - if args.output: + + # Validate + if args.compare and len(args.urls) < 2: + print("Error: --compare requires two URLs", file=sys.stderr) + sys.exit(1) + + # Parse headers + headers = parse_headers(args.headers) + + try: + tester = APILoadTester( + urls=args.urls, + method=args.method, + body=args.body, + headers=headers, + concurrency=args.concurrency, + duration=args.duration, + timeout=args.timeout, + compare=args.compare, + verbose=args.verbose, + verify_ssl=not args.no_verify_ssl, + ) + + results = tester.run() + + if args.json: + output = json.dumps(results, indent=2) + if args.output: + with open(args.output, 'w') as f: + f.write(output) + print(f"\nResults written to: {args.output}") + else: + print(output) + elif args.output: with open(args.output, 'w') as f: - f.write(output) - print(f"Results written to {args.output}") - else: - print(output) + json.dump(results, f, indent=2) + print(f"\nResults written to: {args.output}") + + except KeyboardInterrupt: + print("\nTest interrupted by user") + sys.exit(1) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + if __name__ == '__main__': main() diff --git a/engineering-team/senior-backend/scripts/api_scaffolder.py b/engineering-team/senior-backend/scripts/api_scaffolder.py index cc548b0..2207861 100755 --- a/engineering-team/senior-backend/scripts/api_scaffolder.py +++ b/engineering-team/senior-backend/scripts/api_scaffolder.py @@ -1,81 +1,608 @@ #!/usr/bin/env python3 """ -Api Scaffolder -Automated tool for senior backend tasks +API Scaffolder + +Generates Express.js route handlers, validation middleware, and TypeScript types +from OpenAPI specifications (YAML/JSON). + +Usage: + python api_scaffolder.py openapi.yaml --output src/routes/ + python api_scaffolder.py openapi.json --framework fastify --output src/ + python api_scaffolder.py spec.yaml --types-only --output src/types/ """ import os import sys import json import argparse +import re from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Any +from datetime import datetime -class ApiScaffolder: - """Main class for api scaffolder functionality""" - - def __init__(self, target_path: str, verbose: bool = False): - self.target_path = Path(target_path) - self.verbose = verbose - self.results = {} - - def run(self) -> Dict: - """Execute the main functionality""" - print(f"🚀 Running {self.__class__.__name__}...") - print(f"📁 Target: {self.target_path}") - + +def load_yaml_as_json(content: str) -> Dict: + """Parse YAML content without PyYAML dependency (basic subset).""" + lines = content.split('\n') + result = {} + stack = [(result, -1)] + current_key = None + in_array = False + array_indent = -1 + + for line in lines: + stripped = line.lstrip() + if not stripped or stripped.startswith('#'): + continue + + indent = len(line) - len(stripped) + + # Pop stack until we find the right level + while len(stack) > 1 and stack[-1][1] >= indent: + stack.pop() + + current_obj = stack[-1][0] + + if stripped.startswith('- '): + # Array item + value = stripped[2:].strip() + if isinstance(current_obj, list): + if ':' in value: + # Object in array + key, val = value.split(':', 1) + new_obj = {key.strip(): val.strip().strip('"').strip("'")} + current_obj.append(new_obj) + stack.append((new_obj, indent)) + else: + current_obj.append(value.strip('"').strip("'")) + elif ':' in stripped: + key, value = stripped.split(':', 1) + key = key.strip() + value = value.strip() + + if value == '': + # Check next line for array or object + new_obj = {} + current_obj[key] = new_obj + stack.append((new_obj, indent)) + elif value.startswith('[') and value.endswith(']'): + # Inline array + items = value[1:-1].split(',') + current_obj[key] = [i.strip().strip('"').strip("'") for i in items if i.strip()] + else: + # Simple value + value = value.strip('"').strip("'") + if value.lower() == 'true': + value = True + elif value.lower() == 'false': + value = False + elif value.isdigit(): + value = int(value) + current_obj[key] = value + + return result + + +def load_spec(spec_path: Path) -> Dict: + """Load OpenAPI spec from YAML or JSON file.""" + content = spec_path.read_text() + + if spec_path.suffix in ['.yaml', '.yml']: try: - self.validate_target() - self.analyze() - self.generate_report() - - print("✅ Completed successfully!") - return self.results - - except Exception as e: - print(f"❌ Error: {e}") - sys.exit(1) - - def validate_target(self): - """Validate the target path exists and is accessible""" - if not self.target_path.exists(): - raise ValueError(f"Target path does not exist: {self.target_path}") - + import yaml + return yaml.safe_load(content) + except ImportError: + # Fallback to basic YAML parser + return load_yaml_as_json(content) + else: + return json.loads(content) + + +def openapi_type_to_ts(schema: Dict) -> str: + """Convert OpenAPI schema type to TypeScript type.""" + if not schema: + return 'unknown' + + if '$ref' in schema: + ref = schema['$ref'] + return ref.split('/')[-1] + + type_map = { + 'string': 'string', + 'integer': 'number', + 'number': 'number', + 'boolean': 'boolean', + 'object': 'Record', + 'array': 'unknown[]', + } + + schema_type = schema.get('type', 'unknown') + + if schema_type == 'array': + items = schema.get('items', {}) + item_type = openapi_type_to_ts(items) + return f'{item_type}[]' + + if schema_type == 'object': + properties = schema.get('properties', {}) + if properties: + props = [] + required = schema.get('required', []) + for name, prop in properties.items(): + ts_type = openapi_type_to_ts(prop) + optional = '?' if name not in required else '' + props.append(f' {name}{optional}: {ts_type};') + return '{\n' + '\n'.join(props) + '\n}' + return 'Record' + + if 'enum' in schema: + values = ' | '.join(f"'{v}'" for v in schema['enum']) + return values + + return type_map.get(schema_type, 'unknown') + + +def generate_zod_schema(schema: Dict, name: str) -> str: + """Generate Zod validation schema from OpenAPI schema.""" + if not schema: + return f'export const {name}Schema = z.unknown();' + + def schema_to_zod(s: Dict) -> str: + if '$ref' in s: + ref_name = s['$ref'].split('/')[-1] + return f'{ref_name}Schema' + + s_type = s.get('type', 'unknown') + + if s_type == 'string': + zod = 'z.string()' + if 'minLength' in s: + zod += f'.min({s["minLength"]})' + if 'maxLength' in s: + zod += f'.max({s["maxLength"]})' + if 'pattern' in s: + zod += f'.regex(/{s["pattern"]}/)' + if s.get('format') == 'email': + zod += '.email()' + if s.get('format') == 'uuid': + zod += '.uuid()' + if 'enum' in s: + values = ', '.join(f"'{v}'" for v in s['enum']) + return f'z.enum([{values}])' + return zod + + if s_type == 'integer': + zod = 'z.number().int()' + if 'minimum' in s: + zod += f'.min({s["minimum"]})' + if 'maximum' in s: + zod += f'.max({s["maximum"]})' + return zod + + if s_type == 'number': + zod = 'z.number()' + if 'minimum' in s: + zod += f'.min({s["minimum"]})' + if 'maximum' in s: + zod += f'.max({s["maximum"]})' + return zod + + if s_type == 'boolean': + return 'z.boolean()' + + if s_type == 'array': + items_zod = schema_to_zod(s.get('items', {})) + return f'z.array({items_zod})' + + if s_type == 'object': + properties = s.get('properties', {}) + required = s.get('required', []) + if not properties: + return 'z.record(z.unknown())' + + props = [] + for prop_name, prop_schema in properties.items(): + prop_zod = schema_to_zod(prop_schema) + if prop_name not in required: + prop_zod += '.optional()' + props.append(f' {prop_name}: {prop_zod},') + + return 'z.object({\n' + '\n'.join(props) + '\n})' + + return 'z.unknown()' + + return f'export const {name}Schema = {schema_to_zod(schema)};' + + +def to_camel_case(s: str) -> str: + """Convert string to camelCase.""" + s = re.sub(r'[^a-zA-Z0-9]', ' ', s) + words = s.split() + if not words: + return s + return words[0].lower() + ''.join(w.capitalize() for w in words[1:]) + + +def to_pascal_case(s: str) -> str: + """Convert string to PascalCase.""" + s = re.sub(r'[^a-zA-Z0-9]', ' ', s) + return ''.join(w.capitalize() for w in s.split()) + + +def extract_path_params(path: str) -> List[str]: + """Extract path parameters from OpenAPI path.""" + return re.findall(r'\{(\w+)\}', path) + + +def openapi_path_to_express(path: str) -> str: + """Convert OpenAPI path to Express path format.""" + return re.sub(r'\{(\w+)\}', r':\1', path) + + +class APIScaffolder: + """Generate Express.js routes from OpenAPI specification.""" + + SUPPORTED_FRAMEWORKS = ['express', 'fastify', 'koa'] + + def __init__(self, spec_path: str, output_dir: str, framework: str = 'express', + types_only: bool = False, verbose: bool = False): + self.spec_path = Path(spec_path) + self.output_dir = Path(output_dir) + self.framework = framework + self.types_only = types_only + self.verbose = verbose + self.spec: Dict = {} + self.generated_files: List[str] = [] + + def run(self) -> Dict: + """Execute scaffolding process.""" + print(f"API Scaffolder - {self.framework.capitalize()}") + print(f"Spec: {self.spec_path}") + print(f"Output: {self.output_dir}") + print("-" * 50) + + self.validate() + self.load_spec() + self.ensure_output_dir() + + if self.types_only: + self.generate_types() + else: + self.generate_types() + self.generate_validators() + self.generate_routes() + self.generate_index() + + return { + 'status': 'success', + 'spec': str(self.spec_path), + 'output': str(self.output_dir), + 'framework': self.framework, + 'generated_files': self.generated_files, + 'routes_count': len(self.get_operations()), + 'types_count': len(self.get_schemas()), + } + + def validate(self): + """Validate inputs.""" + if not self.spec_path.exists(): + raise FileNotFoundError(f"Spec file not found: {self.spec_path}") + + if self.framework not in self.SUPPORTED_FRAMEWORKS: + raise ValueError(f"Unsupported framework: {self.framework}") + + def load_spec(self): + """Load and parse OpenAPI specification.""" + self.spec = load_spec(self.spec_path) + if self.verbose: - print(f"✓ Target validated: {self.target_path}") - - def analyze(self): - """Perform the main analysis or operation""" - if self.verbose: - print("📊 Analyzing...") - - # Main logic here - self.results['status'] = 'success' - self.results['target'] = str(self.target_path) - self.results['findings'] = [] - - # Add analysis results - if self.verbose: - print(f"✓ Analysis complete: {len(self.results.get('findings', []))} findings") - - def generate_report(self): - """Generate and display the report""" - print("\n" + "="*50) - print("REPORT") - print("="*50) - print(f"Target: {self.results.get('target')}") - print(f"Status: {self.results.get('status')}") - print(f"Findings: {len(self.results.get('findings', []))}") - print("="*50 + "\n") + title = self.spec.get('info', {}).get('title', 'Unknown') + version = self.spec.get('info', {}).get('version', '0.0.0') + print(f"Loaded: {title} v{version}") + + def ensure_output_dir(self): + """Create output directory if needed.""" + self.output_dir.mkdir(parents=True, exist_ok=True) + + def get_schemas(self) -> Dict: + """Get component schemas from spec.""" + return self.spec.get('components', {}).get('schemas', {}) + + def get_operations(self) -> List[Dict]: + """Extract all operations from spec.""" + operations = [] + paths = self.spec.get('paths', {}) + + for path, methods in paths.items(): + if not isinstance(methods, dict): + continue + + for method, details in methods.items(): + if method.lower() not in ['get', 'post', 'put', 'patch', 'delete']: + continue + + if not isinstance(details, dict): + continue + + op_id = details.get('operationId', f'{method}_{path}'.replace('/', '_')) + + operations.append({ + 'path': path, + 'method': method.lower(), + 'operation_id': op_id, + 'summary': details.get('summary', ''), + 'parameters': details.get('parameters', []), + 'request_body': details.get('requestBody', {}), + 'responses': details.get('responses', {}), + 'tags': details.get('tags', ['default']), + }) + + return operations + + def generate_types(self): + """Generate TypeScript type definitions.""" + schemas = self.get_schemas() + + lines = [ + '// Auto-generated TypeScript types', + f'// Generated from: {self.spec_path.name}', + f'// Date: {datetime.now().isoformat()}', + '', + ] + + for name, schema in schemas.items(): + ts_type = openapi_type_to_ts(schema) + if ts_type.startswith('{'): + lines.append(f'export interface {name} {ts_type}') + else: + lines.append(f'export type {name} = {ts_type};') + lines.append('') + + # Generate request/response types from operations + for op in self.get_operations(): + op_name = to_pascal_case(op['operation_id']) + + # Request body type + req_body = op.get('request_body', {}) + if req_body: + content = req_body.get('content', {}) + json_content = content.get('application/json', {}) + schema = json_content.get('schema', {}) + if schema and '$ref' not in schema: + ts_type = openapi_type_to_ts(schema) + lines.append(f'export interface {op_name}Request {ts_type}') + lines.append('') + + # Response type (200 response) + responses = op.get('responses', {}) + success_resp = responses.get('200', responses.get('201', {})) + if success_resp: + content = success_resp.get('content', {}) + json_content = content.get('application/json', {}) + schema = json_content.get('schema', {}) + if schema and '$ref' not in schema: + ts_type = openapi_type_to_ts(schema) + lines.append(f'export interface {op_name}Response {ts_type}') + lines.append('') + + types_file = self.output_dir / 'types.ts' + types_file.write_text('\n'.join(lines)) + self.generated_files.append(str(types_file)) + print(f" Generated: {types_file}") + + def generate_validators(self): + """Generate Zod validation schemas.""" + schemas = self.get_schemas() + + lines = [ + "import { z } from 'zod';", + '', + '// Auto-generated Zod validation schemas', + f'// Generated from: {self.spec_path.name}', + '', + ] + + for name, schema in schemas.items(): + zod_schema = generate_zod_schema(schema, name) + lines.append(zod_schema) + lines.append(f'export type {name} = z.infer;') + lines.append('') + + # Generate validation middleware + lines.extend([ + '// Validation middleware factory', + 'import { Request, Response, NextFunction } from "express";', + '', + 'export function validate(schema: z.ZodSchema) {', + ' return (req: Request, res: Response, next: NextFunction) => {', + ' const result = schema.safeParse(req.body);', + ' if (!result.success) {', + ' return res.status(400).json({', + ' error: {', + ' code: "VALIDATION_ERROR",', + ' message: "Request validation failed",', + ' details: result.error.errors.map(e => ({', + ' field: e.path.join("."),', + ' message: e.message,', + ' })),', + ' },', + ' });', + ' }', + ' req.body = result.data;', + ' next();', + ' };', + '}', + ]) + + validators_file = self.output_dir / 'validators.ts' + validators_file.write_text('\n'.join(lines)) + self.generated_files.append(str(validators_file)) + print(f" Generated: {validators_file}") + + def generate_routes(self): + """Generate route handlers.""" + operations = self.get_operations() + + # Group by tag + routes_by_tag: Dict[str, List[Dict]] = {} + for op in operations: + tag = op['tags'][0] if op['tags'] else 'default' + if tag not in routes_by_tag: + routes_by_tag[tag] = [] + routes_by_tag[tag].append(op) + + # Generate a route file per tag + for tag, ops in routes_by_tag.items(): + self.generate_route_file(tag, ops) + + def generate_route_file(self, tag: str, operations: List[Dict]): + """Generate a single route file.""" + tag_name = to_camel_case(tag) + + lines = [ + "import { Router, Request, Response, NextFunction } from 'express';", + "import { validate } from './validators';", + "import * as schemas from './validators';", + '', + f'const router = Router();', + '', + ] + + for op in operations: + method = op['method'] + path = openapi_path_to_express(op['path']) + handler_name = to_camel_case(op['operation_id']) + summary = op.get('summary', '') + + # Check if has request body + req_body = op.get('request_body', {}) + has_body = bool(req_body.get('content', {}).get('application/json')) + + # Find schema reference + schema_ref = None + if has_body: + content = req_body.get('content', {}).get('application/json', {}) + schema = content.get('schema', {}) + if '$ref' in schema: + schema_ref = schema['$ref'].split('/')[-1] + + lines.append(f'/**') + if summary: + lines.append(f' * {summary}') + lines.append(f' * {method.upper()} {op["path"]}') + lines.append(f' */') + + middleware = '' + if schema_ref: + middleware = f'validate(schemas.{schema_ref}Schema), ' + + lines.append(f"router.{method}('{path}', {middleware}async (req: Request, res: Response, next: NextFunction) => {{") + lines.append(' try {') + + # Extract path params + path_params = extract_path_params(op['path']) + if path_params: + lines.append(f" const {{ {', '.join(path_params)} }} = req.params;") + + lines.append('') + lines.append(f' // TODO: Implement {handler_name}') + lines.append('') + + # Default response based on method + if method == 'post': + lines.append(" res.status(201).json({ message: 'Created' });") + elif method == 'delete': + lines.append(" res.status(204).send();") + else: + lines.append(" res.json({ message: 'OK' });") + + lines.append(' } catch (err) {') + lines.append(' next(err);') + lines.append(' }') + lines.append('});') + lines.append('') + + lines.append(f'export default router;') + + route_file = self.output_dir / f'{tag_name}.routes.ts' + route_file.write_text('\n'.join(lines)) + self.generated_files.append(str(route_file)) + print(f" Generated: {route_file} ({len(operations)} handlers)") + + def generate_index(self): + """Generate index file that combines all routes.""" + operations = self.get_operations() + + # Get unique tags + tags = set() + for op in operations: + tag = op['tags'][0] if op['tags'] else 'default' + tags.add(tag) + + lines = [ + "import { Router } from 'express';", + '', + ] + + for tag in sorted(tags): + tag_name = to_camel_case(tag) + lines.append(f"import {tag_name}Routes from './{tag_name}.routes';") + + lines.extend([ + '', + 'const router = Router();', + '', + ]) + + for tag in sorted(tags): + tag_name = to_camel_case(tag) + # Use tag as base path + base_path = '/' + tag.lower().replace(' ', '-') + lines.append(f"router.use('{base_path}', {tag_name}Routes);") + + lines.extend([ + '', + 'export default router;', + ]) + + index_file = self.output_dir / 'index.ts' + index_file.write_text('\n'.join(lines)) + self.generated_files.append(str(index_file)) + print(f" Generated: {index_file}") + def main(): - """Main entry point""" + """CLI entry point.""" parser = argparse.ArgumentParser( - description="Api Scaffolder" + description='Generate Express.js routes from OpenAPI specification', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + %(prog)s openapi.yaml --output src/routes/ + %(prog)s spec.json --framework fastify --output src/api/ + %(prog)s openapi.yaml --types-only --output src/types/ + ''' + ) + + parser.add_argument( + 'spec', + help='Path to OpenAPI specification (YAML or JSON)' ) parser.add_argument( - 'target', - help='Target path to analyze or process' + '--output', '-o', + default='./generated', + help='Output directory (default: ./generated)' + ) + parser.add_argument( + '--framework', '-f', + choices=['express', 'fastify', 'koa'], + default='express', + help='Target framework (default: express)' + ) + parser.add_argument( + '--types-only', + action='store_true', + help='Generate only TypeScript types' ) parser.add_argument( '--verbose', '-v', @@ -87,28 +614,32 @@ def main(): action='store_true', help='Output results as JSON' ) - parser.add_argument( - '--output', '-o', - help='Output file path' - ) - + args = parser.parse_args() - - tool = ApiScaffolder( - args.target, - verbose=args.verbose - ) - - results = tool.run() - - if args.json: - output = json.dumps(results, indent=2) - if args.output: - with open(args.output, 'w') as f: - f.write(output) - print(f"Results written to {args.output}") - else: - print(output) + + try: + scaffolder = APIScaffolder( + spec_path=args.spec, + output_dir=args.output, + framework=args.framework, + types_only=args.types_only, + verbose=args.verbose, + ) + + results = scaffolder.run() + + print("-" * 50) + print(f"Generated {results['routes_count']} route handlers") + print(f"Generated {results['types_count']} type definitions") + print(f"Output: {results['output']}") + + if args.json: + print(json.dumps(results, indent=2)) + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + if __name__ == '__main__': main() diff --git a/engineering-team/senior-backend/scripts/database_migration_tool.py b/engineering-team/senior-backend/scripts/database_migration_tool.py index 1fa3701..9bb0e27 100755 --- a/engineering-team/senior-backend/scripts/database_migration_tool.py +++ b/engineering-team/senior-backend/scripts/database_migration_tool.py @@ -1,81 +1,819 @@ #!/usr/bin/env python3 """ Database Migration Tool -Automated tool for senior backend tasks + +Analyzes SQL schema files, detects potential issues, suggests indexes, +and generates migration scripts with rollback support. + +Usage: + python database_migration_tool.py schema.sql --analyze + python database_migration_tool.py old.sql --compare new.sql --output migrations/ + python database_migration_tool.py schema.sql --suggest-indexes """ import os import sys import json import argparse +import re from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Set, Tuple +from datetime import datetime +from dataclasses import dataclass, field, asdict + + +@dataclass +class Column: + """Database column definition.""" + name: str + data_type: str + nullable: bool = True + default: Optional[str] = None + primary_key: bool = False + unique: bool = False + references: Optional[str] = None + + +@dataclass +class Index: + """Database index definition.""" + name: str + table: str + columns: List[str] + unique: bool = False + partial: Optional[str] = None + + +@dataclass +class Table: + """Database table definition.""" + name: str + columns: Dict[str, Column] = field(default_factory=dict) + indexes: List[Index] = field(default_factory=list) + primary_key: List[str] = field(default_factory=list) + foreign_keys: List[Dict] = field(default_factory=list) + + +@dataclass +class Issue: + """Schema issue or recommendation.""" + severity: str # 'error', 'warning', 'info' + category: str # 'index', 'naming', 'type', 'constraint' + table: str + message: str + suggestion: Optional[str] = None + + +class SQLParser: + """Parse SQL DDL statements.""" + + # Common patterns + CREATE_TABLE_PATTERN = re.compile( + r'CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["`]?(\w+)["`]?\s*\((.*?)\)\s*;', + re.IGNORECASE | re.DOTALL + ) + + CREATE_INDEX_PATTERN = re.compile( + r'CREATE\s+(UNIQUE\s+)?INDEX\s+(?:IF\s+NOT\s+EXISTS\s+)?["`]?(\w+)["`]?\s+' + r'ON\s+["`]?(\w+)["`]?\s*\(([^)]+)\)(?:\s+WHERE\s+(.+?))?;', + re.IGNORECASE | re.DOTALL + ) + + COLUMN_PATTERN = re.compile( + r'["`]?(\w+)["`]?\s+' # Column name + r'(\w+(?:\s*\([^)]+\))?)' # Data type + r'([^,]*)', # Constraints + re.IGNORECASE + ) + + FK_PATTERN = re.compile( + r'FOREIGN\s+KEY\s*\(["`]?(\w+)["`]?\)\s+' + r'REFERENCES\s+["`]?(\w+)["`]?\s*\(["`]?(\w+)["`]?\)', + re.IGNORECASE + ) + + def parse(self, sql: str) -> Dict[str, Table]: + """Parse SQL and return table definitions.""" + tables = {} + + # Parse CREATE TABLE statements + for match in self.CREATE_TABLE_PATTERN.finditer(sql): + table_name = match.group(1) + body = match.group(2) + table = self._parse_table_body(table_name, body) + tables[table_name] = table + + # Parse CREATE INDEX statements + for match in self.CREATE_INDEX_PATTERN.finditer(sql): + unique = bool(match.group(1)) + index_name = match.group(2) + table_name = match.group(3) + columns = [c.strip().strip('"`') for c in match.group(4).split(',')] + where_clause = match.group(5) + + index = Index( + name=index_name, + table=table_name, + columns=columns, + unique=unique, + partial=where_clause.strip() if where_clause else None + ) + + if table_name in tables: + tables[table_name].indexes.append(index) + + return tables + + def _parse_table_body(self, table_name: str, body: str) -> Table: + """Parse table body (columns, constraints).""" + table = Table(name=table_name) + + # Split by comma, but respect parentheses + parts = self._split_by_comma(body) + + for part in parts: + part = part.strip() + + # Skip empty parts + if not part: + continue + + # Check for PRIMARY KEY constraint + if part.upper().startswith('PRIMARY KEY'): + pk_match = re.search(r'PRIMARY\s+KEY\s*\(([^)]+)\)', part, re.IGNORECASE) + if pk_match: + cols = [c.strip().strip('"`') for c in pk_match.group(1).split(',')] + table.primary_key = cols + + # Check for FOREIGN KEY constraint + elif part.upper().startswith('FOREIGN KEY'): + fk_match = self.FK_PATTERN.search(part) + if fk_match: + table.foreign_keys.append({ + 'column': fk_match.group(1), + 'ref_table': fk_match.group(2), + 'ref_column': fk_match.group(3), + }) + + # Check for CONSTRAINT + elif part.upper().startswith('CONSTRAINT'): + # Handle named constraints + if 'PRIMARY KEY' in part.upper(): + pk_match = re.search(r'PRIMARY\s+KEY\s*\(([^)]+)\)', part, re.IGNORECASE) + if pk_match: + cols = [c.strip().strip('"`') for c in pk_match.group(1).split(',')] + table.primary_key = cols + elif 'FOREIGN KEY' in part.upper(): + fk_match = self.FK_PATTERN.search(part) + if fk_match: + table.foreign_keys.append({ + 'column': fk_match.group(1), + 'ref_table': fk_match.group(2), + 'ref_column': fk_match.group(3), + }) + + # Regular column definition + else: + col_match = self.COLUMN_PATTERN.match(part) + if col_match: + col_name = col_match.group(1) + col_type = col_match.group(2) + constraints = col_match.group(3).upper() if col_match.group(3) else '' + + column = Column( + name=col_name, + data_type=col_type.upper(), + nullable='NOT NULL' not in constraints, + primary_key='PRIMARY KEY' in constraints, + unique='UNIQUE' in constraints, + ) + + # Extract default value + default_match = re.search(r'DEFAULT\s+(\S+)', constraints, re.IGNORECASE) + if default_match: + column.default = default_match.group(1) + + # Extract references + ref_match = re.search( + r'REFERENCES\s+["`]?(\w+)["`]?\s*\(["`]?(\w+)["`]?\)', + constraints, + re.IGNORECASE + ) + if ref_match: + column.references = f"{ref_match.group(1)}({ref_match.group(2)})" + table.foreign_keys.append({ + 'column': col_name, + 'ref_table': ref_match.group(1), + 'ref_column': ref_match.group(2), + }) + + if column.primary_key and col_name not in table.primary_key: + table.primary_key.append(col_name) + + table.columns[col_name] = column + + return table + + def _split_by_comma(self, s: str) -> List[str]: + """Split string by comma, respecting parentheses.""" + parts = [] + current = [] + depth = 0 + + for char in s: + if char == '(': + depth += 1 + elif char == ')': + depth -= 1 + elif char == ',' and depth == 0: + parts.append(''.join(current)) + current = [] + continue + current.append(char) + + if current: + parts.append(''.join(current)) + + return parts + + +class SchemaAnalyzer: + """Analyze database schema for issues and optimizations.""" + + # Columns that typically need indexes (foreign keys) + FK_COLUMN_PATTERNS = ['_id', 'Id', '_ID'] + + # Columns that typically need indexes for filtering + FILTER_COLUMN_PATTERNS = ['status', 'state', 'type', 'category', 'active', 'enabled', 'deleted'] + + # Columns that typically need indexes for sorting/ordering + SORT_COLUMN_PATTERNS = ['created_at', 'updated_at', 'date', 'timestamp', 'order', 'position'] + + def __init__(self, tables: Dict[str, Table]): + self.tables = tables + self.issues: List[Issue] = [] + + def analyze(self) -> List[Issue]: + """Run all analysis checks.""" + self.issues = [] + + for table_name, table in self.tables.items(): + self._check_naming_conventions(table) + self._check_primary_key(table) + self._check_foreign_key_indexes(table) + self._check_common_filter_columns(table) + self._check_timestamp_columns(table) + self._check_data_types(table) + + return self.issues + + def _check_naming_conventions(self, table: Table): + """Check table and column naming conventions.""" + # Table name should be lowercase + if table.name != table.name.lower(): + self.issues.append(Issue( + severity='warning', + category='naming', + table=table.name, + message=f"Table name '{table.name}' should be lowercase", + suggestion=f"Rename to '{table.name.lower()}'" + )) + + # Table name should be plural (basic check) + if not table.name.endswith('s') and not table.name.endswith('es'): + self.issues.append(Issue( + severity='info', + category='naming', + table=table.name, + message=f"Table name '{table.name}' should typically be plural", + )) + + for col_name, col in table.columns.items(): + # Column names should be lowercase with underscores + if col_name != col_name.lower(): + self.issues.append(Issue( + severity='warning', + category='naming', + table=table.name, + message=f"Column '{col_name}' should use snake_case", + suggestion=f"Rename to '{self._to_snake_case(col_name)}'" + )) + + def _check_primary_key(self, table: Table): + """Check for missing primary key.""" + if not table.primary_key: + self.issues.append(Issue( + severity='error', + category='constraint', + table=table.name, + message=f"Table '{table.name}' has no primary key", + suggestion="Add a primary key column (e.g., 'id SERIAL PRIMARY KEY')" + )) + + def _check_foreign_key_indexes(self, table: Table): + """Check that foreign key columns have indexes.""" + indexed_columns = set() + for index in table.indexes: + indexed_columns.update(index.columns) + + # Primary key columns are implicitly indexed + indexed_columns.update(table.primary_key) + + for fk in table.foreign_keys: + fk_col = fk['column'] + if fk_col not in indexed_columns: + self.issues.append(Issue( + severity='warning', + category='index', + table=table.name, + message=f"Foreign key column '{fk_col}' is not indexed", + suggestion=f"CREATE INDEX idx_{table.name}_{fk_col} ON {table.name}({fk_col});" + )) + + # Also check columns that look like foreign keys but aren't declared + for col_name in table.columns: + if any(col_name.endswith(pattern) for pattern in self.FK_COLUMN_PATTERNS): + if col_name not in indexed_columns: + # Check if it's actually a declared FK + is_declared_fk = any(fk['column'] == col_name for fk in table.foreign_keys) + if not is_declared_fk: + self.issues.append(Issue( + severity='info', + category='index', + table=table.name, + message=f"Column '{col_name}' looks like a foreign key but has no index", + suggestion=f"CREATE INDEX idx_{table.name}_{col_name} ON {table.name}({col_name});" + )) + + def _check_common_filter_columns(self, table: Table): + """Check for indexes on commonly filtered columns.""" + indexed_columns = set() + for index in table.indexes: + indexed_columns.update(index.columns) + indexed_columns.update(table.primary_key) + + for col_name in table.columns: + col_lower = col_name.lower() + if any(pattern in col_lower for pattern in self.FILTER_COLUMN_PATTERNS): + if col_name not in indexed_columns: + self.issues.append(Issue( + severity='info', + category='index', + table=table.name, + message=f"Column '{col_name}' is commonly used for filtering but has no index", + suggestion=f"CREATE INDEX idx_{table.name}_{col_name} ON {table.name}({col_name});" + )) + + def _check_timestamp_columns(self, table: Table): + """Check for indexes on timestamp columns used for sorting.""" + has_created_at = 'created_at' in table.columns + has_updated_at = 'updated_at' in table.columns + + if not has_created_at: + self.issues.append(Issue( + severity='info', + category='convention', + table=table.name, + message=f"Table '{table.name}' has no 'created_at' column", + suggestion="Consider adding: created_at TIMESTAMP DEFAULT NOW()" + )) + + if not has_updated_at: + self.issues.append(Issue( + severity='info', + category='convention', + table=table.name, + message=f"Table '{table.name}' has no 'updated_at' column", + suggestion="Consider adding: updated_at TIMESTAMP DEFAULT NOW()" + )) + + def _check_data_types(self, table: Table): + """Check for potential data type issues.""" + for col_name, col in table.columns.items(): + dtype = col.data_type.upper() + + # Check for VARCHAR without length + if 'VARCHAR' in dtype and '(' not in dtype: + self.issues.append(Issue( + severity='warning', + category='type', + table=table.name, + message=f"Column '{col_name}' uses VARCHAR without length", + suggestion="Specify a maximum length, e.g., VARCHAR(255)" + )) + + # Check for FLOAT/DOUBLE for monetary values + if 'FLOAT' in dtype or 'DOUBLE' in dtype: + if 'price' in col_name.lower() or 'amount' in col_name.lower() or 'total' in col_name.lower(): + self.issues.append(Issue( + severity='warning', + category='type', + table=table.name, + message=f"Column '{col_name}' uses floating point for monetary value", + suggestion="Use DECIMAL or NUMERIC for monetary values" + )) + + # Check for TEXT columns that might benefit from length limits + if dtype == 'TEXT': + if 'email' in col_name.lower() or 'url' in col_name.lower(): + self.issues.append(Issue( + severity='info', + category='type', + table=table.name, + message=f"Column '{col_name}' uses TEXT but might benefit from VARCHAR", + suggestion=f"Consider VARCHAR(255) for {col_name}" + )) + + def _to_snake_case(self, name: str) -> str: + """Convert name to snake_case.""" + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + +class MigrationGenerator: + """Generate migration scripts from schema differences.""" + + def __init__(self, old_tables: Dict[str, Table], new_tables: Dict[str, Table]): + self.old_tables = old_tables + self.new_tables = new_tables + + def generate(self) -> Tuple[str, str]: + """Generate UP and DOWN migration scripts.""" + up_statements = [] + down_statements = [] + + # Find new tables + for table_name, table in self.new_tables.items(): + if table_name not in self.old_tables: + up_statements.append(self._generate_create_table(table)) + down_statements.append(f"DROP TABLE IF EXISTS {table_name};") + + # Find removed tables + for table_name, table in self.old_tables.items(): + if table_name not in self.new_tables: + up_statements.append(f"DROP TABLE IF EXISTS {table_name};") + down_statements.append(self._generate_create_table(table)) + + # Find modified tables + for table_name in set(self.old_tables.keys()) & set(self.new_tables.keys()): + old_table = self.old_tables[table_name] + new_table = self.new_tables[table_name] + up, down = self._compare_tables(old_table, new_table) + up_statements.extend(up) + down_statements.extend(down) + + up_sql = '\n\n'.join(up_statements) if up_statements else '-- No changes' + down_sql = '\n\n'.join(down_statements) if down_statements else '-- No changes' + + return up_sql, down_sql + + def _generate_create_table(self, table: Table) -> str: + """Generate CREATE TABLE statement.""" + lines = [f"CREATE TABLE {table.name} ("] + + col_defs = [] + for col_name, col in table.columns.items(): + col_def = f" {col_name} {col.data_type}" + if not col.nullable: + col_def += " NOT NULL" + if col.default: + col_def += f" DEFAULT {col.default}" + if col.primary_key and len(table.primary_key) == 1: + col_def += " PRIMARY KEY" + if col.unique: + col_def += " UNIQUE" + col_defs.append(col_def) + + # Add composite primary key + if len(table.primary_key) > 1: + pk_cols = ', '.join(table.primary_key) + col_defs.append(f" PRIMARY KEY ({pk_cols})") + + # Add foreign keys + for fk in table.foreign_keys: + col_defs.append( + f" FOREIGN KEY ({fk['column']}) REFERENCES {fk['ref_table']}({fk['ref_column']})" + ) + + lines.append(',\n'.join(col_defs)) + lines.append(");") + + return '\n'.join(lines) + + def _compare_tables(self, old: Table, new: Table) -> Tuple[List[str], List[str]]: + """Compare two tables and generate ALTER statements.""" + up = [] + down = [] + + # New columns + for col_name, col in new.columns.items(): + if col_name not in old.columns: + up.append(f"ALTER TABLE {new.name} ADD COLUMN {col_name} {col.data_type}" + + (" NOT NULL" if not col.nullable else "") + + (f" DEFAULT {col.default}" if col.default else "") + ";") + down.append(f"ALTER TABLE {new.name} DROP COLUMN IF EXISTS {col_name};") + + # Removed columns + for col_name, col in old.columns.items(): + if col_name not in new.columns: + up.append(f"ALTER TABLE {old.name} DROP COLUMN IF EXISTS {col_name};") + down.append(f"ALTER TABLE {old.name} ADD COLUMN {col_name} {col.data_type}" + + (" NOT NULL" if not col.nullable else "") + + (f" DEFAULT {col.default}" if col.default else "") + ";") + + # Modified columns (type changes) + for col_name in set(old.columns.keys()) & set(new.columns.keys()): + old_col = old.columns[col_name] + new_col = new.columns[col_name] + + if old_col.data_type != new_col.data_type: + up.append(f"ALTER TABLE {new.name} ALTER COLUMN {col_name} TYPE {new_col.data_type};") + down.append(f"ALTER TABLE {old.name} ALTER COLUMN {col_name} TYPE {old_col.data_type};") + + # New indexes + old_index_names = {idx.name for idx in old.indexes} + for idx in new.indexes: + if idx.name not in old_index_names: + unique = "UNIQUE " if idx.unique else "" + cols = ', '.join(idx.columns) + where = f" WHERE {idx.partial}" if idx.partial else "" + up.append(f"CREATE {unique}INDEX CONCURRENTLY {idx.name} ON {idx.table}({cols}){where};") + down.append(f"DROP INDEX IF EXISTS {idx.name};") + + # Removed indexes + new_index_names = {idx.name for idx in new.indexes} + for idx in old.indexes: + if idx.name not in new_index_names: + unique = "UNIQUE " if idx.unique else "" + cols = ', '.join(idx.columns) + where = f" WHERE {idx.partial}" if idx.partial else "" + up.append(f"DROP INDEX IF EXISTS {idx.name};") + down.append(f"CREATE {unique}INDEX {idx.name} ON {idx.table}({cols}){where};") + + return up, down + class DatabaseMigrationTool: - """Main class for database migration tool functionality""" - - def __init__(self, target_path: str, verbose: bool = False): - self.target_path = Path(target_path) + """Main tool for database migration analysis.""" + + def __init__(self, schema_path: str, compare_path: Optional[str] = None, + output_dir: Optional[str] = None, verbose: bool = False): + self.schema_path = Path(schema_path) + self.compare_path = Path(compare_path) if compare_path else None + self.output_dir = Path(output_dir) if output_dir else None self.verbose = verbose - self.results = {} - - def run(self) -> Dict: - """Execute the main functionality""" - print(f"🚀 Running {self.__class__.__name__}...") - print(f"📁 Target: {self.target_path}") - - try: - self.validate_target() - self.analyze() - self.generate_report() - - print("✅ Completed successfully!") - return self.results - - except Exception as e: - print(f"❌ Error: {e}") - sys.exit(1) - - def validate_target(self): - """Validate the target path exists and is accessible""" - if not self.target_path.exists(): - raise ValueError(f"Target path does not exist: {self.target_path}") - + self.parser = SQLParser() + + def run(self, mode: str = 'analyze') -> Dict: + """Execute the tool in specified mode.""" + print(f"Database Migration Tool") + print(f"Schema: {self.schema_path}") + print("-" * 50) + + if not self.schema_path.exists(): + raise FileNotFoundError(f"Schema file not found: {self.schema_path}") + + schema_sql = self.schema_path.read_text() + tables = self.parser.parse(schema_sql) + if self.verbose: - print(f"✓ Target validated: {self.target_path}") - - def analyze(self): - """Perform the main analysis or operation""" - if self.verbose: - print("📊 Analyzing...") - - # Main logic here - self.results['status'] = 'success' - self.results['target'] = str(self.target_path) - self.results['findings'] = [] - - # Add analysis results - if self.verbose: - print(f"✓ Analysis complete: {len(self.results.get('findings', []))} findings") - - def generate_report(self): - """Generate and display the report""" - print("\n" + "="*50) - print("REPORT") - print("="*50) - print(f"Target: {self.results.get('target')}") - print(f"Status: {self.results.get('status')}") - print(f"Findings: {len(self.results.get('findings', []))}") - print("="*50 + "\n") + print(f"Parsed {len(tables)} tables") + + if mode == 'analyze': + return self._analyze(tables) + elif mode == 'compare': + return self._compare(tables) + elif mode == 'suggest-indexes': + return self._suggest_indexes(tables) + else: + raise ValueError(f"Unknown mode: {mode}") + + def _analyze(self, tables: Dict[str, Table]) -> Dict: + """Analyze schema for issues.""" + analyzer = SchemaAnalyzer(tables) + issues = analyzer.analyze() + + # Group by severity + errors = [i for i in issues if i.severity == 'error'] + warnings = [i for i in issues if i.severity == 'warning'] + infos = [i for i in issues if i.severity == 'info'] + + print(f"\nAnalysis Results:") + print(f" Tables: {len(tables)}") + print(f" Errors: {len(errors)}") + print(f" Warnings: {len(warnings)}") + print(f" Suggestions: {len(infos)}") + + if errors: + print(f"\nERRORS:") + for issue in errors: + print(f" [{issue.table}] {issue.message}") + if issue.suggestion: + print(f" Suggestion: {issue.suggestion}") + + if warnings: + print(f"\nWARNINGS:") + for issue in warnings: + print(f" [{issue.table}] {issue.message}") + if issue.suggestion: + print(f" Suggestion: {issue.suggestion}") + + if self.verbose and infos: + print(f"\nSUGGESTIONS:") + for issue in infos: + print(f" [{issue.table}] {issue.message}") + if issue.suggestion: + print(f" {issue.suggestion}") + + return { + 'status': 'success', + 'tables_count': len(tables), + 'issues': { + 'errors': len(errors), + 'warnings': len(warnings), + 'suggestions': len(infos), + }, + 'issues_detail': [asdict(i) for i in issues], + } + + def _compare(self, old_tables: Dict[str, Table]) -> Dict: + """Compare two schemas and generate migration.""" + if not self.compare_path: + raise ValueError("Compare path required for compare mode") + + if not self.compare_path.exists(): + raise FileNotFoundError(f"Compare file not found: {self.compare_path}") + + new_sql = self.compare_path.read_text() + new_tables = self.parser.parse(new_sql) + + generator = MigrationGenerator(old_tables, new_tables) + up_sql, down_sql = generator.generate() + + print(f"\nComparing schemas:") + print(f" Old: {self.schema_path}") + print(f" New: {self.compare_path}") + + # Calculate changes + added_tables = set(new_tables.keys()) - set(old_tables.keys()) + removed_tables = set(old_tables.keys()) - set(new_tables.keys()) + + print(f"\nChanges detected:") + print(f" Added tables: {len(added_tables)}") + print(f" Removed tables: {len(removed_tables)}") + + if self.output_dir: + self.output_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + + up_file = self.output_dir / f"{timestamp}_migration.sql" + down_file = self.output_dir / f"{timestamp}_migration_rollback.sql" + + up_file.write_text(f"-- Migration: {self.schema_path} -> {self.compare_path}\n" + f"-- Generated: {datetime.now().isoformat()}\n\n" + f"BEGIN;\n\n{up_sql}\n\nCOMMIT;\n") + + down_file.write_text(f"-- Rollback for migration {timestamp}\n" + f"-- Generated: {datetime.now().isoformat()}\n\n" + f"BEGIN;\n\n{down_sql}\n\nCOMMIT;\n") + + print(f"\nGenerated files:") + print(f" Migration: {up_file}") + print(f" Rollback: {down_file}") + else: + print(f"\n--- UP MIGRATION ---") + print(up_sql) + print(f"\n--- DOWN MIGRATION ---") + print(down_sql) + + return { + 'status': 'success', + 'added_tables': list(added_tables), + 'removed_tables': list(removed_tables), + 'up_sql': up_sql, + 'down_sql': down_sql, + } + + def _suggest_indexes(self, tables: Dict[str, Table]) -> Dict: + """Generate index suggestions.""" + suggestions = [] + + for table_name, table in tables.items(): + # Get existing indexed columns + indexed = set() + for idx in table.indexes: + indexed.update(idx.columns) + indexed.update(table.primary_key) + + # Suggest indexes for foreign keys + for fk in table.foreign_keys: + if fk['column'] not in indexed: + suggestions.append({ + 'table': table_name, + 'column': fk['column'], + 'reason': 'Foreign key', + 'sql': f"CREATE INDEX idx_{table_name}_{fk['column']} ON {table_name}({fk['column']});" + }) + + # Suggest indexes for common patterns + for col_name in table.columns: + if col_name in indexed: + continue + + col_lower = col_name.lower() + + # Foreign key pattern + if col_name.endswith('_id') and col_name not in indexed: + suggestions.append({ + 'table': table_name, + 'column': col_name, + 'reason': 'Likely foreign key', + 'sql': f"CREATE INDEX idx_{table_name}_{col_name} ON {table_name}({col_name});" + }) + + # Status/type columns + elif col_lower in ['status', 'state', 'type', 'category']: + suggestions.append({ + 'table': table_name, + 'column': col_name, + 'reason': 'Common filter column', + 'sql': f"CREATE INDEX idx_{table_name}_{col_name} ON {table_name}({col_name});" + }) + + # Timestamp columns + elif col_lower in ['created_at', 'updated_at']: + suggestions.append({ + 'table': table_name, + 'column': col_name, + 'reason': 'Common sort column', + 'sql': f"CREATE INDEX idx_{table_name}_{col_name} ON {table_name}({col_name} DESC);" + }) + + print(f"\nIndex Suggestions ({len(suggestions)} found):") + for s in suggestions: + print(f"\n [{s['table']}.{s['column']}] {s['reason']}") + print(f" {s['sql']}") + + if self.output_dir: + self.output_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + output_file = self.output_dir / f"{timestamp}_add_indexes.sql" + + lines = [ + f"-- Suggested indexes", + f"-- Generated: {datetime.now().isoformat()}", + "", + ] + for s in suggestions: + lines.append(f"-- {s['table']}.{s['column']}: {s['reason']}") + lines.append(s['sql']) + lines.append("") + + output_file.write_text('\n'.join(lines)) + print(f"\nWritten to: {output_file}") + + return { + 'status': 'success', + 'suggestions_count': len(suggestions), + 'suggestions': suggestions, + } + def main(): - """Main entry point""" + """CLI entry point.""" parser = argparse.ArgumentParser( - description="Database Migration Tool" + description='Analyze SQL schemas and generate migrations', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + %(prog)s schema.sql --analyze + %(prog)s old.sql --compare new.sql --output migrations/ + %(prog)s schema.sql --suggest-indexes --output migrations/ + ''' + ) + + parser.add_argument( + 'schema', + help='Path to SQL schema file' ) parser.add_argument( - 'target', - help='Target path to analyze or process' + '--analyze', + action='store_true', + help='Analyze schema for issues and optimizations' + ) + parser.add_argument( + '--compare', + metavar='FILE', + help='Compare with another schema file and generate migration' + ) + parser.add_argument( + '--suggest-indexes', + action='store_true', + help='Generate index suggestions' + ) + parser.add_argument( + '--output', '-o', + help='Output directory for generated files' ) parser.add_argument( '--verbose', '-v', @@ -87,28 +825,34 @@ def main(): action='store_true', help='Output results as JSON' ) - parser.add_argument( - '--output', '-o', - help='Output file path' - ) - + args = parser.parse_args() - - tool = DatabaseMigrationTool( - args.target, - verbose=args.verbose - ) - - results = tool.run() - - if args.json: - output = json.dumps(results, indent=2) - if args.output: - with open(args.output, 'w') as f: - f.write(output) - print(f"Results written to {args.output}") - else: - print(output) + + # Determine mode + if args.compare: + mode = 'compare' + elif args.suggest_indexes: + mode = 'suggest-indexes' + else: + mode = 'analyze' + + try: + tool = DatabaseMigrationTool( + schema_path=args.schema, + compare_path=args.compare, + output_dir=args.output, + verbose=args.verbose, + ) + + results = tool.run(mode=mode) + + if args.json: + print(json.dumps(results, indent=2)) + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + if __name__ == '__main__': main()