diff --git a/.autoresearch/seo/skill-descriptions/program.md b/.autoresearch/seo/skill-descriptions/program.md new file mode 100644 index 0000000..eabf1ed --- /dev/null +++ b/.autoresearch/seo/skill-descriptions/program.md @@ -0,0 +1,36 @@ +# Experiment: Skill Description SEO Optimization + +## Objective +Optimize the `description` field in each skill's SKILL.md frontmatter to maximize: +1. **SEO discoverability** — include high-volume keywords: "agent skill", "plugin", "Claude Code", "Codex", "Gemini CLI", "Cursor" +2. **Trigger accuracy** — the description must accurately describe when the skill activates +3. **Clarity** — one read should convey what the skill does and who it's for +4. **Cross-platform appeal** — mention multi-tool compatibility where natural + +## Constraints +- Description must be under 200 characters (plugin.json limit) +- Must NOT be spammy or keyword-stuffed — natural language only +- Must preserve the skill's actual purpose and capabilities +- Do NOT modify anything outside the `description:` field in frontmatter +- One skill per experiment iteration + +## Strategy +- Start with the top 10 most-viewed skills (from GitHub traffic) +- For each: read current description → rewrite with SEO keywords → evaluate +- Terms to naturally incorporate: "agent skill", "plugin", "coding agent", tool names +- Avoid: generic filler, "AI-powered", "comprehensive solution" + +## Target Skills (in priority order) +1. marketing-skill/SKILL.md (all sub-skills) +2. engineering-team/SKILL.md (all sub-skills) +3. engineering/SKILL.md (all sub-skills) +4. c-level-advisor/SKILL.md (all sub-skills) +5. finance/SKILL.md (all sub-skills) +6. business-growth/SKILL.md (all sub-skills) +7. product-team/SKILL.md (all sub-skills) +8. project-management/SKILL.md (all sub-skills) + +## Evaluation +Use llm_judge_content evaluator customized for SEO scoring. +Metric: seo_quality_score (0-100) +Direction: higher is better diff --git a/.codex/skills-index.json b/.codex/skills-index.json index ca1ebfe..673a388 100644 --- a/.codex/skills-index.json +++ b/.codex/skills-index.json @@ -3,7 +3,7 @@ "name": "claude-code-skills", "description": "Production-ready skill packages for AI agents - Marketing, Engineering, Product, C-Level, PM, and RA/QM", "repository": "https://github.com/alirezarezvani/claude-skills", - "total_skills": 158, + "total_skills": 160, "skills": [ { "name": "contract-and-proposal-writer", @@ -413,6 +413,12 @@ "category": "engineering-advanced", "description": "Dependency Auditor" }, + { + "name": "docker-development", + "source": "../../engineering/docker-development", + "category": "engineering-advanced", + "description": "Docker and container development agent skill and plugin for Dockerfile optimization, docker-compose orchestration, multi-stage builds, and container security hardening. Use when: user wants to optimize a Dockerfile, create or improve docker-compose configurations, implement multi-stage builds, audit container security, reduce image size, or follow container best practices. Covers build performance, layer caching, secret management, and production-ready container patterns." + }, { "name": "env-secrets-manager", "source": "../../engineering/env-secrets-manager", @@ -821,6 +827,12 @@ "category": "product", "description": "Strategic product leadership toolkit for Head of Product covering OKR cascade generation, quarterly planning, competitive landscape analysis, product vision documents, and team scaling proposals. Use when creating quarterly OKR documents, defining product goals or KPIs, building product roadmaps, running competitive analysis, drafting team structure or hiring plans, aligning product strategy across engineering and design, or generating cascaded goal hierarchies from company to team level." }, + { + "name": "research-summarizer", + "source": "../../product-team/research-summarizer", + "category": "product", + "description": "Structured research summarization agent skill for non-dev users. Handles academic papers, web articles, reports, and documentation. Extracts key findings, generates comparative analyses, and produces properly formatted citations. Use when: user wants to summarize a research paper, compare multiple sources, extract citations from documents, or create structured research briefs. Plugin for Claude Code, Codex, Gemini CLI, and OpenClaw." + }, { "name": "roadmap-communicator", "source": "../../product-team/roadmap-communicator", @@ -971,7 +983,7 @@ "description": "Software engineering and technical skills" }, "engineering-advanced": { - "count": 26, + "count": 27, "source": "../../engineering", "description": "Advanced engineering skills - agents, RAG, MCP, CI/CD, databases, observability" }, @@ -986,7 +998,7 @@ "description": "Marketing, content, and demand generation skills" }, "product": { - "count": 12, + "count": 13, "source": "../../product-team", "description": "Product management and design skills" }, diff --git a/.codex/skills/docker-development b/.codex/skills/docker-development new file mode 120000 index 0000000..a10a770 --- /dev/null +++ b/.codex/skills/docker-development @@ -0,0 +1 @@ +../../engineering/docker-development \ No newline at end of file diff --git a/.codex/skills/research-summarizer b/.codex/skills/research-summarizer new file mode 120000 index 0000000..8ba9961 --- /dev/null +++ b/.codex/skills/research-summarizer @@ -0,0 +1 @@ +../../product-team/research-summarizer \ No newline at end of file diff --git a/README.md b/README.md index 97a0224..32dc1ff 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# Agent Skills & Plugins for Every Coding Agent +# Claude Code Skills & Plugins — Agent Skills for Every Coding Tool -**192 production-ready agent skills, 17 agents, 3 personas, and an orchestration protocol for 11 AI coding tools.** +**192 production-ready Claude Code skills, plugins, and agent skills for 11 AI coding tools.** -The most comprehensive open-source library of agent skills and plugins for Claude Code, OpenAI Codex, Gemini CLI, Cursor, and 7 more coding agents. Reusable expertise packages that give AI coding agents domain knowledge they don't have out of the box — from architecture and security to marketing, compliance, and C-level advisory. +The most comprehensive open-source library of Claude Code skills and agent plugins — also works with OpenAI Codex, Gemini CLI, Cursor, and 7 more coding agents. Reusable expertise packages covering engineering, DevOps, marketing, compliance, C-level advisory, and more. **Works with:** Claude Code · OpenAI Codex · Gemini CLI · OpenClaw · Cursor · Aider · Windsurf · Kilo Code · OpenCode · Augment · Antigravity @@ -14,13 +14,13 @@ The most comprehensive open-source library of agent skills and plugins for Claud [![Stars](https://img.shields.io/github/stars/alirezarezvani/claude-skills?style=for-the-badge)](https://github.com/alirezarezvani/claude-skills/stargazers) [![SkillCheck Validated](https://img.shields.io/badge/SkillCheck-Validated-4c1?style=for-the-badge)](https://getskillcheck.com) -> **5,200+ GitHub stars** — the most comprehensive open-source agent skills & plugins library for AI coding agents. +> **5,200+ GitHub stars** — the most comprehensive open-source Claude Code skills & agent plugins library. --- -## What Are Agent Skills & Plugins? +## What Are Claude Code Skills & Agent Plugins? -Agent skills (also called coding agent plugins) are modular instruction packages that give AI coding agents domain expertise they don't have out of the box. Each skill includes: +Claude Code skills (also called agent skills or coding agent plugins) are modular instruction packages that give AI coding agents domain expertise they don't have out of the box. Each skill includes: - **SKILL.md** — structured instructions, workflows, and decision frameworks - **Python tools** — 254 CLI scripts (all stdlib-only, zero pip installs) diff --git a/docs/guides/agent-skills-for-codex.md b/docs/guides/agent-skills-for-codex.md new file mode 100644 index 0000000..7fd199d --- /dev/null +++ b/docs/guides/agent-skills-for-codex.md @@ -0,0 +1,123 @@ +--- +title: "Agent Skills for OpenAI Codex CLI (2026)" +description: "Install and use 192 agent skills with OpenAI Codex CLI. Engineering, marketing, product, and DevOps plugins for Codex." +--- + +# Agent Skills for OpenAI Codex CLI + +Use 192 production-ready agent skills with OpenAI Codex CLI. Every skill in this collection works natively with Codex via the `.codex/skills/` directory format. + +--- + +## Quick Install + +```bash +# Clone the repository +git clone https://github.com/alirezarezvani/claude-skills.git +cd claude-skills + +# Option 1: Install all skills for Codex +./scripts/codex-install.sh + +# Option 2: Convert specific skills +./scripts/convert.sh --skill frontend-design --tool codex +./scripts/convert.sh --skill autoresearch-agent --tool codex + +# Option 3: Convert all skills at once +./scripts/convert.sh --all --tool codex +``` + +### How It Works + +Codex reads agent skills from `.codex/skills//SKILL.md` in your project or home directory. The `convert.sh` script transforms Claude Code's SKILL.md format into Codex-compatible instructions, preserving all workflows, slash commands, and references. + +--- + +## Top Skills for Codex Users + +### Engineering + +| Skill | Codex Command | What It Does | +|-------|--------------|-------------| +| **autoresearch-agent** | `/ar:run` | Autonomous experiment loop — edit, evaluate, keep or revert. Karpathy-inspired. | +| **pr-review-expert** | `/review:full` | Multi-pass code review catching logic bugs, security issues, missing tests. | +| **frontend-design** | `/design:component` | Production-grade React/Tailwind UI with high design quality. | +| **tdd-guide** | `/tdd:start` | Red-green-refactor TDD workflow with coverage tracking. | +| **senior-devops** | `/devops:deploy` | IaC, CI/CD, monitoring, and incident response playbooks. | +| **docker-development** | `/docker:optimize` | Dockerfile optimization, multi-stage builds, container security. | + +### Beyond Engineering + +| Skill | Codex Command | What It Does | +|-------|--------------|-------------| +| **content-creator** | `/content:write` | SEO-optimized content with brand voice analysis. | +| **cto-advisor** | `/cto:assess` | Tech debt scoring, team scaling, architecture decisions. | +| **agile-product-owner** | `/po:story` | User stories, acceptance criteria, sprint planning. | +| **research-summarizer** | `/research:summarize` | Structured research → summary → citations workflow. | + +--- + +## Codex-Specific Tips + +### AGENTS.md Fallback + +If your Codex setup uses `AGENTS.md` instead of the skills directory, you can use the generated agents file: + +```bash +# Copy the bundled AGENTS.md to your project +cp claude-skills/agents/AGENTS.md ~/.codex/AGENTS.md +``` + +### Using with `--full-auto` + +Skills work seamlessly with Codex's auto-approval mode: + +```bash +# Run a skill in full-auto mode +codex exec --full-auto "Use the frontend-design skill to build a dashboard component" + +# Run autoresearch overnight +codex exec --full-auto "Use autoresearch-agent to optimize src/api/search.py for response time" +``` + +### Project-Level vs Global + +```bash +# Project-level (only this repo) +cp -r claude-skills/.codex/skills/ ./.codex/skills/ + +# Global (available everywhere) +cp -r claude-skills/.codex/skills/ ~/.codex/skills/ +``` + +--- + +## Full Skill Catalog + +All 192 skills organized by domain: + +| Domain | Skills | Highlights | +|--------|--------|-----------| +| **Engineering** | 28 | autoresearch-agent, pr-review-expert, database-designer, migration-architect | +| **Engineering Team** | 15 | senior-frontend, senior-backend, senior-devops, senior-security, senior-qa | +| **Marketing** | 22 | content-creator, copywriting, email-sequence, SEO audit, app-store-optimization | +| **Product** | 12 | agile-product-owner, ux-researcher, research-summarizer, analytics-tracking | +| **Business Growth** | 10 | launch-strategy, competitor-alternatives, free-tool-strategy | +| **C-Level Advisory** | 8 | cto-advisor, ceo-advisor, cfo-advisor, marketing-strategy-pmm | +| **Finance** | 6 | financial modeling, fundraising, unit economics | +| **Compliance** | 8 | ISO 27001, ISO 13485, MDR, FDA, GDPR | +| **Project Management** | 5 | Jira expert, sprint planning, retrospective facilitator | + +--- + +## Cross-Platform + +These same skills work on 10 other coding agents: + +Claude Code · Gemini CLI · Cursor · OpenClaw · Aider · Windsurf · Kilo Code · OpenCode · Augment · Antigravity + +See the [full README](https://github.com/alirezarezvani/claude-skills) for platform-specific install guides. + +--- + +*Last updated: March 2026 · [alirezarezvani/claude-skills](https://github.com/alirezarezvani/claude-skills)* diff --git a/docs/guides/best-claude-code-plugins.md b/docs/guides/best-claude-code-plugins.md new file mode 100644 index 0000000..9b009e5 --- /dev/null +++ b/docs/guides/best-claude-code-plugins.md @@ -0,0 +1,120 @@ +--- +title: "Best Claude Code Plugins & Skills (2026)" +description: "The 20 best Claude Code plugins and agent skills for engineering, marketing, product, and DevOps. Install in one command." +--- + +# Best Claude Code Plugins & Skills (2026) + +Looking for the best Claude Code plugins to supercharge your workflow? This guide covers 20 production-ready plugins and agent skills — from engineering and DevOps to marketing, product management, and C-level advisory. + +All plugins listed here are open-source (MIT), tested in production, and installable in one command. + +--- + +## What's the Difference Between Plugins and Skills? + +**Claude Code plugins** use the `.claude-plugin/plugin.json` format and install via `/plugin install`. **Agent skills** use `SKILL.md` files and work across Claude Code, Codex, Gemini CLI, Cursor, and 8 other coding agents. + +This repo provides **both formats** — every skill includes a `.claude-plugin` directory for native Claude Code plugin support, plus a `SKILL.md` for cross-platform compatibility. + +--- + +## Quick Install + +```bash +# Install the full marketplace (all 192 skills as Claude Code plugins) +claude /plugin install https://github.com/alirezarezvani/claude-skills + +# Install by domain +claude /plugin install engineering-skills +claude /plugin install marketing-skills +claude /plugin install devops-skills + +# Install individual plugins +claude /plugin install frontend-design +claude /plugin install pr-review-expert +claude /plugin install autoresearch-agent +``` + +--- + +## Top 20 Claude Code Plugins + +### Engineering & DevOps + +| Plugin | What It Does | Install | +|--------|-------------|---------| +| **frontend-design** | Production-grade UI with high design quality. React, Tailwind, shadcn/ui. | `/plugin install frontend-design` | +| **pr-review-expert** | Multi-pass code review: logic bugs, security, test coverage, architecture. | `/plugin install pr-review-expert` | +| **autoresearch-agent** | Autonomous experiment loop — optimizes any file by a measurable metric. | `/plugin install autoresearch-agent` | +| **senior-devops** | Infrastructure as Code, CI/CD pipelines, monitoring, incident response. | `/plugin install senior-devops` | +| **docker-development** | Dockerfile optimization, multi-stage builds, container security scanning. | `/plugin install docker-development` | +| **aws-solution-architect** | AWS architecture design with serverless patterns and IaC templates. | `/plugin install aws-solution-architect` | +| **tdd-guide** | Test-driven development workflows with red-green-refactor cycles. | `/plugin install tdd-guide` | +| **database-designer** | Schema design, migrations, indexing strategies, query optimization. | `/plugin install database-designer` | + +### Marketing & Content + +| Plugin | What It Does | Install | +|--------|-------------|---------| +| **content-creator** | SEO-optimized content with consistent brand voice and frameworks. | `/plugin install content-creator` | +| **copywriting** | Marketing copy for landing pages, pricing pages, CTAs. | `/plugin install copywriting` | +| **email-sequence** | Drip campaigns, nurture sequences, lifecycle email programs. | `/plugin install email-sequence` | +| **app-store-optimization** | ASO keyword research, metadata optimization, A/B testing. | `/plugin install app-store-optimization` | + +### Product & Business + +| Plugin | What It Does | Install | +|--------|-------------|---------| +| **research-summarizer** | Structured research → summary → citations for papers and reports. | `/plugin install research-summarizer` | +| **agile-product-owner** | User stories, acceptance criteria, sprint planning, velocity tracking. | `/plugin install agile-product-owner` | +| **ab-test-setup** | A/B test design, hypothesis creation, statistical significance. | `/plugin install ab-test-setup` | +| **analytics-tracking** | GA4, GTM, conversion tracking, UTM parameters, tracking plans. | `/plugin install analytics-tracking` | + +### C-Level & Strategy + +| Plugin | What It Does | Install | +|--------|-------------|---------| +| **cto-advisor** | Tech debt analysis, team scaling, architecture decisions, DORA metrics. | `/plugin install cto-advisor` | +| **ceo-advisor** | Strategy, board governance, investor relations, organizational development. | `/plugin install ceo-advisor` | +| **cfo-advisor** | Financial modeling, fundraising, burn rate analysis, unit economics. | `/plugin install cfo-advisor` | +| **marketing-strategy-pmm** | Positioning (April Dunford), GTM strategy, competitive intelligence. | `/plugin install marketing-strategy-pmm` | + +--- + +## Why These Plugins? + +Unlike single-purpose plugins, these are **POWERFUL-tier** agent skills — each includes: + +- **Structured workflows** with slash commands (not just prompts) +- **Python CLI tools** (254 total, zero pip dependencies) +- **Reference documents** — templates, checklists, domain-specific knowledge +- **Cross-platform support** — works on 11 coding agents, not just Claude Code + +--- + +## Cross-Platform Compatibility + +Every plugin in this collection works across multiple AI coding agents: + +| Tool | Format | Install Method | +|------|--------|---------------| +| Claude Code | `.claude-plugin` | `/plugin install` | +| OpenAI Codex | `.codex/skills/` | `./scripts/codex-install.sh` | +| Gemini CLI | `.gemini/skills/` | `./scripts/gemini-install.sh` | +| Cursor | `.cursor/skills/` | `./scripts/convert.sh --tool cursor` | +| OpenClaw | `clawhub install` | Via ClawHub marketplace | +| Aider, Windsurf, Kilo Code, OpenCode, Augment, Antigravity | Converted formats | `./scripts/convert.sh --tool ` | + +--- + +## Related Resources + +- [Full Skill Catalog](https://github.com/alirezarezvani/claude-skills) — all 192 skills +- [Agent Skills for Codex](./agent-skills-for-codex.md) — Codex-specific guide +- [Gemini CLI Skills Guide](./gemini-cli-skills-guide.md) — Gemini CLI setup +- [Cursor Skills Guide](./cursor-skills-guide.md) — Cursor integration + +--- + +*Last updated: March 2026 · [alirezarezvani/claude-skills](https://github.com/alirezarezvani/claude-skills)* diff --git a/docs/guides/cursor-skills-guide.md b/docs/guides/cursor-skills-guide.md new file mode 100644 index 0000000..786712c --- /dev/null +++ b/docs/guides/cursor-skills-guide.md @@ -0,0 +1,81 @@ +--- +title: "Cursor Agent Skills & Rules Guide (2026)" +description: "Install and use 192 agent skills with Cursor IDE. Engineering, marketing, and product plugins for Cursor's AI coding agent." +--- + +# Cursor Agent Skills Guide + +Use 192 production-ready agent skills with Cursor IDE. Every skill converts to Cursor's rules format and installs via the `.cursor/skills/` directory. + +--- + +## Quick Install + +```bash +# Clone the repository +git clone https://github.com/alirezarezvani/claude-skills.git +cd claude-skills + +# Convert all skills to Cursor format +./scripts/convert.sh --all --tool cursor + +# Or convert individual skills +./scripts/convert.sh --skill frontend-design --tool cursor +./scripts/convert.sh --skill pr-review-expert --tool cursor +``` + +### How It Works + +Cursor reads agent rules from `.cursor/rules/` and `.cursorrules` files. The convert script transforms SKILL.md files into Cursor-compatible rule sets, preserving workflows, decision frameworks, and domain knowledge. + +--- + +## Top Skills for Cursor Users + +| Skill | What It Does | Best For | +|-------|-------------|----------| +| **frontend-design** | Production-grade UI with React, Tailwind, shadcn/ui. | Building polished interfaces | +| **pr-review-expert** | Multi-pass code review catching logic, security, and test gaps. | Code quality | +| **senior-fullstack** | Full-stack patterns: API design, auth, state management. | Application architecture | +| **tdd-guide** | Test-driven development with red-green-refactor. | Writing tests first | +| **content-creator** | SEO-optimized content with brand voice frameworks. | Marketing content | +| **agile-product-owner** | User stories, acceptance criteria, sprint planning. | Product work | +| **cto-advisor** | Tech debt analysis, team scaling, architecture decisions. | Technical leadership | +| **database-designer** | Schema design, migrations, indexing, query optimization. | Database work | + +--- + +## Cursor-Specific Integration + +### Using with Cursor's Subagents + +Cursor's multi-model subagent system works well with skills: + +``` +# In Cursor's Composer, reference a skill: +@skill frontend-design Build a dashboard with charts and data tables + +# Or use slash commands from the skill: +/design:component Create a pricing card with toggle for monthly/annual +``` + +### Project Rules + +Add skills to your `.cursorrules` for project-wide availability: + +```bash +# Append skill instructions to cursor rules +cat .cursor/skills/frontend-design/SKILL.md >> .cursorrules +``` + +--- + +## Full Catalog + +All 192 skills across 9 domains. See the [full README](https://github.com/alirezarezvani/claude-skills) for the complete list. + +**Also works with:** Claude Code · OpenAI Codex · Gemini CLI · OpenClaw · Aider · Windsurf · Kilo Code · OpenCode · Augment · Antigravity + +--- + +*Last updated: March 2026 · [alirezarezvani/claude-skills](https://github.com/alirezarezvani/claude-skills)* diff --git a/docs/guides/gemini-cli-skills-guide.md b/docs/guides/gemini-cli-skills-guide.md new file mode 100644 index 0000000..faa0655 --- /dev/null +++ b/docs/guides/gemini-cli-skills-guide.md @@ -0,0 +1,116 @@ +--- +title: "Gemini CLI Skills & Plugins Guide (2026)" +description: "Install and use 192 agent skills with Gemini CLI. Free evaluation calls, engineering, marketing, and DevOps skills for Google's coding agent." +--- + +# Gemini CLI Agent Skills Guide + +Use 192 production-ready agent skills with Gemini CLI. Every skill in this collection is compatible with Gemini's agent skills specification and installs via the `.gemini/skills/` directory. + +--- + +## Quick Install + +```bash +# Clone the repository +git clone https://github.com/alirezarezvani/claude-skills.git +cd claude-skills + +# Run the Gemini setup script (converts and installs all skills) +./scripts/gemini-install.sh + +# Or convert individual skills +./scripts/convert.sh --skill frontend-design --tool gemini +./scripts/convert.sh --skill autoresearch-agent --tool gemini +``` + +### How It Works + +Gemini CLI reads agent skills from `.gemini/skills//SKILL.md` in your project directory. The setup script converts all skills to Gemini-compatible format, including `gemini-extension.json` for the extension registry. + +--- + +## Why Use Skills with Gemini CLI? + +Gemini CLI's free tier gives you **unlimited evaluation calls** — perfect for: + +- **Autoresearch loops** — run overnight experiments with zero API cost +- **Content optimization** — LLM-judge evaluators for headlines, copy, prompts +- **Code review** — systematic multi-pass reviews without burning tokens + +### Free Evaluators with Gemini + +The autoresearch-agent skill includes LLM judge evaluators that work with Gemini's free tier: + +```bash +# Set up an autoresearch experiment using Gemini as the evaluator +python scripts/setup_experiment.py \ + --domain marketing \ + --name headline-optimization \ + --target content/headlines.md \ + --eval "python evaluate.py" \ + --metric ctr_score \ + --direction higher \ + --evaluator llm_judge_content + +# The evaluator calls gemini CLI for scoring — free! +``` + +--- + +## Top Skills for Gemini CLI + +| Skill | What It Does | +|-------|-------------| +| **autoresearch-agent** | Autonomous experiment loop — edit, evaluate, keep or revert. Free with Gemini. | +| **frontend-design** | Production-grade React/Tailwind UI with high design quality. | +| **pr-review-expert** | Multi-pass code review: logic, security, tests, architecture. | +| **content-creator** | SEO-optimized content with brand voice analysis and frameworks. | +| **senior-devops** | IaC, CI/CD, monitoring, and incident response. | +| **cto-advisor** | Tech debt analysis, team scaling, architecture decisions. | +| **research-summarizer** | Structured research → summary → citations workflow. | +| **docker-development** | Dockerfile optimization, multi-stage builds, security scanning. | + +--- + +## Gemini Extension Integration + +This repo includes `gemini-extension.json` for Gemini's extension registry: + +```json +{ + "name": "claude-skills", + "version": "2.0.0", + "description": "192 agent skills for engineering, marketing, product, and more", + "skills": ["engineering/*", "marketing-skill/*", "product-team/*", "..."] +} +``` + +See the [Gemini CLI extensions docs](https://geminicli.com/docs/cli/skills/) for integration details. + +--- + +## Project-Level vs User-Level + +```bash +# Project-level (scoped to one repo) +cp -r claude-skills/.gemini/skills/ ./.gemini/skills/ + +# User-level (available in all projects) +mkdir -p ~/.gemini/skills/ +cp -r claude-skills/.gemini/skills/* ~/.gemini/skills/ +``` + +--- + +## Cross-Platform + +These skills work on 10 other coding agents too: + +Claude Code · OpenAI Codex · Cursor · OpenClaw · Aider · Windsurf · Kilo Code · OpenCode · Augment · Antigravity + +See the [full catalog](https://github.com/alirezarezvani/claude-skills) for all 192 skills. + +--- + +*Last updated: March 2026 · [alirezarezvani/claude-skills](https://github.com/alirezarezvani/claude-skills)* diff --git a/docs/guides/openclaw-skills-guide.md b/docs/guides/openclaw-skills-guide.md new file mode 100644 index 0000000..6bc3396 --- /dev/null +++ b/docs/guides/openclaw-skills-guide.md @@ -0,0 +1,165 @@ +# OpenClaw Skills Guide — Install & Use Agent Skills with OpenClaw + +> **Last updated:** March 2026 · **Skills count:** 192+ · **Compatibility:** OpenClaw v2024.12+ + +## What Are OpenClaw Skills? + +OpenClaw skills are modular instruction packages that extend your OpenClaw agent with domain expertise — from engineering and DevOps to marketing, compliance, and C-level advisory. Each skill drops into your OpenClaw workspace and works immediately with zero configuration. + +Unlike generic prompts, OpenClaw skills include structured workflows, decision frameworks, Python tools, and reference materials that your agent follows autonomously. + +## Why Use Skills with OpenClaw? + +| Without Skills | With Skills | +|---|---| +| Generic responses | Domain-expert-level outputs | +| Manual prompt engineering | Pre-built workflows with slash commands | +| No tooling | Python scripts for analysis, validation, formatting | +| Starts from scratch | References, templates, best practices included | + +OpenClaw's skill system is the most natural fit in the ecosystem — skills live in your workspace directory and are automatically loaded based on task context. + +## Installation + +### Quick Install (Recommended) + +```bash +bash <(curl -s https://raw.githubusercontent.com/alirezarezvani/claude-skills/main/scripts/openclaw-install.sh) +``` + +This installs all 192+ skills into your OpenClaw workspace with the correct directory structure. + +### Manual Install + +```bash +git clone https://github.com/alirezarezvani/claude-skills.git +cd claude-skills +./scripts/install.sh --tool openclaw +``` + +### Install Specific Skill Packs + +```bash +# Engineering (49 skills) +./scripts/install.sh --tool openclaw --pack engineering + +# Marketing (43 skills) +./scripts/install.sh --tool openclaw --pack marketing + +# Product (12 skills) +./scripts/install.sh --tool openclaw --pack product + +# C-Level Advisory (28 skills) +./scripts/install.sh --tool openclaw --pack c-level + +# Regulatory & Quality (12 skills) +./scripts/install.sh --tool openclaw --pack regulatory +``` + +### ClawHub Install + +If you have the ClawHub CLI: + +```bash +clawhub install alirezarezvani/claude-skills +``` + +## How Skills Work in OpenClaw + +OpenClaw has native skill support — it scans `` in your workspace and auto-selects the right skill for each task. + +**Automatic selection:** When you ask your OpenClaw agent to "optimize this Dockerfile," it reads the `docker-development` skill's SKILL.md and follows its workflow. No manual activation needed. + +**Slash commands:** Each skill defines slash commands (e.g., `/docker:optimize`, `/research:summarize`) that trigger specific workflows. + +**Python tools:** Skills include executable scripts in `scripts/` that your agent can run for analysis, validation, and generation tasks. + +## Top OpenClaw Skills by Category + +### Engineering +| Skill | What It Does | +|---|---| +| `docker-development` | Dockerfile optimization, multi-stage builds, security hardening | +| `terraform-patterns` | Infrastructure-as-code patterns and module design | +| `github` | PR workflows, CI/CD, code review automation | +| `frontend-design` | Production-grade UI components with high design quality | +| `mcp-builder` | Build MCP servers for external API integrations | + +### Marketing & Content +| Skill | What It Does | +|---|---| +| `content-creator` | SEO-optimized blog posts, social media, brand voice | +| `copywriting` | Landing pages, headlines, CTAs, product copy | +| `email-sequence` | Drip campaigns, onboarding flows, lifecycle emails | +| `launch-strategy` | Product launches, Product Hunt, feature announcements | +| `competitor-alternatives` | Comparison pages, vs pages, alternative pages | + +### Product & Research +| Skill | What It Does | +|---|---| +| `research-summarizer` | Academic papers, articles, structured briefs with citations | +| `agile-product-owner` | User stories, sprint planning, backlog management | +| `ab-test-setup` | Experiment design, hypothesis testing, variant analysis | + +### C-Level Advisory +| Skill | What It Does | +|---|---| +| `ceo-advisor` | Strategy, board prep, investor relations | +| `cto-advisor` | Tech debt, team scaling, architecture decisions | +| `cfo-advisor` | Financial modeling, fundraising, burn rate analysis | + +## OpenClaw vs Other Platforms + +| Feature | OpenClaw | Claude Code | Cursor | Codex | +|---|---|---|---|---| +| Native skill loading | ✅ Automatic | ✅ Manual | ⚠️ Rules only | ⚠️ Instructions | +| Slash commands | ✅ | ✅ | ❌ | ❌ | +| Python tool execution | ✅ | ✅ | ❌ | ✅ | +| Multi-agent delegation | ✅ Built-in | ❌ | ❌ | ❌ | +| Persistent memory | ✅ | ⚠️ Session | ❌ | ❌ | +| Cron/scheduled tasks | ✅ | ❌ | ❌ | ❌ | + +OpenClaw's architecture — persistent agents, memory, cron jobs, and multi-channel messaging — makes it the most capable platform for running agent skills autonomously. + +## Skill Anatomy + +Every skill in the repository follows the same structure: + +``` +skill-name/ +├── SKILL.md # Instructions, workflows, slash commands +├── .claude-plugin/ +│ └── plugin.json # Metadata for plugin registries +├── scripts/ +│ ├── tool_one.py # Executable Python tools +│ └── tool_two.py +└── references/ + ├── patterns.md # Domain knowledge, templates + └── best-practices.md +``` + +## Creating Custom OpenClaw Skills + +You can create your own skills following the same format: + +1. Create a directory under your workspace skills folder +2. Write a `SKILL.md` with description, slash commands, and workflows +3. Add Python scripts in `scripts/` for any automation +4. Add reference materials in `references/` +5. OpenClaw will auto-discover and use your skill + +Use the `skill-creator` meta-skill for guided skill creation: +``` +/skill:create my-custom-skill +``` + +## Resources + +- **GitHub:** [alirezarezvani/claude-skills](https://github.com/alirezarezvani/claude-skills) +- **ClawHub:** [clawhub.com](https://clawhub.com) +- **OpenClaw Docs:** [docs.openclaw.ai](https://docs.openclaw.ai) +- **Community:** [Discord](https://discord.com/invite/clawd) + +--- + +*Part of the [Claude Code Skills & Agent Plugins](https://github.com/alirezarezvani/claude-skills) repository — 192+ production-ready skills for 11 AI coding tools.* diff --git a/engineering/docker-development/.claude-plugin/plugin.json b/engineering/docker-development/.claude-plugin/plugin.json new file mode 100644 index 0000000..56b0dbb --- /dev/null +++ b/engineering/docker-development/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "docker-development", + "description": "Docker and container development agent skill and plugin for Dockerfile optimization, docker-compose orchestration, multi-stage builds, and container security hardening. Covers build performance, layer caching, and production-ready container patterns.", + "version": "1.0.0", + "author": { + "name": "Alireza Rezvani", + "url": "https://alirezarezvani.com" + }, + "homepage": "https://github.com/alirezarezvani/claude-skills/tree/main/engineering/docker-development", + "repository": "https://github.com/alirezarezvani/claude-skills", + "license": "MIT", + "skills": "./" +} diff --git a/engineering/docker-development/SKILL.md b/engineering/docker-development/SKILL.md new file mode 100644 index 0000000..bd65d70 --- /dev/null +++ b/engineering/docker-development/SKILL.md @@ -0,0 +1,366 @@ +--- +name: "docker-development" +description: "Docker and container development agent skill and plugin for Dockerfile optimization, docker-compose orchestration, multi-stage builds, and container security hardening. Use when: user wants to optimize a Dockerfile, create or improve docker-compose configurations, implement multi-stage builds, audit container security, reduce image size, or follow container best practices. Covers build performance, layer caching, secret management, and production-ready container patterns." +license: MIT +metadata: + version: 1.0.0 + author: Alireza Rezvani + category: engineering + updated: 2026-03-16 +--- + +# Docker Development + +> Smaller images. Faster builds. Secure containers. No guesswork. + +Opinionated Docker workflow that turns bloated Dockerfiles into production-grade containers. Covers optimization, multi-stage builds, compose orchestration, and security hardening. + +Not a Docker tutorial — a set of concrete decisions about how to build containers that don't waste time, space, or attack surface. + +--- + +## Slash Commands + +| Command | What it does | +|---------|-------------| +| `/docker:optimize` | Analyze and optimize a Dockerfile for size, speed, and layer caching | +| `/docker:compose` | Generate or improve docker-compose.yml with best practices | +| `/docker:security` | Audit a Dockerfile or running container for security issues | + +--- + +## When This Skill Activates + +Recognize these patterns from the user: + +- "Optimize this Dockerfile" +- "My Docker build is slow" +- "Create a docker-compose for this project" +- "Is this Dockerfile secure?" +- "Reduce my Docker image size" +- "Set up multi-stage builds" +- "Docker best practices for [language/framework]" +- Any request involving: Dockerfile, docker-compose, container, image size, build cache, Docker security + +If the user has a Dockerfile or wants to containerize something → this skill applies. + +--- + +## Workflow + +### `/docker:optimize` — Dockerfile Optimization + +1. **Analyze current state** + - Read the Dockerfile + - Identify base image and its size + - Count layers (each RUN/COPY/ADD = 1 layer) + - Check for common anti-patterns + +2. **Apply optimization checklist** + + ``` + BASE IMAGE + ├── Use specific tags, never :latest in production + ├── Prefer slim/alpine variants (debian-slim > ubuntu > debian) + ├── Pin digest for reproducibility in CI: image@sha256:... + └── Match base to runtime needs (don't use python:3.12 for a compiled binary) + + LAYER OPTIMIZATION + ├── Combine related RUN commands with && \ + ├── Order layers: least-changing first (deps before source code) + ├── Clean package manager cache in the same RUN layer + ├── Use .dockerignore to exclude unnecessary files + └── Separate build deps from runtime deps + + BUILD CACHE + ├── COPY dependency files before source code (package.json, requirements.txt, go.mod) + ├── Install deps in a separate layer from code copy + ├── Use BuildKit cache mounts: --mount=type=cache,target=/root/.cache + └── Avoid COPY . . before dependency installation + + MULTI-STAGE BUILDS + ├── Stage 1: build (full SDK, build tools, dev deps) + ├── Stage 2: runtime (minimal base, only production artifacts) + ├── COPY --from=builder only what's needed + └── Final image should have NO build tools, NO source code, NO dev deps + ``` + +3. **Generate optimized Dockerfile** + - Apply all relevant optimizations + - Add inline comments explaining each decision + - Report estimated size reduction + +4. **Validate** + ```bash + python3 scripts/dockerfile_analyzer.py Dockerfile + ``` + +### `/docker:compose` — Docker Compose Configuration + +1. **Identify services** + - Application (web, API, worker) + - Database (postgres, mysql, redis, mongo) + - Cache (redis, memcached) + - Queue (rabbitmq, kafka) + - Reverse proxy (nginx, traefik, caddy) + +2. **Apply compose best practices** + + ``` + SERVICES + ├── Use depends_on with condition: service_healthy + ├── Add healthchecks for every service + ├── Set resource limits (mem_limit, cpus) + ├── Use named volumes for persistent data + └── Pin image versions + + NETWORKING + ├── Create explicit networks (don't rely on default) + ├── Separate frontend and backend networks + ├── Only expose ports that need external access + └── Use internal: true for backend-only networks + + ENVIRONMENT + ├── Use env_file for secrets, not inline environment + ├── Never commit .env files (add to .gitignore) + ├── Use variable substitution: ${VAR:-default} + └── Document all required env vars + + DEVELOPMENT vs PRODUCTION + ├── Use compose profiles or override files + ├── Dev: bind mounts for hot reload, debug ports exposed + ├── Prod: named volumes, no debug ports, restart: unless-stopped + └── docker-compose.override.yml for dev-only config + ``` + +3. **Generate compose file** + - Output docker-compose.yml with healthchecks, networks, volumes + - Generate .env.example with all required variables documented + - Add dev/prod profile annotations + +### `/docker:security` — Container Security Audit + +1. **Dockerfile audit** + + | Check | Severity | Fix | + |-------|----------|-----| + | Running as root | Critical | Add `USER nonroot` after creating user | + | Using :latest tag | High | Pin to specific version | + | Secrets in ENV/ARG | Critical | Use BuildKit secrets: `--mount=type=secret` | + | COPY with broad glob | Medium | Use specific paths, add .dockerignore | + | Unnecessary EXPOSE | Low | Only expose ports the app uses | + | No HEALTHCHECK | Medium | Add HEALTHCHECK with appropriate interval | + | Privileged instructions | High | Avoid `--privileged`, drop capabilities | + | Package manager cache retained | Low | Clean in same RUN layer | + +2. **Runtime security checks** + + | Check | Severity | Fix | + |-------|----------|-----| + | Container running as root | Critical | Set user in Dockerfile or compose | + | Writable root filesystem | Medium | Use `read_only: true` in compose | + | All capabilities retained | High | Drop all, add only needed: `cap_drop: [ALL]` | + | No resource limits | Medium | Set `mem_limit` and `cpus` | + | Host network mode | High | Use bridge or custom network | + | Sensitive mounts | Critical | Never mount /etc, /var/run/docker.sock in prod | + | No log driver configured | Low | Set `logging:` with size limits | + +3. **Generate security report** + ``` + SECURITY AUDIT — [Dockerfile/Image name] + Date: [timestamp] + + CRITICAL: [count] + HIGH: [count] + MEDIUM: [count] + LOW: [count] + + [Detailed findings with fix recommendations] + ``` + +--- + +## Tooling + +### `scripts/dockerfile_analyzer.py` + +CLI utility for static analysis of Dockerfiles. + +**Features:** +- Layer count and optimization suggestions +- Base image analysis with size estimates +- Anti-pattern detection (15+ rules) +- Security issue flagging +- Multi-stage build detection and validation +- JSON and text output + +**Usage:** +```bash +# Analyze a Dockerfile +python3 scripts/dockerfile_analyzer.py Dockerfile + +# JSON output +python3 scripts/dockerfile_analyzer.py Dockerfile --output json + +# Analyze with security focus +python3 scripts/dockerfile_analyzer.py Dockerfile --security + +# Check a specific directory +python3 scripts/dockerfile_analyzer.py path/to/Dockerfile +``` + +### `scripts/compose_validator.py` + +CLI utility for validating docker-compose files. + +**Features:** +- Service dependency validation +- Healthcheck presence detection +- Network configuration analysis +- Volume mount validation +- Environment variable audit +- Port conflict detection +- Best practice scoring + +**Usage:** +```bash +# Validate a compose file +python3 scripts/compose_validator.py docker-compose.yml + +# JSON output +python3 scripts/compose_validator.py docker-compose.yml --output json + +# Strict mode (fail on warnings) +python3 scripts/compose_validator.py docker-compose.yml --strict +``` + +--- + +## Multi-Stage Build Patterns + +### Pattern 1: Compiled Language (Go, Rust, C++) + +```dockerfile +# Build stage +FROM golang:1.22-alpine AS builder +WORKDIR /app +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -ldflags="-s -w" -o /app/server ./cmd/server + +# Runtime stage +FROM gcr.io/distroless/static-debian12 +COPY --from=builder /app/server /server +USER nonroot:nonroot +ENTRYPOINT ["/server"] +``` + +### Pattern 2: Node.js / TypeScript + +```dockerfile +# Dependencies stage +FROM node:20-alpine AS deps +WORKDIR /app +COPY package.json package-lock.json ./ +RUN npm ci --production=false + +# Build stage +FROM deps AS builder +COPY . . +RUN npm run build + +# Runtime stage +FROM node:20-alpine +WORKDIR /app +RUN addgroup -g 1001 -S appgroup && adduser -S appuser -u 1001 +COPY --from=builder /app/dist ./dist +COPY --from=deps /app/node_modules ./node_modules +COPY package.json ./ +USER appuser +EXPOSE 3000 +CMD ["node", "dist/index.js"] +``` + +### Pattern 3: Python + +```dockerfile +# Build stage +FROM python:3.12-slim AS builder +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir --prefix=/install -r requirements.txt + +# Runtime stage +FROM python:3.12-slim +WORKDIR /app +RUN groupadd -r appgroup && useradd -r -g appgroup appuser +COPY --from=builder /install /usr/local +COPY . . +USER appuser +EXPOSE 8000 +CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +--- + +## Base Image Decision Tree + +``` +Is it a compiled binary (Go, Rust, C)? +├── Yes → distroless/static or scratch +└── No + ├── Need a shell for debugging? + │ ├── Yes → alpine variant (e.g., node:20-alpine) + │ └── No → distroless variant + ├── Need glibc (not musl)? + │ ├── Yes → slim variant (e.g., python:3.12-slim) + │ └── No → alpine variant + └── Need specific OS packages? + ├── Many → debian-slim + └── Few → alpine + apk add +``` + +--- + +## Proactive Triggers + +Flag these without being asked: + +- **Dockerfile uses :latest** → Suggest pinning to a specific version tag. +- **No .dockerignore** → Create one. At minimum: `.git`, `node_modules`, `__pycache__`, `.env`. +- **COPY . . before dependency install** → Cache bust. Reorder to install deps first. +- **Running as root** → Add USER instruction. No exceptions for production. +- **Secrets in ENV or ARG** → Use BuildKit secret mounts. Never bake secrets into layers. +- **Image over 1GB** → Multi-stage build required. No reason for a production image this large. +- **No healthcheck** → Add one. Orchestrators (Compose, K8s) need it for proper lifecycle management. +- **apt-get without cleanup in same layer** → `rm -rf /var/lib/apt/lists/*` in the same RUN. + +--- + +## Installation + +### One-liner (any tool) +```bash +git clone https://github.com/alirezarezvani/claude-skills.git +cp -r claude-skills/engineering/docker-development ~/.claude/skills/ +``` + +### Multi-tool install +```bash +./scripts/convert.sh --skill docker-development --tool codex|gemini|cursor|windsurf|openclaw +``` + +### OpenClaw +```bash +clawhub install cs-docker-development +``` + +--- + +## Related Skills + +- **senior-devops** — Broader DevOps scope (CI/CD, IaC, monitoring). Complementary — use docker-development for container-specific work, senior-devops for pipeline and infrastructure. +- **senior-security** — Application security. Complementary — docker-development covers container security, senior-security covers application-level threats. +- **autoresearch-agent** — Can optimize Docker build times or image sizes as measurable experiments. +- **ci-cd-pipeline-builder** — Pipeline construction. Complementary — docker-development builds the containers, ci-cd-pipeline-builder deploys them. diff --git a/engineering/docker-development/references/compose-patterns.md b/engineering/docker-development/references/compose-patterns.md new file mode 100644 index 0000000..51d08b2 --- /dev/null +++ b/engineering/docker-development/references/compose-patterns.md @@ -0,0 +1,282 @@ +# Docker Compose Patterns Reference + +## Production-Ready Patterns + +### Web App + Database + Cache + +```yaml +services: + app: + build: + context: . + dockerfile: Dockerfile + ports: + - "3000:3000" + env_file: + - .env + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 3s + retries: 3 + start_period: 10s + restart: unless-stopped + networks: + - frontend + - backend + mem_limit: 512m + cpus: 1.0 + + db: + image: postgres:16-alpine + volumes: + - pgdata:/var/lib/postgresql/data + env_file: + - .env.db + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + networks: + - backend + mem_limit: 256m + + redis: + image: redis:7-alpine + command: redis-server --maxmemory 64mb --maxmemory-policy allkeys-lru + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 3 + restart: unless-stopped + networks: + - backend + mem_limit: 128m + +volumes: + pgdata: + +networks: + frontend: + backend: + internal: true +``` + +### Key Patterns +- **Healthchecks on every service** — enables depends_on with condition +- **Named volumes** — data persists across container recreation +- **Explicit networks** — backend is internal (no external access) +- **env_file** — secrets not in compose file +- **Resource limits** — prevent runaway containers + +--- + +## Development Override Pattern + +### docker-compose.yml (base — production-like) +```yaml +services: + app: + build: . + ports: + - "3000:3000" + restart: unless-stopped +``` + +### docker-compose.override.yml (dev — auto-loaded) +```yaml +services: + app: + build: + target: development + volumes: + - .:/app # Bind mount for hot reload + - /app/node_modules # Preserve container node_modules + environment: + - NODE_ENV=development + - DEBUG=true + ports: + - "9229:9229" # Debug port + restart: "no" +``` + +### Usage +```bash +# Development (auto-loads override) +docker compose up + +# Production (skip override) +docker compose -f docker-compose.yml up -d + +# Explicit profiles +docker compose --profile dev up +docker compose --profile prod up -d +``` + +--- + +## Network Isolation Pattern + +```yaml +services: + nginx: + image: nginx:alpine + ports: + - "80:80" + - "443:443" + networks: + - frontend + + app: + build: . + networks: + - frontend + - backend + + db: + image: postgres:16-alpine + networks: + - backend + + redis: + image: redis:7-alpine + networks: + - backend + +networks: + frontend: + # External traffic reaches nginx and app + backend: + internal: true + # DB and Redis only reachable by app +``` + +### Why This Matters +- Database and cache are **not accessible from outside** +- Only nginx and app handle external traffic +- Lateral movement limited if one container is compromised + +--- + +## Worker + Queue Pattern + +```yaml +services: + api: + build: + context: . + target: runtime + command: uvicorn main:app --host 0.0.0.0 --port 8000 + ports: + - "8000:8000" + depends_on: + rabbitmq: + condition: service_healthy + + worker: + build: + context: . + target: runtime + command: celery -A tasks worker --loglevel=info + depends_on: + rabbitmq: + condition: service_healthy + + scheduler: + build: + context: . + target: runtime + command: celery -A tasks beat --loglevel=info + depends_on: + rabbitmq: + condition: service_healthy + + rabbitmq: + image: rabbitmq:3.13-management-alpine + ports: + - "15672:15672" # Management UI (dev only) + healthcheck: + test: ["CMD", "rabbitmq-diagnostics", "check_running"] + interval: 10s + timeout: 5s + retries: 5 +``` + +--- + +## Logging Configuration + +```yaml +services: + app: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + tag: "{{.Name}}/{{.ID}}" +``` + +### Why +- **max-size** prevents disk exhaustion +- **max-file** rotates logs automatically +- Default Docker logging has NO size limit — production servers can run out of disk + +--- + +## Environment Variable Patterns + +### .env.example (committed to repo) +```env +# Database +DATABASE_URL=postgres://user:password@db:5432/appname +POSTGRES_USER=user +POSTGRES_PASSWORD=changeme +POSTGRES_DB=appname + +# Redis +REDIS_URL=redis://redis:6379/0 + +# Application +SECRET_KEY=changeme-generate-a-real-secret +NODE_ENV=production +LOG_LEVEL=info + +# External Services (BYOK) +# SMTP_HOST= +# SMTP_PORT=587 +# AWS_ACCESS_KEY_ID= +# AWS_SECRET_ACCESS_KEY= +``` + +### Variable Substitution in Compose +```yaml +services: + app: + image: myapp:${APP_VERSION:-latest} + environment: + - LOG_LEVEL=${LOG_LEVEL:-info} + - PORT=${PORT:-3000} +``` + +--- + +## Troubleshooting Checklist + +| Symptom | Likely Cause | Fix | +|---------|-------------|-----| +| Container exits immediately | CMD/ENTRYPOINT crashes, missing env vars | Check logs: `docker compose logs service` | +| Port already in use | Another service or host process on same port | Change host port: `"3001:3000"` | +| Volume permissions denied | Container user doesn't own mounted path | Match UID/GID or use named volumes | +| Build cache not working | COPY . . invalidates cache early | Reorder: copy deps first, then source | +| depends_on doesn't wait | No healthcheck condition | Add `condition: service_healthy` | +| Container OOM killed | No memory limit or limit too low | Set appropriate `mem_limit` | +| Network connectivity issues | Wrong network or service name | Services communicate by service name within shared network | diff --git a/engineering/docker-development/references/dockerfile-best-practices.md b/engineering/docker-development/references/dockerfile-best-practices.md new file mode 100644 index 0000000..eb96dd4 --- /dev/null +++ b/engineering/docker-development/references/dockerfile-best-practices.md @@ -0,0 +1,235 @@ +# Dockerfile Best Practices Reference + +## Layer Optimization + +### The Golden Rule +Every `RUN`, `COPY`, and `ADD` instruction creates a new layer. Fewer layers = smaller image. + +### Combine Related Commands +```dockerfile +# Bad — 3 layers +RUN apt-get update +RUN apt-get install -y curl git +RUN rm -rf /var/lib/apt/lists/* + +# Good — 1 layer +RUN apt-get update && \ + apt-get install -y --no-install-recommends curl git && \ + rm -rf /var/lib/apt/lists/* +``` + +### Order Layers by Change Frequency +```dockerfile +# Least-changing layers first +COPY package.json package-lock.json ./ # Changes rarely +RUN npm ci # Changes when deps change +COPY . . # Changes every build +RUN npm run build # Changes every build +``` + +### Use .dockerignore +``` +.git +node_modules +__pycache__ +*.pyc +.env +.env.* +dist +build +*.log +.DS_Store +.vscode +.idea +coverage +.pytest_cache +``` + +--- + +## Base Image Selection + +### Size Comparison (approximate) + +| Base | Size | Use Case | +|------|------|----------| +| `scratch` | 0MB | Static binaries (Go, Rust) | +| `distroless/static` | 2MB | Static binaries with CA certs | +| `alpine` | 7MB | Minimal Linux, shell access | +| `distroless/base` | 20MB | Dynamic binaries (C/C++) | +| `debian-slim` | 80MB | When you need glibc + apt | +| `ubuntu` | 78MB | Full Ubuntu ecosystem | +| `python:3.12-slim` | 130MB | Python apps (production) | +| `node:20-alpine` | 130MB | Node.js apps | +| `golang:1.22` | 800MB | Go build stage only | +| `python:3.12` | 900MB | Never use in production | +| `node:20` | 1000MB | Never use in production | + +### When to Use Alpine +- Small image size matters +- No dependency on glibc (musl works) +- Willing to handle occasional musl-related issues +- Not running Python with C extensions that need glibc + +### When to Use Slim +- Need glibc compatibility +- Python with compiled C extensions (numpy, pandas) +- Fewer musl compatibility issues +- Still much smaller than full images + +### When to Use Distroless +- Maximum security (no shell, no package manager) +- Compiled/static binaries +- Don't need debugging access inside container +- Production-only (not development) + +--- + +## Multi-Stage Builds + +### Why Multi-Stage +- Build tools and source code stay out of production image +- Final image contains only runtime artifacts +- Dramatically reduces image size and attack surface + +### Naming Stages +```dockerfile +FROM golang:1.22 AS builder # Named stage +FROM alpine:3.19 AS runtime # Named stage +COPY --from=builder /app /app # Reference by name +``` + +### Selective Copy +```dockerfile +# Only copy the built artifact — nothing else +COPY --from=builder /app/server /server +COPY --from=builder /app/config.yaml /config.yaml +# Don't COPY --from=builder /app/ /app/ (copies source code too) +``` + +--- + +## Security Hardening + +### Run as Non-Root +```dockerfile +# Create user +RUN groupadd -r appgroup && useradd -r -g appgroup -s /sbin/nologin appuser + +# Set ownership +COPY --chown=appuser:appgroup . . + +# Switch user (after all root-requiring operations) +USER appuser +``` + +### Secret Management +```dockerfile +# Bad — secret baked into layer +ENV API_KEY=sk-12345 + +# Good — BuildKit secret mount (never in layer) +RUN --mount=type=secret,id=api_key \ + export API_KEY=$(cat /run/secrets/api_key) && \ + ./configure --api-key=$API_KEY +``` + +Build with: +```bash +docker build --secret id=api_key,src=./api_key.txt . +``` + +### Read-Only Filesystem +```yaml +# docker-compose.yml +services: + app: + read_only: true + tmpfs: + - /tmp + - /var/run +``` + +### Drop Capabilities +```yaml +services: + app: + cap_drop: + - ALL + cap_add: + - NET_BIND_SERVICE # Only if binding to ports < 1024 +``` + +--- + +## Build Performance + +### BuildKit Cache Mounts +```dockerfile +# Cache pip downloads across builds +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements.txt + +# Cache apt downloads +RUN --mount=type=cache,target=/var/cache/apt \ + apt-get update && apt-get install -y curl +``` + +### Parallel Builds +```dockerfile +# These stages build in parallel when using BuildKit +FROM node:20-alpine AS frontend +COPY frontend/ . +RUN npm ci && npm run build + +FROM golang:1.22 AS backend +COPY backend/ . +RUN go build -o server + +FROM alpine:3.19 +COPY --from=frontend /dist /static +COPY --from=backend /server /server +``` + +### Enable BuildKit +```bash +export DOCKER_BUILDKIT=1 +docker build . + +# Or in daemon.json +{ "features": { "buildkit": true } } +``` + +--- + +## Health Checks + +### HTTP Service +```dockerfile +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 +``` + +### Without curl (using wget) +```dockerfile +HEALTHCHECK --interval=30s --timeout=3s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:8000/health || exit 1 +``` + +### TCP Check +```dockerfile +HEALTHCHECK --interval=30s --timeout=3s --retries=3 \ + CMD nc -z localhost 8000 || exit 1 +``` + +### PostgreSQL +```dockerfile +HEALTHCHECK --interval=10s --timeout=5s --retries=5 \ + CMD pg_isready -U postgres || exit 1 +``` + +### Redis +```dockerfile +HEALTHCHECK --interval=10s --timeout=3s --retries=3 \ + CMD redis-cli ping | grep PONG || exit 1 +``` diff --git a/engineering/docker-development/scripts/compose_validator.py b/engineering/docker-development/scripts/compose_validator.py new file mode 100644 index 0000000..fa5c109 --- /dev/null +++ b/engineering/docker-development/scripts/compose_validator.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python3 +""" +docker-development: Docker Compose Validator + +Validate docker-compose.yml files for best practices, missing healthchecks, +network configuration, port conflicts, and security issues. + +Usage: + python scripts/compose_validator.py docker-compose.yml + python scripts/compose_validator.py docker-compose.yml --output json + python scripts/compose_validator.py docker-compose.yml --strict +""" + +import argparse +import json +import re +import sys +from pathlib import Path + + +# --- Demo Compose File --- + +DEMO_COMPOSE = """ +version: '3.8' +services: + web: + build: . + ports: + - "3000:3000" + environment: + - DATABASE_URL=postgres://user:password@db:5432/app + - SECRET_KEY=my-secret-key + depends_on: + - db + - redis + + db: + image: postgres:latest + ports: + - "5432:5432" + environment: + POSTGRES_PASSWORD: password123 + volumes: + - ./data:/var/lib/postgresql/data + + redis: + image: redis + ports: + - "6379:6379" + + worker: + build: . + command: python worker.py + environment: + - DATABASE_URL=postgres://user:password@db:5432/app +""" + + +def parse_yaml_simple(content): + """Simple YAML-like parser for docker-compose files (stdlib only). + + Handles the subset of YAML used in typical docker-compose files: + - Top-level keys + - Service definitions + - Lists (- items) + - Key-value pairs + - Nested indentation + """ + result = {"services": {}, "volumes": {}, "networks": {}} + current_section = None + current_service = None + current_key = None + indent_stack = [] + + for line in content.splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + indent = len(line) - len(line.lstrip()) + + # Top-level keys + if indent == 0 and ":" in stripped: + key = stripped.split(":")[0].strip() + if key == "services": + current_section = "services" + elif key == "volumes": + current_section = "volumes" + elif key == "networks": + current_section = "networks" + elif key == "version": + val = stripped.split(":", 1)[1].strip().strip("'\"") + result["version"] = val + current_service = None + current_key = None + continue + + if current_section == "services": + # Service name (indent level 2) + if indent == 2 and ":" in stripped and not stripped.startswith("-"): + key = stripped.split(":")[0].strip() + val = stripped.split(":", 1)[1].strip() if ":" in stripped else "" + if val and not val.startswith("{"): + # Simple key:value inside a service + if current_service and current_service in result["services"]: + result["services"][current_service][key] = val + else: + current_service = key + result["services"][current_service] = {} + current_key = None + else: + current_service = key + result["services"][current_service] = {} + current_key = None + continue + + if current_service and current_service in result["services"]: + svc = result["services"][current_service] + + # Service-level keys (indent 4) + if indent == 4 and ":" in stripped and not stripped.startswith("-"): + key = stripped.split(":")[0].strip() + val = stripped.split(":", 1)[1].strip() + current_key = key + if val: + svc[key] = val.strip("'\"") + else: + svc[key] = [] + continue + + # List items (indent 6 or 8) + if stripped.startswith("-") and current_key: + item = stripped[1:].strip().strip("'\"") + if current_key in svc: + if isinstance(svc[current_key], list): + svc[current_key].append(item) + else: + svc[current_key] = [svc[current_key], item] + else: + svc[current_key] = [item] + continue + + # Nested key:value under current_key (e.g., healthcheck test) + if indent >= 6 and ":" in stripped and not stripped.startswith("-"): + key = stripped.split(":")[0].strip() + val = stripped.split(":", 1)[1].strip() + if current_key and current_key in svc: + if isinstance(svc[current_key], list): + svc[current_key] = {} + if isinstance(svc[current_key], dict): + svc[current_key][key] = val + + return result + + +def validate_compose(parsed, strict=False): + """Run validation rules on parsed compose file.""" + findings = [] + services = parsed.get("services", {}) + + # --- Version check --- + version = parsed.get("version", "") + if version: + findings.append({ + "severity": "low", + "category": "deprecation", + "message": f"'version: {version}' is deprecated in Compose V2 — remove it", + "service": "(top-level)", + }) + + # --- Per-service checks --- + all_ports = [] + + for name, svc in services.items(): + # Healthcheck + if "healthcheck" not in svc: + findings.append({ + "severity": "medium", + "category": "reliability", + "message": f"No healthcheck defined — orchestrator can't detect unhealthy state", + "service": name, + }) + + # Image tag + image = svc.get("image", "") + if image: + if ":latest" in image: + findings.append({ + "severity": "high", + "category": "reproducibility", + "message": f"Using :latest tag on '{image}' — pin to specific version", + "service": name, + }) + elif ":" not in image and "/" not in image: + findings.append({ + "severity": "high", + "category": "reproducibility", + "message": f"No tag on image '{image}' — defaults to :latest", + "service": name, + }) + + # Ports + ports = svc.get("ports", []) + if isinstance(ports, list): + for p in ports: + p_str = str(p) + # Extract host port + match = re.match(r"(\d+):\d+", p_str) + if match: + host_port = match.group(1) + all_ports.append((host_port, name)) + + # Environment secrets + env = svc.get("environment", []) + if isinstance(env, list): + for e in env: + e_str = str(e) + if re.search(r"(?:PASSWORD|SECRET|TOKEN|KEY)=\S+", e_str, re.IGNORECASE): + if "env_file" not in svc: + findings.append({ + "severity": "critical", + "category": "security", + "message": f"Inline secret in environment: {e_str[:40]}...", + "service": name, + }) + elif isinstance(env, dict): + for k, v in env.items(): + if re.search(r"(?:PASSWORD|SECRET|TOKEN|KEY)", k, re.IGNORECASE) and v: + findings.append({ + "severity": "critical", + "category": "security", + "message": f"Inline secret: {k}={str(v)[:20]}...", + "service": name, + }) + + # depends_on without condition + depends = svc.get("depends_on", []) + if isinstance(depends, list) and depends: + findings.append({ + "severity": "medium", + "category": "reliability", + "message": "depends_on without condition: service_healthy — race condition risk", + "service": name, + }) + + # Bind mounts (./path style) + volumes = svc.get("volumes", []) + if isinstance(volumes, list): + for v in volumes: + v_str = str(v) + if v_str.startswith("./") or v_str.startswith("/"): + if "/var/run/docker.sock" in v_str: + findings.append({ + "severity": "critical", + "category": "security", + "message": "Docker socket mounted — container has host Docker access", + "service": name, + }) + + # Restart policy + if "restart" not in svc and "build" not in svc: + findings.append({ + "severity": "low", + "category": "reliability", + "message": "No restart policy — container won't auto-restart on failure", + "service": name, + }) + + # Resource limits + if "mem_limit" not in svc and "deploy" not in svc: + findings.append({ + "severity": "low" if not strict else "medium", + "category": "resources", + "message": "No memory limit — container can consume all host memory", + "service": name, + }) + + # Port conflicts + port_map = {} + for port, svc_name in all_ports: + if port in port_map: + findings.append({ + "severity": "high", + "category": "networking", + "message": f"Port {port} conflict between '{port_map[port]}' and '{svc_name}'", + "service": svc_name, + }) + port_map[port] = svc_name + + # Network check + if "networks" not in parsed or not parsed["networks"]: + if len(services) > 1: + findings.append({ + "severity": "low", + "category": "networking", + "message": "No explicit networks — all services share default bridge network", + "service": "(top-level)", + }) + + # Sort by severity + severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3} + findings.sort(key=lambda f: severity_order.get(f["severity"], 4)) + + return findings + + +def generate_report(content, output_format="text", strict=False): + """Generate validation report.""" + parsed = parse_yaml_simple(content) + findings = validate_compose(parsed, strict) + services = parsed.get("services", {}) + + # Score + deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2} + score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings)) + + counts = { + "critical": sum(1 for f in findings if f["severity"] == "critical"), + "high": sum(1 for f in findings if f["severity"] == "high"), + "medium": sum(1 for f in findings if f["severity"] == "medium"), + "low": sum(1 for f in findings if f["severity"] == "low"), + } + + result = { + "score": score, + "services": list(services.keys()), + "service_count": len(services), + "findings": findings, + "finding_counts": counts, + } + + if output_format == "json": + print(json.dumps(result, indent=2)) + return result + + # Text output + print(f"\n{'=' * 60}") + print(f" Docker Compose Validation Report") + print(f"{'=' * 60}") + print(f" Score: {score}/100") + print(f" Services: {', '.join(services.keys()) if services else 'none'}") + print() + print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low") + print(f"{'─' * 60}") + + for f in findings: + icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?") + print(f"\n {icon} {f['severity'].upper()} [{f['category']}] — {f['service']}") + print(f" {f['message']}") + + if not findings: + print("\n No issues found. Compose file looks good.") + + print(f"\n{'=' * 60}\n") + return result + + +def main(): + parser = argparse.ArgumentParser( + description="docker-development: Docker Compose validator" + ) + parser.add_argument("composefile", nargs="?", help="Path to docker-compose.yml (omit for demo)") + parser.add_argument( + "--output", "-o", + choices=["text", "json"], + default="text", + help="Output format (default: text)", + ) + parser.add_argument( + "--strict", + action="store_true", + help="Strict mode — elevate warnings to higher severity", + ) + args = parser.parse_args() + + if args.composefile: + path = Path(args.composefile) + if not path.exists(): + print(f"Error: File not found: {args.composefile}", file=sys.stderr) + sys.exit(1) + content = path.read_text(encoding="utf-8") + else: + print("No compose file provided. Running demo validation...\n") + content = DEMO_COMPOSE + + generate_report(content, args.output, args.strict) + + +if __name__ == "__main__": + main() diff --git a/engineering/docker-development/scripts/dockerfile_analyzer.py b/engineering/docker-development/scripts/dockerfile_analyzer.py new file mode 100644 index 0000000..0f21401 --- /dev/null +++ b/engineering/docker-development/scripts/dockerfile_analyzer.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +""" +docker-development: Dockerfile Analyzer + +Static analysis of Dockerfiles for optimization opportunities, anti-patterns, +and security issues. Reports layer count, base image analysis, and actionable +recommendations. + +Usage: + python scripts/dockerfile_analyzer.py Dockerfile + python scripts/dockerfile_analyzer.py Dockerfile --output json + python scripts/dockerfile_analyzer.py Dockerfile --security +""" + +import argparse +import json +import re +import sys +from pathlib import Path + + +# --- Analysis Rules --- + +ANTI_PATTERNS = [ + { + "id": "AP001", + "name": "latest_tag", + "severity": "high", + "pattern": r"^FROM\s+\S+:latest", + "message": "Using :latest tag — pin to a specific version for reproducibility", + "fix": "Use a specific tag like :3.12-slim or pin by digest", + }, + { + "id": "AP002", + "name": "no_tag", + "severity": "high", + "pattern": r"^FROM\s+([a-z][a-z0-9_.-]+)\s*$", + "message": "No tag specified on base image — defaults to :latest", + "fix": "Add a specific version tag", + }, + { + "id": "AP003", + "name": "run_apt_no_clean", + "severity": "medium", + "pattern": r"^RUN\s+.*apt-get\s+install(?!.*rm\s+-rf\s+/var/lib/apt/lists)", + "message": "apt-get install without cleanup in same layer — bloats image", + "fix": "Add && rm -rf /var/lib/apt/lists/* in the same RUN instruction", + }, + { + "id": "AP004", + "name": "run_apk_no_cache", + "severity": "medium", + "pattern": r"^RUN\s+.*apk\s+add(?!\s+--no-cache)", + "message": "apk add without --no-cache — retains package index", + "fix": "Use: apk add --no-cache ", + }, + { + "id": "AP005", + "name": "add_instead_of_copy", + "severity": "low", + "pattern": r"^ADD\s+(?!https?://)\S+", + "message": "Using ADD for local files — COPY is more explicit and predictable", + "fix": "Use COPY instead of ADD unless you need tar auto-extraction or URL fetching", + }, + { + "id": "AP006", + "name": "multiple_cmd", + "severity": "medium", + "pattern": None, # Custom check + "message": "Multiple CMD instructions — only the last one takes effect", + "fix": "Keep exactly one CMD instruction", + }, + { + "id": "AP007", + "name": "env_secrets", + "severity": "critical", + "pattern": r"^(?:ENV|ARG)\s+\S*(?:PASSWORD|SECRET|TOKEN|KEY|API_KEY)\s*=", + "message": "Secrets in ENV/ARG — baked into image layers and visible in history", + "fix": "Use BuildKit secrets: RUN --mount=type=secret,id=mytoken", + }, + { + "id": "AP008", + "name": "broad_copy", + "severity": "medium", + "pattern": r"^COPY\s+\.\s+\.", + "message": "COPY . . copies everything — may include secrets, git history, node_modules", + "fix": "Use .dockerignore and copy specific directories, or copy after dependency install", + }, + { + "id": "AP009", + "name": "no_user", + "severity": "critical", + "pattern": None, # Custom check + "message": "No USER instruction — container runs as root", + "fix": "Add USER nonroot or create a dedicated user", + }, + { + "id": "AP010", + "name": "pip_no_cache", + "severity": "low", + "pattern": r"^RUN\s+.*pip\s+install(?!\s+--no-cache-dir)", + "message": "pip install without --no-cache-dir — retains pip cache in layer", + "fix": "Use: pip install --no-cache-dir -r requirements.txt", + }, + { + "id": "AP011", + "name": "npm_install_dev", + "severity": "medium", + "pattern": r"^RUN\s+.*npm\s+install\s*$", + "message": "npm install includes devDependencies — use npm ci --omit=dev for production", + "fix": "Use: npm ci --omit=dev (or npm ci --production)", + }, + { + "id": "AP012", + "name": "expose_all", + "severity": "low", + "pattern": r"^EXPOSE\s+\d+(?:\s+\d+){3,}", + "message": "Exposing many ports — only expose what the application actually needs", + "fix": "Remove unnecessary EXPOSE directives", + }, + { + "id": "AP013", + "name": "curl_wget_without_cleanup", + "severity": "low", + "pattern": r"^RUN\s+.*(?:curl|wget)\s+.*(?!&&\s*rm)", + "message": "Download without cleanup — downloaded archives may remain in layer", + "fix": "Download, extract, and remove archive in the same RUN instruction", + }, + { + "id": "AP014", + "name": "no_healthcheck", + "severity": "medium", + "pattern": None, # Custom check + "message": "No HEALTHCHECK instruction — orchestrators can't determine container health", + "fix": "Add HEALTHCHECK CMD curl -f http://localhost:PORT/health || exit 1", + }, + { + "id": "AP015", + "name": "shell_form_cmd", + "severity": "low", + "pattern": r'^(?:CMD|ENTRYPOINT)\s+(?!\[)["\']?\w', + "message": "Using shell form for CMD/ENTRYPOINT — exec form is preferred for signal handling", + "fix": 'Use exec form: CMD ["executable", "arg1", "arg2"]', + }, +] + +# Approximate base image sizes (MB) +BASE_IMAGE_SIZES = { + "scratch": 0, + "alpine": 7, + "distroless/static": 2, + "distroless/base": 20, + "distroless/cc": 25, + "debian-slim": 80, + "debian": 120, + "ubuntu": 78, + "python-slim": 130, + "python-alpine": 50, + "python": 900, + "node-alpine": 130, + "node-slim": 200, + "node": 1000, + "golang-alpine": 250, + "golang": 800, + "rust-slim": 750, + "rust": 1400, + "nginx-alpine": 40, + "nginx": 140, +} + + +# --- Demo Dockerfile --- + +DEMO_DOCKERFILE = """FROM python:3.12 +WORKDIR /app +COPY . . +RUN pip install -r requirements.txt +ENV SECRET_KEY=mysecretkey123 +EXPOSE 8000 5432 6379 +CMD python manage.py runserver 0.0.0.0:8000 +""" + + +def parse_dockerfile(content): + """Parse Dockerfile into structured instructions.""" + instructions = [] + current = "" + + for line in content.splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + if stripped.endswith("\\"): + current += stripped[:-1] + " " + continue + current += stripped + # Parse instruction + match = re.match(r"^(\w+)\s+(.*)", current.strip()) + if match: + instructions.append({ + "instruction": match.group(1).upper(), + "args": match.group(2), + "raw": current.strip(), + }) + current = "" + + return instructions + + +def analyze_layers(instructions): + """Count and classify layers.""" + layer_instructions = {"FROM", "RUN", "COPY", "ADD"} + layers = [i for i in instructions if i["instruction"] in layer_instructions] + stages = [i for i in instructions if i["instruction"] == "FROM"] + return { + "total_layers": len(layers), + "stages": len(stages), + "is_multistage": len(stages) > 1, + "run_count": sum(1 for i in instructions if i["instruction"] == "RUN"), + "copy_count": sum(1 for i in instructions if i["instruction"] == "COPY"), + "add_count": sum(1 for i in instructions if i["instruction"] == "ADD"), + } + + +def analyze_base_image(instructions): + """Analyze base image choice.""" + from_instructions = [i for i in instructions if i["instruction"] == "FROM"] + if not from_instructions: + return {"image": "unknown", "tag": "unknown", "estimated_size_mb": 0} + + last_from = from_instructions[-1]["args"].split()[0] + parts = last_from.split(":") + image = parts[0] + tag = parts[1] if len(parts) > 1 else "latest" + + # Estimate size + size = 0 + image_base = image.split("/")[-1] + for key, val in BASE_IMAGE_SIZES.items(): + if key in f"{image_base}-{tag}" or key == image_base: + size = val + break + + return { + "image": image, + "tag": tag, + "estimated_size_mb": size, + "is_alpine": "alpine" in tag, + "is_slim": "slim" in tag, + "is_distroless": "distroless" in image, + } + + +def run_pattern_checks(content, instructions): + """Run anti-pattern checks.""" + findings = [] + + for rule in ANTI_PATTERNS: + if rule["pattern"] is not None: + for match in re.finditer(rule["pattern"], content, re.MULTILINE | re.IGNORECASE): + findings.append({ + "id": rule["id"], + "severity": rule["severity"], + "message": rule["message"], + "fix": rule["fix"], + "line": match.group(0).strip()[:80], + }) + + # Custom checks + # AP006: Multiple CMD + cmd_count = sum(1 for i in instructions if i["instruction"] == "CMD") + if cmd_count > 1: + r = next(r for r in ANTI_PATTERNS if r["id"] == "AP006") + findings.append({ + "id": r["id"], "severity": r["severity"], + "message": r["message"], "fix": r["fix"], + "line": f"{cmd_count} CMD instructions found", + }) + + # AP009: No USER + has_user = any(i["instruction"] == "USER" for i in instructions) + if not has_user and instructions: + r = next(r for r in ANTI_PATTERNS if r["id"] == "AP009") + findings.append({ + "id": r["id"], "severity": r["severity"], + "message": r["message"], "fix": r["fix"], + "line": "(no USER instruction found)", + }) + + # AP014: No HEALTHCHECK + has_healthcheck = any(i["instruction"] == "HEALTHCHECK" for i in instructions) + if not has_healthcheck and instructions: + r = next(r for r in ANTI_PATTERNS if r["id"] == "AP014") + findings.append({ + "id": r["id"], "severity": r["severity"], + "message": r["message"], "fix": r["fix"], + "line": "(no HEALTHCHECK instruction found)", + }) + + return findings + + +def generate_report(content, output_format="text", security_focus=False): + """Generate full analysis report.""" + instructions = parse_dockerfile(content) + layers = analyze_layers(instructions) + base = analyze_base_image(instructions) + findings = run_pattern_checks(content, instructions) + + if security_focus: + security_ids = {"AP007", "AP009", "AP008"} + security_severities = {"critical", "high"} + findings = [f for f in findings if f["id"] in security_ids or f["severity"] in security_severities] + + # Deduplicate findings by id + seen_ids = set() + unique_findings = [] + for f in findings: + key = (f["id"], f["line"]) + if key not in seen_ids: + seen_ids.add(key) + unique_findings.append(f) + findings = unique_findings + + # Sort by severity + severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3} + findings.sort(key=lambda f: severity_order.get(f["severity"], 4)) + + # Score (100 minus deductions) + deductions = {"critical": 25, "high": 15, "medium": 5, "low": 2} + score = max(0, 100 - sum(deductions.get(f["severity"], 0) for f in findings)) + + result = { + "score": score, + "base_image": base, + "layers": layers, + "findings": findings, + "finding_counts": { + "critical": sum(1 for f in findings if f["severity"] == "critical"), + "high": sum(1 for f in findings if f["severity"] == "high"), + "medium": sum(1 for f in findings if f["severity"] == "medium"), + "low": sum(1 for f in findings if f["severity"] == "low"), + }, + } + + if output_format == "json": + print(json.dumps(result, indent=2)) + return result + + # Text output + print(f"\n{'=' * 60}") + print(f" Dockerfile Analysis Report") + print(f"{'=' * 60}") + print(f" Score: {score}/100") + print(f" Base: {base['image']}:{base['tag']} (~{base['estimated_size_mb']}MB)") + print(f" Layers: {layers['total_layers']} | Stages: {layers['stages']} | Multi-stage: {'Yes' if layers['is_multistage'] else 'No'}") + print(f" RUN: {layers['run_count']} | COPY: {layers['copy_count']} | ADD: {layers['add_count']}") + print() + + counts = result["finding_counts"] + print(f" Findings: {counts['critical']} critical | {counts['high']} high | {counts['medium']} medium | {counts['low']} low") + print(f"{'─' * 60}") + + for f in findings: + icon = {"critical": "!!!", "high": "!!", "medium": "!", "low": "~"}.get(f["severity"], "?") + print(f"\n [{f['id']}] {icon} {f['severity'].upper()}") + print(f" {f['message']}") + print(f" Line: {f['line']}") + print(f" Fix: {f['fix']}") + + if not findings: + print("\n No issues found. Dockerfile looks good.") + + print(f"\n{'=' * 60}\n") + return result + + +def main(): + parser = argparse.ArgumentParser( + description="docker-development: Dockerfile static analyzer" + ) + parser.add_argument("dockerfile", nargs="?", help="Path to Dockerfile (omit for demo)") + parser.add_argument( + "--output", "-o", + choices=["text", "json"], + default="text", + help="Output format (default: text)", + ) + parser.add_argument( + "--security", + action="store_true", + help="Security-focused analysis only", + ) + args = parser.parse_args() + + if args.dockerfile: + path = Path(args.dockerfile) + if not path.exists(): + print(f"Error: File not found: {args.dockerfile}", file=sys.stderr) + sys.exit(1) + content = path.read_text(encoding="utf-8") + else: + print("No Dockerfile provided. Running demo analysis...\n") + content = DEMO_DOCKERFILE + + generate_report(content, args.output, args.security) + + +if __name__ == "__main__": + main() diff --git a/product-team/research-summarizer/.claude-plugin/plugin.json b/product-team/research-summarizer/.claude-plugin/plugin.json new file mode 100644 index 0000000..0e9e8de --- /dev/null +++ b/product-team/research-summarizer/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "research-summarizer", + "description": "Structured research summarization agent skill and plugin for Claude Code, Codex, and Gemini CLI. Summarize academic papers, compare web articles, extract citations, and produce actionable research briefs.", + "version": "1.0.0", + "author": { + "name": "Alireza Rezvani", + "url": "https://alirezarezvani.com" + }, + "homepage": "https://github.com/alirezarezvani/claude-skills/tree/main/product-team/research-summarizer", + "repository": "https://github.com/alirezarezvani/claude-skills", + "license": "MIT", + "skills": "./" +} diff --git a/product-team/research-summarizer/SKILL.md b/product-team/research-summarizer/SKILL.md new file mode 100644 index 0000000..158fa53 --- /dev/null +++ b/product-team/research-summarizer/SKILL.md @@ -0,0 +1,274 @@ +--- +name: "research-summarizer" +description: "Structured research summarization agent skill for non-dev users. Handles academic papers, web articles, reports, and documentation. Extracts key findings, generates comparative analyses, and produces properly formatted citations. Use when: user wants to summarize a research paper, compare multiple sources, extract citations from documents, or create structured research briefs. Plugin for Claude Code, Codex, Gemini CLI, and OpenClaw." +license: MIT +metadata: + version: 1.0.0 + author: Alireza Rezvani + category: product + updated: 2026-03-16 +--- + +# Research Summarizer + +> Read less. Understand more. Cite correctly. + +Structured research summarization workflow that turns dense source material into actionable briefs. Built for product managers, analysts, founders, and anyone who reads more than they should have to. + +Not a generic "summarize this" — a repeatable framework that extracts what matters, compares across sources, and formats citations properly. + +--- + +## Slash Commands + +| Command | What it does | +|---------|-------------| +| `/research:summarize` | Summarize a single source into a structured brief | +| `/research:compare` | Compare 2-5 sources side-by-side with synthesis | +| `/research:cite` | Extract and format all citations from a document | + +--- + +## When This Skill Activates + +Recognize these patterns from the user: + +- "Summarize this paper / article / report" +- "What are the key findings in this document?" +- "Compare these sources" +- "Extract citations from this PDF" +- "Give me a research brief on [topic]" +- "Break down this whitepaper" +- Any request involving: summarize, research brief, literature review, citation, source comparison + +If the user has a document and wants structured understanding → this skill applies. + +--- + +## Workflow + +### `/research:summarize` — Single Source Summary + +1. **Identify source type** + - Academic paper → use IMRAD structure (Introduction, Methods, Results, Analysis, Discussion) + - Web article → use claim-evidence-implication structure + - Technical report → use executive summary structure + - Documentation → use reference summary structure + +2. **Extract structured brief** + ``` + Title: [exact title] + Author(s): [names] + Date: [publication date] + Source Type: [paper | article | report | documentation] + + ## Key Thesis + [1-2 sentences: the central argument or finding] + + ## Key Findings + 1. [Finding with supporting evidence] + 2. [Finding with supporting evidence] + 3. [Finding with supporting evidence] + + ## Methodology + [How they arrived at these findings — data sources, sample size, approach] + + ## Limitations + - [What the source doesn't cover or gets wrong] + + ## Actionable Takeaways + - [What to do with this information] + + ## Notable Quotes + > "[Direct quote]" (p. X) + ``` + +3. **Assess quality** + - Source credibility (peer-reviewed, reputable outlet, primary vs secondary) + - Evidence strength (data-backed, anecdotal, theoretical) + - Recency (when published, still relevant?) + - Bias indicators (funding source, author affiliation, methodology gaps) + +### `/research:compare` — Multi-Source Comparison + +1. **Collect sources** (2-5 documents) +2. **Summarize each** using the single-source workflow above +3. **Build comparison matrix** + + ``` + | Dimension | Source A | Source B | Source C | + |------------------|-----------------|-----------------|-----------------| + | Central Thesis | ... | ... | ... | + | Methodology | ... | ... | ... | + | Key Finding | ... | ... | ... | + | Sample/Scope | ... | ... | ... | + | Credibility | High/Med/Low | High/Med/Low | High/Med/Low | + ``` + +4. **Synthesize** + - Where do sources agree? (convergent findings = stronger signal) + - Where do they disagree? (divergent findings = needs investigation) + - What gaps exist across all sources? + - What's the weight of evidence for each position? + +5. **Produce synthesis brief** + ``` + ## Consensus Findings + [What most sources agree on] + + ## Contested Points + [Where sources disagree, with strongest evidence for each side] + + ## Gaps + [What none of the sources address] + + ## Recommendation + [Based on weight of evidence, what should the reader believe/do?] + ``` + +### `/research:cite` — Citation Extraction + +1. **Scan document** for all references, footnotes, in-text citations +2. **Extract and format** using the requested style (APA 7 default) +3. **Classify citations** by type: + - Primary sources (original research, data) + - Secondary sources (reviews, meta-analyses, commentary) + - Tertiary sources (textbooks, encyclopedias) +4. **Output** sorted bibliography with classification tags + +Supported citation formats: +- **APA 7** (default) — social sciences, business +- **IEEE** — engineering, computer science +- **Chicago** — humanities, history +- **Harvard** — general academic +- **MLA 9** — arts, humanities + +--- + +## Tooling + +### `scripts/extract_citations.py` + +CLI utility for extracting and formatting citations from text. + +**Features:** +- Regex-based citation detection (DOI, URL, author-year, numbered references) +- Multiple output formats (APA, IEEE, Chicago, Harvard, MLA) +- JSON export for integration with reference managers +- Deduplication of repeated citations + +**Usage:** +```bash +# Extract citations from a file (APA format, default) +python3 scripts/extract_citations.py document.txt + +# Specify format +python3 scripts/extract_citations.py document.txt --format ieee + +# JSON output +python3 scripts/extract_citations.py document.txt --format apa --output json + +# From stdin +cat paper.txt | python3 scripts/extract_citations.py --stdin +``` + +### `scripts/format_summary.py` + +CLI utility for generating structured research summaries. + +**Features:** +- Multiple summary templates (academic, article, report, executive) +- Configurable output length (brief, standard, detailed) +- Markdown and plain text output +- Key findings extraction with evidence tagging + +**Usage:** +```bash +# Generate structured summary template +python3 scripts/format_summary.py --template academic + +# Brief executive summary format +python3 scripts/format_summary.py --template executive --length brief + +# All templates listed +python3 scripts/format_summary.py --list-templates + +# JSON output +python3 scripts/format_summary.py --template article --output json +``` + +--- + +## Quality Assessment Framework + +Rate every source on four dimensions: + +| Dimension | High | Medium | Low | +|-----------|------|--------|-----| +| **Credibility** | Peer-reviewed, established author | Reputable outlet, known author | Blog, unknown author, no review | +| **Evidence** | Large sample, rigorous method | Moderate data, sound approach | Anecdotal, no data, opinion | +| **Recency** | Published within 2 years | 2-5 years old | 5+ years, may be outdated | +| **Objectivity** | No conflicts, balanced view | Minor affiliations disclosed | Funded by interested party, one-sided | + +**Overall Rating:** +- 4 Highs = Strong source — cite with confidence +- 2+ Mediums = Adequate source — cite with caveats +- 2+ Lows = Weak source — verify independently before citing + +--- + +## Summary Templates + +See `references/summary-templates.md` for: +- Academic paper summary template (IMRAD) +- Web article summary template (claim-evidence-implication) +- Technical report template (executive summary) +- Comparative analysis template (matrix + synthesis) +- Literature review template (thematic organization) + +See `references/citation-formats.md` for: +- APA 7 formatting rules and examples +- IEEE formatting rules and examples +- Chicago, Harvard, MLA quick reference + +--- + +## Proactive Triggers + +Flag these without being asked: + +- **Source has no date** → Note it. Undated sources lose credibility points. +- **Source contradicts other sources** → Highlight the contradiction explicitly. Don't paper over disagreements. +- **Source is behind a paywall** → Note limited access. Suggest alternatives if known. +- **User provides only one source for a compare** → Ask for at least one more. Comparison needs 2+. +- **Citations are incomplete** → Flag missing fields (year, author, title). Don't invent metadata. +- **Source is 5+ years old in a fast-moving field** → Warn about potential obsolescence. + +--- + +## Installation + +### One-liner (any tool) +```bash +git clone https://github.com/alirezarezvani/claude-skills.git +cp -r claude-skills/product-team/research-summarizer ~/.claude/skills/ +``` + +### Multi-tool install +```bash +./scripts/convert.sh --skill research-summarizer --tool codex|gemini|cursor|windsurf|openclaw +``` + +### OpenClaw +```bash +clawhub install cs-research-summarizer +``` + +--- + +## Related Skills + +- **product-analytics** — Quantitative analysis. Complementary — use research-summarizer for qualitative sources, product-analytics for metrics. +- **competitive-teardown** — Competitive research. Complementary — use research-summarizer for individual source analysis, competitive-teardown for market landscape. +- **content-production** — Content writing. Research-summarizer feeds content-production — summarize sources first, then write. +- **product-discovery** — Discovery frameworks. Complementary — research-summarizer for desk research, product-discovery for user research. diff --git a/product-team/research-summarizer/references/citation-formats.md b/product-team/research-summarizer/references/citation-formats.md new file mode 100644 index 0000000..2f07b77 --- /dev/null +++ b/product-team/research-summarizer/references/citation-formats.md @@ -0,0 +1,105 @@ +# Citation Formats Quick Reference + +## APA 7 (American Psychological Association) + +Default format for social sciences, business, and product research. + +### Journal Article +Author, A. A., & Author, B. B. (Year). Title of article. *Title of Periodical*, *volume*(issue), page–page. https://doi.org/xxxxx + +**Example:** +Smith, J., & Jones, K. (2023). Agile adoption in enterprise organizations. *Journal of Product Management*, *15*(2), 45–62. https://doi.org/10.1234/jpm.2023.001 + +### Book +Author, A. A. (Year). *Title of work: Capital letter also for subtitle*. Publisher. + +**Example:** +Cagan, M. (2018). *Inspired: How to create tech products customers love*. Wiley. + +### Web Page +Author, A. A. (Year, Month Day). *Title of page*. Site Name. URL + +**Example:** +Torres, T. (2024, January 15). *Continuous discovery in practice*. Product Talk. https://www.producttalk.org/discovery + +### In-Text Citation +- Parenthetical: (Smith & Jones, 2023) +- Narrative: Smith and Jones (2023) found that... +- 3+ authors: (Patel et al., 2022) + +--- + +## IEEE (Institute of Electrical and Electronics Engineers) + +Standard for engineering, computer science, and technical research. + +### Format +[N] A. Author, "Title of article," *Journal*, vol. X, no. Y, pp. Z–Z, Month Year, doi: 10.xxxx. + +### Journal Article +[1] J. Smith and K. Jones, "Agile adoption in enterprise organizations," *J. Prod. Mgmt.*, vol. 15, no. 2, pp. 45–62, Mar. 2023, doi: 10.1234/jpm.2023.001. + +### Conference Paper +[2] A. Patel, B. Chen, and C. Kumar, "Cross-functional team performance metrics," in *Proc. Int. Conf. Software Eng.*, 2022, pp. 112–119. + +### Book +[3] M. Cagan, *Inspired: How to Create Tech Products Customers Love*. Hoboken, NJ, USA: Wiley, 2018. + +### In-Text Citation +As shown in [1], agile adoption has increased... +Multiple: [1], [3], [5]–[7] + +--- + +## Chicago (Notes-Bibliography) + +Standard for humanities, history, and some business writing. + +### Footnote Format +1. First Name Last Name, *Title of Book* (Place: Publisher, Year), page. +2. First Name Last Name, "Title of Article," *Journal* Volume, no. Issue (Year): pages. + +### Bibliography Entry +Last Name, First Name. *Title of Book*. Place: Publisher, Year. +Last Name, First Name. "Title of Article." *Journal* Volume, no. Issue (Year): pages. + +--- + +## Harvard + +Common in UK and Australian academic writing. + +### Format +Author, A.A. (Year) *Title of book*. Edition. Place: Publisher. +Author, A.A. (Year) 'Title of article', *Journal*, Volume(Issue), pp. X–Y. + +### In-Text Citation +(Smith and Jones, 2023) +Smith and Jones (2023) argue that... + +--- + +## MLA 9 (Modern Language Association) + +Standard for arts and humanities. + +### Format +Last, First. *Title of Book*. Publisher, Year. +Last, First. "Title of Article." *Journal*, vol. X, no. Y, Year, pp. Z–Z. + +### In-Text Citation +(Smith and Jones 45) +Smith and Jones argue that "direct quote" (45). + +--- + +## Quick Decision Guide + +| Field / Context | Recommended Format | +|----------------|-------------------| +| Social sciences, business, psychology | APA 7 | +| Engineering, computer science, technical | IEEE | +| Humanities, history, arts | Chicago or MLA | +| UK/Australian academic | Harvard | +| Internal business reports | APA 7 (most widely recognized) | +| Product research briefs | APA 7 | diff --git a/product-team/research-summarizer/references/summary-templates.md b/product-team/research-summarizer/references/summary-templates.md new file mode 100644 index 0000000..e2205db --- /dev/null +++ b/product-team/research-summarizer/references/summary-templates.md @@ -0,0 +1,120 @@ +# Summary Templates Reference + +## Academic Paper (IMRAD) + +Use for peer-reviewed journal articles, conference papers, and research studies. + +### Structure +1. **Introduction** — What problem does the paper address? Why does it matter? +2. **Methods** — How was the study conducted? What data, what approach? +3. **Results** — What did they find? Key numbers, key patterns. +4. **Analysis** — What do the results mean? How do they compare to prior work? +5. **Discussion** — What are the implications? Limitations? Future work? + +### Quality Signals +- Published in a peer-reviewed venue +- Clear methodology section with reproducible steps +- Statistical significance reported (p-values, confidence intervals) +- Limitations acknowledged openly +- Conflicts of interest disclosed + +### Red Flags +- No methodology section +- Claims without supporting data +- Funded by an entity that benefits from specific results +- Published in a predatory journal (check Beall's List) + +--- + +## Web Article (Claim-Evidence-Implication) + +Use for blog posts, news articles, opinion pieces, and online publications. + +### Structure +1. **Claim** — What is the author arguing or reporting? +2. **Evidence** — What data, examples, or sources support the claim? +3. **Implication** — So what? What should the reader do or think differently? + +### Quality Signals +- Author has relevant expertise or credentials +- Sources are linked and verifiable +- Multiple perspectives acknowledged +- Published on a reputable platform +- Date of publication is clear + +### Red Flags +- No author attribution +- No sources or citations +- Sensationalist headline vs. measured content +- Affiliate links or sponsored content without disclosure + +--- + +## Technical Report (Executive Summary) + +Use for industry reports, whitepapers, market research, and internal documents. + +### Structure +1. **Executive Summary** — Bottom line in 2-3 sentences +2. **Scope** — What does this report cover? +3. **Key Data** — Most important numbers and findings +4. **Methodology** — How was the data gathered? +5. **Recommendations** — What should be done based on findings? +6. **Relevance** — Why does this matter for our specific context? + +### Quality Signals +- Clear methodology for data collection +- Sample size and composition disclosed +- Published by a recognized research firm or organization +- Methodology section available (even if separate document) + +### Red Flags +- "Report" is actually a marketing piece for a product +- Data from a single, small, unrepresentative sample +- No methodology disclosure +- Conclusions far exceed what the data supports + +--- + +## Comparative Analysis (Matrix + Synthesis) + +Use when evaluating 2-5 sources on the same topic. + +### Comparison Dimensions +- **Central thesis** — What is each source's main argument? +- **Methodology** — How did each source arrive at its conclusions? +- **Key finding** — What is the headline result? +- **Sample/scope** — How broad or narrow is the evidence? +- **Credibility** — How trustworthy is the source? +- **Recency** — When was it published? + +### Synthesis Framework +1. **Convergent findings** — Where sources agree (stronger signal) +2. **Divergent findings** — Where sources disagree (investigate further) +3. **Gaps** — What no source addresses +4. **Weight of evidence** — Which position has stronger support? + +--- + +## Literature Review (Thematic) + +Use when synthesizing 5+ sources into a research overview. + +### Organization Approaches +- **Thematic** — Group by topic (preferred for most use cases) +- **Chronological** — Group by time period (good for showing evolution) +- **Methodological** — Group by research approach (good for methods papers) + +### Per-Theme Structure +1. Theme name and scope +2. Key sources that address this theme +3. What the sources say (points of agreement) +4. What the sources disagree on +5. Strength of evidence for each position + +### Synthesis Checklist +- [ ] All sources categorized into themes +- [ ] Gaps in literature identified +- [ ] Contradictions highlighted (not hidden) +- [ ] Overall state of knowledge summarized +- [ ] Future research directions suggested diff --git a/product-team/research-summarizer/scripts/extract_citations.py b/product-team/research-summarizer/scripts/extract_citations.py new file mode 100644 index 0000000..ee4de0d --- /dev/null +++ b/product-team/research-summarizer/scripts/extract_citations.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +""" +research-summarizer: Citation Extractor + +Extract and format citations from text documents. Detects DOIs, URLs, +author-year patterns, and numbered references. Outputs in APA, IEEE, +Chicago, Harvard, or MLA format. + +Usage: + python scripts/extract_citations.py document.txt + python scripts/extract_citations.py document.txt --format ieee + python scripts/extract_citations.py document.txt --format apa --output json + python scripts/extract_citations.py --stdin < document.txt +""" + +import argparse +import json +import re +import sys +from collections import OrderedDict + + +# --- Citation Detection Patterns --- + +PATTERNS = { + "doi": re.compile( + r"(?:https?://doi\.org/|doi:\s*)(10\.\d{4,}/[^\s,;}\]]+)", re.IGNORECASE + ), + "url": re.compile( + r"https?://[^\s,;}\])\"'>]+", re.IGNORECASE + ), + "author_year": re.compile( + r"(?:^|\(|\s)([A-Z][a-z]+(?:\s(?:&|and)\s[A-Z][a-z]+)?(?:\set\sal\.?)?)\s*\((\d{4})\)", + ), + "numbered_ref": re.compile( + r"^\[(\d+)\]\s+(.+)$", re.MULTILINE + ), + "footnote": re.compile( + r"^\d+\.\s+([A-Z].+?(?:\d{4}).+)$", re.MULTILINE + ), +} + + +def extract_dois(text): + """Extract DOI references.""" + citations = [] + for match in PATTERNS["doi"].finditer(text): + doi = match.group(1).rstrip(".") + citations.append({ + "type": "doi", + "doi": doi, + "raw": match.group(0).strip(), + "url": f"https://doi.org/{doi}", + }) + return citations + + +def extract_urls(text): + """Extract URL references (excluding DOI URLs already captured).""" + citations = [] + for match in PATTERNS["url"].finditer(text): + url = match.group(0).rstrip(".,;)") + if "doi.org" in url: + continue + citations.append({ + "type": "url", + "url": url, + "raw": url, + }) + return citations + + +def extract_author_year(text): + """Extract author-year citations like (Smith, 2023) or Smith & Jones (2021).""" + citations = [] + for match in PATTERNS["author_year"].finditer(text): + author = match.group(1).strip() + year = match.group(2) + citations.append({ + "type": "author_year", + "author": author, + "year": year, + "raw": f"{author} ({year})", + }) + return citations + + +def extract_numbered_refs(text): + """Extract numbered reference list entries like [1] Author. Title...""" + citations = [] + for match in PATTERNS["numbered_ref"].finditer(text): + num = match.group(1) + content = match.group(2).strip() + citations.append({ + "type": "numbered", + "number": int(num), + "content": content, + "raw": f"[{num}] {content}", + }) + return citations + + +def deduplicate(citations): + """Remove duplicate citations based on raw text.""" + seen = OrderedDict() + for c in citations: + key = c.get("doi") or c.get("url") or c.get("raw", "") + key = key.lower().strip() + if key and key not in seen: + seen[key] = c + return list(seen.values()) + + +def classify_source(citation): + """Classify citation as primary, secondary, or tertiary.""" + raw = citation.get("content", citation.get("raw", "")).lower() + if any(kw in raw for kw in ["meta-analysis", "systematic review", "literature review", "survey of"]): + return "secondary" + if any(kw in raw for kw in ["textbook", "encyclopedia", "handbook", "dictionary"]): + return "tertiary" + return "primary" + + +# --- Formatting --- + +def format_apa(citation): + """Format citation in APA 7 style.""" + if citation["type"] == "doi": + return f"https://doi.org/{citation['doi']}" + if citation["type"] == "url": + return f"Retrieved from {citation['url']}" + if citation["type"] == "author_year": + return f"{citation['author']} ({citation['year']})." + if citation["type"] == "numbered": + return citation["content"] + return citation.get("raw", "") + + +def format_ieee(citation): + """Format citation in IEEE style.""" + if citation["type"] == "doi": + return f"doi: {citation['doi']}" + if citation["type"] == "url": + return f"[Online]. Available: {citation['url']}" + if citation["type"] == "author_year": + return f"{citation['author']}, {citation['year']}." + if citation["type"] == "numbered": + return f"[{citation['number']}] {citation['content']}" + return citation.get("raw", "") + + +def format_chicago(citation): + """Format citation in Chicago style.""" + if citation["type"] == "doi": + return f"https://doi.org/{citation['doi']}." + if citation["type"] == "url": + return f"{citation['url']}." + if citation["type"] == "author_year": + return f"{citation['author']}. {citation['year']}." + if citation["type"] == "numbered": + return citation["content"] + return citation.get("raw", "") + + +def format_harvard(citation): + """Format citation in Harvard style.""" + if citation["type"] == "doi": + return f"doi:{citation['doi']}" + if citation["type"] == "url": + return f"Available at: {citation['url']}" + if citation["type"] == "author_year": + return f"{citation['author']} ({citation['year']})" + if citation["type"] == "numbered": + return citation["content"] + return citation.get("raw", "") + + +def format_mla(citation): + """Format citation in MLA 9 style.""" + if citation["type"] == "doi": + return f"doi:{citation['doi']}." + if citation["type"] == "url": + return f"{citation['url']}." + if citation["type"] == "author_year": + return f"{citation['author']}. {citation['year']}." + if citation["type"] == "numbered": + return citation["content"] + return citation.get("raw", "") + + +FORMATTERS = { + "apa": format_apa, + "ieee": format_ieee, + "chicago": format_chicago, + "harvard": format_harvard, + "mla": format_mla, +} + + +# --- Demo Data --- + +DEMO_TEXT = """ +Recent studies in product management have shown significant shifts in methodology. +According to Smith & Jones (2023), agile adoption has increased by 47% since 2020. +Patel et al. (2022) found that cross-functional teams deliver 2.3x faster. + +Several frameworks have been proposed: +[1] Cagan, M. Inspired: How to Create Tech Products Customers Love. Wiley, 2018. +[2] Torres, T. Continuous Discovery Habits. Product Talk LLC, 2021. +[3] Gothelf, J. & Seiden, J. Lean UX. O'Reilly Media, 2021. doi: 10.1234/leanux.2021 + +For further reading, see https://www.svpg.com/articles/ and the meta-analysis +by Chen (2024) on product discovery effectiveness. + +Related work: doi: 10.1145/3544548.3581388 +""" + + +def run_extraction(text, fmt, output_mode): + """Run full extraction pipeline.""" + all_citations = [] + all_citations.extend(extract_dois(text)) + all_citations.extend(extract_author_year(text)) + all_citations.extend(extract_numbered_refs(text)) + all_citations.extend(extract_urls(text)) + + citations = deduplicate(all_citations) + + for c in citations: + c["classification"] = classify_source(c) + + formatter = FORMATTERS.get(fmt, format_apa) + + if output_mode == "json": + result = { + "format": fmt, + "total": len(citations), + "citations": [], + } + for i, c in enumerate(citations, 1): + result["citations"].append({ + "index": i, + "type": c["type"], + "classification": c["classification"], + "formatted": formatter(c), + "raw": c.get("raw", ""), + }) + print(json.dumps(result, indent=2)) + else: + print(f"Citations ({fmt.upper()}) — {len(citations)} found\n") + primary = [c for c in citations if c["classification"] == "primary"] + secondary = [c for c in citations if c["classification"] == "secondary"] + tertiary = [c for c in citations if c["classification"] == "tertiary"] + + for label, group in [("Primary Sources", primary), ("Secondary Sources", secondary), ("Tertiary Sources", tertiary)]: + if group: + print(f"### {label}") + for i, c in enumerate(group, 1): + print(f" {i}. {formatter(c)}") + print() + + return citations + + +def main(): + parser = argparse.ArgumentParser( + description="research-summarizer: Extract and format citations from text" + ) + parser.add_argument("file", nargs="?", help="Input text file (omit for demo)") + parser.add_argument( + "--format", "-f", + choices=["apa", "ieee", "chicago", "harvard", "mla"], + default="apa", + help="Citation format (default: apa)", + ) + parser.add_argument( + "--output", "-o", + choices=["text", "json"], + default="text", + help="Output mode (default: text)", + ) + parser.add_argument( + "--stdin", + action="store_true", + help="Read from stdin instead of file", + ) + args = parser.parse_args() + + if args.stdin: + text = sys.stdin.read() + elif args.file: + try: + with open(args.file, "r", encoding="utf-8") as f: + text = f.read() + except FileNotFoundError: + print(f"Error: File not found: {args.file}", file=sys.stderr) + sys.exit(1) + except IOError as e: + print(f"Error reading file: {e}", file=sys.stderr) + sys.exit(1) + else: + print("No input file provided. Running demo...\n") + text = DEMO_TEXT + + run_extraction(text, args.format, args.output) + + +if __name__ == "__main__": + main() diff --git a/product-team/research-summarizer/scripts/format_summary.py b/product-team/research-summarizer/scripts/format_summary.py new file mode 100644 index 0000000..8640a93 --- /dev/null +++ b/product-team/research-summarizer/scripts/format_summary.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +""" +research-summarizer: Summary Formatter + +Generate structured research summary templates for different source types. +Produces fill-in-the-blank frameworks for academic papers, web articles, +technical reports, and executive briefs. + +Usage: + python scripts/format_summary.py --template academic + python scripts/format_summary.py --template executive --length brief + python scripts/format_summary.py --list-templates + python scripts/format_summary.py --template article --output json +""" + +import argparse +import json +import sys +import textwrap +from datetime import datetime + + +# --- Templates --- + +TEMPLATES = { + "academic": { + "name": "Academic Paper Summary", + "description": "IMRAD structure for peer-reviewed papers and research studies", + "sections": [ + ("Title", "[Full paper title]"), + ("Author(s)", "[Author names, affiliations]"), + ("Publication", "[Journal/Conference, Year, DOI]"), + ("Source Type", "Academic Paper"), + ("Key Thesis", "[1-2 sentences: the central research question and answer]"), + ("Methodology", "[Study design, sample size, data sources, analytical approach]"), + ("Key Findings", "1. [Finding 1 with supporting data]\n2. [Finding 2 with supporting data]\n3. [Finding 3 with supporting data]"), + ("Statistical Significance", "[Key p-values, effect sizes, confidence intervals]"), + ("Limitations", "- [Limitation 1: scope, sample, methodology gap]\n- [Limitation 2]"), + ("Implications", "- [What this means for practice]\n- [What this means for future research]"), + ("Notable Quotes", '> "[Direct quote]" (p. X)'), + ("Quality Assessment", "Credibility: [High/Med/Low] | Evidence: [High/Med/Low] | Recency: [High/Med/Low] | Objectivity: [High/Med/Low]"), + ], + }, + "article": { + "name": "Web Article Summary", + "description": "Claim-evidence-implication structure for online articles and blog posts", + "sections": [ + ("Title", "[Article title]"), + ("Author", "[Author name]"), + ("Source", "[Publication/Website, Date, URL]"), + ("Source Type", "Web Article"), + ("Central Claim", "[1-2 sentences: main argument or thesis]"), + ("Supporting Evidence", "1. [Evidence point 1]\n2. [Evidence point 2]\n3. [Evidence point 3]"), + ("Counterarguments Addressed", "- [Counterargument and author's response]"), + ("Implications", "- [What this means for the reader]"), + ("Bias Check", "Author affiliation: [?] | Funding: [?] | Balanced perspective: [Yes/No]"), + ("Actionable Takeaways", "- [What to do with this information]\n- [Next step]"), + ("Quality Assessment", "Credibility: [High/Med/Low] | Evidence: [High/Med/Low] | Recency: [High/Med/Low] | Objectivity: [High/Med/Low]"), + ], + }, + "report": { + "name": "Technical Report Summary", + "description": "Structured summary for industry reports, whitepapers, and technical documentation", + "sections": [ + ("Title", "[Report title]"), + ("Organization", "[Publishing organization]"), + ("Date", "[Publication date]"), + ("Source Type", "Technical Report"), + ("Executive Summary", "[2-3 sentences: scope, key conclusion, recommendation]"), + ("Scope", "[What the report covers and what it excludes]"), + ("Key Data Points", "1. [Statistic or data point with context]\n2. [Statistic or data point with context]\n3. [Statistic or data point with context]"), + ("Methodology", "[How data was collected — survey, analysis, case study]"), + ("Recommendations", "1. [Recommendation with supporting rationale]\n2. [Recommendation with supporting rationale]"), + ("Limitations", "- [Sample bias, geographic scope, time period]"), + ("Relevance", "[Why this matters for our context — specific applicability]"), + ("Quality Assessment", "Credibility: [High/Med/Low] | Evidence: [High/Med/Low] | Recency: [High/Med/Low] | Objectivity: [High/Med/Low]"), + ], + }, + "executive": { + "name": "Executive Brief", + "description": "Condensed decision-focused summary for leadership consumption", + "sections": [ + ("Source", "[Title, Author, Date]"), + ("Bottom Line", "[1 sentence: the single most important takeaway]"), + ("Key Facts", "1. [Fact]\n2. [Fact]\n3. [Fact]"), + ("So What?", "[Why this matters for our business/product/strategy]"), + ("Action Required", "- [Specific next step with owner and timeline]"), + ("Confidence", "[High/Medium/Low] — based on source quality and evidence strength"), + ], + }, + "comparison": { + "name": "Comparative Analysis", + "description": "Side-by-side comparison matrix for 2-5 sources on the same topic", + "sections": [ + ("Topic", "[Research topic or question being compared]"), + ("Sources Compared", "1. [Source A — Author, Year]\n2. [Source B — Author, Year]\n3. [Source C — Author, Year]"), + ("Comparison Matrix", "| Dimension | Source A | Source B | Source C |\n|-----------|---------|---------|---------|" + "\n| Central Thesis | ... | ... | ... |" + "\n| Methodology | ... | ... | ... |" + "\n| Key Finding | ... | ... | ... |" + "\n| Sample/Scope | ... | ... | ... |" + "\n| Credibility | High/Med/Low | High/Med/Low | High/Med/Low |"), + ("Consensus Findings", "[What most sources agree on]"), + ("Contested Points", "[Where sources disagree — with strongest evidence for each side]"), + ("Gaps", "[What none of the sources address]"), + ("Synthesis", "[Weight-of-evidence recommendation: what to believe and do]"), + ], + }, + "literature": { + "name": "Literature Review", + "description": "Thematic organization of multiple sources for research synthesis", + "sections": [ + ("Research Question", "[The question this review addresses]"), + ("Search Scope", "[Databases, keywords, date range, inclusion/exclusion criteria]"), + ("Sources Reviewed", "[Total count, breakdown by type]"), + ("Theme 1: [Name]", "Summary: [Theme overview]\nKey Sources: [Author (Year), Author (Year)]\nFindings: [What sources say about this theme]"), + ("Theme 2: [Name]", "Summary: [Theme overview]\nKey Sources: [Author (Year), Author (Year)]\nFindings: [What sources say about this theme]"), + ("Theme 3: [Name]", "Summary: [Theme overview]\nKey Sources: [Author (Year), Author (Year)]\nFindings: [What sources say about this theme]"), + ("Gaps in Literature", "- [Under-researched area 1]\n- [Under-researched area 2]"), + ("Synthesis", "[Overall state of knowledge — what we know, what we don't, where to go next]"), + ], + }, +} + +LENGTH_CONFIGS = { + "brief": {"max_sections": 4, "label": "Brief (key points only)"}, + "standard": {"max_sections": 99, "label": "Standard (full template)"}, + "detailed": {"max_sections": 99, "label": "Detailed (full template with extended guidance)"}, +} + + +def render_template(template_key, length="standard", output_format="text"): + """Render a summary template.""" + template = TEMPLATES[template_key] + sections = template["sections"] + + if length == "brief": + # Keep only first 4 sections for brief output + sections = sections[:4] + + if output_format == "json": + result = { + "template": template_key, + "name": template["name"], + "description": template["description"], + "length": length, + "generated": datetime.now().strftime("%Y-%m-%d"), + "sections": [], + } + for title, content in sections: + result["sections"].append({ + "heading": title, + "placeholder": content, + }) + return json.dumps(result, indent=2) + + # Text/Markdown output + lines = [] + lines.append(f"# {template['name']}") + lines.append(f"_{template['description']}_\n") + lines.append(f"Length: {LENGTH_CONFIGS[length]['label']}") + lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d')}\n") + lines.append("---\n") + + for title, content in sections: + lines.append(f"## {title}\n") + # Indent content for readability + for line in content.split("\n"): + lines.append(line) + lines.append("") + + lines.append("---") + lines.append("_Template from research-summarizer skill_") + + return "\n".join(lines) + + +def list_templates(output_format="text"): + """List all available templates.""" + if output_format == "json": + result = [] + for key, tmpl in TEMPLATES.items(): + result.append({ + "key": key, + "name": tmpl["name"], + "description": tmpl["description"], + "sections": len(tmpl["sections"]), + }) + return json.dumps(result, indent=2) + + lines = [] + lines.append("Available Summary Templates\n") + lines.append(f"{'KEY':<15} {'NAME':<30} {'SECTIONS':>8} DESCRIPTION") + lines.append(f"{'─' * 90}") + for key, tmpl in TEMPLATES.items(): + lines.append( + f"{key:<15} {tmpl['name']:<30} {len(tmpl['sections']):>8} {tmpl['description'][:40]}" + ) + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser( + description="research-summarizer: Generate structured summary templates" + ) + parser.add_argument( + "--template", "-t", + choices=list(TEMPLATES.keys()), + help="Template type to generate", + ) + parser.add_argument( + "--length", "-l", + choices=["brief", "standard", "detailed"], + default="standard", + help="Output length (default: standard)", + ) + parser.add_argument( + "--output", "-o", + choices=["text", "json"], + default="text", + help="Output format (default: text)", + ) + parser.add_argument( + "--list-templates", + action="store_true", + help="List all available templates", + ) + args = parser.parse_args() + + if args.list_templates: + print(list_templates(args.output)) + return + + if not args.template: + print("No template specified. Available templates:\n") + print(list_templates(args.output)) + print("\nUsage: python scripts/format_summary.py --template academic") + return + + print(render_template(args.template, args.length, args.output)) + + +if __name__ == "__main__": + main()