From 28cd6bd8136f8589df6fac3f0b12b8f7085dcdb5 Mon Sep 17 00:00:00 2001 From: daymade Date: Sat, 4 Apr 2026 12:36:51 +0800 Subject: [PATCH] feat: add douban-skill + enhance skill-creator with development methodology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New skill: douban-skill - Full export of Douban (豆瓣) book/movie/music/game collections via Frodo API - RSS incremental sync for daily updates - Python stdlib only, zero dependencies, cross-platform (macOS/Windows/Linux) - Documented 7 failed approaches (PoW anti-scraping) and why Frodo API is the only working solution - Pre-flight user validation, KeyboardInterrupt handling, pagination bug fix skill-creator enhancements: - Add development methodology reference (8-phase process with prior art research, counter review, and real failure case studies) - Sync upstream changes: improve_description.py now uses `claude -p` instead of Anthropic SDK (no ANTHROPIC_API_KEY needed), remove stale "extended thinking" ref - Add "Updating an existing skill" guidance to Claude.ai and Cowork sections - Restore test case heuristic guidance for objective vs subjective skills README updates: - Document fork advantages vs upstream with quality comparison table (65 vs 42) - Bilingual (EN + ZH-CN) with consistent content Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 4 +- README.md | 34 +- README.zh-CN.md | 34 +- douban-skill/SKILL.md | 131 +++++++ douban-skill/references/troubleshooting.md | 265 ++++++++++++++ douban-skill/scripts/douban-frodo-export.py | 329 ++++++++++++++++++ douban-skill/scripts/douban-rss-sync.mjs | 190 ++++++++++ skill-creator/SKILL.md | 16 +- .../skill-development-methodology.md | 149 ++++++++ skill-creator/scripts/improve_description.py | 103 +++--- skill-creator/scripts/run_loop.py | 4 - 11 files changed, 1186 insertions(+), 73 deletions(-) create mode 100644 douban-skill/SKILL.md create mode 100644 douban-skill/references/troubleshooting.md create mode 100644 douban-skill/scripts/douban-frodo-export.py create mode 100644 douban-skill/scripts/douban-rss-sync.mjs create mode 100644 skill-creator/references/skill-development-methodology.md diff --git a/.gitignore b/.gitignore index 16f07d7..5955780 100644 --- a/.gitignore +++ b/.gitignore @@ -90,5 +90,5 @@ recovered_deep_research/ # OpenCLI cache .opencli/ -# Work-in-progress skills -douban-skill/ +# Eval workspaces (contain test data with personal info) +douban-skill-workspace/ diff --git a/README.md b/README.md index 53a3e9f..ea13f11 100644 --- a/README.md +++ b/README.md @@ -38,13 +38,35 @@ Professional Claude Code skills marketplace featuring 43 production-ready skills The `skill-creator` is the **meta-skill** that enables you to build, validate, and package your own Claude Code skills. It's the most important tool in this marketplace because it empowers you to extend Claude Code with your own specialized workflows. -### Why skill-creator First? +### Why This skill-creator? -- **🎯 Foundation**: Learn how skills work by creating your own -- **🛠️ Complete Toolkit**: Initialization, validation, and packaging scripts included -- **📖 Best Practices**: Learn from production-ready examples -- **🚀 Quick Start**: Generate skill templates in seconds -- **✅ Quality Assurance**: Built-in validation ensures your skills meet standards +This is a **production-hardened fork** of [Anthropic's official skill-creator](https://github.com/anthropics/skills/tree/main/skills/skill-creator), born from building real skills and hitting every wall the official version doesn't warn you about. + +**The official skill-creator tells you _what_ to build. Ours also tells you _what not to try_ — and why.** + +| You're trying to... | Official | This Fork | +|---------------------|----------|-----------| +| Research before building | "Check available MCPs" (5 lines) | 8-channel search protocol with decision matrix: Adopt / Extend / Build | +| Create a skill interactively | Prose-based instructions | 9 structured AskUserQuestion checkpoints — user never loses context | +| Avoid common mistakes | No guidance | Cache edit warnings, prerequisite checks, security scan gate | +| Know the architecture options | Not mentioned | Inline vs Fork decision guide with examples (choosing wrong silently breaks your skill) | +| Validate before shipping | Basic YAML check | Expanded validator (all frontmatter fields, path reference integrity, whitespace issues) | +| Catch security issues | No tooling | `security_scan.py` with gitleaks integration — hard gate before packaging | +| Learn from real failures | No failure cases | Battle-tested methodology with documented failure patterns and gotchas | + +**Quality comparison** (independent audit, 8 dimensions): + +| Dimension | Official | This Fork | +|-----------|----------|-----------| +| Actionability | 7 | 9 | +| Error Prevention | 5 | 9 | +| Prior Art Research | 4 | 9 | +| Counter Review Process | 4 | 8 | +| Real-World Lessons | 3 | 8 | +| User Experience | 4 | 9 | +| **Total (out of 80)** | **42** | **65** | + +> Full methodology: [skill-creator/references/skill-development-methodology.md](./skill-creator/references/skill-development-methodology.md) ### Quick Install diff --git a/README.zh-CN.md b/README.zh-CN.md index f4b354f..29bfc1a 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -38,13 +38,35 @@ `skill-creator` 是一个**元技能**,它使你能够构建、验证和打包自己的 Claude Code 技能。它是这个市场中最重要的工具,因为它赋予你用自己的专业工作流扩展 Claude Code 的能力。 -### 为什么首选 skill-creator? +### 为什么选这个 skill-creator? -- **🎯 基础工具**:通过创建自己的技能来学习技能的工作原理 -- **🛠️ 完整工具包**:包含初始化、验证和打包脚本 -- **📖 最佳实践**:从生产就绪的示例中学习 -- **🚀 快速启动**:在几秒钟内生成技能模板 -- **✅ 质量保证**:内置验证确保你的技能符合标准 +这是 [Anthropic 官方 skill-creator](https://github.com/anthropics/skills/tree/main/skills/skill-creator) 的**生产强化版 fork**——从真实 skill 开发中踩过的坑里长出来的。 + +**官方告诉你"做什么"。我们还告诉你"别试什么"——以及为什么。** + +| 你想要... | 官方版 | 本 Fork | +|----------|--------|---------| +| 造之前先调研 | "Check available MCPs"(5 行) | 8 渠道搜索协议 + Adopt/Extend/Build 决策矩阵 | +| 交互式创建 skill | 纯文字指令 | 9 个结构化 AskUserQuestion 检查点——用户永远不丢上下文 | +| 避免常见错误 | 无指引 | 缓存编辑警告、前置依赖检查、安全扫描门禁 | +| 了解架构选项 | 未提及 | Inline vs Fork 决策指南(选错会静默破坏你的 skill) | +| 发布前验证 | 基本 YAML 检查 | 扩展验证器(全部 frontmatter 字段、路径引用完整性、空白字符问题) | +| 安全审查 | 无工具 | `security_scan.py` + gitleaks 集成——打包前硬门禁 | +| 从真实失败中学习 | 无失败案例 | 实战方法论 + 文档化的失败模式和踩坑记录 | + +**质量对比**(独立审计,8 个维度): + +| 维度 | 官方版 | 本 Fork | +|------|--------|---------| +| 可操作性 | 7 | 9 | +| 错误预防 | 5 | 9 | +| 前置调研 | 4 | 9 | +| 对抗性审查 | 4 | 8 | +| 实战经验 | 3 | 8 | +| 用户体验 | 4 | 9 | +| **总分(/80)** | **42** | **65** | + +> 完整方法论:[skill-creator/references/skill-development-methodology.md](./skill-creator/references/skill-development-methodology.md) ### 快速安装 diff --git a/douban-skill/SKILL.md b/douban-skill/SKILL.md new file mode 100644 index 0000000..a1ce978 --- /dev/null +++ b/douban-skill/SKILL.md @@ -0,0 +1,131 @@ +--- +name: douban-skill +description: > + Export and sync Douban (豆瓣) book/movie/music/game collections to local CSV files via Frodo API. + Supports full export (all history) and RSS incremental sync (recent items). + Use when the user wants to export Douban reading/watching/listening/gaming history, + back up their Douban data, set up incremental sync, or mentions 豆瓣/douban collections. + Triggers on: 豆瓣, douban, 读书记录, 观影记录, 书影音, 导出豆瓣, export, backup, sync, collection. +--- + +# Douban Collection Export + +Export Douban user collections (books, movies, music, games) to CSV files. +Douban has no official data export; the official API shut down in 2018. + +## What This Skill Can Do + +- Full export of all book/movie/music/game collections via Frodo API +- RSS incremental sync for daily updates (last ~10 items) +- CSV output with UTF-8 BOM (Excel-compatible), cross-platform (macOS/Windows/Linux) +- No login, no cookies, no browser required +- Pre-flight user ID validation (fail fast on wrong ID) + +## What This Skill Cannot Do + +- Cannot export reviews (长评), notes (读书笔记), or broadcasts (广播) +- Cannot filter by single category in one run (exports all 4 types together) +- Cannot access private profiles (returns 0 items silently) + +## Why Frodo API (Do NOT Use Web Scraping) + +Douban uses PoW (Proof of Work) challenges on web pages, blocking all HTTP scraping. +We tested 7 approaches — only the Frodo API works. **Do NOT attempt** web scraping, +`browser_cookie3`+`requests`, `curl` with cookies, or Jina Reader. + +See [references/troubleshooting.md](references/troubleshooting.md) for the complete +failure log of all 7 tested approaches and why each failed. + +## Security & Privacy + +The API key and HMAC secret in the script are Douban's **public mobile app credentials**, +extracted from the APK. They are shared by all Douban app users and do not identify you. +No personal credentials are used or stored. Data is fetched only from `frodo.douban.com`. + +## Full Export (Primary Method) + +```bash +DOUBAN_USER= python3 scripts/douban-frodo-export.py +``` + +**Finding the user ID:** Profile URL `douban.com/people//` — the ID is after `/people/`. +If the user provides a full URL, the script auto-extracts the ID. + +**Environment variables:** +- `DOUBAN_USER` (required): Douban user ID (alphanumeric or numeric, or full profile URL) +- `DOUBAN_OUTPUT_DIR` (optional): Override output directory + +**Default output** (auto-detected per platform): +- macOS: `~/Downloads/douban-sync//` +- Windows: `%USERPROFILE%\Downloads\douban-sync\\` +- Linux: `~/Downloads/douban-sync//` + +**Dependencies:** Python 3.6+ standard library only (works with `python3` or `uv run`). + +**Example console output:** +``` +Douban Export for user: your_douban_id +Output directory: /Users/you/Downloads/douban-sync/your_douban_id + +=== 读过 (book) === + Total: 639 + Fetched 0-50 (50/639) + Fetched 50-100 (100/639) + ... + Fetched 597-639 (639/639) + Collected: 639 + +=== 在读 (book) === + Total: 75 + ... + +--- Writing CSV files --- + 书.csv: 996 rows + 影视.csv: 238 rows + 音乐.csv: 0 rows + 游戏.csv: 0 rows + +Done! 1234 total items exported to /Users/you/Downloads/douban-sync/your_douban_id +``` + +## RSS Incremental Sync (Complementary) + +```bash +DOUBAN_USER= node scripts/douban-rss-sync.mjs +``` + +RSS returns only the latest ~10 items (no pagination). Use Full Export first, then RSS for daily updates. + +## Output Format + +Four CSV files per user: + +``` +Downloads/douban-sync// +├── 书.csv (读过 + 在读 + 想读) +├── 影视.csv (看过 + 在看 + 想看) +├── 音乐.csv (听过 + 在听 + 想听) +└── 游戏.csv (玩过 + 在玩 + 想玩) +``` + +Columns: `title, url, date, rating, status, comment` +- `rating`: ★ to ★★★★★ (empty if unrated) +- `date`: YYYY-MM-DD (when the user marked it) +- Safe to run multiple times (overwrites with fresh data) +- Row counts may be slightly below Douban's displayed count due to delisted items + +## Workflow + +1. Ask for Douban user ID (from profile URL, or accept full URL) +2. Run: `DOUBAN_USER= python3 scripts/douban-frodo-export.py` +3. Verify: row counts in console output should match, check with `wc -l /*.csv` +4. (Optional) Set up RSS sync for daily incremental updates + +## Troubleshooting + +See [references/troubleshooting.md](references/troubleshooting.md) for: +- Frodo API auth details (HMAC-SHA1 signature computation) +- Common errors (code 996 signature error, rate limits, pagination quirks) +- Complete failure log of all 7 tested approaches with root causes +- Alternative approaches (豆伴 extension, Tampermonkey script, browser console) +- API endpoint reference with response format diff --git a/douban-skill/references/troubleshooting.md b/douban-skill/references/troubleshooting.md new file mode 100644 index 0000000..501b115 --- /dev/null +++ b/douban-skill/references/troubleshooting.md @@ -0,0 +1,265 @@ +# Troubleshooting & Technical Reference + +## How Frodo API Auth Works + +The Frodo API is Douban's mobile app backend at `frodo.douban.com`. It uses HMAC-SHA1 signature +authentication instead of the PoW challenges used on web pages. + +**Signature computation:** +1. Build raw string: `GET` + `&` + URL-encoded(**path only**) + `&` + timestamp +2. HMAC-SHA1 with secret key `bf7dddc7c9cfe6f7` +3. Base64-encode the result → this is `_sig` + +**Critical:** Sign only the URL **path** (e.g., `/api/v2/user/xxx/interests`), never the +full URL with query parameters. This was our first signature error — code 996. + +**Required query parameters:** +- `apiKey`: `0dad551ec0f84ed02907ff5c42e8ec70` (Douban mobile app's public API key) +- `_ts`: Unix timestamp in **seconds** (string) +- `_sig`: The computed HMAC-SHA1 signature +- `os_rom`: `android` + +**Required headers:** +- `User-Agent`: Must look like a Douban Android app client string + +**Python implementation:** +```python +import hmac, hashlib, base64, urllib.parse + +def compute_signature(url_path, timestamp): + raw = '&'.join(['GET', urllib.parse.quote(url_path, safe=''), timestamp]) + sig = hmac.new(b'bf7dddc7c9cfe6f7', raw.encode(), hashlib.sha1) + return base64.b64encode(sig.digest()).decode() +``` + +## Common Errors + +### Signature Error (code 996) + +```json +{"msg": "invalid_request_996", "code": 996} +``` + +**Cause:** The `_sig` parameter doesn't match the expected value. + +**Debug checklist:** +1. Are you signing only the **path**, not the full URL with query params? +2. Does `_ts` in the signature match `_ts` in the query params exactly? +3. Is `_ts` a string of Unix seconds (not milliseconds)? +4. Are you using `urllib.parse.quote(path, safe='')` (encoding `/` as `%2F`)? + +### Pagination Returns Fewer Items Than Expected + +Some pages return fewer than the requested `count` (e.g., 48 instead of 50). This happens +when items have been delisted from Douban's catalog but still count toward the total. + +**This was our biggest silent bug.** The first version of the export script used +`len(page_items) < count_per_page` as the stop condition. Result: only 499 out of 639 +books were exported, with no error message. The fix: + +```python +# WRONG: stops early when a page has fewer items due to delisted content +if len(interests) < count_per_page: + break + +# CORRECT: check against the total count reported by the API +if len(all_items) >= total: + break +start += len(interests) # advance by actual count, not page_size +``` + +### Rating Scale Confusion + +The Frodo API returns **two different ratings** per item: + +| Field | Scale | Meaning | +|-------|-------|---------| +| `interest.rating` | `{value: 1-5, max: 5}` | **User's personal rating** | +| `subject.rating` | `{value: 0-10, max: 10}` | Douban community average | + +Our first version divided all values by 2, which halved the user's rating (2 stars → 1 star). +The fix: check `max` field to determine scale. + +```python +# Correct conversion +if max_val <= 5: + stars = int(val) # value is already 1-5 +else: + stars = int(val / 2) # value is 2-10, convert to 1-5 +``` + +### HTTP 403 / Rate Limiting + +The Frodo API is generally tolerant, but excessive requests may trigger rate limiting. + +**Tested intervals:** +- 1.5s between pages + 2s between categories: 1234 items exported without issues +- 0s (no delay): Not tested, not recommended + +If you hit 403, increase delays to 3s/5s and retry after a few minutes. + +## Detailed Failure Log: All 7 Tested Approaches + +### Approach 1: `requests` + `browser_cookie3` (Python) + +**What we tried:** Extract Chrome cookies via `browser_cookie3`, use `requests` with those cookies. + +**What happened:** +1. First request succeeded — we saw "639 books" in the page title +2. Subsequent requests returned "禁止访问" (Forbidden) page +3. The HTML contained no items despite HTTP 200 status + +**Root cause:** Douban's PoW challenge. The first request sometimes passes (cached/grace period), +but subsequent requests trigger the PoW redirect to `sec.douban.com`. Python `requests` cannot +execute the SHA-512 proof-of-work JavaScript. + +### Approach 2: `curl` with browser cookies + +**What we tried:** Export cookies from Chrome, use `curl` with full browser headers (User-Agent, +Accept, Referer, Accept-Language). + +**What happened:** HTTP 302 redirect to `https://www.douban.com/misc/sorry?original-url=...` + +**Root cause:** Same PoW issue. Even with `NO_PROXY` set to bypass local proxy, the IP was +already rate-limited from approach 1's requests. + +### Approach 3: Jina Reader (`r.jina.ai`) + +**What we tried:** `curl -s "https://r.jina.ai/https://book.douban.com/people//collect"` + +**What happened:** HTTP 200 but content was "403 Forbidden" — Jina's server got blocked. + +**Root cause:** Jina's scraping infrastructure also cannot solve Douban's PoW challenges. + +### Approach 4: Chrome DevTools MCP (Playwright browser) + +**What we tried:** Navigate to Douban pages in the Playwright browser via Chrome DevTools MCP. +Injected cookies via `document.cookie` in evaluate_script. + +**What happened:** +1. `mcp__chrome-devtools__navigate_page` → page title was "403 Forbidden" +2. After cookie injection → still redirected to `/misc/sorry` + +**Root cause:** The Chrome DevTools MCP connects to a Playwright browser instance, not the +user's actual Chrome. Even after injecting cookies, the IP was already banned from earlier +requests. Also, HttpOnly cookies (like `dbcl2`) can't be set via `document.cookie`. + +### Approach 5: `opencli douban marks` + +**What we tried:** `opencli douban marks --uid --status all --limit 0 -f csv` + +**What happened:** **Partial success** — exported 24 movie records successfully. + +**Limitation:** `opencli douban` only implements `marks` (movies). No book/music/game support. +The `opencli generate` and `opencli cascade` commands failed to discover APIs for +`book.douban.com` because Douban books use server-rendered HTML with no discoverable API. + +### Approach 6: Agent Reach + +**What we tried:** Installed `agent-reach` (17-platform CLI tool). Checked for Douban support. + +**What happened:** Agent Reach has no Douban channel. Its web reader (Jina) also gets 403. + +### Approach 7: Node.js HTTP scraper (from douban-sync-skill) + +**What we tried:** The `douban-scraper.mjs` from the cosformula/douban-sync-skill. + +**Status:** User rejected the command before it ran — based on prior failures, it would hit +the same PoW blocking. The script uses `fetch()` with a fake User-Agent, which is exactly +what approaches 1-3 proved doesn't work. + +## Alternative Approaches (Not Blocked) + +These approaches work but have different tradeoffs compared to the Frodo API: + +### 豆伴 (Tofu) Chrome Extension (605 stars) + +- GitHub: `doufen-org/tofu` +- Uses Douban's **Rexxar API** (`m.douban.com/rexxar/api/v2/user/{uid}/interests`) +- Most comprehensive: backs up books, movies, music, games, reviews, notes, photos, etc. +- **Current status (April 2026):** Mainline v0.12.x is broken due to MV3 migration + anti-scraping. + PR #121 (v0.13.0) fixes both issues but is not yet merged. +- **Risk:** Makes many API calls as logged-in user → may trigger account lockout + +### Tampermonkey Userscript (bambooom/douban-backup, 162 stars) + +- Greasemonkey/Tampermonkey: `https://greasyfork.org/en/scripts/420999` +- Runs inside real browser → inherits PoW-solved session +- Adds "export" button on collection pages → auto-paginates → downloads CSV +- Suitable for one-time manual export + +### Browser Console Script (built into old skill) + +- Paste `fetch()`-based extraction script into browser DevTools console +- Zero blocking risk (same-origin request from authenticated session) +- Most manual approach — user must paste script and copy clipboard + +## API Endpoint Reference + +### User Interests (Collections) + +``` +GET https://frodo.douban.com/api/v2/user/{user_id}/interests + ?type={book|movie|music|game} + &status={done|doing|mark} + &start={offset} + &count={page_size, max 50} + &apiKey=0dad551ec0f84ed02907ff5c42e8ec70 + &_ts={unix_timestamp_seconds} + &_sig={hmac_sha1_signature} + &os_rom=android +``` + +**Response:** +```json +{ + "count": 50, + "start": 0, + "total": 639, + "interests": [ + { + "comment": "短评文本", + "rating": {"value": 4, "max": 5, "star_count": 4.0}, + "create_time": "2026-03-21 18:23:10", + "status": "done", + "id": 4799352304, + "subject": { + "id": "36116375", + "title": "书名", + "url": "https://book.douban.com/subject/36116375/", + "rating": {"value": 7.8, "max": 10, "count": 14} + } + } + ] +} +``` + +**Important distinctions:** +- `interest.rating` = user's personal rating (max 5) +- `subject.rating` = Douban community average (max 10) +- `interest.create_time` = when the user marked it (not the item's publish date) +- `status`: `done` = 读过/看过/听过/玩过, `doing` = 在读/在看/在听/在玩, `mark` = 想读/想看/想听/想玩 + +### Other Known Frodo Endpoints (Not Used by This Skill) + +| Endpoint | Returns | +|----------|---------| +| `/api/v2/book/{id}` | Book detail | +| `/api/v2/movie/{id}` | Movie detail | +| `/api/v2/group/{id}/topics` | Group discussion topics | +| `/api/v2/group/topic/{id}` | Single topic with comments | +| `/api/v2/subject_collection/{type}/items` | Douban curated lists | + +### Mouban Proxy Service (Third-Party) + +`mouban.mythsman.com` is a Go service that pre-crawls Douban data. If a user has been indexed, +it returns data instantly without hitting Douban directly. Endpoints: + +| Endpoint | Returns | +|----------|---------| +| `GET /guest/check_user?id={douban_id}` | User profile + counts | +| `GET /guest/user_book?id={id}&action={wish\|do\|collect}` | Book entries | +| `GET /guest/user_movie?id={id}&action=...` | Movie entries | + +**Caveat:** Data freshness depends on when the service last crawled the user. First request +for a new user triggers a background crawl (takes minutes to hours). Third-party dependency. diff --git a/douban-skill/scripts/douban-frodo-export.py b/douban-skill/scripts/douban-frodo-export.py new file mode 100644 index 0000000..2c6a003 --- /dev/null +++ b/douban-skill/scripts/douban-frodo-export.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +""" +Douban Collection Full Export via Frodo API (Mobile App Backend) + +Exports all book/movie/music/game collections to CSV files. +No login or cookies required — uses HMAC-SHA1 signature auth. + +The API key and HMAC secret are Douban's mobile app credentials, extracted from +the public APK. They are the same for all users and do not identify you. No +personal credentials are used or stored. Data is fetched only from frodo.douban.com. + +Usage: + DOUBAN_USER= python3 douban-frodo-export.py + DOUBAN_USER= DOUBAN_OUTPUT_DIR=/custom/path python3 douban-frodo-export.py + +Environment: + DOUBAN_USER (required): Douban user ID from profile URL + DOUBAN_OUTPUT_DIR (optional): Override output directory +""" + +import hmac +import hashlib +import base64 +import csv +import json +import os +import platform +import re +import socket +import sys +import time +import urllib.parse +import urllib.request +import urllib.error + +# --- Frodo API Auth --- +# Public credentials from the Douban Android APK, shared by all app users. +API_KEY = '0dad551ec0f84ed02907ff5c42e8ec70' +HMAC_SECRET = b'bf7dddc7c9cfe6f7' +BASE_URL = 'https://frodo.douban.com' +USER_AGENT = ( + 'api-client/1 com.douban.frodo/7.22.0.beta9(231) Android/23 ' + 'product/Mate40 vendor/HUAWEI model/Mate40 brand/HUAWEI ' + 'rom/android network/wifi platform/AndroidPad' +) + +# --- Rate Limiting --- +# 1.5s between pages, 2s between categories. Tested with 1200+ items. +PAGE_DELAY = 1.5 +CATEGORY_DELAY = 2.0 +ITEMS_PER_PAGE = 50 +MAX_PAGES_SAFETY = 500 # Guard against infinite pagination loops + +# --- Category Definitions --- +CATEGORIES = [ + ('book', 'done', '读过', '书.csv'), + ('book', 'doing', '在读', '书.csv'), + ('book', 'mark', '想读', '书.csv'), + ('movie', 'done', '看过', '影视.csv'), + ('movie', 'doing', '在看', '影视.csv'), + ('movie', 'mark', '想看', '影视.csv'), + ('music', 'done', '听过', '音乐.csv'), + ('music', 'doing', '在听', '音乐.csv'), + ('music', 'mark', '想听', '音乐.csv'), + ('game', 'done', '玩过', '游戏.csv'), + ('game', 'doing', '在玩', '游戏.csv'), + ('game', 'mark', '想玩', '游戏.csv'), +] + +URL_PREFIX = { + 'book': 'https://book.douban.com/subject/', + 'movie': 'https://movie.douban.com/subject/', + 'music': 'https://music.douban.com/subject/', + 'game': 'https://www.douban.com/game/', +} + +CSV_FIELDS = ['title', 'url', 'date', 'rating', 'status', 'comment'] + + +def get_download_dir(): + """Get the platform-appropriate Downloads directory.""" + system = platform.system() + if system == 'Darwin': + return os.path.expanduser('~/Downloads') + elif system == 'Windows': + return os.path.join(os.environ.get('USERPROFILE', os.path.expanduser('~')), 'Downloads') + else: + return os.path.expanduser('~/Downloads') + + +def get_output_dir(user_id): + """Determine output directory from env or platform default.""" + base = os.environ.get('DOUBAN_OUTPUT_DIR') + if not base: + base = os.path.join(get_download_dir(), 'douban-sync') + return os.path.join(base, user_id) + + +def compute_signature(url_path, timestamp): + """Compute Frodo API HMAC-SHA1 signature. + + Signs: METHOD & url_encoded_path & timestamp (path only, no query params). + """ + raw = '&'.join(['GET', urllib.parse.quote(url_path, safe=''), timestamp]) + sig = hmac.new(HMAC_SECRET, raw.encode(), hashlib.sha1) + return base64.b64encode(sig.digest()).decode() + + +def fetch_json(url, params): + """Make an authenticated GET request to the Frodo API. + + Returns (data_dict, status_code). Catches HTTP errors, network errors, + and timeouts — all return a synthetic error dict so the caller can retry. + """ + query = urllib.parse.urlencode(params) + full_url = f'{url}?{query}' + req = urllib.request.Request(full_url, headers={'User-Agent': USER_AGENT}) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + return json.loads(resp.read().decode('utf-8')), resp.status + except urllib.error.HTTPError as e: + body = e.read().decode('utf-8', errors='replace')[:200] + return {'error': body, 'code': e.code}, e.code + except urllib.error.URLError as e: + return {'error': f'Network error: {e.reason}'}, 0 + except socket.timeout: + return {'error': 'Request timed out'}, 0 + except json.JSONDecodeError as e: + return {'error': f'Invalid JSON response: {e}'}, 0 + + +def preflight_check(user_id): + """Verify user exists by fetching one page of book interests. + + Returns True if the user has any data, False if the user ID appears invalid. + Prints a warning and continues if the check itself fails (network issue). + """ + api_path = f'/api/v2/user/{user_id}/interests' + ts = str(int(time.time())) + sig = compute_signature(api_path, ts) + params = { + 'type': 'book', 'status': 'done', 'start': 0, 'count': 1, + 'apiKey': API_KEY, '_ts': ts, '_sig': sig, 'os_rom': 'android', + } + data, code = fetch_json(f'{BASE_URL}{api_path}', params) + if code == 0: + print(f'Warning: Could not verify user ID (network issue). Proceeding anyway.') + return True + if code != 200: + print(f'Error: API returned HTTP {code} for user "{user_id}".') + print(f' Check that the user ID is correct (from douban.com/people//).') + return False + total = data.get('total', -1) + if total == -1: + print(f'Warning: Unexpected API response. Proceeding anyway.') + return True + return True + + +def fetch_all_interests(user_id, type_name, status): + """Fetch all items for a given type+status combination. + + Paginates through the API, checking against the reported total + (not page size) to handle pages with fewer items due to delisted content. + """ + api_path = f'/api/v2/user/{user_id}/interests' + all_items = [] + start = 0 + total = None + retries = 0 + max_retries = 3 + page_count = 0 + + while page_count < MAX_PAGES_SAFETY: + page_count += 1 + ts = str(int(time.time())) + sig = compute_signature(api_path, ts) + params = { + 'type': type_name, 'status': status, + 'start': start, 'count': ITEMS_PER_PAGE, + 'apiKey': API_KEY, '_ts': ts, '_sig': sig, 'os_rom': 'android', + } + + data, status_code = fetch_json(f'{BASE_URL}{api_path}', params) + + if status_code != 200: + retries += 1 + if retries > max_retries: + print(f' Error: HTTP {status_code} after {max_retries} retries, stopping.') + print(f' See references/troubleshooting.md for common errors.') + break + delay = 5 * (2 ** (retries - 1)) + print(f' HTTP {status_code}, retry {retries}/{max_retries}, waiting {delay}s...') + time.sleep(delay) + continue + + retries = 0 + + if total is None: + total = data.get('total', 0) + if total == 0: + return [] + print(f' Total: {total}') + + interests = data.get('interests', []) + if not interests: + break + + all_items.extend(interests) + print(f' Fetched {start}-{start + len(interests)} ({len(all_items)}/{total})') + + if len(all_items) >= total: + break + start += len(interests) + time.sleep(PAGE_DELAY) + + return all_items + + +def extract_rating(interest): + """Convert Frodo API rating to star string. + + Frodo returns {value: N, max: 5} where N is 1-5. + Some older entries may use max=10 scale (value 2-10). + API values are typically integers; round() handles any edge cases. + """ + r = interest.get('rating') + if not r or not isinstance(r, dict): + return '' + val = r.get('value', 0) + max_val = r.get('max', 5) + if not val: + return '' + stars = round(val) if max_val <= 5 else round(val / 2) + return '★' * max(0, min(5, stars)) + + +def interest_to_row(interest, type_name, status_cn): + """Convert a single Frodo API interest object to a CSV row dict.""" + subject = interest.get('subject', {}) + sid = subject.get('id', '') + prefix = URL_PREFIX.get(type_name, 'https://www.douban.com/subject/') + url = f'{prefix}{sid}/' if sid else subject.get('url', '') + + date_raw = interest.get('create_time', '') or '' + date = date_raw[:10] if re.match(r'\d{4}-\d{2}-\d{2}', date_raw) else '' + + return { + 'title': subject.get('title', ''), + 'url': url, + 'date': date, + 'rating': extract_rating(interest), + 'status': status_cn, + 'comment': interest.get('comment', ''), + } + + +def write_csv(filepath, rows): + """Write rows to a CSV file with UTF-8 BOM for Excel compatibility.""" + with open(filepath, 'w', newline='', encoding='utf-8-sig') as f: + writer = csv.DictWriter(f, fieldnames=CSV_FIELDS) + writer.writeheader() + writer.writerows(rows) + + +def main(): + user_id = os.environ.get('DOUBAN_USER', '').strip() + if not user_id: + print('Error: DOUBAN_USER environment variable is required') + print('Usage: DOUBAN_USER= python3 douban-frodo-export.py') + sys.exit(1) + + # Extract ID from URL if user pasted a full profile URL + url_match = re.search(r'douban\.com/people/([A-Za-z0-9._-]+)', user_id) + if url_match: + user_id = url_match.group(1) + + if not re.match(r'^[A-Za-z0-9._-]+$', user_id): + print(f'Error: DOUBAN_USER contains invalid characters: {user_id}') + sys.exit(1) + + output_dir = get_output_dir(user_id) + os.makedirs(output_dir, exist_ok=True) + print(f'Douban Export for user: {user_id}') + print(f'Output directory: {output_dir}\n') + + # Pre-flight: verify user ID is valid before spending time on full export + if not preflight_check(user_id): + sys.exit(1) + + # Collect data grouped by output file, then write all at the end. + file_data = {} + grand_total = 0 + + for type_name, status, status_cn, outfile in CATEGORIES: + print(f'=== {status_cn} ({type_name}) ===') + items = fetch_all_interests(user_id, type_name, status) + + if outfile not in file_data: + file_data[outfile] = [] + + for item in items: + file_data[outfile].append(interest_to_row(item, type_name, status_cn)) + + count = len(items) + grand_total += count + if count > 0: + print(f' Collected: {count}\n') + else: + print(f' (empty)\n') + + time.sleep(CATEGORY_DELAY) + + # Write CSV files + print('--- Writing CSV files ---') + for filename, rows in file_data.items(): + filepath = os.path.join(output_dir, filename) + write_csv(filepath, rows) + print(f' {filename}: {len(rows)} rows') + + print(f'\nDone! {grand_total} total items exported to {output_dir}') + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + print('\n\nExport interrupted by user.') + sys.exit(130) diff --git a/douban-skill/scripts/douban-rss-sync.mjs b/douban-skill/scripts/douban-rss-sync.mjs new file mode 100644 index 0000000..c90b85c --- /dev/null +++ b/douban-skill/scripts/douban-rss-sync.mjs @@ -0,0 +1,190 @@ +#!/usr/bin/env node +/** + * Douban RSS → CSV incremental sync + * + * Pulls the public RSS feed, parses new entries, appends to CSV files. + * No login required. Returns only the ~10 most recent items. + * Best used for daily sync after a full Frodo API export. + * + * Usage: + * DOUBAN_USER= node douban-rss-sync.mjs + * + * Environment: + * DOUBAN_USER (required): Douban user ID + * DOUBAN_OUTPUT_DIR (optional): Override output directory + */ + +import https from 'node:https'; +import http from 'node:http'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; + +let DOUBAN_USER = process.env.DOUBAN_USER; +if (!DOUBAN_USER) { console.error('Error: DOUBAN_USER env var is required'); process.exit(1); } +// Extract ID from full URL if provided (e.g., https://www.douban.com/people/foo/) +const urlMatch = DOUBAN_USER.match(/douban\.com\/people\/([A-Za-z0-9._-]+)/); +if (urlMatch) DOUBAN_USER = urlMatch[1]; +if (!/^[A-Za-z0-9._-]+$/.test(DOUBAN_USER)) { console.error('Error: DOUBAN_USER contains invalid characters'); process.exit(1); } + +function getDownloadDir() { + if (process.platform === 'win32') { + return path.join(process.env.USERPROFILE || os.homedir(), 'Downloads'); + } + return path.join(os.homedir(), 'Downloads'); +} + +const BASE_DIR = process.env.DOUBAN_OUTPUT_DIR || path.join(getDownloadDir(), 'douban-sync'); +const DOUBAN_OUTPUT_DIR = path.join(BASE_DIR, DOUBAN_USER); +const STATE_FILE = path.join(DOUBAN_OUTPUT_DIR, '.douban-rss-state.json'); +const RSS_URL = `https://www.douban.com/feed/people/${DOUBAN_USER}/interests`; + +const CATEGORY_MAP = [ + { pattern: /^读过/, file: '书.csv', status: '读过' }, + { pattern: /^(?:在读|最近在读)/, file: '书.csv', status: '在读' }, + { pattern: /^想读/, file: '书.csv', status: '想读' }, + { pattern: /^看过/, file: '影视.csv', status: '看过' }, + { pattern: /^(?:在看|最近在看)/, file: '影视.csv', status: '在看' }, + { pattern: /^想看/, file: '影视.csv', status: '想看' }, + { pattern: /^听过/, file: '音乐.csv', status: '听过' }, + { pattern: /^(?:在听|最近在听)/, file: '音乐.csv', status: '在听' }, + { pattern: /^想听/, file: '音乐.csv', status: '想听' }, + { pattern: /^玩过/, file: '游戏.csv', status: '玩过' }, + { pattern: /^(?:在玩|最近在玩)/, file: '游戏.csv', status: '在玩' }, + { pattern: /^想玩/, file: '游戏.csv', status: '想玩' }, +]; + +const CSV_HEADER = '\ufefftitle,url,date,rating,status,comment\n'; +const RATING_MAP = { '力荐': '★★★★★', '推荐': '★★★★', '还行': '★★★', '较差': '★★', '很差': '★' }; + +function httpGet(url, redirects = 0) { + if (redirects > 5) return Promise.reject(new Error('Too many redirects')); + return new Promise((resolve, reject) => { + const mod = url.startsWith('https') ? https : http; + const req = mod.get(url, { headers: { 'User-Agent': 'Mozilla/5.0' }, timeout: 15000 }, res => { + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return httpGet(new URL(res.headers.location, url).href, redirects + 1).then(resolve, reject); + } + if (res.statusCode >= 400) return reject(new Error(`HTTP ${res.statusCode} for ${url}`)); + let data = ''; + res.on('data', c => data += c); + res.on('end', () => resolve(data)); + }); + req.on('error', reject); + req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); + }); +} + +function csvEscape(str) { + if (!str) return ''; + if (str.includes(',') || str.includes('"') || str.includes('\n') || str.includes('\r')) { + return '"' + str.replace(/"/g, '""') + '"'; + } + return str; +} + +function parseItems(xml) { + const items = []; + const itemRegex = /([\s\S]*?)<\/item>/g; + let match; + while ((match = itemRegex.exec(xml)) !== null) { + const block = match[1]; + const get = tag => { + const m = block.match(new RegExp(`<${tag}[^>]*>(?:)?<\\/${tag}>`)); + return m ? m[1].trim() : ''; + }; + const title = get('title'); + const link = get('link'); + const guid = get('guid'); + const pubDate = get('pubDate'); + const desc = get('description'); + const ratingMatch = desc.match(/推荐:\s*(力荐|推荐|还行|较差|很差)/); + const rating = ratingMatch ? RATING_MAP[ratingMatch[1]] || '' : ''; + const commentMatch = desc.match(/短评:\s*([^<]+)/); + const comment = commentMatch ? commentMatch[1].trim() : ''; + items.push({ title, link, guid, pubDate, rating, comment }); + } + return items; +} + +function loadState() { + try { return JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); } + catch { return { lastSyncGuids: [] }; } +} + +function saveState(state) { fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); } + +function extractName(title) { + for (const { pattern } of CATEGORY_MAP) { + if (pattern.test(title)) return title.replace(pattern, ''); + } + return title; +} + +function isAlreadyInFile(filePath, link) { + try { + const content = fs.readFileSync(filePath, 'utf8'); + // Exact URL match as CSV field — avoid false positives from substring matches + // (e.g., /subject/1234/ matching /subject/12345/) + return content.includes(',' + link + ',') || + content.includes(',' + link + '\n') || + content.includes(',' + link + '\r'); + } catch { return false; } +} + +function formatDate(pubDateStr) { + try { + const direct = pubDateStr.match(/(\d{4}-\d{2}-\d{2})/); + if (direct) return direct[1]; + const d = new Date(pubDateStr); + const cst = new Date(d.getTime() + 8 * 3600000); + return cst.toISOString().split('T')[0]; + } catch { return ''; } +} + +function ensureCsvFile(filePath) { + if (!fs.existsSync(filePath)) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, CSV_HEADER); + } +} + +function appendToCsv(filePath, entry, status) { + ensureCsvFile(filePath); + const name = extractName(entry.title); + const date = formatDate(entry.pubDate); + const line = [csvEscape(name), csvEscape(entry.link), csvEscape(date), + csvEscape(entry.rating), csvEscape(status), csvEscape(entry.comment)].join(',') + '\n'; + fs.appendFileSync(filePath, line); +} + +async function main() { + console.log(`Douban RSS Sync for user: ${DOUBAN_USER}`); + console.log(`Output: ${DOUBAN_OUTPUT_DIR}\n`); + console.log('Fetching RSS feed...'); + const xml = await httpGet(RSS_URL); + const items = parseItems(xml); + console.log(`Found ${items.length} items in feed`); + + const state = loadState(); + const knownGuids = new Set(state.lastSyncGuids || []); + let newCount = 0; + + for (const item of items) { + if (knownGuids.has(item.guid)) continue; + const cat = CATEGORY_MAP.find(c => c.pattern.test(item.title)); + if (!cat) { console.log(` Skip (unknown category): ${item.title}`); continue; } + const filePath = path.join(DOUBAN_OUTPUT_DIR, cat.file); + if (isAlreadyInFile(filePath, item.link)) { console.log(` Skip (exists): ${item.title}`); continue; } + console.log(` + ${item.title} → ${cat.file}`); + appendToCsv(filePath, item, cat.status); + newCount++; + } + + state.lastSyncGuids = items.map(i => i.guid); + state.lastSync = new Date().toISOString(); + saveState(state); + console.log(`\nDone. ${newCount} new entries added.`); +} + +main().catch(err => { console.error('Error:', err.message); process.exit(1); }); diff --git a/skill-creator/SKILL.md b/skill-creator/SKILL.md index 6ba0e0f..a3671e3 100644 --- a/skill-creator/SKILL.md +++ b/skill-creator/SKILL.md @@ -1,6 +1,6 @@ --- name: skill-creator -description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, update or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy. +description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy. license: Complete terms in LICENSE.txt --- @@ -69,7 +69,7 @@ Start by understanding the user's intent. The current conversation might already 1. What should this skill enable Claude to do? 2. When should this skill trigger? (what user phrases/contexts) 3. What's the expected output format? -4. Should we set up test cases to verify the skill works? +4. Should we set up test cases to verify the skill works? Skills with objectively verifiable outputs (file transforms, data extraction, code generation, fixed workflow steps) benefit from test cases. Skills with subjective outputs (writing style, art) often don't need them. Suggest the appropriate default based on the skill type, but let the user decide. After extracting answers from conversation history (or asking questions 1-3), use **AskUserQuestion** to confirm the skill type and testing strategy: @@ -433,6 +433,10 @@ Filenames must be self-explanatory without reading contents. Anthropic has wrote skill authoring best practices, you SHOULD retrieve it before you create or update any skills, the link is https://platform.claude.com/docs/en/agents-and-tools/agent-skills/best-practices.md +#### Development Methodology Reference + +Also read [references/skill-development-methodology.md](references/skill-development-methodology.md) before starting — it covers the full 8-phase development process with prior art research, counter review, and real failure case studies. The two references are complementary: the Anthropic doc covers principles, the methodology covers process. + ### Test Cases After writing the skill draft, come up with 2-3 realistic test prompts — the kind of thing a real user would actually say. Present them via **AskUserQuestion**: @@ -750,7 +754,7 @@ Use the model ID from your system prompt (the one powering the current session) While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like. -This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude with extended thinking to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting. +This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting. ### How skill triggering works @@ -1008,6 +1012,11 @@ In Claude.ai, the core workflow is the same (draft -> test -> review -> improve **Packaging**: The `package_skill.py` script works anywhere with Python and a filesystem. On Claude.ai, you can run it and the user can download the resulting `.skill` file. +- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. In this case: + - **Preserve the original name.** Note the skill's directory name and `name` frontmatter field — use them unchanged. E.g., if the installed skill is `research-helper`, output `research-helper.skill` (not `research-helper-v2`). + - **Copy to a writeable location before editing.** The installed skill path may be read-only. Copy to `/tmp/skill-name/`, edit there, and package from the copy. + - **If packaging manually, stage in `/tmp/` first**, then copy to the output directory — direct writes may fail due to permissions. + --- ## Cowork-Specific Instructions @@ -1020,6 +1029,7 @@ If you're in Cowork, the main things to know are: - Feedback works differently: since there's no running server, the viewer's "Submit All Reviews" button will download `feedback.json` as a file. You can then read it from there (you may have to request access first). - Packaging works — `package_skill.py` just needs Python and a filesystem. - Description optimization (`run_loop.py` / `run_eval.py`) should work in Cowork just fine since it uses `claude -p` via subprocess, not a browser, but please save it until you've fully finished making the skill and the user agrees it's in good shape. +- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. Follow the update guidance in the claude.ai section above. --- diff --git a/skill-creator/references/skill-development-methodology.md b/skill-creator/references/skill-development-methodology.md new file mode 100644 index 0000000..0d93b07 --- /dev/null +++ b/skill-creator/references/skill-development-methodology.md @@ -0,0 +1,149 @@ +# Skill Development Methodology + +综合 Anthropic 官方最佳实践、skill-creator 工作流、社区经验和实战教训的完整方法论。 + +本文档只包含 SKILL.md 中**没有覆盖**的内容。SKILL.md 已经详细描述的流程(Prior Art 8 渠道表、决策矩阵、Inline vs Fork、测试用例格式、描述优化循环等)不在此重复——请直接参考 SKILL.md 对应章节。 + +## Phase 1: 先手动解决问题,不要上来就建 skill + +SKILL.md 的 "Capture Intent" 章节覆盖了意图收集的 4 个问题和 skill 类型分类。本节补充一个被忽略的前置步骤: + +**不要一开始就写 skill。** 先用 Claude Code 正常解决用户的问题,在过程中积累经验——哪些方案有效、哪些失败、最终的 working solution 是什么。如果你没有亲自失败过,你写不出能防止别人失败的 skill。 + +很多 skill 都是从"把我们刚做的变成一个 skill"中诞生的。先从对话历史中提取已验证的模式(SKILL.md "Capture Intent" 第三段已提及),然后才开始规划 skill 结构。 + +## Phase 2: 用 Agent Team 做并行调研 + +SKILL.md 的 "Prior Art Research" 章节覆盖了 8 个搜索渠道、clone-and-verify 检查清单、和 Adopt/Extend/Build 决策矩阵。本节补充 SKILL.md 未提及的**并行调研模式**: + +遇到不确定的技术方案时,不要串行尝试(太慢),也不要凭经验猜(太危险)。同时启动 3+ 个研究 agent,每个负责一个调研方向: + +| Agent | 职责 | 搜索范围 | +|-------|------|---------| +| 工具调研 | 找已有成熟工具 | GitHub stars、npm/PyPI、社区 skill 注册表 | +| API 调研 | 找可用 API 端点 | 官方文档、逆向工程、移动端 API | +| 约束调研 | 理解技术限制 | 反爬机制、认证要求、平台限制 | + +每个 agent 必须独立验证(读源码、确认 API 可达、检查最近提交日期),不能只看 README。 + +**案例**:开发一个数据导出 skill 时,3 个 agent 并行跑了 5-20 分钟,分别发现:一个关键工具当前版本 broken(605 stars 但 PR 待合并)、一个未公开的移动端 API(唯一可行方案)、目标平台升级了 PoW 反爬(所有 HTTP 抓取失效)。没有并行研究,这些信息需要串行试错 3+ 小时才能获得。 + +## Phase 3: 用真实数据验证原型 + +SKILL.md 的 Evaluation-Driven Development 流程覆盖了"先跑 baseline → 建 eval → 迭代"的过程。本节补充两个 SKILL.md 未强调的验证原则: + +### 3.1 数据完整性验证 + +"it runs without errors" ≠ "it exported all items correctly"。必须: +- 对比 API 报告的 total 和实际导出行数 +- 检查字段格式(评分、日期、编码是否符合预期) +- 用不同规模的数据测试(0 条、100 条、1000+ 条) + +**常见静默 bug**: +- 分页逻辑:某些页面返回的数据量少于请求值(如请求 50 条返回 48 条),被误判为最后一页导致提前终止。修复:检查 `total` 而非 `page_size` +- 数据转换:API 返回 `{value: 2, max: 5}` 表示 2/5 星,但代码按 `max: 10` 处理后变成 1 星。修复:检查 `max` 字段确定 scale + +### 3.2 记录失败 + +详细记录每个失败方案的方法、失败模式、根因。这些将成为 skill 中 "Do NOT attempt" 部分的内容——这是 skill 最独特的价值,防止未来的 agent 重走弯路。 + +失败记录的结构: + +| 方案 | 结果 | 根因 | +|------|------|------| +| 方案名称 | 具体失败表现(HTTP 状态码、错误信息) | 架构层面的原因分析 | + +## Phase 4: Skill 写作补充原则 + +SKILL.md 的 "Skill Writing Guide" 已覆盖 frontmatter、progressive disclosure、bundled resources、命名规范等。本节补充 SKILL.md 未提及的内容层面原则: + +### 4.1 写清楚 skill 不能做什么 + +防止 agent 尝试不可能的操作。例如: +- "Cannot export reviews (长评) — different API endpoint, not implemented" +- "Cannot filter by single category — exports all 4 types together" + +### 4.2 写清楚失败过什么 + +在 SKILL.md 或 references 中保留失败方案的摘要(详见 Phase 3.2),加上明确的"Do NOT attempt"警告。这比正面指令更有效——agent 看到 7 种方案的失败记录后,不会尝试第 8 种类似方案。 + +### 4.3 安全说明 + +如果脚本包含 API key、HMAC 密钥或其他凭据,必须解释来源和安全性。例如:"These are the app's public credentials extracted from the APK, shared by all users. No personal credentials are used." + +### 4.4 Console output 示例 + +展示一次成功运行的完整控制台输出。让 agent 知道"正确运行"长什么样,方便验证(SKILL.md Phase 5 的 self-verification)。 + +### 4.5 脚本健壮性 + +SKILL.md 的 "Solve, don't punt" 覆盖了基本错误处理。补充实战中发现的常见遗漏: +- 只捕获 HTTPError,遗漏 URLError / socket.timeout / JSONDecodeError +- 无限分页循环(API 异常时)——需要 max-page 安全阀 +- CSV 中的换行符/回车符——`csvEscape` 必须处理 `\r` +- 用户输入是完整 URL 而非 ID——脚本应自动提取 + +## Phase 5: 测试迭代补充 + +SKILL.md 的测试流程非常详细(A/B 测试、断言、评分、viewer)。本节补充两个 SKILL.md 未覆盖的实操教训: + +### 5.1 删除竞争的旧 skill + +如果系统中存在旧版 skill(关键词冲突),eval agent 会被旧 skill 截胡,导致测试结果完全无效。必须在测试前删除旧 skill。 + +**信号**:eval agent 使用了不同于预期的脚本或方法 → 检查是否有同名/同领域的旧 skill 被加载。 + +### 5.2 量化迭代对比 + +SKILL.md 提到 timing.json 和 benchmark,但未给出具体应跟踪哪些指标。推荐: + +| 指标 | 为什么重要 | +|------|-----------| +| 数据完整性(实际/预期) | 核心正确性 | +| 执行时间 | 用户体验 | +| Token 消耗 | 成本 | +| 工具调用次数 | skill 引导效率——次数越少说明 skill 的指令越清晰 | +| 错误数 | 必须为 0 | + +**案例对比**:某 skill 迭代后,工具调用从 31 次降到 8 次(74% 减少)、Token 从 72K 降到 41K(43% 减少),说明 skill 的指令让 agent 不再需要自己摸索。 + +## Phase 6: Counter Review — 用 Agent Team 做对抗性审查 + +这是 SKILL.md 未覆盖的独立环节。SKILL.md 的 "Improving the skill" 章节关注用户反馈驱动的迭代,但没有系统化的多视角审查流程。 + +### 6.1 第一轮:3 个视角并行 + +用 Task 工具同时启动 3 个 review agent: + +| Reviewer | 视角 | 关注点 | +|----------|------|--------| +| Skill 质量 | 对标 Anthropic 最佳实践 | 描述质量、简洁性、progressive disclosure、可操作性、错误预防、示例、术语一致性 | +| 代码健壮性 | 高级工程师找 bug | 错误处理、安全性、跨平台、边界情况、依赖、幂等性 | +| 用户视角 | 首次使用者体验 | 首次成功率、输入容错、输出预期、隐私顾虑、失败恢复 | + +### 6.2 修复后 Final Gate + +修复所有 Critical 和 HIGH 问题后,再启动 final gate reviewers 验证修复正确性。评分 >= 8 才放行。 + +### 6.3 常见发现模式 + +根据实战经验,reviewer 经常发现的问题类型: +- **SKILL.md 和 references 内容重复**(每次都会犯,包括本文档自己) +- **异常类型遗漏**(只捕获 HTTPError,漏掉 URLError/socket.timeout) +- **substring 误匹配**(`content.includes(url)` 导致 `/1234/` 匹配 `/12345/`) +- **docstring 与实际行为不一致**(写了 "4.5 → 5" 但实际行为是 "4.5 → 4") +- **误导性注释**(注释说"每个分类写入后立即保存"但代码在最后才写入) +- **时间敏感数据**(特定日期的测试结果、版本号——下周就过时了) + +## Phase 7 & 8: Description Optimization + Packaging + +SKILL.md 已完整覆盖描述优化循环(20 个 eval query、60/40 train/test split、5 轮迭代)和打包流程(prerequisites、security scan、marketplace.json)。无补充。 + +## 来源 + +| 来源 | 本文档引用的独有贡献 | +|------|-------------------| +| Anthropic Official | Evaluation-driven development、conciseness imperative(已由 SKILL.md 覆盖,本文不重复) | +| skill-creator SKILL.md | 完整工作流和工具链(本文引用但不复制,请直接参考 SKILL.md) | +| 社区经验 | 激活率数据(20%→90%)、Encoded Preference > Capability Uplift | +| 实战教训 | 并行研究 agent、失败记录的价值、竞争 skill 删除、量化迭代对比、Counter Review 流程 | diff --git a/skill-creator/scripts/improve_description.py b/skill-creator/scripts/improve_description.py index a270777..06bcec7 100755 --- a/skill-creator/scripts/improve_description.py +++ b/skill-creator/scripts/improve_description.py @@ -2,22 +2,52 @@ """Improve a skill description based on eval results. Takes eval results (from run_eval.py) and generates an improved description -using Claude with extended thinking. +by calling `claude -p` as a subprocess (same auth pattern as run_eval.py — +uses the session's Claude Code auth, no separate ANTHROPIC_API_KEY needed). """ import argparse import json +import os import re +import subprocess import sys from pathlib import Path -import anthropic - from scripts.utils import parse_skill_md +def _call_claude(prompt: str, model: str | None, timeout: int = 300) -> str: + """Run `claude -p` with the prompt on stdin and return the text response. + + Prompt goes over stdin (not argv) because it embeds the full SKILL.md + body and can easily exceed comfortable argv length. + """ + cmd = ["claude", "-p", "--output-format", "text"] + if model: + cmd.extend(["--model", model]) + + # Remove CLAUDECODE env var to allow nesting claude -p inside a + # Claude Code session. The guard is for interactive terminal conflicts; + # programmatic subprocess usage is safe. Same pattern as run_eval.py. + env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} + + result = subprocess.run( + cmd, + input=prompt, + capture_output=True, + text=True, + env=env, + timeout=timeout, + ) + if result.returncode != 0: + raise RuntimeError( + f"claude -p exited {result.returncode}\nstderr: {result.stderr}" + ) + return result.stdout + + def improve_description( - client: anthropic.Anthropic, skill_name: str, skill_content: str, current_description: str, @@ -99,7 +129,7 @@ Based on the failures, write a new and improved description that is more likely 1. Avoid overfitting 2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description. -Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. +Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. There is a hard limit of 1024 characters — descriptions over that will be truncated, so stay comfortably under it. Here are some tips that we've found to work well in writing these descriptions: - The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does" @@ -111,70 +141,41 @@ I'd encourage you to be creative and mix up the style in different iterations si Please respond with only the new description text in tags, nothing else.""" - response = client.messages.create( - model=model, - max_tokens=16000, - thinking={ - "type": "enabled", - "budget_tokens": 10000, - }, - messages=[{"role": "user", "content": prompt}], - ) + text = _call_claude(prompt, model) - # Extract thinking and text from response - thinking_text = "" - text = "" - for block in response.content: - if block.type == "thinking": - thinking_text = block.thinking - elif block.type == "text": - text = block.text - - # Parse out the tags match = re.search(r"(.*?)", text, re.DOTALL) description = match.group(1).strip().strip('"') if match else text.strip().strip('"') - # Log the transcript transcript: dict = { "iteration": iteration, "prompt": prompt, - "thinking": thinking_text, "response": text, "parsed_description": description, "char_count": len(description), "over_limit": len(description) > 1024, } - # If over 1024 chars, ask the model to shorten it + # Safety net: the prompt already states the 1024-char hard limit, but if + # the model blew past it anyway, make one fresh single-turn call that + # quotes the too-long version and asks for a shorter rewrite. (The old + # SDK path did this as a true multi-turn; `claude -p` is one-shot, so we + # inline the prior output into the new prompt instead.) if len(description) > 1024: - shorten_prompt = f"Your description is {len(description)} characters, which exceeds the hard 1024 character limit. Please rewrite it to be under 1024 characters while preserving the most important trigger words and intent coverage. Respond with only the new description in tags." - shorten_response = client.messages.create( - model=model, - max_tokens=16000, - thinking={ - "type": "enabled", - "budget_tokens": 10000, - }, - messages=[ - {"role": "user", "content": prompt}, - {"role": "assistant", "content": text}, - {"role": "user", "content": shorten_prompt}, - ], + shorten_prompt = ( + f"{prompt}\n\n" + f"---\n\n" + f"A previous attempt produced this description, which at " + f"{len(description)} characters is over the 1024-character hard limit:\n\n" + f'"{description}"\n\n' + f"Rewrite it to be under 1024 characters while keeping the most " + f"important trigger words and intent coverage. Respond with only " + f"the new description in tags." ) - - shorten_thinking = "" - shorten_text = "" - for block in shorten_response.content: - if block.type == "thinking": - shorten_thinking = block.thinking - elif block.type == "text": - shorten_text = block.text - + shorten_text = _call_claude(shorten_prompt, model) match = re.search(r"(.*?)", shorten_text, re.DOTALL) shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"') transcript["rewrite_prompt"] = shorten_prompt - transcript["rewrite_thinking"] = shorten_thinking transcript["rewrite_response"] = shorten_text transcript["rewrite_description"] = shortened transcript["rewrite_char_count"] = len(shortened) @@ -216,9 +217,7 @@ def main(): print(f"Current: {current_description}", file=sys.stderr) print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr) - client = anthropic.Anthropic() new_description = improve_description( - client=client, skill_name=name, skill_content=content, current_description=current_description, diff --git a/skill-creator/scripts/run_loop.py b/skill-creator/scripts/run_loop.py index 36f9b4e..30a263d 100755 --- a/skill-creator/scripts/run_loop.py +++ b/skill-creator/scripts/run_loop.py @@ -15,8 +15,6 @@ import time import webbrowser from pathlib import Path -import anthropic - from scripts.generate_report import generate_html from scripts.improve_description import improve_description from scripts.run_eval import find_project_root, run_eval @@ -75,7 +73,6 @@ def run_loop( train_set = eval_set test_set = [] - client = anthropic.Anthropic() history = [] exit_reason = "unknown" @@ -200,7 +197,6 @@ def run_loop( for h in history ] new_description = improve_description( - client=client, skill_name=name, skill_content=content, current_description=current_description,