feat: add douban-skill + enhance skill-creator with development methodology

New skill: douban-skill - Full export of Douban (豆瓣) book/movie/music/game collections via Frodo API - RSS incremental sync for daily updates - Python stdlib only, zero dependencies, cross-platform (macOS/Windows/Linux) - Documented 7 failed approaches (PoW anti-scraping) and why Frodo API is the only working solution - Pre-flight user validation, KeyboardInterrupt handling, pagination bug fix skill-creator enhancements: - Add development methodology reference (8-phase process with prior art research, counter review, and real failure case studies) - Sync upstream changes: improve_description.py now uses `claude -p` instead of Anthropic SDK (no ANTHROPIC_API_KEY needed), remove stale "extended thinking" ref - Add "Updating an existing skill" guidance to Claude.ai and Cowork sections - Restore test case heuristic guidance for objective vs subjective skills README updates: - Document fork advantages vs upstream with quality comparison table (65 vs 42) - Bilingual (EN + ZH-CN) with consistent content Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 12:36:51 +08:00
parent cafabd753b
commit 28cd6bd813
11 changed files with 1186 additions and 73 deletions
--- a/douban-skill/scripts/douban-frodo-export.py
+++ b/douban-skill/scripts/douban-frodo-export.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+"""
+Douban Collection Full Export via Frodo API (Mobile App Backend)
+
+Exports all book/movie/music/game collections to CSV files.
+No login or cookies required — uses HMAC-SHA1 signature auth.
+
+The API key and HMAC secret are Douban's mobile app credentials, extracted from
+the public APK. They are the same for all users and do not identify you. No
+personal credentials are used or stored. Data is fetched only from frodo.douban.com.
+
+Usage:
+    DOUBAN_USER=<user_id> python3 douban-frodo-export.py
+    DOUBAN_USER=<user_id> DOUBAN_OUTPUT_DIR=/custom/path python3 douban-frodo-export.py
+
+Environment:
+    DOUBAN_USER (required): Douban user ID from profile URL
+    DOUBAN_OUTPUT_DIR (optional): Override output directory
+"""
+
+import hmac
+import hashlib
+import base64
+import csv
+import json
+import os
+import platform
+import re
+import socket
+import sys
+import time
+import urllib.parse
+import urllib.request
+import urllib.error
+
+# --- Frodo API Auth ---
+# Public credentials from the Douban Android APK, shared by all app users.
+API_KEY = '0dad551ec0f84ed02907ff5c42e8ec70'
+HMAC_SECRET = b'bf7dddc7c9cfe6f7'
+BASE_URL = 'https://frodo.douban.com'
+USER_AGENT = (
+    'api-client/1 com.douban.frodo/7.22.0.beta9(231) Android/23 '
+    'product/Mate40 vendor/HUAWEI model/Mate40 brand/HUAWEI '
+    'rom/android network/wifi platform/AndroidPad'
+)
+
+# --- Rate Limiting ---
+# 1.5s between pages, 2s between categories. Tested with 1200+ items.
+PAGE_DELAY = 1.5
+CATEGORY_DELAY = 2.0
+ITEMS_PER_PAGE = 50
+MAX_PAGES_SAFETY = 500  # Guard against infinite pagination loops
+
+# --- Category Definitions ---
+CATEGORIES = [
+    ('book',  'done',  '读过', '书.csv'),
+    ('book',  'doing', '在读', '书.csv'),
+    ('book',  'mark',  '想读', '书.csv'),
+    ('movie', 'done',  '看过', '影视.csv'),
+    ('movie', 'doing', '在看', '影视.csv'),
+    ('movie', 'mark',  '想看', '影视.csv'),
+    ('music', 'done',  '听过', '音乐.csv'),
+    ('music', 'doing', '在听', '音乐.csv'),
+    ('music', 'mark',  '想听', '音乐.csv'),
+    ('game',  'done',  '玩过', '游戏.csv'),
+    ('game',  'doing', '在玩', '游戏.csv'),
+    ('game',  'mark',  '想玩', '游戏.csv'),
+]
+
+URL_PREFIX = {
+    'book':  'https://book.douban.com/subject/',
+    'movie': 'https://movie.douban.com/subject/',
+    'music': 'https://music.douban.com/subject/',
+    'game':  'https://www.douban.com/game/',
+}
+
+CSV_FIELDS = ['title', 'url', 'date', 'rating', 'status', 'comment']
+
+
+def get_download_dir():
+    """Get the platform-appropriate Downloads directory."""
+    system = platform.system()
+    if system == 'Darwin':
+        return os.path.expanduser('~/Downloads')
+    elif system == 'Windows':
+        return os.path.join(os.environ.get('USERPROFILE', os.path.expanduser('~')), 'Downloads')
+    else:
+        return os.path.expanduser('~/Downloads')
+
+
+def get_output_dir(user_id):
+    """Determine output directory from env or platform default."""
+    base = os.environ.get('DOUBAN_OUTPUT_DIR')
+    if not base:
+        base = os.path.join(get_download_dir(), 'douban-sync')
+    return os.path.join(base, user_id)
+
+
+def compute_signature(url_path, timestamp):
+    """Compute Frodo API HMAC-SHA1 signature.
+
+    Signs: METHOD & url_encoded_path & timestamp (path only, no query params).
+    """
+    raw = '&'.join(['GET', urllib.parse.quote(url_path, safe=''), timestamp])
+    sig = hmac.new(HMAC_SECRET, raw.encode(), hashlib.sha1)
+    return base64.b64encode(sig.digest()).decode()
+
+
+def fetch_json(url, params):
+    """Make an authenticated GET request to the Frodo API.
+
+    Returns (data_dict, status_code). Catches HTTP errors, network errors,
+    and timeouts — all return a synthetic error dict so the caller can retry.
+    """
+    query = urllib.parse.urlencode(params)
+    full_url = f'{url}?{query}'
+    req = urllib.request.Request(full_url, headers={'User-Agent': USER_AGENT})
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read().decode('utf-8')), resp.status
+    except urllib.error.HTTPError as e:
+        body = e.read().decode('utf-8', errors='replace')[:200]
+        return {'error': body, 'code': e.code}, e.code
+    except urllib.error.URLError as e:
+        return {'error': f'Network error: {e.reason}'}, 0
+    except socket.timeout:
+        return {'error': 'Request timed out'}, 0
+    except json.JSONDecodeError as e:
+        return {'error': f'Invalid JSON response: {e}'}, 0
+
+
+def preflight_check(user_id):
+    """Verify user exists by fetching one page of book interests.
+
+    Returns True if the user has any data, False if the user ID appears invalid.
+    Prints a warning and continues if the check itself fails (network issue).
+    """
+    api_path = f'/api/v2/user/{user_id}/interests'
+    ts = str(int(time.time()))
+    sig = compute_signature(api_path, ts)
+    params = {
+        'type': 'book', 'status': 'done', 'start': 0, 'count': 1,
+        'apiKey': API_KEY, '_ts': ts, '_sig': sig, 'os_rom': 'android',
+    }
+    data, code = fetch_json(f'{BASE_URL}{api_path}', params)
+    if code == 0:
+        print(f'Warning: Could not verify user ID (network issue). Proceeding anyway.')
+        return True
+    if code != 200:
+        print(f'Error: API returned HTTP {code} for user "{user_id}".')
+        print(f'  Check that the user ID is correct (from douban.com/people/<ID>/).')
+        return False
+    total = data.get('total', -1)
+    if total == -1:
+        print(f'Warning: Unexpected API response. Proceeding anyway.')
+        return True
+    return True
+
+
+def fetch_all_interests(user_id, type_name, status):
+    """Fetch all items for a given type+status combination.
+
+    Paginates through the API, checking against the reported total
+    (not page size) to handle pages with fewer items due to delisted content.
+    """
+    api_path = f'/api/v2/user/{user_id}/interests'
+    all_items = []
+    start = 0
+    total = None
+    retries = 0
+    max_retries = 3
+    page_count = 0
+
+    while page_count < MAX_PAGES_SAFETY:
+        page_count += 1
+        ts = str(int(time.time()))
+        sig = compute_signature(api_path, ts)
+        params = {
+            'type': type_name, 'status': status,
+            'start': start, 'count': ITEMS_PER_PAGE,
+            'apiKey': API_KEY, '_ts': ts, '_sig': sig, 'os_rom': 'android',
+        }
+
+        data, status_code = fetch_json(f'{BASE_URL}{api_path}', params)
+
+        if status_code != 200:
+            retries += 1
+            if retries > max_retries:
+                print(f'  Error: HTTP {status_code} after {max_retries} retries, stopping.')
+                print(f'  See references/troubleshooting.md for common errors.')
+                break
+            delay = 5 * (2 ** (retries - 1))
+            print(f'  HTTP {status_code}, retry {retries}/{max_retries}, waiting {delay}s...')
+            time.sleep(delay)
+            continue
+
+        retries = 0
+
+        if total is None:
+            total = data.get('total', 0)
+            if total == 0:
+                return []
+            print(f'  Total: {total}')
+
+        interests = data.get('interests', [])
+        if not interests:
+            break
+
+        all_items.extend(interests)
+        print(f'  Fetched {start}-{start + len(interests)} ({len(all_items)}/{total})')
+
+        if len(all_items) >= total:
+            break
+        start += len(interests)
+        time.sleep(PAGE_DELAY)
+
+    return all_items
+
+
+def extract_rating(interest):
+    """Convert Frodo API rating to star string.
+
+    Frodo returns {value: N, max: 5} where N is 1-5.
+    Some older entries may use max=10 scale (value 2-10).
+    API values are typically integers; round() handles any edge cases.
+    """
+    r = interest.get('rating')
+    if not r or not isinstance(r, dict):
+        return ''
+    val = r.get('value', 0)
+    max_val = r.get('max', 5)
+    if not val:
+        return ''
+    stars = round(val) if max_val <= 5 else round(val / 2)
+    return '★' * max(0, min(5, stars))
+
+
+def interest_to_row(interest, type_name, status_cn):
+    """Convert a single Frodo API interest object to a CSV row dict."""
+    subject = interest.get('subject', {})
+    sid = subject.get('id', '')
+    prefix = URL_PREFIX.get(type_name, 'https://www.douban.com/subject/')
+    url = f'{prefix}{sid}/' if sid else subject.get('url', '')
+
+    date_raw = interest.get('create_time', '') or ''
+    date = date_raw[:10] if re.match(r'\d{4}-\d{2}-\d{2}', date_raw) else ''
+
+    return {
+        'title': subject.get('title', ''),
+        'url': url,
+        'date': date,
+        'rating': extract_rating(interest),
+        'status': status_cn,
+        'comment': interest.get('comment', ''),
+    }
+
+
+def write_csv(filepath, rows):
+    """Write rows to a CSV file with UTF-8 BOM for Excel compatibility."""
+    with open(filepath, 'w', newline='', encoding='utf-8-sig') as f:
+        writer = csv.DictWriter(f, fieldnames=CSV_FIELDS)
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def main():
+    user_id = os.environ.get('DOUBAN_USER', '').strip()
+    if not user_id:
+        print('Error: DOUBAN_USER environment variable is required')
+        print('Usage: DOUBAN_USER=<your_douban_id> python3 douban-frodo-export.py')
+        sys.exit(1)
+
+    # Extract ID from URL if user pasted a full profile URL
+    url_match = re.search(r'douban\.com/people/([A-Za-z0-9._-]+)', user_id)
+    if url_match:
+        user_id = url_match.group(1)
+
+    if not re.match(r'^[A-Za-z0-9._-]+$', user_id):
+        print(f'Error: DOUBAN_USER contains invalid characters: {user_id}')
+        sys.exit(1)
+
+    output_dir = get_output_dir(user_id)
+    os.makedirs(output_dir, exist_ok=True)
+    print(f'Douban Export for user: {user_id}')
+    print(f'Output directory: {output_dir}\n')
+
+    # Pre-flight: verify user ID is valid before spending time on full export
+    if not preflight_check(user_id):
+        sys.exit(1)
+
+    # Collect data grouped by output file, then write all at the end.
+    file_data = {}
+    grand_total = 0
+
+    for type_name, status, status_cn, outfile in CATEGORIES:
+        print(f'=== {status_cn} ({type_name}) ===')
+        items = fetch_all_interests(user_id, type_name, status)
+
+        if outfile not in file_data:
+            file_data[outfile] = []
+
+        for item in items:
+            file_data[outfile].append(interest_to_row(item, type_name, status_cn))
+
+        count = len(items)
+        grand_total += count
+        if count > 0:
+            print(f'  Collected: {count}\n')
+        else:
+            print(f'  (empty)\n')
+
+        time.sleep(CATEGORY_DELAY)
+
+    # Write CSV files
+    print('--- Writing CSV files ---')
+    for filename, rows in file_data.items():
+        filepath = os.path.join(output_dir, filename)
+        write_csv(filepath, rows)
+        print(f'  {filename}: {len(rows)} rows')
+
+    print(f'\nDone! {grand_total} total items exported to {output_dir}')
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        print('\n\nExport interrupted by user.')
+        sys.exit(130)
--- a/douban-skill/scripts/douban-rss-sync.mjs
+++ b/douban-skill/scripts/douban-rss-sync.mjs
@@ -0,0 +1,190 @@
+#!/usr/bin/env node
+/**
+ * Douban RSS → CSV incremental sync
+ *
+ * Pulls the public RSS feed, parses new entries, appends to CSV files.
+ * No login required. Returns only the ~10 most recent items.
+ * Best used for daily sync after a full Frodo API export.
+ *
+ * Usage:
+ *   DOUBAN_USER=<user_id> node douban-rss-sync.mjs
+ *
+ * Environment:
+ *   DOUBAN_USER (required): Douban user ID
+ *   DOUBAN_OUTPUT_DIR (optional): Override output directory
+ */
+
+import https from 'node:https';
+import http from 'node:http';
+import fs from 'node:fs';
+import path from 'node:path';
+import os from 'node:os';
+
+let DOUBAN_USER = process.env.DOUBAN_USER;
+if (!DOUBAN_USER) { console.error('Error: DOUBAN_USER env var is required'); process.exit(1); }
+// Extract ID from full URL if provided (e.g., https://www.douban.com/people/foo/)
+const urlMatch = DOUBAN_USER.match(/douban\.com\/people\/([A-Za-z0-9._-]+)/);
+if (urlMatch) DOUBAN_USER = urlMatch[1];
+if (!/^[A-Za-z0-9._-]+$/.test(DOUBAN_USER)) { console.error('Error: DOUBAN_USER contains invalid characters'); process.exit(1); }
+
+function getDownloadDir() {
+  if (process.platform === 'win32') {
+    return path.join(process.env.USERPROFILE || os.homedir(), 'Downloads');
+  }
+  return path.join(os.homedir(), 'Downloads');
+}
+
+const BASE_DIR = process.env.DOUBAN_OUTPUT_DIR || path.join(getDownloadDir(), 'douban-sync');
+const DOUBAN_OUTPUT_DIR = path.join(BASE_DIR, DOUBAN_USER);
+const STATE_FILE = path.join(DOUBAN_OUTPUT_DIR, '.douban-rss-state.json');
+const RSS_URL = `https://www.douban.com/feed/people/${DOUBAN_USER}/interests`;
+
+const CATEGORY_MAP = [
+  { pattern: /^读过/, file: '书.csv', status: '读过' },
+  { pattern: /^(?:在读|最近在读)/, file: '书.csv', status: '在读' },
+  { pattern: /^想读/, file: '书.csv', status: '想读' },
+  { pattern: /^看过/, file: '影视.csv', status: '看过' },
+  { pattern: /^(?:在看|最近在看)/, file: '影视.csv', status: '在看' },
+  { pattern: /^想看/, file: '影视.csv', status: '想看' },
+  { pattern: /^听过/, file: '音乐.csv', status: '听过' },
+  { pattern: /^(?:在听|最近在听)/, file: '音乐.csv', status: '在听' },
+  { pattern: /^想听/, file: '音乐.csv', status: '想听' },
+  { pattern: /^玩过/, file: '游戏.csv', status: '玩过' },
+  { pattern: /^(?:在玩|最近在玩)/, file: '游戏.csv', status: '在玩' },
+  { pattern: /^想玩/, file: '游戏.csv', status: '想玩' },
+];
+
+const CSV_HEADER = '\ufefftitle,url,date,rating,status,comment\n';
+const RATING_MAP = { '力荐': '★★★★★', '推荐': '★★★★', '还行': '★★★', '较差': '★★', '很差': '★' };
+
+function httpGet(url, redirects = 0) {
+  if (redirects > 5) return Promise.reject(new Error('Too many redirects'));
+  return new Promise((resolve, reject) => {
+    const mod = url.startsWith('https') ? https : http;
+    const req = mod.get(url, { headers: { 'User-Agent': 'Mozilla/5.0' }, timeout: 15000 }, res => {
+      if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
+        return httpGet(new URL(res.headers.location, url).href, redirects + 1).then(resolve, reject);
+      }
+      if (res.statusCode >= 400) return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
+      let data = '';
+      res.on('data', c => data += c);
+      res.on('end', () => resolve(data));
+    });
+    req.on('error', reject);
+    req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); });
+  });
+}
+
+function csvEscape(str) {
+  if (!str) return '';
+  if (str.includes(',') || str.includes('"') || str.includes('\n') || str.includes('\r')) {
+    return '"' + str.replace(/"/g, '""') + '"';
+  }
+  return str;
+}
+
+function parseItems(xml) {
+  const items = [];
+  const itemRegex = /<item>([\s\S]*?)<\/item>/g;
+  let match;
+  while ((match = itemRegex.exec(xml)) !== null) {
+    const block = match[1];
+    const get = tag => {
+      const m = block.match(new RegExp(`<${tag}[^>]*>(?:<!\\[CDATA\\[)?([\\s\\S]*?)(?:\\]\\]>)?<\\/${tag}>`));
+      return m ? m[1].trim() : '';
+    };
+    const title = get('title');
+    const link = get('link');
+    const guid = get('guid');
+    const pubDate = get('pubDate');
+    const desc = get('description');
+    const ratingMatch = desc.match(/推荐:\s*(力荐|推荐|还行|较差|很差)/);
+    const rating = ratingMatch ? RATING_MAP[ratingMatch[1]] || '' : '';
+    const commentMatch = desc.match(/短评:\s*([^<]+)/);
+    const comment = commentMatch ? commentMatch[1].trim() : '';
+    items.push({ title, link, guid, pubDate, rating, comment });
+  }
+  return items;
+}
+
+function loadState() {
+  try { return JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); }
+  catch { return { lastSyncGuids: [] }; }
+}
+
+function saveState(state) { fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); }
+
+function extractName(title) {
+  for (const { pattern } of CATEGORY_MAP) {
+    if (pattern.test(title)) return title.replace(pattern, '');
+  }
+  return title;
+}
+
+function isAlreadyInFile(filePath, link) {
+  try {
+    const content = fs.readFileSync(filePath, 'utf8');
+    // Exact URL match as CSV field — avoid false positives from substring matches
+    // (e.g., /subject/1234/ matching /subject/12345/)
+    return content.includes(',' + link + ',') ||
+           content.includes(',' + link + '\n') ||
+           content.includes(',' + link + '\r');
+  } catch { return false; }
+}
+
+function formatDate(pubDateStr) {
+  try {
+    const direct = pubDateStr.match(/(\d{4}-\d{2}-\d{2})/);
+    if (direct) return direct[1];
+    const d = new Date(pubDateStr);
+    const cst = new Date(d.getTime() + 8 * 3600000);
+    return cst.toISOString().split('T')[0];
+  } catch { return ''; }
+}
+
+function ensureCsvFile(filePath) {
+  if (!fs.existsSync(filePath)) {
+    fs.mkdirSync(path.dirname(filePath), { recursive: true });
+    fs.writeFileSync(filePath, CSV_HEADER);
+  }
+}
+
+function appendToCsv(filePath, entry, status) {
+  ensureCsvFile(filePath);
+  const name = extractName(entry.title);
+  const date = formatDate(entry.pubDate);
+  const line = [csvEscape(name), csvEscape(entry.link), csvEscape(date),
+    csvEscape(entry.rating), csvEscape(status), csvEscape(entry.comment)].join(',') + '\n';
+  fs.appendFileSync(filePath, line);
+}
+
+async function main() {
+  console.log(`Douban RSS Sync for user: ${DOUBAN_USER}`);
+  console.log(`Output: ${DOUBAN_OUTPUT_DIR}\n`);
+  console.log('Fetching RSS feed...');
+  const xml = await httpGet(RSS_URL);
+  const items = parseItems(xml);
+  console.log(`Found ${items.length} items in feed`);
+
+  const state = loadState();
+  const knownGuids = new Set(state.lastSyncGuids || []);
+  let newCount = 0;
+
+  for (const item of items) {
+    if (knownGuids.has(item.guid)) continue;
+    const cat = CATEGORY_MAP.find(c => c.pattern.test(item.title));
+    if (!cat) { console.log(`  Skip (unknown category): ${item.title}`); continue; }
+    const filePath = path.join(DOUBAN_OUTPUT_DIR, cat.file);
+    if (isAlreadyInFile(filePath, item.link)) { console.log(`  Skip (exists): ${item.title}`); continue; }
+    console.log(`  + ${item.title} → ${cat.file}`);
+    appendToCsv(filePath, item, cat.status);
+    newCount++;
+  }
+
+  state.lastSyncGuids = items.map(i => i.guid);
+  state.lastSync = new Date().toISOString();
+  saveState(state);
+  console.log(`\nDone. ${newCount} new entries added.`);
+}
+
+main().catch(err => { console.error('Error:', err.message); process.exit(1); });