#!/usr/bin/env python3 """ Douban Collection Full Export via Frodo API (Mobile App Backend) Exports all book/movie/music/game collections to CSV files. No login or cookies required — uses HMAC-SHA1 signature auth. The API key and HMAC secret are Douban's mobile app credentials, extracted from the public APK. They are the same for all users and do not identify you. No personal credentials are used or stored. Data is fetched only from frodo.douban.com. Usage: DOUBAN_USER= python3 douban-frodo-export.py DOUBAN_USER= DOUBAN_OUTPUT_DIR=/custom/path python3 douban-frodo-export.py Environment: DOUBAN_USER (required): Douban user ID from profile URL DOUBAN_OUTPUT_DIR (optional): Override output directory """ import hmac import hashlib import base64 import csv import json import os import platform import re import socket import sys import time import urllib.parse import urllib.request import urllib.error # --- Frodo API Auth --- # Public credentials from the Douban Android APK, shared by all app users. API_KEY = '0dad551ec0f84ed02907ff5c42e8ec70' HMAC_SECRET = b'bf7dddc7c9cfe6f7' BASE_URL = 'https://frodo.douban.com' USER_AGENT = ( 'api-client/1 com.douban.frodo/7.22.0.beta9(231) Android/23 ' 'product/Mate40 vendor/HUAWEI model/Mate40 brand/HUAWEI ' 'rom/android network/wifi platform/AndroidPad' ) # --- Rate Limiting --- # 1.5s between pages, 2s between categories. Tested with 1200+ items. PAGE_DELAY = 1.5 CATEGORY_DELAY = 2.0 ITEMS_PER_PAGE = 50 MAX_PAGES_SAFETY = 500 # Guard against infinite pagination loops # --- Category Definitions --- CATEGORIES = [ ('book', 'done', '读过', '书.csv'), ('book', 'doing', '在读', '书.csv'), ('book', 'mark', '想读', '书.csv'), ('movie', 'done', '看过', '影视.csv'), ('movie', 'doing', '在看', '影视.csv'), ('movie', 'mark', '想看', '影视.csv'), ('music', 'done', '听过', '音乐.csv'), ('music', 'doing', '在听', '音乐.csv'), ('music', 'mark', '想听', '音乐.csv'), ('game', 'done', '玩过', '游戏.csv'), ('game', 'doing', '在玩', '游戏.csv'), ('game', 'mark', '想玩', '游戏.csv'), ] URL_PREFIX = { 'book': 'https://book.douban.com/subject/', 'movie': 'https://movie.douban.com/subject/', 'music': 'https://music.douban.com/subject/', 'game': 'https://www.douban.com/game/', } CSV_FIELDS = ['title', 'url', 'date', 'rating', 'status', 'comment'] def get_download_dir(): """Get the platform-appropriate Downloads directory.""" system = platform.system() if system == 'Darwin': return os.path.expanduser('~/Downloads') elif system == 'Windows': return os.path.join(os.environ.get('USERPROFILE', os.path.expanduser('~')), 'Downloads') else: return os.path.expanduser('~/Downloads') def get_output_dir(user_id): """Determine output directory from env or platform default.""" base = os.environ.get('DOUBAN_OUTPUT_DIR') if not base: base = os.path.join(get_download_dir(), 'douban-sync') return os.path.join(base, user_id) def compute_signature(url_path, timestamp): """Compute Frodo API HMAC-SHA1 signature. Signs: METHOD & url_encoded_path & timestamp (path only, no query params). """ raw = '&'.join(['GET', urllib.parse.quote(url_path, safe=''), timestamp]) sig = hmac.new(HMAC_SECRET, raw.encode(), hashlib.sha1) return base64.b64encode(sig.digest()).decode() def fetch_json(url, params): """Make an authenticated GET request to the Frodo API. Returns (data_dict, status_code). Catches HTTP errors, network errors, and timeouts — all return a synthetic error dict so the caller can retry. """ query = urllib.parse.urlencode(params) full_url = f'{url}?{query}' req = urllib.request.Request(full_url, headers={'User-Agent': USER_AGENT}) try: with urllib.request.urlopen(req, timeout=15) as resp: return json.loads(resp.read().decode('utf-8')), resp.status except urllib.error.HTTPError as e: body = e.read().decode('utf-8', errors='replace')[:200] return {'error': body, 'code': e.code}, e.code except urllib.error.URLError as e: return {'error': f'Network error: {e.reason}'}, 0 except socket.timeout: return {'error': 'Request timed out'}, 0 except json.JSONDecodeError as e: return {'error': f'Invalid JSON response: {e}'}, 0 def preflight_check(user_id): """Verify user exists by fetching one page of book interests. Returns True if the user has any data, False if the user ID appears invalid. Prints a warning and continues if the check itself fails (network issue). """ api_path = f'/api/v2/user/{user_id}/interests' ts = str(int(time.time())) sig = compute_signature(api_path, ts) params = { 'type': 'book', 'status': 'done', 'start': 0, 'count': 1, 'apiKey': API_KEY, '_ts': ts, '_sig': sig, 'os_rom': 'android', } data, code = fetch_json(f'{BASE_URL}{api_path}', params) if code == 0: print(f'Warning: Could not verify user ID (network issue). Proceeding anyway.') return True if code != 200: print(f'Error: API returned HTTP {code} for user "{user_id}".') print(f' Check that the user ID is correct (from douban.com/people//).') return False total = data.get('total', -1) if total == -1: print(f'Warning: Unexpected API response. Proceeding anyway.') return True return True def fetch_all_interests(user_id, type_name, status): """Fetch all items for a given type+status combination. Paginates through the API, checking against the reported total (not page size) to handle pages with fewer items due to delisted content. """ api_path = f'/api/v2/user/{user_id}/interests' all_items = [] start = 0 total = None retries = 0 max_retries = 3 page_count = 0 while page_count < MAX_PAGES_SAFETY: page_count += 1 ts = str(int(time.time())) sig = compute_signature(api_path, ts) params = { 'type': type_name, 'status': status, 'start': start, 'count': ITEMS_PER_PAGE, 'apiKey': API_KEY, '_ts': ts, '_sig': sig, 'os_rom': 'android', } data, status_code = fetch_json(f'{BASE_URL}{api_path}', params) if status_code != 200: retries += 1 if retries > max_retries: print(f' Error: HTTP {status_code} after {max_retries} retries, stopping.') print(f' See references/troubleshooting.md for common errors.') break delay = 5 * (2 ** (retries - 1)) print(f' HTTP {status_code}, retry {retries}/{max_retries}, waiting {delay}s...') time.sleep(delay) continue retries = 0 if total is None: total = data.get('total', 0) if total == 0: return [] print(f' Total: {total}') interests = data.get('interests', []) if not interests: break all_items.extend(interests) print(f' Fetched {start}-{start + len(interests)} ({len(all_items)}/{total})') if len(all_items) >= total: break start += len(interests) time.sleep(PAGE_DELAY) return all_items def extract_rating(interest): """Convert Frodo API rating to star string. Frodo returns {value: N, max: 5} where N is 1-5. Some older entries may use max=10 scale (value 2-10). API values are typically integers; round() handles any edge cases. """ r = interest.get('rating') if not r or not isinstance(r, dict): return '' val = r.get('value', 0) max_val = r.get('max', 5) if not val: return '' stars = round(val) if max_val <= 5 else round(val / 2) return '★' * max(0, min(5, stars)) def interest_to_row(interest, type_name, status_cn): """Convert a single Frodo API interest object to a CSV row dict.""" subject = interest.get('subject', {}) sid = subject.get('id', '') prefix = URL_PREFIX.get(type_name, 'https://www.douban.com/subject/') url = f'{prefix}{sid}/' if sid else subject.get('url', '') date_raw = interest.get('create_time', '') or '' date = date_raw[:10] if re.match(r'\d{4}-\d{2}-\d{2}', date_raw) else '' return { 'title': subject.get('title', ''), 'url': url, 'date': date, 'rating': extract_rating(interest), 'status': status_cn, 'comment': interest.get('comment', ''), } def write_csv(filepath, rows): """Write rows to a CSV file with UTF-8 BOM for Excel compatibility.""" with open(filepath, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.DictWriter(f, fieldnames=CSV_FIELDS) writer.writeheader() writer.writerows(rows) def main(): user_id = os.environ.get('DOUBAN_USER', '').strip() if not user_id: print('Error: DOUBAN_USER environment variable is required') print('Usage: DOUBAN_USER= python3 douban-frodo-export.py') sys.exit(1) # Extract ID from URL if user pasted a full profile URL url_match = re.search(r'douban\.com/people/([A-Za-z0-9._-]+)', user_id) if url_match: user_id = url_match.group(1) if not re.match(r'^[A-Za-z0-9._-]+$', user_id): print(f'Error: DOUBAN_USER contains invalid characters: {user_id}') sys.exit(1) output_dir = get_output_dir(user_id) os.makedirs(output_dir, exist_ok=True) print(f'Douban Export for user: {user_id}') print(f'Output directory: {output_dir}\n') # Pre-flight: verify user ID is valid before spending time on full export if not preflight_check(user_id): sys.exit(1) # Collect data grouped by output file, then write all at the end. file_data = {} grand_total = 0 for type_name, status, status_cn, outfile in CATEGORIES: print(f'=== {status_cn} ({type_name}) ===') items = fetch_all_interests(user_id, type_name, status) if outfile not in file_data: file_data[outfile] = [] for item in items: file_data[outfile].append(interest_to_row(item, type_name, status_cn)) count = len(items) grand_total += count if count > 0: print(f' Collected: {count}\n') else: print(f' (empty)\n') time.sleep(CATEGORY_DELAY) # Write CSV files print('--- Writing CSV files ---') for filename, rows in file_data.items(): filepath = os.path.join(output_dir, filename) write_csv(filepath, rows) print(f' {filename}: {len(rows)} rows') print(f'\nDone! {grand_total} total items exported to {output_dir}') if __name__ == '__main__': try: main() except KeyboardInterrupt: print('\n\nExport interrupted by user.') sys.exit(130)