Files
skill-seekers-reference/scripts/translate_doc.py
yusyus ba9a8ff8b5 docs: complete documentation overhaul with v3.1.0 release notes and zh-CN translations
Documentation restructure:
- New docs/getting-started/ guide (4 files: install, quick-start, first-skill, next-steps)
- New docs/user-guide/ section (6 files: core concepts through troubleshooting)
- New docs/reference/ section (CLI_REFERENCE, CONFIG_FORMAT, ENVIRONMENT_VARIABLES, MCP_REFERENCE)
- New docs/advanced/ section (custom-workflows, mcp-server, multi-source)
- New docs/ARCHITECTURE.md - system architecture overview
- Archived legacy files (QUICKSTART.md, QUICK_REFERENCE.md, docs/guides/USAGE.md) to docs/archive/legacy/

Chinese (zh-CN) translations:
- Full zh-CN mirror of all user-facing docs (getting-started, user-guide, reference, advanced)
- GitHub Actions workflow for translation sync (.github/workflows/translate-docs.yml)
- Translation sync checker script (scripts/check_translation_sync.sh)
- Translation helper script (scripts/translate_doc.py)

Content updates:
- CHANGELOG.md: [Unreleased] → [3.1.0] - 2026-02-22
- README.md: updated with new doc structure links
- AGENTS.md: updated agent documentation
- docs/features/UNIFIED_SCRAPING.md: updated for unified scraper workflow JSON config

Analysis/planning artifacts (kept for reference):
- DOCUMENTATION_OVERHAUL_PLAN.md, DOCUMENTATION_OVERHAUL_SUMMARY.md
- FEATURE_GAP_ANALYSIS.md, IMPLEMENTATION_GAPS_ANALYSIS.md, CREATE_COMMAND_COVERAGE_ANALYSIS.md
- CHINESE_TRANSLATION_IMPLEMENTATION_SUMMARY.md, ISSUE_260_UPDATE.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-22 01:01:51 +03:00

247 lines
7.7 KiB
Python

#!/usr/bin/env python3
"""
Translate Skill Seekers documentation to Chinese.
Usage:
python scripts/translate_doc.py <file> --target-lang zh-CN
python scripts/translate_doc.py docs/getting-started/02-quick-start.md
"""
import argparse
import os
import re
from pathlib import Path
from datetime import datetime
def get_version() -> str:
"""Get current version from package."""
try:
from skill_seekers import __version__
return __version__
except ImportError:
return "3.1.0"
def translate_with_anthropic(content: str, api_key: str) -> str:
"""Translate content using Anthropic Claude API."""
try:
import anthropic
client = anthropic.Anthropic(api_key=api_key)
system_prompt = """You are a professional technical translator translating Skill Seekers documentation from English to Simplified Chinese.
Translation rules:
1. Keep technical terms in English: CLI, API, JSON, YAML, MCP, URL, HTTP, etc.
2. Keep code examples, commands, and file paths in English
3. Keep proper nouns (product names, company names) in English
4. Use Simplified Chinese (简体中文)
5. Maintain all Markdown formatting
6. Translate link text but keep link targets (will be handled separately)
7. Use professional, technical Chinese appropriate for developers
8. Preserve all code blocks, they should remain exactly the same
Output ONLY the translated content, no explanations."""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=8000,
temperature=0.1,
system=system_prompt,
messages=[
{
"role": "user",
"content": f"Translate this technical documentation to Simplified Chinese:\n\n{content}"
}
]
)
return message.content[0].text
except Exception as e:
print(f"Translation API error: {e}")
return None
def add_translation_header(content: str, original_file: Path, target_lang: str) -> str:
"""Add translation header to document."""
version = get_version()
date = datetime.now().strftime("%Y-%m-%d")
original_name = original_file.name
# Calculate relative path from docs/
try:
relative_path = original_file.relative_to("docs")
original_link = f"../{relative_path}"
except ValueError:
original_link = f"../{original_file.name}"
header = f"""> **注意:** 本文档是 [{original_name}]({original_link}) 的中文翻译。
>
> - **最后翻译日期:** {date}
> - **英文原文版本:** {version}
> - **翻译状态:** ⚠️ 待审阅
>
> 如果本文档与英文版本有冲突,请以英文版本为准。
>
> ---
>
> **Note:** This document is a Chinese translation of [{original_name}]({original_link}).
>
> - **Last translated:** {date}
> - **Original version:** {version}
> - **Translation status:** ⚠️ Pending review
>
> If there are conflicts, the English version takes precedence.
---
"""
return header + content
def fix_links(content: str, original_file: Path) -> str:
"""Fix internal links to point to Chinese versions."""
# Pattern for markdown links: [text](path)
# We need to convert links to other docs to point to zh-CN versions
def replace_link(match):
text = match.group(1)
path = match.group(2)
# Skip external links
if path.startswith(('http://', 'https://', '#', 'mailto:')):
return match.group(0)
# Skip anchor-only links
if path.startswith('#'):
return match.group(0)
# For relative links to other md files, adjust path
if path.endswith('.md'):
# If it's a relative link, it should point to zh-CN version
if not path.startswith('/'):
# Count directory levels
depth = len(original_file.parent.parts) - 1 # -1 for 'docs'
if depth > 0:
# Going up to docs/, then into zh-CN/
prefix = '../' * depth
new_path = prefix + 'zh-CN/' + path.lstrip('./')
else:
new_path = 'zh-CN/' + path
return f'[{text}]({new_path})'
return match.group(0)
# Replace markdown links
content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_link, content)
return content
def translate_file(input_path: str, target_lang: str = "zh-CN"):
"""Translate a documentation file."""
input_file = Path(input_path).resolve()
if not input_file.exists():
print(f"❌ File not found: {input_file}")
return False
# Read English content
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
# Remove existing translation header if present (for re-translation)
if '> **注意:**' in content[:500]:
# Find the separator and remove everything before it
separator_pos = content.find('---\n\n')
if separator_pos != -1:
content = content[separator_pos + 5:]
# Translate content
api_key = os.environ.get('ANTHROPIC_API_KEY')
if api_key:
print(f"🤖 Translating with Claude API: {input_file.name}")
translated = translate_with_anthropic(content, api_key)
if translated:
content = translated
else:
print(f"⚠️ Translation failed, keeping original content for: {input_file.name}")
else:
print(f"⚠️ No ANTHROPIC_API_KEY, skipping translation for: {input_file.name}")
return False
# Fix internal links
content = fix_links(content, input_file)
# Add translation header
content = add_translation_header(content, input_file, target_lang)
# Determine output path
try:
relative_path = input_file.relative_to(Path("docs").resolve())
except ValueError:
# If file is not in docs/, use just the filename
relative_path = Path(input_file.name)
output_file = Path("docs") / target_lang / relative_path
output_file.parent.mkdir(parents=True, exist_ok=True)
# Write translated content
with open(output_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✅ Created: {output_file}")
return True
def main():
parser = argparse.ArgumentParser(
description="Translate Skill Seekers documentation to Chinese"
)
parser.add_argument(
"file",
nargs='?',
help="Path to the documentation file to translate (not needed with --batch)"
)
parser.add_argument(
"--target-lang",
default="zh-CN",
help="Target language code (default: zh-CN)"
)
parser.add_argument(
"--batch",
action="store_true",
help="Translate all documentation files"
)
args = parser.parse_args()
if args.batch:
# Translate all docs
docs_dir = Path("docs")
files_to_translate = []
for pattern in ["**/*.md"]:
files = list(docs_dir.glob(pattern))
for f in files:
# Skip already translated files and archive
if "zh-CN" not in str(f) and "archive" not in str(f):
files_to_translate.append(f)
print(f"🔄 Batch translating {len(files_to_translate)} files...")
success_count = 0
for f in files_to_translate:
if translate_file(str(f), args.target_lang):
success_count += 1
print(f"\n✅ Successfully translated {success_count}/{len(files_to_translate)} files")
else:
# Translate single file
translate_file(args.file, args.target_lang)
if __name__ == "__main__":
main()