fix: prevent dictionary false positives + add tunnel-doctor WSL/Go findings

transcript-fixer:
- Add common_words.py safety system (blocks common Chinese words from dictionary)
- Add --audit command to scan existing dictionary for risky rules
- Add --force flag to override safety checks explicitly
- Fix substring corruption (产线数据→产线束据, 现金流→现现金流)
- Unified position-aware replacement with _already_corrected() check
- 69 tests covering all production false positive scenarios

tunnel-doctor:
- Add Step 5A: Tailscale SSH proxy silent failure on WSL
- Add Step 5B: App Store vs Standalone Tailscale on macOS
- Add Go net/http NO_PROXY CIDR incompatibility warning
- Add utun interface identification (MTU 1280=Tailscale, 4064=Shadowrocket)
- Fix "Four→Five Conflict Layers" inconsistency in reference doc
- Add complete working Shadowrocket config reference

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
daymade
2026-03-21 15:56:38 +08:00
parent d4634cb00b
commit a496c91cae
12 changed files with 1596 additions and 44 deletions

View File

@@ -12,6 +12,7 @@ from __future__ import annotations
import re
import os
import sys
import logging
from pathlib import Path
from typing import Dict, List, Optional, Tuple
@@ -23,6 +24,10 @@ from .correction_repository import (
DatabaseError
)
# Import safety check for common words
sys.path.insert(0, str(Path(__file__).parent.parent))
from utils.common_words import check_correction_safety, audit_corrections, SafetyWarning
logger = logging.getLogger(__name__)
@@ -178,10 +183,15 @@ class CorrectionService:
domain: str = "general",
source: str = "manual",
confidence: float = 1.0,
notes: Optional[str] = None
notes: Optional[str] = None,
force: bool = False,
) -> int:
"""
Add a correction with full validation.
Add a correction with full validation and safety checks.
Safety checks detect common Chinese words and substring collision
risks that would cause false positives. Pass force=True to bypass
(errors become warnings printed to stderr).
Args:
from_text: Original (incorrect) text
@@ -190,12 +200,13 @@ class CorrectionService:
source: Origin of correction
confidence: Confidence score
notes: Optional notes
force: If True, downgrade safety errors to warnings
Returns:
ID of inserted correction
Raises:
ValidationError: If validation fails
ValidationError: If validation or safety check fails
"""
# Comprehensive validation
self.validate_correction_text(from_text, "from_text")
@@ -210,6 +221,34 @@ class CorrectionService:
f"from_text and to_text are identical: '{from_text}'"
)
# Safety check: detect common words and substring collisions
safety_warnings = check_correction_safety(from_text, to_text, strict=True)
if safety_warnings:
errors = [w for w in safety_warnings if w.level == "error"]
warns = [w for w in safety_warnings if w.level == "warning"]
if errors and not force:
# Block the addition
msg_parts = []
for w in errors:
msg_parts.append(f"[{w.category}] {w.message}")
msg_parts.append(f" Suggestion: {w.suggestion}")
raise ValidationError(
f"Safety check BLOCKED adding '{from_text}' -> '{to_text}':\n"
+ "\n".join(msg_parts)
+ "\n\nUse --force to override (at your own risk)."
)
# Print warnings (errors downgraded by --force, or genuine warnings)
all_to_print = errors + warns if force else warns
if all_to_print:
for w in all_to_print:
prefix = "FORCED" if w.level == "error" else "WARNING"
logger.warning(
f"[{prefix}] [{w.category}] {w.message} | {w.suggestion}"
)
# Get current user
added_by = os.getenv("USER") or os.getenv("USERNAME") or "unknown"
@@ -431,6 +470,31 @@ class CorrectionService:
return stats
# ==================== Audit Operations ====================
def audit_dictionary(
self,
domain: Optional[str] = None,
) -> Dict[str, List[SafetyWarning]]:
"""
Audit all active corrections for safety issues.
Scans every rule and flags:
- from_text that is a common Chinese word (false positive risk)
- from_text that is <= 2 characters (high collision risk)
- from_text that appears as substring in common words (collateral damage)
- Both from_text and to_text being common words (bidirectional risk)
Args:
domain: Optional domain filter (None = all domains)
Returns:
Dict mapping from_text to list of SafetyWarnings.
Only entries with issues are included.
"""
corrections = self.get_corrections(domain)
return audit_corrections(corrections)
# ==================== Helper Methods ====================
def _detect_conflicts(