Files
skill-seekers-reference/tests/test_multilang_support.py
yusyus 51787e57bc style: Fix 411 ruff lint issues (Kimi's issue #4)
Auto-fixed lint issues with ruff --fix and --unsafe-fixes:

Issue #4: Ruff Lint Issues
- Before: 447 errors (originally reported as ~5,500)
- After: 55 errors remaining
- Fixed: 411 errors (92% reduction)

Auto-fixes applied:
- 156 UP006: List/Dict → list/dict (PEP 585)
- 63 UP045: Optional[X] → X | None (PEP 604)
- 52 F401: Removed unused imports
- 52 UP035: Fixed deprecated imports
- 34 E712: True/False comparisons → not/bool()
- 17 F841: Removed unused variables
- Plus 37 other auto-fixable issues

Remaining 55 errors (non-critical):
- 39 B904: Exception chaining (best practice)
- 5 F401: Unused imports (edge cases)
- 3 SIM105: Could use contextlib.suppress
- 8 other minor style issues

These remaining issues are code quality improvements, not critical bugs.

Result: Code quality significantly improved (92% of linting issues resolved)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-08 12:46:38 +03:00

304 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""
Tests for multi-language documentation support.
Validates:
- Language detection (content and filename)
- Multi-language organization
- Translation status tracking
- Language filtering
- Export by language
"""
import pytest
from pathlib import Path
import sys
import tempfile
import json
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from skill_seekers.cli.multilang_support import (
LanguageDetector,
MultiLanguageManager
)
def test_detect_english():
"""Test English language detection."""
detector = LanguageDetector()
text = "This is an English document. It contains common English words."
lang_info = detector.detect(text)
assert lang_info.code == 'en'
assert lang_info.name == 'English'
assert lang_info.confidence > 0.0
def test_detect_spanish():
"""Test Spanish language detection."""
detector = LanguageDetector()
text = "Este es un documento en español. Contiene palabras comunes en español."
lang_info = detector.detect(text)
assert lang_info.code == 'es'
assert lang_info.name == 'Spanish'
def test_detect_french():
"""Test French language detection."""
detector = LanguageDetector()
text = "Ceci est un document en français. Il contient des mots français communs."
lang_info = detector.detect(text)
assert lang_info.code == 'fr'
assert lang_info.name == 'French'
def test_detect_german():
"""Test German language detection."""
detector = LanguageDetector()
text = "Dies ist ein deutsches Dokument. Es enthält übliche deutsche Wörter."
lang_info = detector.detect(text)
assert lang_info.code == 'de'
assert lang_info.name == 'German'
def test_detect_chinese():
"""Test Chinese language detection."""
detector = LanguageDetector()
text = "这是一个中文文档。它包含常见的中文字符。"
lang_info = detector.detect(text)
assert lang_info.code == 'zh'
assert lang_info.name == 'Chinese'
def test_detect_from_filename_dot_pattern():
"""Test language detection from filename (file.en.md pattern)."""
detector = LanguageDetector()
assert detector.detect_from_filename("README.en.md") == 'en'
assert detector.detect_from_filename("guide.es.md") == 'es'
assert detector.detect_from_filename("doc.fr.md") == 'fr'
def test_detect_from_filename_underscore_pattern():
"""Test language detection from filename (file_en.md pattern)."""
detector = LanguageDetector()
assert detector.detect_from_filename("README_en.md") == 'en'
assert detector.detect_from_filename("guide_es.md") == 'es'
def test_detect_from_filename_dash_pattern():
"""Test language detection from filename (file-en.md pattern)."""
detector = LanguageDetector()
assert detector.detect_from_filename("README-en.md") == 'en'
assert detector.detect_from_filename("guide-es.md") == 'es'
def test_detect_from_filename_no_match():
"""Test filename with no language pattern."""
detector = LanguageDetector()
assert detector.detect_from_filename("README.md") is None
assert detector.detect_from_filename("guide.txt") is None
def test_add_document_single_language():
"""Test adding documents in single language."""
manager = MultiLanguageManager()
manager.add_document(
"README.md",
"This is an English document.",
{"category": "overview"}
)
assert len(manager.get_languages()) == 1
assert 'en' in manager.get_languages()
assert manager.get_document_count('en') == 1
def test_add_document_multiple_languages():
"""Test adding documents in multiple languages."""
manager = MultiLanguageManager()
manager.add_document("README.md", "This is English.", {})
manager.add_document("README.es.md", "Esto es español.", {})
manager.add_document("README.fr.md", "Ceci est français.", {})
assert len(manager.get_languages()) == 3
assert 'en' in manager.get_languages()
assert 'es' in manager.get_languages()
assert 'fr' in manager.get_languages()
def test_force_language():
"""Test forcing language override."""
manager = MultiLanguageManager()
# Force Spanish despite English content
manager.add_document(
"file.md",
"This is actually English content.",
{},
force_language='es'
)
assert 'es' in manager.get_languages()
assert manager.get_document_count('es') == 1
def test_filename_language_priority():
"""Test filename pattern takes priority over content detection."""
manager = MultiLanguageManager()
# Filename says Spanish, but content is English
manager.add_document(
"guide.es.md",
"This is English content.",
{}
)
# Should use filename language
assert 'es' in manager.get_languages()
def test_document_count_all():
"""Test total document count."""
manager = MultiLanguageManager()
manager.add_document("file1.md", "English doc 1", {})
manager.add_document("file2.md", "English doc 2", {})
manager.add_document("file3.es.md", "Spanish doc", {})
assert manager.get_document_count() == 3
assert manager.get_document_count('en') == 2
assert manager.get_document_count('es') == 1
def test_primary_language():
"""Test primary language is set correctly."""
manager = MultiLanguageManager()
manager.add_document("file1.md", "First English doc", {})
manager.add_document("file2.es.md", "Spanish doc", {})
# Primary should be first added
assert manager.primary_language == 'en'
def test_translation_status():
"""Test translation status tracking."""
manager = MultiLanguageManager()
manager.add_document("README.md", "English doc", {})
manager.add_document("README.es.md", "Spanish doc", {})
manager.add_document("README.fr.md", "French doc", {})
status = manager.get_translation_status()
assert status.source_language == 'en'
assert 'es' in status.translated_languages
assert 'fr' in status.translated_languages
assert len(status.translated_languages) == 2
def test_export_by_language():
"""Test exporting documents by language."""
manager = MultiLanguageManager()
manager.add_document("file1.md", "English content", {})
manager.add_document("file2.es.md", "Spanish content", {})
with tempfile.TemporaryDirectory() as tmpdir:
exports = manager.export_by_language(Path(tmpdir))
assert len(exports) == 2
assert 'en' in exports
assert 'es' in exports
# Check files exist
assert exports['en'].exists()
assert exports['es'].exists()
# Check content
en_data = json.loads(exports['en'].read_text())
assert en_data['language'] == 'en'
assert en_data['document_count'] == 1
def test_translation_report_generation():
"""Test translation report generation."""
manager = MultiLanguageManager()
manager.add_document("file1.md", "English doc", {})
manager.add_document("file2.es.md", "Spanish doc", {})
report = manager.generate_translation_report()
assert "MULTI-LANGUAGE DOCUMENTATION REPORT" in report
assert "Languages: 2" in report
assert "English (en)" in report
assert "Spanish (es)" in report
def test_empty_manager():
"""Test manager with no documents."""
manager = MultiLanguageManager()
assert len(manager.get_languages()) == 0
assert manager.get_document_count() == 0
assert manager.primary_language is None
def test_script_detection():
"""Test script type detection."""
detector = LanguageDetector()
# English uses Latin script
en_info = detector.detect("This is English")
assert en_info.script == 'Latin'
# Chinese uses Han script
zh_info = detector.detect("这是中文")
assert zh_info.script == 'Han'
def test_confidence_scoring():
"""Test confidence scoring."""
detector = LanguageDetector()
# Strong English signal
strong_en = "The quick brown fox jumps over the lazy dog. This is clearly English."
lang_info = detector.detect(strong_en)
assert lang_info.code == 'en'
assert lang_info.confidence > 0.3 # Should have decent confidence
def test_metadata_preservation():
"""Test metadata is preserved."""
manager = MultiLanguageManager()
metadata = {"category": "guide", "version": "1.0"}
manager.add_document("file.md", "English content", metadata)
docs = manager.documents['en']
assert len(docs) == 1
assert docs[0]['metadata'] == metadata
if __name__ == "__main__":
pytest.main([__file__, "-v"])