feat: Add comprehensive Swift language detection support (#223)
* feat: Add comprehensive Swift language detection support Add Swift language detection with 40+ patterns covering syntax, stdlib, frameworks, and idioms. Implement fork-friendly architecture with separate swift_patterns.py module and graceful import fallback. Key changes: - New swift_patterns.py: 40+ Swift detection patterns (SwiftUI, Combine, async/await, property wrappers, etc.) - Enhanced language_detector.py: Graceful import handling, robust pattern compilation with error recovery - Comprehensive test suite: 19 tests covering syntax, frameworks, edge cases, and error handling - Updated .gitignore: Exclude Claude-specific config files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix: Fix Swift pattern false positives and add comprehensive error handling Critical Fixes (Priority 0): - Fix 'some' and 'any' keyword false positives by requiring capitalized type names - Use (?-i:[A-Z]) to enforce case-sensitivity despite global IGNORECASE flag - Prevents "some random" from being detected as Swift code Error Handling (Priority 1): - Wrap pattern validation in try/except to prevent module import crashes - Add SWIFT_PATTERNS verification with logging after import - Gracefully degrade to empty dict on validation errors - Add 7 comprehensive error handling tests Improvements (Priority 2): - Remove fragile line number references in comments - Add 5 new tests for previously untested patterns: * Property observers (willSet/didSet) * Memory management (weak var, unowned, [weak self]) * String interpolation Test Results: - All 92 tests passing (72 Swift + 20 language detection) - Fixed regression: test_detect_unknown now passes - 12 new tests added (7 error handling + 5 feature coverage) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -9,8 +9,45 @@ Author: Skill Seekers Project
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Optional, Tuple, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import Swift patterns from separate module (fork-friendly architecture)
|
||||
try:
|
||||
from skill_seekers.cli.swift_patterns import SWIFT_PATTERNS
|
||||
except ImportError as e:
|
||||
logger.warning(
|
||||
"Swift language detection patterns unavailable. "
|
||||
"Swift code detection will be disabled. Error: %s",
|
||||
e
|
||||
)
|
||||
SWIFT_PATTERNS: Dict[str, List[Tuple[str, int]]] = {}
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to load Swift patterns due to unexpected error: %s. "
|
||||
"Swift detection disabled.",
|
||||
e
|
||||
)
|
||||
SWIFT_PATTERNS: Dict[str, List[Tuple[str, int]]] = {}
|
||||
|
||||
# Verify Swift patterns were loaded correctly
|
||||
if not SWIFT_PATTERNS:
|
||||
logger.warning(
|
||||
"Swift pattern dictionary is empty. Swift detection is disabled. "
|
||||
"This may indicate swift_patterns.py has no patterns defined."
|
||||
)
|
||||
elif 'swift' not in SWIFT_PATTERNS:
|
||||
logger.error(
|
||||
"Swift patterns loaded but 'swift' key is missing. "
|
||||
"Swift detection is broken. Please file a bug report."
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Swift patterns loaded successfully: %d patterns for language detection",
|
||||
len(SWIFT_PATTERNS.get('swift', []))
|
||||
)
|
||||
|
||||
# Comprehensive language patterns with weighted confidence scoring
|
||||
# Weight 5: Unique identifiers (highly specific)
|
||||
@@ -371,6 +408,9 @@ LANGUAGE_PATTERNS: Dict[str, List[Tuple[str, int]]] = {
|
||||
],
|
||||
}
|
||||
|
||||
# Merge Swift patterns (fork-friendly: patterns defined in swift_patterns.py)
|
||||
LANGUAGE_PATTERNS.update(SWIFT_PATTERNS)
|
||||
|
||||
|
||||
# Known language list for CSS class detection
|
||||
KNOWN_LANGUAGES = [
|
||||
@@ -418,10 +458,32 @@ class LanguageDetector:
|
||||
def _compile_patterns(self) -> None:
|
||||
"""Compile regex patterns and cache them for performance"""
|
||||
for lang, patterns in LANGUAGE_PATTERNS.items():
|
||||
self._pattern_cache[lang] = [
|
||||
(re.compile(pattern, re.IGNORECASE | re.MULTILINE), weight)
|
||||
for pattern, weight in patterns
|
||||
]
|
||||
compiled_patterns = []
|
||||
for i, (pattern, weight) in enumerate(patterns):
|
||||
try:
|
||||
compiled = re.compile(pattern, re.IGNORECASE | re.MULTILINE)
|
||||
compiled_patterns.append((compiled, weight))
|
||||
except re.error as e:
|
||||
logger.error(
|
||||
"Invalid regex pattern for language '%s' at index %d: '%s'. "
|
||||
"Error: %s. Pattern skipped.",
|
||||
lang, i, pattern[:50], e
|
||||
)
|
||||
except TypeError as e:
|
||||
logger.error(
|
||||
"Pattern for language '%s' at index %d is not a string: %s. "
|
||||
"Pattern skipped.",
|
||||
lang, i, type(pattern).__name__
|
||||
)
|
||||
|
||||
if compiled_patterns:
|
||||
self._pattern_cache[lang] = compiled_patterns
|
||||
else:
|
||||
logger.warning(
|
||||
"No valid patterns compiled for language '%s'. "
|
||||
"Detection for this language is disabled.",
|
||||
lang
|
||||
)
|
||||
|
||||
def detect_from_html(self, elem, code: str) -> Tuple[str, float]:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user