fix(skill): restructure tech-stack-evaluator with Progressive Disclosure (#64) (#120)

Restructure skill to follow Progressive Disclosure Architecture: Structure Changes: - Move Python scripts to scripts/ directory - Move sample JSON files to assets/ directory - Create references/ directory with extracted content - Remove redundant HOW_TO_USE.md and README.md New Reference Files: - references/metrics.md: Detailed scoring algorithms and formulas - references/examples.md: Concrete input/output examples - references/workflows.md: Step-by-step evaluation workflows SKILL.md Improvements: - Reduced from 430 lines to ~180 lines - Added table of contents - Added trigger phrases in description - Consistent imperative voice - Points to references for details Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 06:28:42 +01:00
parent 829a197c2b
commit a10a4f2c4b
17 changed files with 1114 additions and 1266 deletions
--- a/engineering-team/tech-stack-evaluator/scripts/migration_analyzer.py
+++ b/engineering-team/tech-stack-evaluator/scripts/migration_analyzer.py
@@ -0,0 +1,587 @@
+"""
+Migration Path Analyzer.
+
+Analyzes migration complexity, risks, timelines, and strategies for moving
+from legacy technology stacks to modern alternatives.
+"""
+
+from typing import Dict, List, Any, Optional, Tuple
+
+
+class MigrationAnalyzer:
+    """Analyze migration paths and complexity for technology stack changes."""
+
+    # Migration complexity factors
+    COMPLEXITY_FACTORS = [
+        'code_volume',
+        'architecture_changes',
+        'data_migration',
+        'api_compatibility',
+        'dependency_changes',
+        'testing_requirements'
+    ]
+
+    def __init__(self, migration_data: Dict[str, Any]):
+        """
+        Initialize migration analyzer with migration parameters.
+
+        Args:
+            migration_data: Dictionary containing source/target technologies and constraints
+        """
+        self.source_tech = migration_data.get('source_technology', 'Unknown')
+        self.target_tech = migration_data.get('target_technology', 'Unknown')
+        self.codebase_stats = migration_data.get('codebase_stats', {})
+        self.constraints = migration_data.get('constraints', {})
+        self.team_info = migration_data.get('team', {})
+
+    def calculate_complexity_score(self) -> Dict[str, Any]:
+        """
+        Calculate overall migration complexity (1-10 scale).
+
+        Returns:
+            Dictionary with complexity scores by factor
+        """
+        scores = {
+            'code_volume': self._score_code_volume(),
+            'architecture_changes': self._score_architecture_changes(),
+            'data_migration': self._score_data_migration(),
+            'api_compatibility': self._score_api_compatibility(),
+            'dependency_changes': self._score_dependency_changes(),
+            'testing_requirements': self._score_testing_requirements()
+        }
+
+        # Calculate weighted average
+        weights = {
+            'code_volume': 0.20,
+            'architecture_changes': 0.25,
+            'data_migration': 0.20,
+            'api_compatibility': 0.15,
+            'dependency_changes': 0.10,
+            'testing_requirements': 0.10
+        }
+
+        overall = sum(scores[k] * weights[k] for k in scores.keys())
+        scores['overall_complexity'] = overall
+
+        return scores
+
+    def _score_code_volume(self) -> float:
+        """
+        Score complexity based on codebase size.
+
+        Returns:
+            Code volume complexity score (1-10)
+        """
+        lines_of_code = self.codebase_stats.get('lines_of_code', 10000)
+        num_files = self.codebase_stats.get('num_files', 100)
+        num_components = self.codebase_stats.get('num_components', 50)
+
+        # Score based on lines of code (primary factor)
+        if lines_of_code < 5000:
+            base_score = 2
+        elif lines_of_code < 20000:
+            base_score = 4
+        elif lines_of_code < 50000:
+            base_score = 6
+        elif lines_of_code < 100000:
+            base_score = 8
+        else:
+            base_score = 10
+
+        # Adjust for component count
+        if num_components > 200:
+            base_score = min(10, base_score + 1)
+        elif num_components > 500:
+            base_score = min(10, base_score + 2)
+
+        return float(base_score)
+
+    def _score_architecture_changes(self) -> float:
+        """
+        Score complexity based on architectural changes.
+
+        Returns:
+            Architecture complexity score (1-10)
+        """
+        arch_change_level = self.codebase_stats.get('architecture_change_level', 'moderate')
+
+        scores = {
+            'minimal': 2,      # Same patterns, just different framework
+            'moderate': 5,     # Some pattern changes, similar concepts
+            'significant': 7,  # Different patterns, major refactoring
+            'complete': 10     # Complete rewrite, different paradigm
+        }
+
+        return float(scores.get(arch_change_level, 5))
+
+    def _score_data_migration(self) -> float:
+        """
+        Score complexity based on data migration requirements.
+
+        Returns:
+            Data migration complexity score (1-10)
+        """
+        has_database = self.codebase_stats.get('has_database', True)
+        if not has_database:
+            return 1.0
+
+        database_size_gb = self.codebase_stats.get('database_size_gb', 10)
+        schema_changes = self.codebase_stats.get('schema_changes_required', 'minimal')
+        data_transformation = self.codebase_stats.get('data_transformation_required', False)
+
+        # Base score from database size
+        if database_size_gb < 1:
+            score = 2
+        elif database_size_gb < 10:
+            score = 3
+        elif database_size_gb < 100:
+            score = 5
+        elif database_size_gb < 1000:
+            score = 7
+        else:
+            score = 9
+
+        # Adjust for schema changes
+        schema_adjustments = {
+            'none': 0,
+            'minimal': 1,
+            'moderate': 2,
+            'significant': 3
+        }
+        score += schema_adjustments.get(schema_changes, 1)
+
+        # Adjust for data transformation
+        if data_transformation:
+            score += 2
+
+        return min(10.0, float(score))
+
+    def _score_api_compatibility(self) -> float:
+        """
+        Score complexity based on API compatibility.
+
+        Returns:
+            API compatibility complexity score (1-10)
+        """
+        breaking_api_changes = self.codebase_stats.get('breaking_api_changes', 'some')
+
+        scores = {
+            'none': 1,         # Fully compatible
+            'minimal': 3,      # Few breaking changes
+            'some': 5,         # Moderate breaking changes
+            'many': 7,         # Significant breaking changes
+            'complete': 10     # Complete API rewrite
+        }
+
+        return float(scores.get(breaking_api_changes, 5))
+
+    def _score_dependency_changes(self) -> float:
+        """
+        Score complexity based on dependency changes.
+
+        Returns:
+            Dependency complexity score (1-10)
+        """
+        num_dependencies = self.codebase_stats.get('num_dependencies', 20)
+        dependencies_to_replace = self.codebase_stats.get('dependencies_to_replace', 5)
+
+        # Score based on replacement percentage
+        if num_dependencies == 0:
+            return 1.0
+
+        replacement_pct = (dependencies_to_replace / num_dependencies) * 100
+
+        if replacement_pct < 10:
+            return 2.0
+        elif replacement_pct < 25:
+            return 4.0
+        elif replacement_pct < 50:
+            return 6.0
+        elif replacement_pct < 75:
+            return 8.0
+        else:
+            return 10.0
+
+    def _score_testing_requirements(self) -> float:
+        """
+        Score complexity based on testing requirements.
+
+        Returns:
+            Testing complexity score (1-10)
+        """
+        test_coverage = self.codebase_stats.get('current_test_coverage', 0.5)  # 0-1 scale
+        num_tests = self.codebase_stats.get('num_tests', 100)
+
+        # If good test coverage, easier migration (can verify)
+        if test_coverage >= 0.8:
+            base_score = 3
+        elif test_coverage >= 0.6:
+            base_score = 5
+        elif test_coverage >= 0.4:
+            base_score = 7
+        else:
+            base_score = 9  # Poor coverage = hard to verify migration
+
+        # Large test suites need updates
+        if num_tests > 500:
+            base_score = min(10, base_score + 1)
+
+        return float(base_score)
+
+    def estimate_effort(self) -> Dict[str, Any]:
+        """
+        Estimate migration effort in person-hours and timeline.
+
+        Returns:
+            Dictionary with effort estimates
+        """
+        complexity = self.calculate_complexity_score()
+        overall_complexity = complexity['overall_complexity']
+
+        # Base hours estimation
+        lines_of_code = self.codebase_stats.get('lines_of_code', 10000)
+        base_hours = lines_of_code / 50  # 50 lines per hour baseline
+
+        # Complexity multiplier
+        complexity_multiplier = 1 + (overall_complexity / 10)
+        estimated_hours = base_hours * complexity_multiplier
+
+        # Break down by phase
+        phases = self._calculate_phase_breakdown(estimated_hours)
+
+        # Calculate timeline
+        team_size = self.team_info.get('team_size', 3)
+        hours_per_week_per_dev = self.team_info.get('hours_per_week', 30)  # Account for other work
+
+        total_dev_weeks = estimated_hours / (team_size * hours_per_week_per_dev)
+        total_calendar_weeks = total_dev_weeks * 1.2  # Buffer for blockers
+
+        return {
+            'total_hours': estimated_hours,
+            'total_person_months': estimated_hours / 160,  # 160 hours per person-month
+            'phases': phases,
+            'estimated_timeline': {
+                'dev_weeks': total_dev_weeks,
+                'calendar_weeks': total_calendar_weeks,
+                'calendar_months': total_calendar_weeks / 4.33
+            },
+            'team_assumptions': {
+                'team_size': team_size,
+                'hours_per_week_per_dev': hours_per_week_per_dev
+            }
+        }
+
+    def _calculate_phase_breakdown(self, total_hours: float) -> Dict[str, Dict[str, float]]:
+        """
+        Calculate effort breakdown by migration phase.
+
+        Args:
+            total_hours: Total estimated hours
+
+        Returns:
+            Hours breakdown by phase
+        """
+        # Standard phase percentages
+        phase_percentages = {
+            'planning_and_prototyping': 0.15,
+            'core_migration': 0.45,
+            'testing_and_validation': 0.25,
+            'deployment_and_monitoring': 0.10,
+            'buffer_and_contingency': 0.05
+        }
+
+        phases = {}
+        for phase, percentage in phase_percentages.items():
+            hours = total_hours * percentage
+            phases[phase] = {
+                'hours': hours,
+                'person_weeks': hours / 40,
+                'percentage': f"{percentage * 100:.0f}%"
+            }
+
+        return phases
+
+    def assess_risks(self) -> Dict[str, List[Dict[str, str]]]:
+        """
+        Identify and assess migration risks.
+
+        Returns:
+            Categorized risks with mitigation strategies
+        """
+        complexity = self.calculate_complexity_score()
+
+        risks = {
+            'technical_risks': self._identify_technical_risks(complexity),
+            'business_risks': self._identify_business_risks(),
+            'team_risks': self._identify_team_risks()
+        }
+
+        return risks
+
+    def _identify_technical_risks(self, complexity: Dict[str, float]) -> List[Dict[str, str]]:
+        """
+        Identify technical risks.
+
+        Args:
+            complexity: Complexity scores
+
+        Returns:
+            List of technical risks with mitigations
+        """
+        risks = []
+
+        # API compatibility risks
+        if complexity['api_compatibility'] >= 7:
+            risks.append({
+                'risk': 'Breaking API changes may cause integration failures',
+                'severity': 'High',
+                'mitigation': 'Create compatibility layer; implement feature flags for gradual rollout'
+            })
+
+        # Data migration risks
+        if complexity['data_migration'] >= 7:
+            risks.append({
+                'risk': 'Data migration could cause data loss or corruption',
+                'severity': 'Critical',
+                'mitigation': 'Implement robust backup strategy; run parallel systems during migration; extensive validation'
+            })
+
+        # Architecture risks
+        if complexity['architecture_changes'] >= 8:
+            risks.append({
+                'risk': 'Major architectural changes increase risk of performance regression',
+                'severity': 'High',
+                'mitigation': 'Extensive performance testing; staged rollout; monitoring and alerting'
+            })
+
+        # Testing risks
+        if complexity['testing_requirements'] >= 7:
+            risks.append({
+                'risk': 'Inadequate test coverage may miss critical bugs',
+                'severity': 'Medium',
+                'mitigation': 'Improve test coverage before migration; automated regression testing; user acceptance testing'
+            })
+
+        if not risks:
+            risks.append({
+                'risk': 'Standard technical risks (bugs, edge cases)',
+                'severity': 'Low',
+                'mitigation': 'Standard QA processes and staged rollout'
+            })
+
+        return risks
+
+    def _identify_business_risks(self) -> List[Dict[str, str]]:
+        """
+        Identify business risks.
+
+        Returns:
+            List of business risks with mitigations
+        """
+        risks = []
+
+        # Downtime risk
+        downtime_tolerance = self.constraints.get('downtime_tolerance', 'low')
+        if downtime_tolerance == 'none':
+            risks.append({
+                'risk': 'Zero-downtime migration increases complexity and risk',
+                'severity': 'High',
+                'mitigation': 'Blue-green deployment; feature flags; gradual traffic migration'
+            })
+
+        # Feature parity risk
+        risks.append({
+            'risk': 'New implementation may lack feature parity',
+            'severity': 'Medium',
+            'mitigation': 'Comprehensive feature audit; prioritized feature list; clear communication'
+        })
+
+        # Timeline risk
+        risks.append({
+            'risk': 'Migration may take longer than estimated',
+            'severity': 'Medium',
+            'mitigation': 'Build in 20% buffer; regular progress reviews; scope management'
+        })
+
+        return risks
+
+    def _identify_team_risks(self) -> List[Dict[str, str]]:
+        """
+        Identify team-related risks.
+
+        Returns:
+            List of team risks with mitigations
+        """
+        risks = []
+
+        # Learning curve
+        team_experience = self.team_info.get('target_tech_experience', 'low')
+        if team_experience in ['low', 'none']:
+            risks.append({
+                'risk': 'Team lacks experience with target technology',
+                'severity': 'High',
+                'mitigation': 'Training program; hire experienced developers; external consulting'
+            })
+
+        # Team size
+        team_size = self.team_info.get('team_size', 3)
+        if team_size < 3:
+            risks.append({
+                'risk': 'Small team size may extend timeline',
+                'severity': 'Medium',
+                'mitigation': 'Consider augmenting team; reduce scope; extend timeline'
+            })
+
+        # Knowledge retention
+        risks.append({
+            'risk': 'Loss of institutional knowledge during migration',
+            'severity': 'Medium',
+            'mitigation': 'Comprehensive documentation; knowledge sharing sessions; pair programming'
+        })
+
+        return risks
+
+    def generate_migration_plan(self) -> Dict[str, Any]:
+        """
+        Generate comprehensive migration plan.
+
+        Returns:
+            Complete migration plan with timeline and recommendations
+        """
+        complexity = self.calculate_complexity_score()
+        effort = self.estimate_effort()
+        risks = self.assess_risks()
+
+        # Generate phased approach
+        approach = self._recommend_migration_approach(complexity['overall_complexity'])
+
+        # Generate recommendation
+        recommendation = self._generate_migration_recommendation(complexity, effort, risks)
+
+        return {
+            'source_technology': self.source_tech,
+            'target_technology': self.target_tech,
+            'complexity_analysis': complexity,
+            'effort_estimation': effort,
+            'risk_assessment': risks,
+            'recommended_approach': approach,
+            'overall_recommendation': recommendation,
+            'success_criteria': self._define_success_criteria()
+        }
+
+    def _recommend_migration_approach(self, complexity_score: float) -> Dict[str, Any]:
+        """
+        Recommend migration approach based on complexity.
+
+        Args:
+            complexity_score: Overall complexity score
+
+        Returns:
+            Recommended approach details
+        """
+        if complexity_score <= 3:
+            approach = 'direct_migration'
+            description = 'Direct migration - low complexity allows straightforward migration'
+            timeline_multiplier = 1.0
+        elif complexity_score <= 6:
+            approach = 'phased_migration'
+            description = 'Phased migration - migrate components incrementally to manage risk'
+            timeline_multiplier = 1.3
+        else:
+            approach = 'strangler_pattern'
+            description = 'Strangler pattern - gradually replace old system while running in parallel'
+            timeline_multiplier = 1.5
+
+        return {
+            'approach': approach,
+            'description': description,
+            'timeline_multiplier': timeline_multiplier,
+            'phases': self._generate_approach_phases(approach)
+        }
+
+    def _generate_approach_phases(self, approach: str) -> List[str]:
+        """
+        Generate phase descriptions for migration approach.
+
+        Args:
+            approach: Migration approach type
+
+        Returns:
+            List of phase descriptions
+        """
+        phases = {
+            'direct_migration': [
+                'Phase 1: Set up target environment and migrate configuration',
+                'Phase 2: Migrate codebase and dependencies',
+                'Phase 3: Migrate data with validation',
+                'Phase 4: Comprehensive testing',
+                'Phase 5: Cutover and monitoring'
+            ],
+            'phased_migration': [
+                'Phase 1: Identify and prioritize components for migration',
+                'Phase 2: Migrate non-critical components first',
+                'Phase 3: Migrate core components with parallel running',
+                'Phase 4: Migrate critical components with rollback plan',
+                'Phase 5: Decommission old system'
+            ],
+            'strangler_pattern': [
+                'Phase 1: Set up routing layer between old and new systems',
+                'Phase 2: Implement new features in target technology only',
+                'Phase 3: Gradually migrate existing features (lowest risk first)',
+                'Phase 4: Migrate high-risk components last with extensive testing',
+                'Phase 5: Complete migration and remove routing layer'
+            ]
+        }
+
+        return phases.get(approach, phases['phased_migration'])
+
+    def _generate_migration_recommendation(
+        self,
+        complexity: Dict[str, float],
+        effort: Dict[str, Any],
+        risks: Dict[str, List[Dict[str, str]]]
+    ) -> str:
+        """
+        Generate overall migration recommendation.
+
+        Args:
+            complexity: Complexity analysis
+            effort: Effort estimation
+            risks: Risk assessment
+
+        Returns:
+            Recommendation string
+        """
+        overall_complexity = complexity['overall_complexity']
+        timeline_months = effort['estimated_timeline']['calendar_months']
+
+        # Count high/critical severity risks
+        high_risk_count = sum(
+            1 for risk_list in risks.values()
+            for risk in risk_list
+            if risk['severity'] in ['High', 'Critical']
+        )
+
+        if overall_complexity <= 4 and high_risk_count <= 2:
+            return f"Recommended - Low complexity migration achievable in {timeline_months:.1f} months with manageable risks"
+        elif overall_complexity <= 7 and high_risk_count <= 4:
+            return f"Proceed with caution - Moderate complexity migration requiring {timeline_months:.1f} months and careful risk management"
+        else:
+            return f"High risk - Complex migration requiring {timeline_months:.1f} months. Consider: incremental approach, additional resources, or alternative solutions"
+
+    def _define_success_criteria(self) -> List[str]:
+        """
+        Define success criteria for migration.
+
+        Returns:
+            List of success criteria
+        """
+        return [
+            'Feature parity with current system',
+            'Performance equal or better than current system',
+            'Zero data loss or corruption',
+            'All tests passing (unit, integration, E2E)',
+            'Successful production deployment with <1% error rate',
+            'Team trained and comfortable with new technology',
+            'Documentation complete and up-to-date'
+        ]