#!/usr/bin/env python3
"""
Learning Engine - Pattern Detection from Correction History

SINGLE RESPONSIBILITY: Analyze history and suggest new corrections

Features:
- Analyze correction history for patterns
- Detect frequently occurring corrections
- Calculate confidence scores
- Generate suggestions for user review
- Track rejected suggestions to avoid re-suggesting
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import List, Dict
from dataclasses import dataclass, asdict
from collections import defaultdict


@dataclass
class Suggestion:
    """Represents a learned correction suggestion"""
    from_text: str
    to_text: str
    frequency: int
    confidence: float
    examples: List[Dict]  # List of {file, line, context}
    first_seen: str
    last_seen: str
    status: str  # "pending", "approved", "rejected"


class LearningEngine:
    """
    Analyzes correction history to suggest new corrections

    Algorithm:
    1. Load all history files
    2. Extract stage2 (AI) changes
    3. Group by pattern (from_text → to_text)
    4. Calculate frequency and confidence
    5. Filter by thresholds
    6. Save suggestions for user review
    """

    # Thresholds for suggesting corrections
    MIN_FREQUENCY = 3  # Must appear at least 3 times
    MIN_CONFIDENCE = 0.8  # Must have 80%+ confidence

    def __init__(self, history_dir: Path, learned_dir: Path):
        """
        Initialize learning engine

        Args:
            history_dir: Directory containing correction history
            learned_dir: Directory for learned suggestions
        """
        self.history_dir = history_dir
        self.learned_dir = learned_dir
        self.pending_file = learned_dir / "pending_review.json"
        self.rejected_file = learned_dir / "rejected.json"

    def analyze_and_suggest(self) -> List[Suggestion]:
        """
        Analyze history and generate suggestions

        Returns:
            List of new suggestions for user review
        """
        # Load all history
        patterns = self._extract_patterns()

        # Filter rejected patterns
        rejected = self._load_rejected()
        patterns = {k: v for k, v in patterns.items()
                   if k not in rejected}

        # Generate suggestions
        suggestions = []
        for (from_text, to_text), occurrences in patterns.items():
            frequency = len(occurrences)

            if frequency < self.MIN_FREQUENCY:
                continue

            confidence = self._calculate_confidence(occurrences)

            if confidence < self.MIN_CONFIDENCE:
                continue

            suggestion = Suggestion(
                from_text=from_text,
                to_text=to_text,
                frequency=frequency,
                confidence=confidence,
                examples=occurrences[:5],  # Top 5 examples
                first_seen=occurrences[0]["timestamp"],
                last_seen=occurrences[-1]["timestamp"],
                status="pending"
            )

            suggestions.append(suggestion)

        # Save new suggestions
        if suggestions:
            self._save_pending_suggestions(suggestions)

        return suggestions

    def approve_suggestion(self, from_text: str) -> bool:
        """
        Approve a suggestion (remove from pending)

        Returns:
            True if approved, False if not found
        """
        pending = self._load_pending_suggestions()

        for suggestion in pending:
            if suggestion["from_text"] == from_text:
                pending.remove(suggestion)
                self._save_suggestions(pending, self.pending_file)
                return True

        return False

    def reject_suggestion(self, from_text: str, to_text: str) -> None:
        """
        Reject a suggestion (move to rejected list)
        """
        # Remove from pending
        pending = self._load_pending_suggestions()
        pending = [s for s in pending
                  if not (s["from_text"] == from_text and s["to_text"] == to_text)]
        self._save_suggestions(pending, self.pending_file)

        # Add to rejected
        rejected = self._load_rejected()
        rejected.add((from_text, to_text))
        self._save_rejected(rejected)

    def list_pending(self) -> List[Dict]:
        """List all pending suggestions"""
        return self._load_pending_suggestions()

    def _extract_patterns(self) -> Dict[tuple, List[Dict]]:
        """Extract all correction patterns from history"""
        patterns = defaultdict(list)

        if not self.history_dir.exists():
            return patterns

        for history_file in self.history_dir.glob("*.json"):
            with open(history_file, 'r', encoding='utf-8') as f:
                data = json.load(f)

            # Extract stage2 changes (AI corrections)
            if "stages" in data and "stage2" in data["stages"]:
                changes = data["stages"]["stage2"].get("changes", [])

                for change in changes:
                    key = (change["from"], change["to"])
                    patterns[key].append({
                        "file": data["filename"],
                        "line": change.get("line", 0),
                        "context": change.get("context", ""),
                        "timestamp": data["timestamp"]
                    })

        return patterns

    def _calculate_confidence(self, occurrences: List[Dict]) -> float:
        """
        Calculate confidence score for a pattern

        Factors:
        - Frequency (more = higher)
        - Consistency (always same correction = higher)
        - Recency (recent occurrences = higher)
        """
        # Base confidence from frequency
        frequency_score = min(len(occurrences) / 10.0, 1.0)

        # Consistency: always the same from→to mapping
        consistency_score = 1.0  # Already consistent by grouping

        # Recency: more recent = higher
        # (Simplified: assume chronological order)
        recency_score = 0.9 if len(occurrences) > 1 else 0.8

        # Weighted average
        confidence = (
            0.5 * frequency_score +
            0.3 * consistency_score +
            0.2 * recency_score
        )

        return confidence

    def _load_pending_suggestions(self) -> List[Dict]:
        """Load pending suggestions from file"""
        if not self.pending_file.exists():
            return []

        with open(self.pending_file, 'r', encoding='utf-8') as f:
            content = f.read().strip()
            if not content:
                return []
            return json.loads(content).get("suggestions", [])

    def _save_pending_suggestions(self, suggestions: List[Suggestion]) -> None:
        """Save pending suggestions to file"""
        existing = self._load_pending_suggestions()

        # Convert to dict and append
        new_suggestions = [asdict(s) for s in suggestions]
        all_suggestions = existing + new_suggestions

        self._save_suggestions(all_suggestions, self.pending_file)

    def _save_suggestions(self, suggestions: List[Dict], filepath: Path) -> None:
        """Save suggestions to file"""
        data = {"suggestions": suggestions}
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)

    def _load_rejected(self) -> set:
        """Load rejected patterns"""
        if not self.rejected_file.exists():
            return set()

        with open(self.rejected_file, 'r', encoding='utf-8') as f:
            content = f.read().strip()
            if not content:
                return set()
            data = json.loads(content)
            return {(r["from"], r["to"]) for r in data.get("rejected", [])}

    def _save_rejected(self, rejected: set) -> None:
        """Save rejected patterns"""
        data = {
            "rejected": [
                {"from": from_text, "to": to_text}
                for from_text, to_text in rejected
            ]
        }
        with open(self.rejected_file, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)