claude-skills-reference/marketing-skill/app-store-optimization/review_analyzer.py

"""
Review analysis module for App Store Optimization.
Analyzes user reviews for sentiment, issues, and feature requests.
"""

from typing import Dict, List, Any, Optional, Tuple
from collections import Counter
import re


class ReviewAnalyzer:
    """Analyzes user reviews for actionable insights."""

    # Sentiment keywords
    POSITIVE_KEYWORDS = [
        'great', 'awesome', 'excellent', 'amazing', 'love', 'best', 'perfect',
        'fantastic', 'wonderful', 'brilliant', 'outstanding', 'superb'
    ]

    NEGATIVE_KEYWORDS = [
        'bad', 'terrible', 'awful', 'horrible', 'hate', 'worst', 'useless',
        'broken', 'crash', 'bug', 'slow', 'disappointing', 'frustrating'
    ]

    # Issue indicators
    ISSUE_KEYWORDS = [
        'crash', 'bug', 'error', 'broken', 'not working', 'doesnt work',
        'freezes', 'slow', 'laggy', 'glitch', 'problem', 'issue', 'fail'
    ]

    # Feature request indicators
    FEATURE_REQUEST_KEYWORDS = [
        'wish', 'would be nice', 'should add', 'need', 'want', 'hope',
        'please add', 'missing', 'lacks', 'feature request'
    ]

    def __init__(self, app_name: str):
        """
        Initialize review analyzer.

        Args:
            app_name: Name of the app
        """
        self.app_name = app_name
        self.reviews = []
        self.analysis_cache = {}

    def analyze_sentiment(
        self,
        reviews: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Analyze sentiment across reviews.

        Args:
            reviews: List of review dicts with 'text', 'rating', 'date'

        Returns:
            Sentiment analysis summary
        """
        self.reviews = reviews

        sentiment_counts = {
            'positive': 0,
            'neutral': 0,
            'negative': 0
        }

        detailed_sentiments = []

        for review in reviews:
            text = review.get('text', '').lower()
            rating = review.get('rating', 3)

            # Calculate sentiment score
            sentiment_score = self._calculate_sentiment_score(text, rating)
            sentiment_category = self._categorize_sentiment(sentiment_score)

            sentiment_counts[sentiment_category] += 1

            detailed_sentiments.append({
                'review_id': review.get('id', ''),
                'rating': rating,
                'sentiment_score': sentiment_score,
                'sentiment': sentiment_category,
                'text_preview': text[:100] + '...' if len(text) > 100 else text
            })

        # Calculate percentages
        total = len(reviews)
        sentiment_distribution = {
            'positive': round((sentiment_counts['positive'] / total) * 100, 1) if total > 0 else 0,
            'neutral': round((sentiment_counts['neutral'] / total) * 100, 1) if total > 0 else 0,
            'negative': round((sentiment_counts['negative'] / total) * 100, 1) if total > 0 else 0
        }

        # Calculate average rating
        avg_rating = sum(r.get('rating', 0) for r in reviews) / total if total > 0 else 0

        return {
            'total_reviews_analyzed': total,
            'average_rating': round(avg_rating, 2),
            'sentiment_distribution': sentiment_distribution,
            'sentiment_counts': sentiment_counts,
            'sentiment_trend': self._assess_sentiment_trend(sentiment_distribution),
            'detailed_sentiments': detailed_sentiments[:50]  # Limit output
        }

    def extract_common_themes(
        self,
        reviews: List[Dict[str, Any]],
        min_mentions: int = 3
    ) -> Dict[str, Any]:
        """
        Extract frequently mentioned themes and topics.

        Args:
            reviews: List of review dicts
            min_mentions: Minimum mentions to be considered common

        Returns:
            Common themes analysis
        """
        # Extract all words from reviews
        all_words = []
        all_phrases = []

        for review in reviews:
            text = review.get('text', '').lower()
            # Clean text
            text = re.sub(r'[^\w\s]', ' ', text)
            words = text.split()

            # Filter out common words
            stop_words = {
                'the', 'and', 'for', 'with', 'this', 'that', 'from', 'have',
                'app', 'apps', 'very', 'really', 'just', 'but', 'not', 'you'
            }
            words = [w for w in words if w not in stop_words and len(w) > 3]

            all_words.extend(words)

            # Extract 2-3 word phrases
            for i in range(len(words) - 1):
                phrase = f"{words[i]} {words[i+1]}"
                all_phrases.append(phrase)

        # Count frequency
        word_freq = Counter(all_words)
        phrase_freq = Counter(all_phrases)

        # Filter by min_mentions
        common_words = [
            {'word': word, 'mentions': count}
            for word, count in word_freq.most_common(30)
            if count >= min_mentions
        ]

        common_phrases = [
            {'phrase': phrase, 'mentions': count}
            for phrase, count in phrase_freq.most_common(20)
            if count >= min_mentions
        ]

        # Categorize themes
        themes = self._categorize_themes(common_words, common_phrases)

        return {
            'common_words': common_words,
            'common_phrases': common_phrases,
            'identified_themes': themes,
            'insights': self._generate_theme_insights(themes)
        }

    def identify_issues(
        self,
        reviews: List[Dict[str, Any]],
        rating_threshold: int = 3
    ) -> Dict[str, Any]:
        """
        Identify bugs, crashes, and other issues from reviews.

        Args:
            reviews: List of review dicts
            rating_threshold: Only analyze reviews at or below this rating

        Returns:
            Issue identification report
        """
        issues = []

        for review in reviews:
            rating = review.get('rating', 5)
            if rating > rating_threshold:
                continue

            text = review.get('text', '').lower()

            # Check for issue keywords
            mentioned_issues = []
            for keyword in self.ISSUE_KEYWORDS:
                if keyword in text:
                    mentioned_issues.append(keyword)

            if mentioned_issues:
                issues.append({
                    'review_id': review.get('id', ''),
                    'rating': rating,
                    'date': review.get('date', ''),
                    'issue_keywords': mentioned_issues,
                    'text': text[:200] + '...' if len(text) > 200 else text
                })

        # Group by issue type
        issue_frequency = Counter()
        for issue in issues:
            for keyword in issue['issue_keywords']:
                issue_frequency[keyword] += 1

        # Categorize issues
        categorized_issues = self._categorize_issues(issues)

        # Calculate issue severity
        severity_scores = self._calculate_issue_severity(
            categorized_issues,
            len(reviews)
        )

        return {
            'total_issues_found': len(issues),
            'issue_frequency': dict(issue_frequency.most_common(15)),
            'categorized_issues': categorized_issues,
            'severity_scores': severity_scores,
            'top_issues': self._rank_issues_by_severity(severity_scores),
            'recommendations': self._generate_issue_recommendations(
                categorized_issues,
                severity_scores
            )
        }

    def find_feature_requests(
        self,
        reviews: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Extract feature requests and desired improvements.

        Args:
            reviews: List of review dicts

        Returns:
            Feature request analysis
        """
        feature_requests = []

        for review in reviews:
            text = review.get('text', '').lower()
            rating = review.get('rating', 3)

            # Check for feature request indicators
            is_feature_request = any(
                keyword in text
                for keyword in self.FEATURE_REQUEST_KEYWORDS
            )

            if is_feature_request:
                # Extract the specific request
                request_text = self._extract_feature_request_text(text)

                feature_requests.append({
                    'review_id': review.get('id', ''),
                    'rating': rating,
                    'date': review.get('date', ''),
                    'request_text': request_text,
                    'full_review': text[:200] + '...' if len(text) > 200 else text
                })

        # Cluster similar requests
        clustered_requests = self._cluster_feature_requests(feature_requests)

        # Prioritize based on frequency and rating context
        prioritized_requests = self._prioritize_feature_requests(clustered_requests)

        return {
            'total_feature_requests': len(feature_requests),
            'clustered_requests': clustered_requests,
            'prioritized_requests': prioritized_requests,
            'implementation_recommendations': self._generate_feature_recommendations(
                prioritized_requests
            )
        }

    def track_sentiment_trends(
        self,
        reviews_by_period: Dict[str, List[Dict[str, Any]]]
    ) -> Dict[str, Any]:
        """
        Track sentiment changes over time.

        Args:
            reviews_by_period: Dict of period_name: reviews

        Returns:
            Trend analysis
        """
        trends = []

        for period, reviews in reviews_by_period.items():
            sentiment = self.analyze_sentiment(reviews)

            trends.append({
                'period': period,
                'total_reviews': len(reviews),
                'average_rating': sentiment['average_rating'],
                'positive_percentage': sentiment['sentiment_distribution']['positive'],
                'negative_percentage': sentiment['sentiment_distribution']['negative']
            })

        # Calculate trend direction
        if len(trends) >= 2:
            first_period = trends[0]
            last_period = trends[-1]

            rating_change = last_period['average_rating'] - first_period['average_rating']
            sentiment_change = last_period['positive_percentage'] - first_period['positive_percentage']

            trend_direction = self._determine_trend_direction(
                rating_change,
                sentiment_change
            )
        else:
            trend_direction = 'insufficient_data'

        return {
            'periods_analyzed': len(trends),
            'trend_data': trends,
            'trend_direction': trend_direction,
            'insights': self._generate_trend_insights(trends, trend_direction)
        }

    def generate_response_templates(
        self,
        issue_category: str
    ) -> List[Dict[str, str]]:
        """
        Generate response templates for common review scenarios.

        Args:
            issue_category: Category of issue ('crash', 'feature_request', 'positive', etc.)

        Returns:
            Response templates
        """
        templates = {
            'crash': [
                {
                    'scenario': 'App crash reported',
                    'template': "Thank you for bringing this to our attention. We're sorry you experienced a crash. "
                               "Our team is investigating this issue. Could you please share more details about when "
                               "this occurred (device model, iOS/Android version) by contacting support@[company].com? "
                               "We're committed to fixing this quickly."
                },
                {
                    'scenario': 'Crash already fixed',
                    'template': "Thank you for your feedback. We've identified and fixed this crash issue in version [X.X]. "
                               "Please update to the latest version. If the problem persists, please reach out to "
                               "support@[company].com and we'll help you directly."
                }
            ],
            'bug': [
                {
                    'scenario': 'Bug reported',
                    'template': "Thanks for reporting this bug. We take these issues seriously. Our team is looking into it "
                               "and we'll have a fix in an upcoming update. We appreciate your patience and will notify you "
                               "when it's resolved."
                }
            ],
            'feature_request': [
                {
                    'scenario': 'Feature request received',
                    'template': "Thank you for this suggestion! We're always looking to improve [app_name]. We've added your "
                               "request to our roadmap and will consider it for a future update. Follow us @[social] for "
                               "updates on new features."
                },
                {
                    'scenario': 'Feature already planned',
                    'template': "Great news! This feature is already on our roadmap and we're working on it. Stay tuned for "
                               "updates in the coming months. Thanks for your feedback!"
                }
            ],
            'positive': [
                {
                    'scenario': 'Positive review',
                    'template': "Thank you so much for your kind words! We're thrilled that you're enjoying [app_name]. "
                               "Reviews like yours motivate our team to keep improving. If you ever have suggestions, "
                               "we'd love to hear them!"
                }
            ],
            'negative_general': [
                {
                    'scenario': 'General complaint',
                    'template': "We're sorry to hear you're not satisfied with your experience. We'd like to make this right. "
                               "Please contact us at support@[company].com so we can understand the issue better and help "
                               "you directly. Thank you for giving us a chance to improve."
                }
            ]
        }

        return templates.get(issue_category, templates['negative_general'])

    def _calculate_sentiment_score(self, text: str, rating: int) -> float:
        """Calculate sentiment score (-1 to 1)."""
        # Start with rating-based score
        rating_score = (rating - 3) / 2  # Convert 1-5 to -1 to 1

        # Adjust based on text sentiment
        positive_count = sum(1 for keyword in self.POSITIVE_KEYWORDS if keyword in text)
        negative_count = sum(1 for keyword in self.NEGATIVE_KEYWORDS if keyword in text)

        text_score = (positive_count - negative_count) / 10  # Normalize

        # Weighted average (60% rating, 40% text)
        final_score = (rating_score * 0.6) + (text_score * 0.4)

        return max(min(final_score, 1.0), -1.0)

    def _categorize_sentiment(self, score: float) -> str:
        """Categorize sentiment score."""
        if score > 0.3:
            return 'positive'
        elif score < -0.3:
            return 'negative'
        else:
            return 'neutral'

    def _assess_sentiment_trend(self, distribution: Dict[str, float]) -> str:
        """Assess overall sentiment trend."""
        positive = distribution['positive']
        negative = distribution['negative']

        if positive > 70:
            return 'very_positive'
        elif positive > 50:
            return 'positive'
        elif negative > 30:
            return 'concerning'
        elif negative > 50:
            return 'critical'
        else:
            return 'mixed'

    def _categorize_themes(
        self,
        common_words: List[Dict[str, Any]],
        common_phrases: List[Dict[str, Any]]
    ) -> Dict[str, List[str]]:
        """Categorize themes from words and phrases."""
        themes = {
            'features': [],
            'performance': [],
            'usability': [],
            'support': [],
            'pricing': []
        }

        # Keywords for each category
        feature_keywords = {'feature', 'functionality', 'option', 'tool'}
        performance_keywords = {'fast', 'slow', 'crash', 'lag', 'speed', 'performance'}
        usability_keywords = {'easy', 'difficult', 'intuitive', 'confusing', 'interface', 'design'}
        support_keywords = {'support', 'help', 'customer', 'service', 'response'}
        pricing_keywords = {'price', 'cost', 'expensive', 'cheap', 'subscription', 'free'}

        for word_data in common_words:
            word = word_data['word']
            if any(kw in word for kw in feature_keywords):
                themes['features'].append(word)
            elif any(kw in word for kw in performance_keywords):
                themes['performance'].append(word)
            elif any(kw in word for kw in usability_keywords):
                themes['usability'].append(word)
            elif any(kw in word for kw in support_keywords):
                themes['support'].append(word)
            elif any(kw in word for kw in pricing_keywords):
                themes['pricing'].append(word)

        return {k: v for k, v in themes.items() if v}  # Remove empty categories

    def _generate_theme_insights(self, themes: Dict[str, List[str]]) -> List[str]:
        """Generate insights from themes."""
        insights = []

        for category, keywords in themes.items():
            if keywords:
                insights.append(
                    f"{category.title()}: Users frequently mention {', '.join(keywords[:3])}"
                )

        return insights[:5]

    def _categorize_issues(self, issues: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
        """Categorize issues by type."""
        categories = {
            'crashes': [],
            'bugs': [],
            'performance': [],
            'compatibility': []
        }

        for issue in issues:
            keywords = issue['issue_keywords']

            if 'crash' in keywords or 'freezes' in keywords:
                categories['crashes'].append(issue)
            elif 'bug' in keywords or 'error' in keywords or 'broken' in keywords:
                categories['bugs'].append(issue)
            elif 'slow' in keywords or 'laggy' in keywords:
                categories['performance'].append(issue)
            else:
                categories['compatibility'].append(issue)

        return {k: v for k, v in categories.items() if v}

    def _calculate_issue_severity(
        self,
        categorized_issues: Dict[str, List[Dict[str, Any]]],
        total_reviews: int
    ) -> Dict[str, Dict[str, Any]]:
        """Calculate severity scores for each issue category."""
        severity_scores = {}

        for category, issues in categorized_issues.items():
            count = len(issues)
            percentage = (count / total_reviews) * 100 if total_reviews > 0 else 0

            # Calculate average rating of affected reviews
            avg_rating = sum(i['rating'] for i in issues) / count if count > 0 else 0

            # Severity score (0-100)
            severity = min((percentage * 10) + ((5 - avg_rating) * 10), 100)

            severity_scores[category] = {
                'count': count,
                'percentage': round(percentage, 2),
                'average_rating': round(avg_rating, 2),
                'severity_score': round(severity, 1),
                'priority': 'critical' if severity > 70 else ('high' if severity > 40 else 'medium')
            }

        return severity_scores

    def _rank_issues_by_severity(
        self,
        severity_scores: Dict[str, Dict[str, Any]]
    ) -> List[Dict[str, Any]]:
        """Rank issues by severity score."""
        ranked = sorted(
            [{'category': cat, **data} for cat, data in severity_scores.items()],
            key=lambda x: x['severity_score'],
            reverse=True
        )
        return ranked

    def _generate_issue_recommendations(
        self,
        categorized_issues: Dict[str, List[Dict[str, Any]]],
        severity_scores: Dict[str, Dict[str, Any]]
    ) -> List[str]:
        """Generate recommendations for addressing issues."""
        recommendations = []

        for category, score_data in severity_scores.items():
            if score_data['priority'] == 'critical':
                recommendations.append(
                    f"URGENT: Address {category} issues immediately - affecting {score_data['percentage']}% of reviews"
                )
            elif score_data['priority'] == 'high':
                recommendations.append(
                    f"HIGH PRIORITY: Focus on {category} issues in next update"
                )

        return recommendations

    def _extract_feature_request_text(self, text: str) -> str:
        """Extract the specific feature request from review text."""
        # Simple extraction - find sentence with feature request keywords
        sentences = text.split('.')
        for sentence in sentences:
            if any(keyword in sentence for keyword in self.FEATURE_REQUEST_KEYWORDS):
                return sentence.strip()
        return text[:100]  # Fallback

    def _cluster_feature_requests(
        self,
        feature_requests: List[Dict[str, Any]]
    ) -> List[Dict[str, Any]]:
        """Cluster similar feature requests."""
        # Simplified clustering - group by common keywords
        clusters = {}

        for request in feature_requests:
            text = request['request_text'].lower()
            # Extract key words
            words = [w for w in text.split() if len(w) > 4]

            # Try to find matching cluster
            matched = False
            for cluster_key in clusters:
                if any(word in cluster_key for word in words[:3]):
                    clusters[cluster_key].append(request)
                    matched = True
                    break

            if not matched and words:
                cluster_key = ' '.join(words[:2])
                clusters[cluster_key] = [request]

        return [
            {'feature_theme': theme, 'request_count': len(requests), 'examples': requests[:3]}
            for theme, requests in clusters.items()
        ]

    def _prioritize_feature_requests(
        self,
        clustered_requests: List[Dict[str, Any]]
    ) -> List[Dict[str, Any]]:
        """Prioritize feature requests by frequency."""
        return sorted(
            clustered_requests,
            key=lambda x: x['request_count'],
            reverse=True
        )[:10]

    def _generate_feature_recommendations(
        self,
        prioritized_requests: List[Dict[str, Any]]
    ) -> List[str]:
        """Generate recommendations for feature requests."""
        recommendations = []

        if prioritized_requests:
            top_request = prioritized_requests[0]
            recommendations.append(
                f"Most requested feature: {top_request['feature_theme']} "
                f"({top_request['request_count']} mentions) - consider for next major release"
            )

        if len(prioritized_requests) > 1:
            recommendations.append(
                f"Also consider: {prioritized_requests[1]['feature_theme']}"
            )

        return recommendations

    def _determine_trend_direction(
        self,
        rating_change: float,
        sentiment_change: float
    ) -> str:
        """Determine overall trend direction."""
        if rating_change > 0.2 and sentiment_change > 5:
            return 'improving'
        elif rating_change < -0.2 and sentiment_change < -5:
            return 'declining'
        else:
            return 'stable'

    def _generate_trend_insights(
        self,
        trends: List[Dict[str, Any]],
        trend_direction: str
    ) -> List[str]:
        """Generate insights from trend analysis."""
        insights = []

        if trend_direction == 'improving':
            insights.append("Positive trend: User satisfaction is increasing over time")
        elif trend_direction == 'declining':
            insights.append("WARNING: User satisfaction is declining - immediate action needed")
        else:
            insights.append("Sentiment is stable - maintain current quality")

        # Review velocity insight
        if len(trends) >= 2:
            recent_reviews = trends[-1]['total_reviews']
            previous_reviews = trends[-2]['total_reviews']

            if recent_reviews > previous_reviews * 1.5:
                insights.append("Review volume increasing - growing user base or recent controversy")

        return insights


def analyze_reviews(
    app_name: str,
    reviews: List[Dict[str, Any]]
) -> Dict[str, Any]:
    """
    Convenience function to perform comprehensive review analysis.

    Args:
        app_name: App name
        reviews: List of review dictionaries

    Returns:
        Complete review analysis
    """
    analyzer = ReviewAnalyzer(app_name)

    return {
        'sentiment_analysis': analyzer.analyze_sentiment(reviews),
        'common_themes': analyzer.extract_common_themes(reviews),
        'issues_identified': analyzer.identify_issues(reviews),
        'feature_requests': analyzer.find_feature_requests(reviews)
    }