""" Review analysis module for App Store Optimization. Analyzes user reviews for sentiment, issues, and feature requests. """ from typing import Dict, List, Any, Optional, Tuple from collections import Counter import re class ReviewAnalyzer: """Analyzes user reviews for actionable insights.""" # Sentiment keywords POSITIVE_KEYWORDS = [ 'great', 'awesome', 'excellent', 'amazing', 'love', 'best', 'perfect', 'fantastic', 'wonderful', 'brilliant', 'outstanding', 'superb' ] NEGATIVE_KEYWORDS = [ 'bad', 'terrible', 'awful', 'horrible', 'hate', 'worst', 'useless', 'broken', 'crash', 'bug', 'slow', 'disappointing', 'frustrating' ] # Issue indicators ISSUE_KEYWORDS = [ 'crash', 'bug', 'error', 'broken', 'not working', 'doesnt work', 'freezes', 'slow', 'laggy', 'glitch', 'problem', 'issue', 'fail' ] # Feature request indicators FEATURE_REQUEST_KEYWORDS = [ 'wish', 'would be nice', 'should add', 'need', 'want', 'hope', 'please add', 'missing', 'lacks', 'feature request' ] def __init__(self, app_name: str): """ Initialize review analyzer. Args: app_name: Name of the app """ self.app_name = app_name self.reviews = [] self.analysis_cache = {} def analyze_sentiment( self, reviews: List[Dict[str, Any]] ) -> Dict[str, Any]: """ Analyze sentiment across reviews. Args: reviews: List of review dicts with 'text', 'rating', 'date' Returns: Sentiment analysis summary """ self.reviews = reviews sentiment_counts = { 'positive': 0, 'neutral': 0, 'negative': 0 } detailed_sentiments = [] for review in reviews: text = review.get('text', '').lower() rating = review.get('rating', 3) # Calculate sentiment score sentiment_score = self._calculate_sentiment_score(text, rating) sentiment_category = self._categorize_sentiment(sentiment_score) sentiment_counts[sentiment_category] += 1 detailed_sentiments.append({ 'review_id': review.get('id', ''), 'rating': rating, 'sentiment_score': sentiment_score, 'sentiment': sentiment_category, 'text_preview': text[:100] + '...' if len(text) > 100 else text }) # Calculate percentages total = len(reviews) sentiment_distribution = { 'positive': round((sentiment_counts['positive'] / total) * 100, 1) if total > 0 else 0, 'neutral': round((sentiment_counts['neutral'] / total) * 100, 1) if total > 0 else 0, 'negative': round((sentiment_counts['negative'] / total) * 100, 1) if total > 0 else 0 } # Calculate average rating avg_rating = sum(r.get('rating', 0) for r in reviews) / total if total > 0 else 0 return { 'total_reviews_analyzed': total, 'average_rating': round(avg_rating, 2), 'sentiment_distribution': sentiment_distribution, 'sentiment_counts': sentiment_counts, 'sentiment_trend': self._assess_sentiment_trend(sentiment_distribution), 'detailed_sentiments': detailed_sentiments[:50] # Limit output } def extract_common_themes( self, reviews: List[Dict[str, Any]], min_mentions: int = 3 ) -> Dict[str, Any]: """ Extract frequently mentioned themes and topics. Args: reviews: List of review dicts min_mentions: Minimum mentions to be considered common Returns: Common themes analysis """ # Extract all words from reviews all_words = [] all_phrases = [] for review in reviews: text = review.get('text', '').lower() # Clean text text = re.sub(r'[^\w\s]', ' ', text) words = text.split() # Filter out common words stop_words = { 'the', 'and', 'for', 'with', 'this', 'that', 'from', 'have', 'app', 'apps', 'very', 'really', 'just', 'but', 'not', 'you' } words = [w for w in words if w not in stop_words and len(w) > 3] all_words.extend(words) # Extract 2-3 word phrases for i in range(len(words) - 1): phrase = f"{words[i]} {words[i+1]}" all_phrases.append(phrase) # Count frequency word_freq = Counter(all_words) phrase_freq = Counter(all_phrases) # Filter by min_mentions common_words = [ {'word': word, 'mentions': count} for word, count in word_freq.most_common(30) if count >= min_mentions ] common_phrases = [ {'phrase': phrase, 'mentions': count} for phrase, count in phrase_freq.most_common(20) if count >= min_mentions ] # Categorize themes themes = self._categorize_themes(common_words, common_phrases) return { 'common_words': common_words, 'common_phrases': common_phrases, 'identified_themes': themes, 'insights': self._generate_theme_insights(themes) } def identify_issues( self, reviews: List[Dict[str, Any]], rating_threshold: int = 3 ) -> Dict[str, Any]: """ Identify bugs, crashes, and other issues from reviews. Args: reviews: List of review dicts rating_threshold: Only analyze reviews at or below this rating Returns: Issue identification report """ issues = [] for review in reviews: rating = review.get('rating', 5) if rating > rating_threshold: continue text = review.get('text', '').lower() # Check for issue keywords mentioned_issues = [] for keyword in self.ISSUE_KEYWORDS: if keyword in text: mentioned_issues.append(keyword) if mentioned_issues: issues.append({ 'review_id': review.get('id', ''), 'rating': rating, 'date': review.get('date', ''), 'issue_keywords': mentioned_issues, 'text': text[:200] + '...' if len(text) > 200 else text }) # Group by issue type issue_frequency = Counter() for issue in issues: for keyword in issue['issue_keywords']: issue_frequency[keyword] += 1 # Categorize issues categorized_issues = self._categorize_issues(issues) # Calculate issue severity severity_scores = self._calculate_issue_severity( categorized_issues, len(reviews) ) return { 'total_issues_found': len(issues), 'issue_frequency': dict(issue_frequency.most_common(15)), 'categorized_issues': categorized_issues, 'severity_scores': severity_scores, 'top_issues': self._rank_issues_by_severity(severity_scores), 'recommendations': self._generate_issue_recommendations( categorized_issues, severity_scores ) } def find_feature_requests( self, reviews: List[Dict[str, Any]] ) -> Dict[str, Any]: """ Extract feature requests and desired improvements. Args: reviews: List of review dicts Returns: Feature request analysis """ feature_requests = [] for review in reviews: text = review.get('text', '').lower() rating = review.get('rating', 3) # Check for feature request indicators is_feature_request = any( keyword in text for keyword in self.FEATURE_REQUEST_KEYWORDS ) if is_feature_request: # Extract the specific request request_text = self._extract_feature_request_text(text) feature_requests.append({ 'review_id': review.get('id', ''), 'rating': rating, 'date': review.get('date', ''), 'request_text': request_text, 'full_review': text[:200] + '...' if len(text) > 200 else text }) # Cluster similar requests clustered_requests = self._cluster_feature_requests(feature_requests) # Prioritize based on frequency and rating context prioritized_requests = self._prioritize_feature_requests(clustered_requests) return { 'total_feature_requests': len(feature_requests), 'clustered_requests': clustered_requests, 'prioritized_requests': prioritized_requests, 'implementation_recommendations': self._generate_feature_recommendations( prioritized_requests ) } def track_sentiment_trends( self, reviews_by_period: Dict[str, List[Dict[str, Any]]] ) -> Dict[str, Any]: """ Track sentiment changes over time. Args: reviews_by_period: Dict of period_name: reviews Returns: Trend analysis """ trends = [] for period, reviews in reviews_by_period.items(): sentiment = self.analyze_sentiment(reviews) trends.append({ 'period': period, 'total_reviews': len(reviews), 'average_rating': sentiment['average_rating'], 'positive_percentage': sentiment['sentiment_distribution']['positive'], 'negative_percentage': sentiment['sentiment_distribution']['negative'] }) # Calculate trend direction if len(trends) >= 2: first_period = trends[0] last_period = trends[-1] rating_change = last_period['average_rating'] - first_period['average_rating'] sentiment_change = last_period['positive_percentage'] - first_period['positive_percentage'] trend_direction = self._determine_trend_direction( rating_change, sentiment_change ) else: trend_direction = 'insufficient_data' return { 'periods_analyzed': len(trends), 'trend_data': trends, 'trend_direction': trend_direction, 'insights': self._generate_trend_insights(trends, trend_direction) } def generate_response_templates( self, issue_category: str ) -> List[Dict[str, str]]: """ Generate response templates for common review scenarios. Args: issue_category: Category of issue ('crash', 'feature_request', 'positive', etc.) Returns: Response templates """ templates = { 'crash': [ { 'scenario': 'App crash reported', 'template': "Thank you for bringing this to our attention. We're sorry you experienced a crash. " "Our team is investigating this issue. Could you please share more details about when " "this occurred (device model, iOS/Android version) by contacting support@[company].com? " "We're committed to fixing this quickly." }, { 'scenario': 'Crash already fixed', 'template': "Thank you for your feedback. We've identified and fixed this crash issue in version [X.X]. " "Please update to the latest version. If the problem persists, please reach out to " "support@[company].com and we'll help you directly." } ], 'bug': [ { 'scenario': 'Bug reported', 'template': "Thanks for reporting this bug. We take these issues seriously. Our team is looking into it " "and we'll have a fix in an upcoming update. We appreciate your patience and will notify you " "when it's resolved." } ], 'feature_request': [ { 'scenario': 'Feature request received', 'template': "Thank you for this suggestion! We're always looking to improve [app_name]. We've added your " "request to our roadmap and will consider it for a future update. Follow us @[social] for " "updates on new features." }, { 'scenario': 'Feature already planned', 'template': "Great news! This feature is already on our roadmap and we're working on it. Stay tuned for " "updates in the coming months. Thanks for your feedback!" } ], 'positive': [ { 'scenario': 'Positive review', 'template': "Thank you so much for your kind words! We're thrilled that you're enjoying [app_name]. " "Reviews like yours motivate our team to keep improving. If you ever have suggestions, " "we'd love to hear them!" } ], 'negative_general': [ { 'scenario': 'General complaint', 'template': "We're sorry to hear you're not satisfied with your experience. We'd like to make this right. " "Please contact us at support@[company].com so we can understand the issue better and help " "you directly. Thank you for giving us a chance to improve." } ] } return templates.get(issue_category, templates['negative_general']) def _calculate_sentiment_score(self, text: str, rating: int) -> float: """Calculate sentiment score (-1 to 1).""" # Start with rating-based score rating_score = (rating - 3) / 2 # Convert 1-5 to -1 to 1 # Adjust based on text sentiment positive_count = sum(1 for keyword in self.POSITIVE_KEYWORDS if keyword in text) negative_count = sum(1 for keyword in self.NEGATIVE_KEYWORDS if keyword in text) text_score = (positive_count - negative_count) / 10 # Normalize # Weighted average (60% rating, 40% text) final_score = (rating_score * 0.6) + (text_score * 0.4) return max(min(final_score, 1.0), -1.0) def _categorize_sentiment(self, score: float) -> str: """Categorize sentiment score.""" if score > 0.3: return 'positive' elif score < -0.3: return 'negative' else: return 'neutral' def _assess_sentiment_trend(self, distribution: Dict[str, float]) -> str: """Assess overall sentiment trend.""" positive = distribution['positive'] negative = distribution['negative'] if positive > 70: return 'very_positive' elif positive > 50: return 'positive' elif negative > 30: return 'concerning' elif negative > 50: return 'critical' else: return 'mixed' def _categorize_themes( self, common_words: List[Dict[str, Any]], common_phrases: List[Dict[str, Any]] ) -> Dict[str, List[str]]: """Categorize themes from words and phrases.""" themes = { 'features': [], 'performance': [], 'usability': [], 'support': [], 'pricing': [] } # Keywords for each category feature_keywords = {'feature', 'functionality', 'option', 'tool'} performance_keywords = {'fast', 'slow', 'crash', 'lag', 'speed', 'performance'} usability_keywords = {'easy', 'difficult', 'intuitive', 'confusing', 'interface', 'design'} support_keywords = {'support', 'help', 'customer', 'service', 'response'} pricing_keywords = {'price', 'cost', 'expensive', 'cheap', 'subscription', 'free'} for word_data in common_words: word = word_data['word'] if any(kw in word for kw in feature_keywords): themes['features'].append(word) elif any(kw in word for kw in performance_keywords): themes['performance'].append(word) elif any(kw in word for kw in usability_keywords): themes['usability'].append(word) elif any(kw in word for kw in support_keywords): themes['support'].append(word) elif any(kw in word for kw in pricing_keywords): themes['pricing'].append(word) return {k: v for k, v in themes.items() if v} # Remove empty categories def _generate_theme_insights(self, themes: Dict[str, List[str]]) -> List[str]: """Generate insights from themes.""" insights = [] for category, keywords in themes.items(): if keywords: insights.append( f"{category.title()}: Users frequently mention {', '.join(keywords[:3])}" ) return insights[:5] def _categorize_issues(self, issues: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]: """Categorize issues by type.""" categories = { 'crashes': [], 'bugs': [], 'performance': [], 'compatibility': [] } for issue in issues: keywords = issue['issue_keywords'] if 'crash' in keywords or 'freezes' in keywords: categories['crashes'].append(issue) elif 'bug' in keywords or 'error' in keywords or 'broken' in keywords: categories['bugs'].append(issue) elif 'slow' in keywords or 'laggy' in keywords: categories['performance'].append(issue) else: categories['compatibility'].append(issue) return {k: v for k, v in categories.items() if v} def _calculate_issue_severity( self, categorized_issues: Dict[str, List[Dict[str, Any]]], total_reviews: int ) -> Dict[str, Dict[str, Any]]: """Calculate severity scores for each issue category.""" severity_scores = {} for category, issues in categorized_issues.items(): count = len(issues) percentage = (count / total_reviews) * 100 if total_reviews > 0 else 0 # Calculate average rating of affected reviews avg_rating = sum(i['rating'] for i in issues) / count if count > 0 else 0 # Severity score (0-100) severity = min((percentage * 10) + ((5 - avg_rating) * 10), 100) severity_scores[category] = { 'count': count, 'percentage': round(percentage, 2), 'average_rating': round(avg_rating, 2), 'severity_score': round(severity, 1), 'priority': 'critical' if severity > 70 else ('high' if severity > 40 else 'medium') } return severity_scores def _rank_issues_by_severity( self, severity_scores: Dict[str, Dict[str, Any]] ) -> List[Dict[str, Any]]: """Rank issues by severity score.""" ranked = sorted( [{'category': cat, **data} for cat, data in severity_scores.items()], key=lambda x: x['severity_score'], reverse=True ) return ranked def _generate_issue_recommendations( self, categorized_issues: Dict[str, List[Dict[str, Any]]], severity_scores: Dict[str, Dict[str, Any]] ) -> List[str]: """Generate recommendations for addressing issues.""" recommendations = [] for category, score_data in severity_scores.items(): if score_data['priority'] == 'critical': recommendations.append( f"URGENT: Address {category} issues immediately - affecting {score_data['percentage']}% of reviews" ) elif score_data['priority'] == 'high': recommendations.append( f"HIGH PRIORITY: Focus on {category} issues in next update" ) return recommendations def _extract_feature_request_text(self, text: str) -> str: """Extract the specific feature request from review text.""" # Simple extraction - find sentence with feature request keywords sentences = text.split('.') for sentence in sentences: if any(keyword in sentence for keyword in self.FEATURE_REQUEST_KEYWORDS): return sentence.strip() return text[:100] # Fallback def _cluster_feature_requests( self, feature_requests: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: """Cluster similar feature requests.""" # Simplified clustering - group by common keywords clusters = {} for request in feature_requests: text = request['request_text'].lower() # Extract key words words = [w for w in text.split() if len(w) > 4] # Try to find matching cluster matched = False for cluster_key in clusters: if any(word in cluster_key for word in words[:3]): clusters[cluster_key].append(request) matched = True break if not matched and words: cluster_key = ' '.join(words[:2]) clusters[cluster_key] = [request] return [ {'feature_theme': theme, 'request_count': len(requests), 'examples': requests[:3]} for theme, requests in clusters.items() ] def _prioritize_feature_requests( self, clustered_requests: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: """Prioritize feature requests by frequency.""" return sorted( clustered_requests, key=lambda x: x['request_count'], reverse=True )[:10] def _generate_feature_recommendations( self, prioritized_requests: List[Dict[str, Any]] ) -> List[str]: """Generate recommendations for feature requests.""" recommendations = [] if prioritized_requests: top_request = prioritized_requests[0] recommendations.append( f"Most requested feature: {top_request['feature_theme']} " f"({top_request['request_count']} mentions) - consider for next major release" ) if len(prioritized_requests) > 1: recommendations.append( f"Also consider: {prioritized_requests[1]['feature_theme']}" ) return recommendations def _determine_trend_direction( self, rating_change: float, sentiment_change: float ) -> str: """Determine overall trend direction.""" if rating_change > 0.2 and sentiment_change > 5: return 'improving' elif rating_change < -0.2 and sentiment_change < -5: return 'declining' else: return 'stable' def _generate_trend_insights( self, trends: List[Dict[str, Any]], trend_direction: str ) -> List[str]: """Generate insights from trend analysis.""" insights = [] if trend_direction == 'improving': insights.append("Positive trend: User satisfaction is increasing over time") elif trend_direction == 'declining': insights.append("WARNING: User satisfaction is declining - immediate action needed") else: insights.append("Sentiment is stable - maintain current quality") # Review velocity insight if len(trends) >= 2: recent_reviews = trends[-1]['total_reviews'] previous_reviews = trends[-2]['total_reviews'] if recent_reviews > previous_reviews * 1.5: insights.append("Review volume increasing - growing user base or recent controversy") return insights def analyze_reviews( app_name: str, reviews: List[Dict[str, Any]] ) -> Dict[str, Any]: """ Convenience function to perform comprehensive review analysis. Args: app_name: App name reviews: List of review dictionaries Returns: Complete review analysis """ analyzer = ReviewAnalyzer(app_name) return { 'sentiment_analysis': analyzer.analyze_sentiment(reviews), 'common_themes': analyzer.extract_common_themes(reviews), 'issues_identified': analyzer.identify_issues(reviews), 'feature_requests': analyzer.find_feature_requests(reviews) }