#!/usr/bin/env python3 """ SEO Content Optimizer - Analyzes and optimizes content for SEO """ import re from typing import Dict, List, Set import json class SEOOptimizer: def __init__(self): # Common stop words to filter self.stop_words = { 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'shall' } # SEO best practices self.best_practices = { 'title_length': (50, 60), 'meta_description_length': (150, 160), 'url_length': (50, 60), 'paragraph_length': (40, 150), 'heading_keyword_placement': True, 'keyword_density': (0.01, 0.03) # 1-3% } def analyze(self, content: str, target_keyword: str = None, secondary_keywords: List[str] = None) -> Dict: """Analyze content for SEO optimization""" analysis = { 'content_length': len(content.split()), 'keyword_analysis': {}, 'structure_analysis': self._analyze_structure(content), 'readability': self._analyze_readability(content), 'meta_suggestions': {}, 'optimization_score': 0, 'recommendations': [] } # Keyword analysis if target_keyword: analysis['keyword_analysis'] = self._analyze_keywords( content, target_keyword, secondary_keywords or [] ) # Generate meta suggestions analysis['meta_suggestions'] = self._generate_meta_suggestions( content, target_keyword ) # Calculate optimization score analysis['optimization_score'] = self._calculate_seo_score(analysis) # Generate recommendations analysis['recommendations'] = self._generate_recommendations(analysis) return analysis def _analyze_keywords(self, content: str, primary: str, secondary: List[str]) -> Dict: """Analyze keyword usage and density""" content_lower = content.lower() word_count = len(content.split()) results = { 'primary_keyword': { 'keyword': primary, 'count': content_lower.count(primary.lower()), 'density': 0, 'in_title': False, 'in_headings': False, 'in_first_paragraph': False }, 'secondary_keywords': [], 'lsi_keywords': [] } # Calculate primary keyword metrics if word_count > 0: results['primary_keyword']['density'] = ( results['primary_keyword']['count'] / word_count ) # Check keyword placement first_para = content.split('\n\n')[0] if '\n\n' in content else content[:200] results['primary_keyword']['in_first_paragraph'] = ( primary.lower() in first_para.lower() ) # Analyze secondary keywords for keyword in secondary: count = content_lower.count(keyword.lower()) results['secondary_keywords'].append({ 'keyword': keyword, 'count': count, 'density': count / word_count if word_count > 0 else 0 }) # Extract potential LSI keywords results['lsi_keywords'] = self._extract_lsi_keywords(content, primary) return results def _analyze_structure(self, content: str) -> Dict: """Analyze content structure for SEO""" lines = content.split('\n') structure = { 'headings': {'h1': 0, 'h2': 0, 'h3': 0, 'total': 0}, 'paragraphs': 0, 'lists': 0, 'images': 0, 'links': {'internal': 0, 'external': 0}, 'avg_paragraph_length': 0 } paragraphs = [] current_para = [] for line in lines: # Count headings if line.startswith('# '): structure['headings']['h1'] += 1 structure['headings']['total'] += 1 elif line.startswith('## '): structure['headings']['h2'] += 1 structure['headings']['total'] += 1 elif line.startswith('### '): structure['headings']['h3'] += 1 structure['headings']['total'] += 1 # Count lists if line.strip().startswith(('- ', '* ', '1. ')): structure['lists'] += 1 # Count links internal_links = len(re.findall(r'\[.*?\]\(/.*?\)', line)) external_links = len(re.findall(r'\[.*?\]\(https?://.*?\)', line)) structure['links']['internal'] += internal_links structure['links']['external'] += external_links # Track paragraphs if line.strip() and not line.startswith('#'): current_para.append(line) elif current_para: paragraphs.append(' '.join(current_para)) current_para = [] if current_para: paragraphs.append(' '.join(current_para)) structure['paragraphs'] = len(paragraphs) if paragraphs: avg_length = sum(len(p.split()) for p in paragraphs) / len(paragraphs) structure['avg_paragraph_length'] = round(avg_length, 1) return structure def _analyze_readability(self, content: str) -> Dict: """Analyze content readability""" sentences = re.split(r'[.!?]+', content) words = content.split() if not sentences or not words: return {'score': 0, 'level': 'Unknown'} avg_sentence_length = len(words) / len(sentences) # Simple readability scoring if avg_sentence_length < 15: level = 'Easy' score = 90 elif avg_sentence_length < 20: level = 'Moderate' score = 70 elif avg_sentence_length < 25: level = 'Difficult' score = 50 else: level = 'Very Difficult' score = 30 return { 'score': score, 'level': level, 'avg_sentence_length': round(avg_sentence_length, 1) } def _extract_lsi_keywords(self, content: str, primary_keyword: str) -> List[str]: """Extract potential LSI (semantically related) keywords""" words = re.findall(r'\b[a-z]+\b', content.lower()) word_freq = {} # Count word frequencies for word in words: if word not in self.stop_words and len(word) > 3: word_freq[word] = word_freq.get(word, 0) + 1 # Sort by frequency and return top related terms sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True) # Filter out the primary keyword and return top 10 lsi_keywords = [] for word, count in sorted_words: if word != primary_keyword.lower() and count > 1: lsi_keywords.append(word) if len(lsi_keywords) >= 10: break return lsi_keywords def _generate_meta_suggestions(self, content: str, keyword: str = None) -> Dict: """Generate SEO meta tag suggestions""" # Extract first sentence for description base sentences = re.split(r'[.!?]+', content) first_sentence = sentences[0] if sentences else content[:160] suggestions = { 'title': '', 'meta_description': '', 'url_slug': '', 'og_title': '', 'og_description': '' } if keyword: # Title suggestion suggestions['title'] = f"{keyword.title()} - Complete Guide" if len(suggestions['title']) > 60: suggestions['title'] = keyword.title()[:57] + "..." # Meta description desc_base = f"Learn everything about {keyword}. {first_sentence}" if len(desc_base) > 160: desc_base = desc_base[:157] + "..." suggestions['meta_description'] = desc_base # URL slug suggestions['url_slug'] = re.sub(r'[^a-z0-9-]+', '-', keyword.lower()).strip('-') # Open Graph tags suggestions['og_title'] = suggestions['title'] suggestions['og_description'] = suggestions['meta_description'] return suggestions def _calculate_seo_score(self, analysis: Dict) -> int: """Calculate overall SEO optimization score""" score = 0 max_score = 100 # Content length scoring (20 points) if 300 <= analysis['content_length'] <= 2500: score += 20 elif 200 <= analysis['content_length'] < 300: score += 10 elif analysis['content_length'] > 2500: score += 15 # Keyword optimization (30 points) if analysis['keyword_analysis']: kw_data = analysis['keyword_analysis']['primary_keyword'] # Density scoring if 0.01 <= kw_data['density'] <= 0.03: score += 15 elif 0.005 <= kw_data['density'] < 0.01: score += 8 # Placement scoring if kw_data['in_first_paragraph']: score += 10 if kw_data.get('in_headings'): score += 5 # Structure scoring (25 points) struct = analysis['structure_analysis'] if struct['headings']['total'] > 0: score += 10 if struct['paragraphs'] >= 3: score += 10 if struct['links']['internal'] > 0 or struct['links']['external'] > 0: score += 5 # Readability scoring (25 points) readability_score = analysis['readability']['score'] score += int(readability_score * 0.25) return min(score, max_score) def _generate_recommendations(self, analysis: Dict) -> List[str]: """Generate SEO improvement recommendations""" recommendations = [] # Content length recommendations if analysis['content_length'] < 300: recommendations.append( f"Increase content length to at least 300 words (currently {analysis['content_length']})" ) elif analysis['content_length'] > 3000: recommendations.append( "Consider breaking long content into multiple pages or adding a table of contents" ) # Keyword recommendations if analysis['keyword_analysis']: kw_data = analysis['keyword_analysis']['primary_keyword'] if kw_data['density'] < 0.01: recommendations.append( f"Increase keyword density for '{kw_data['keyword']}' (currently {kw_data['density']:.2%})" ) elif kw_data['density'] > 0.03: recommendations.append( f"Reduce keyword density to avoid over-optimization (currently {kw_data['density']:.2%})" ) if not kw_data['in_first_paragraph']: recommendations.append( "Include primary keyword in the first paragraph" ) # Structure recommendations struct = analysis['structure_analysis'] if struct['headings']['total'] == 0: recommendations.append("Add headings (H1, H2, H3) to improve content structure") if struct['links']['internal'] == 0: recommendations.append("Add internal links to related content") if struct['avg_paragraph_length'] > 150: recommendations.append("Break up long paragraphs for better readability") # Readability recommendations if analysis['readability']['avg_sentence_length'] > 20: recommendations.append("Simplify sentences for better readability") return recommendations def optimize_content(content: str, keyword: str = None, secondary_keywords: List[str] = None) -> str: """Main function to optimize content""" optimizer = SEOOptimizer() # Parse secondary keywords from comma-separated string if provided if secondary_keywords and isinstance(secondary_keywords, str): secondary_keywords = [kw.strip() for kw in secondary_keywords.split(',')] results = optimizer.analyze(content, keyword, secondary_keywords) # Format output output = [ "=== SEO Content Analysis ===", f"Overall SEO Score: {results['optimization_score']}/100", f"Content Length: {results['content_length']} words", f"", "Content Structure:", f" Headings: {results['structure_analysis']['headings']['total']}", f" Paragraphs: {results['structure_analysis']['paragraphs']}", f" Avg Paragraph Length: {results['structure_analysis']['avg_paragraph_length']} words", f" Internal Links: {results['structure_analysis']['links']['internal']}", f" External Links: {results['structure_analysis']['links']['external']}", f"", f"Readability: {results['readability']['level']} (Score: {results['readability']['score']})", f"" ] if results['keyword_analysis']: kw = results['keyword_analysis']['primary_keyword'] output.extend([ "Keyword Analysis:", f" Primary Keyword: {kw['keyword']}", f" Count: {kw['count']}", f" Density: {kw['density']:.2%}", f" In First Paragraph: {'Yes' if kw['in_first_paragraph'] else 'No'}", f"" ]) if results['keyword_analysis']['lsi_keywords']: output.append(" Related Keywords Found:") for lsi in results['keyword_analysis']['lsi_keywords'][:5]: output.append(f" • {lsi}") output.append("") if results['meta_suggestions']: output.extend([ "Meta Tag Suggestions:", f" Title: {results['meta_suggestions']['title']}", f" Description: {results['meta_suggestions']['meta_description']}", f" URL Slug: {results['meta_suggestions']['url_slug']}", f"" ]) output.extend([ "Recommendations:", ]) for rec in results['recommendations']: output.append(f" • {rec}") return '\n'.join(output) if __name__ == "__main__": import sys import argparse parser = argparse.ArgumentParser( description="SEO Content Optimizer - Analyzes and optimizes content for SEO" ) parser.add_argument( "file", nargs="?", default=None, help="Text file to analyze" ) parser.add_argument( "--keyword", "-k", default=None, help="Primary keyword to optimize for" ) parser.add_argument( "--secondary", "-s", default=None, help="Comma-separated secondary keywords" ) args = parser.parse_args() if args.file: with open(args.file, 'r') as f: content = f.read() print(optimize_content(content, args.keyword, args.secondary)) else: print("Usage: python seo_optimizer.py [--keyword primary] [--secondary kw1,kw2]")