- Fix Python 3.10+ syntax (float | None → Optional[float]) in 2 scripts - Add argparse CLI handling to 9 marketing scripts using raw sys.argv - Fix 10 scripts crashing at module level (wrap in __main__, add argparse) - Make yaml/prefect/mcp imports conditional with stdlib fallbacks (4 scripts) - Fix f-string backslash syntax in project_bootstrapper.py - Fix -h flag conflict in pr_analyzer.py - Fix tech-debt.md description (score → prioritize) All 237 scripts now pass python3 --help verification. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
434 lines
16 KiB
Python
434 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SEO Content Optimizer - Analyzes and optimizes content for SEO
|
|
"""
|
|
|
|
import re
|
|
from typing import Dict, List, Set
|
|
import json
|
|
|
|
class SEOOptimizer:
|
|
def __init__(self):
|
|
# Common stop words to filter
|
|
self.stop_words = {
|
|
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'be',
|
|
'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
|
|
'would', 'could', 'should', 'may', 'might', 'must', 'can', 'shall'
|
|
}
|
|
|
|
# SEO best practices
|
|
self.best_practices = {
|
|
'title_length': (50, 60),
|
|
'meta_description_length': (150, 160),
|
|
'url_length': (50, 60),
|
|
'paragraph_length': (40, 150),
|
|
'heading_keyword_placement': True,
|
|
'keyword_density': (0.01, 0.03) # 1-3%
|
|
}
|
|
|
|
def analyze(self, content: str, target_keyword: str = None,
|
|
secondary_keywords: List[str] = None) -> Dict:
|
|
"""Analyze content for SEO optimization"""
|
|
|
|
analysis = {
|
|
'content_length': len(content.split()),
|
|
'keyword_analysis': {},
|
|
'structure_analysis': self._analyze_structure(content),
|
|
'readability': self._analyze_readability(content),
|
|
'meta_suggestions': {},
|
|
'optimization_score': 0,
|
|
'recommendations': []
|
|
}
|
|
|
|
# Keyword analysis
|
|
if target_keyword:
|
|
analysis['keyword_analysis'] = self._analyze_keywords(
|
|
content, target_keyword, secondary_keywords or []
|
|
)
|
|
|
|
# Generate meta suggestions
|
|
analysis['meta_suggestions'] = self._generate_meta_suggestions(
|
|
content, target_keyword
|
|
)
|
|
|
|
# Calculate optimization score
|
|
analysis['optimization_score'] = self._calculate_seo_score(analysis)
|
|
|
|
# Generate recommendations
|
|
analysis['recommendations'] = self._generate_recommendations(analysis)
|
|
|
|
return analysis
|
|
|
|
def _analyze_keywords(self, content: str, primary: str,
|
|
secondary: List[str]) -> Dict:
|
|
"""Analyze keyword usage and density"""
|
|
content_lower = content.lower()
|
|
word_count = len(content.split())
|
|
|
|
results = {
|
|
'primary_keyword': {
|
|
'keyword': primary,
|
|
'count': content_lower.count(primary.lower()),
|
|
'density': 0,
|
|
'in_title': False,
|
|
'in_headings': False,
|
|
'in_first_paragraph': False
|
|
},
|
|
'secondary_keywords': [],
|
|
'lsi_keywords': []
|
|
}
|
|
|
|
# Calculate primary keyword metrics
|
|
if word_count > 0:
|
|
results['primary_keyword']['density'] = (
|
|
results['primary_keyword']['count'] / word_count
|
|
)
|
|
|
|
# Check keyword placement
|
|
first_para = content.split('\n\n')[0] if '\n\n' in content else content[:200]
|
|
results['primary_keyword']['in_first_paragraph'] = (
|
|
primary.lower() in first_para.lower()
|
|
)
|
|
|
|
# Analyze secondary keywords
|
|
for keyword in secondary:
|
|
count = content_lower.count(keyword.lower())
|
|
results['secondary_keywords'].append({
|
|
'keyword': keyword,
|
|
'count': count,
|
|
'density': count / word_count if word_count > 0 else 0
|
|
})
|
|
|
|
# Extract potential LSI keywords
|
|
results['lsi_keywords'] = self._extract_lsi_keywords(content, primary)
|
|
|
|
return results
|
|
|
|
def _analyze_structure(self, content: str) -> Dict:
|
|
"""Analyze content structure for SEO"""
|
|
lines = content.split('\n')
|
|
|
|
structure = {
|
|
'headings': {'h1': 0, 'h2': 0, 'h3': 0, 'total': 0},
|
|
'paragraphs': 0,
|
|
'lists': 0,
|
|
'images': 0,
|
|
'links': {'internal': 0, 'external': 0},
|
|
'avg_paragraph_length': 0
|
|
}
|
|
|
|
paragraphs = []
|
|
current_para = []
|
|
|
|
for line in lines:
|
|
# Count headings
|
|
if line.startswith('# '):
|
|
structure['headings']['h1'] += 1
|
|
structure['headings']['total'] += 1
|
|
elif line.startswith('## '):
|
|
structure['headings']['h2'] += 1
|
|
structure['headings']['total'] += 1
|
|
elif line.startswith('### '):
|
|
structure['headings']['h3'] += 1
|
|
structure['headings']['total'] += 1
|
|
|
|
# Count lists
|
|
if line.strip().startswith(('- ', '* ', '1. ')):
|
|
structure['lists'] += 1
|
|
|
|
# Count links
|
|
internal_links = len(re.findall(r'\[.*?\]\(/.*?\)', line))
|
|
external_links = len(re.findall(r'\[.*?\]\(https?://.*?\)', line))
|
|
structure['links']['internal'] += internal_links
|
|
structure['links']['external'] += external_links
|
|
|
|
# Track paragraphs
|
|
if line.strip() and not line.startswith('#'):
|
|
current_para.append(line)
|
|
elif current_para:
|
|
paragraphs.append(' '.join(current_para))
|
|
current_para = []
|
|
|
|
if current_para:
|
|
paragraphs.append(' '.join(current_para))
|
|
|
|
structure['paragraphs'] = len(paragraphs)
|
|
|
|
if paragraphs:
|
|
avg_length = sum(len(p.split()) for p in paragraphs) / len(paragraphs)
|
|
structure['avg_paragraph_length'] = round(avg_length, 1)
|
|
|
|
return structure
|
|
|
|
def _analyze_readability(self, content: str) -> Dict:
|
|
"""Analyze content readability"""
|
|
sentences = re.split(r'[.!?]+', content)
|
|
words = content.split()
|
|
|
|
if not sentences or not words:
|
|
return {'score': 0, 'level': 'Unknown'}
|
|
|
|
avg_sentence_length = len(words) / len(sentences)
|
|
|
|
# Simple readability scoring
|
|
if avg_sentence_length < 15:
|
|
level = 'Easy'
|
|
score = 90
|
|
elif avg_sentence_length < 20:
|
|
level = 'Moderate'
|
|
score = 70
|
|
elif avg_sentence_length < 25:
|
|
level = 'Difficult'
|
|
score = 50
|
|
else:
|
|
level = 'Very Difficult'
|
|
score = 30
|
|
|
|
return {
|
|
'score': score,
|
|
'level': level,
|
|
'avg_sentence_length': round(avg_sentence_length, 1)
|
|
}
|
|
|
|
def _extract_lsi_keywords(self, content: str, primary_keyword: str) -> List[str]:
|
|
"""Extract potential LSI (semantically related) keywords"""
|
|
words = re.findall(r'\b[a-z]+\b', content.lower())
|
|
word_freq = {}
|
|
|
|
# Count word frequencies
|
|
for word in words:
|
|
if word not in self.stop_words and len(word) > 3:
|
|
word_freq[word] = word_freq.get(word, 0) + 1
|
|
|
|
# Sort by frequency and return top related terms
|
|
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
# Filter out the primary keyword and return top 10
|
|
lsi_keywords = []
|
|
for word, count in sorted_words:
|
|
if word != primary_keyword.lower() and count > 1:
|
|
lsi_keywords.append(word)
|
|
if len(lsi_keywords) >= 10:
|
|
break
|
|
|
|
return lsi_keywords
|
|
|
|
def _generate_meta_suggestions(self, content: str, keyword: str = None) -> Dict:
|
|
"""Generate SEO meta tag suggestions"""
|
|
# Extract first sentence for description base
|
|
sentences = re.split(r'[.!?]+', content)
|
|
first_sentence = sentences[0] if sentences else content[:160]
|
|
|
|
suggestions = {
|
|
'title': '',
|
|
'meta_description': '',
|
|
'url_slug': '',
|
|
'og_title': '',
|
|
'og_description': ''
|
|
}
|
|
|
|
if keyword:
|
|
# Title suggestion
|
|
suggestions['title'] = f"{keyword.title()} - Complete Guide"
|
|
if len(suggestions['title']) > 60:
|
|
suggestions['title'] = keyword.title()[:57] + "..."
|
|
|
|
# Meta description
|
|
desc_base = f"Learn everything about {keyword}. {first_sentence}"
|
|
if len(desc_base) > 160:
|
|
desc_base = desc_base[:157] + "..."
|
|
suggestions['meta_description'] = desc_base
|
|
|
|
# URL slug
|
|
suggestions['url_slug'] = re.sub(r'[^a-z0-9-]+', '-',
|
|
keyword.lower()).strip('-')
|
|
|
|
# Open Graph tags
|
|
suggestions['og_title'] = suggestions['title']
|
|
suggestions['og_description'] = suggestions['meta_description']
|
|
|
|
return suggestions
|
|
|
|
def _calculate_seo_score(self, analysis: Dict) -> int:
|
|
"""Calculate overall SEO optimization score"""
|
|
score = 0
|
|
max_score = 100
|
|
|
|
# Content length scoring (20 points)
|
|
if 300 <= analysis['content_length'] <= 2500:
|
|
score += 20
|
|
elif 200 <= analysis['content_length'] < 300:
|
|
score += 10
|
|
elif analysis['content_length'] > 2500:
|
|
score += 15
|
|
|
|
# Keyword optimization (30 points)
|
|
if analysis['keyword_analysis']:
|
|
kw_data = analysis['keyword_analysis']['primary_keyword']
|
|
|
|
# Density scoring
|
|
if 0.01 <= kw_data['density'] <= 0.03:
|
|
score += 15
|
|
elif 0.005 <= kw_data['density'] < 0.01:
|
|
score += 8
|
|
|
|
# Placement scoring
|
|
if kw_data['in_first_paragraph']:
|
|
score += 10
|
|
if kw_data.get('in_headings'):
|
|
score += 5
|
|
|
|
# Structure scoring (25 points)
|
|
struct = analysis['structure_analysis']
|
|
if struct['headings']['total'] > 0:
|
|
score += 10
|
|
if struct['paragraphs'] >= 3:
|
|
score += 10
|
|
if struct['links']['internal'] > 0 or struct['links']['external'] > 0:
|
|
score += 5
|
|
|
|
# Readability scoring (25 points)
|
|
readability_score = analysis['readability']['score']
|
|
score += int(readability_score * 0.25)
|
|
|
|
return min(score, max_score)
|
|
|
|
def _generate_recommendations(self, analysis: Dict) -> List[str]:
|
|
"""Generate SEO improvement recommendations"""
|
|
recommendations = []
|
|
|
|
# Content length recommendations
|
|
if analysis['content_length'] < 300:
|
|
recommendations.append(
|
|
f"Increase content length to at least 300 words (currently {analysis['content_length']})"
|
|
)
|
|
elif analysis['content_length'] > 3000:
|
|
recommendations.append(
|
|
"Consider breaking long content into multiple pages or adding a table of contents"
|
|
)
|
|
|
|
# Keyword recommendations
|
|
if analysis['keyword_analysis']:
|
|
kw_data = analysis['keyword_analysis']['primary_keyword']
|
|
|
|
if kw_data['density'] < 0.01:
|
|
recommendations.append(
|
|
f"Increase keyword density for '{kw_data['keyword']}' (currently {kw_data['density']:.2%})"
|
|
)
|
|
elif kw_data['density'] > 0.03:
|
|
recommendations.append(
|
|
f"Reduce keyword density to avoid over-optimization (currently {kw_data['density']:.2%})"
|
|
)
|
|
|
|
if not kw_data['in_first_paragraph']:
|
|
recommendations.append(
|
|
"Include primary keyword in the first paragraph"
|
|
)
|
|
|
|
# Structure recommendations
|
|
struct = analysis['structure_analysis']
|
|
if struct['headings']['total'] == 0:
|
|
recommendations.append("Add headings (H1, H2, H3) to improve content structure")
|
|
if struct['links']['internal'] == 0:
|
|
recommendations.append("Add internal links to related content")
|
|
if struct['avg_paragraph_length'] > 150:
|
|
recommendations.append("Break up long paragraphs for better readability")
|
|
|
|
# Readability recommendations
|
|
if analysis['readability']['avg_sentence_length'] > 20:
|
|
recommendations.append("Simplify sentences for better readability")
|
|
|
|
return recommendations
|
|
|
|
def optimize_content(content: str, keyword: str = None,
|
|
secondary_keywords: List[str] = None) -> str:
|
|
"""Main function to optimize content"""
|
|
optimizer = SEOOptimizer()
|
|
|
|
# Parse secondary keywords from comma-separated string if provided
|
|
if secondary_keywords and isinstance(secondary_keywords, str):
|
|
secondary_keywords = [kw.strip() for kw in secondary_keywords.split(',')]
|
|
|
|
results = optimizer.analyze(content, keyword, secondary_keywords)
|
|
|
|
# Format output
|
|
output = [
|
|
"=== SEO Content Analysis ===",
|
|
f"Overall SEO Score: {results['optimization_score']}/100",
|
|
f"Content Length: {results['content_length']} words",
|
|
f"",
|
|
"Content Structure:",
|
|
f" Headings: {results['structure_analysis']['headings']['total']}",
|
|
f" Paragraphs: {results['structure_analysis']['paragraphs']}",
|
|
f" Avg Paragraph Length: {results['structure_analysis']['avg_paragraph_length']} words",
|
|
f" Internal Links: {results['structure_analysis']['links']['internal']}",
|
|
f" External Links: {results['structure_analysis']['links']['external']}",
|
|
f"",
|
|
f"Readability: {results['readability']['level']} (Score: {results['readability']['score']})",
|
|
f""
|
|
]
|
|
|
|
if results['keyword_analysis']:
|
|
kw = results['keyword_analysis']['primary_keyword']
|
|
output.extend([
|
|
"Keyword Analysis:",
|
|
f" Primary Keyword: {kw['keyword']}",
|
|
f" Count: {kw['count']}",
|
|
f" Density: {kw['density']:.2%}",
|
|
f" In First Paragraph: {'Yes' if kw['in_first_paragraph'] else 'No'}",
|
|
f""
|
|
])
|
|
|
|
if results['keyword_analysis']['lsi_keywords']:
|
|
output.append(" Related Keywords Found:")
|
|
for lsi in results['keyword_analysis']['lsi_keywords'][:5]:
|
|
output.append(f" • {lsi}")
|
|
output.append("")
|
|
|
|
if results['meta_suggestions']:
|
|
output.extend([
|
|
"Meta Tag Suggestions:",
|
|
f" Title: {results['meta_suggestions']['title']}",
|
|
f" Description: {results['meta_suggestions']['meta_description']}",
|
|
f" URL Slug: {results['meta_suggestions']['url_slug']}",
|
|
f""
|
|
])
|
|
|
|
output.extend([
|
|
"Recommendations:",
|
|
])
|
|
|
|
for rec in results['recommendations']:
|
|
output.append(f" • {rec}")
|
|
|
|
return '\n'.join(output)
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="SEO Content Optimizer - Analyzes and optimizes content for SEO"
|
|
)
|
|
parser.add_argument(
|
|
"file", nargs="?", default=None,
|
|
help="Text file to analyze"
|
|
)
|
|
parser.add_argument(
|
|
"--keyword", "-k", default=None,
|
|
help="Primary keyword to optimize for"
|
|
)
|
|
parser.add_argument(
|
|
"--secondary", "-s", default=None,
|
|
help="Comma-separated secondary keywords"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.file:
|
|
with open(args.file, 'r') as f:
|
|
content = f.read()
|
|
print(optimize_content(content, args.keyword, args.secondary))
|
|
else:
|
|
print("Usage: python seo_optimizer.py <file> [--keyword primary] [--secondary kw1,kw2]")
|