186 lines
6.5 KiB
Python
186 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Brand Voice Analyzer - Analyzes content to establish and maintain brand voice consistency
|
|
"""
|
|
|
|
import re
|
|
from typing import Dict, List, Tuple
|
|
import json
|
|
|
|
class BrandVoiceAnalyzer:
|
|
def __init__(self):
|
|
self.voice_dimensions = {
|
|
'formality': {
|
|
'formal': ['hereby', 'therefore', 'furthermore', 'pursuant', 'regarding'],
|
|
'casual': ['hey', 'cool', 'awesome', 'stuff', 'yeah', 'gonna']
|
|
},
|
|
'tone': {
|
|
'professional': ['expertise', 'solution', 'optimize', 'leverage', 'strategic'],
|
|
'friendly': ['happy', 'excited', 'love', 'enjoy', 'together', 'share']
|
|
},
|
|
'perspective': {
|
|
'authoritative': ['proven', 'research shows', 'experts agree', 'data indicates'],
|
|
'conversational': ['you might', 'let\'s explore', 'we think', 'imagine if']
|
|
}
|
|
}
|
|
|
|
def analyze_text(self, text: str) -> Dict:
|
|
"""Analyze text for brand voice characteristics"""
|
|
text_lower = text.lower()
|
|
word_count = len(text.split())
|
|
|
|
results = {
|
|
'word_count': word_count,
|
|
'readability_score': self._calculate_readability(text),
|
|
'voice_profile': {},
|
|
'sentence_analysis': self._analyze_sentences(text),
|
|
'recommendations': []
|
|
}
|
|
|
|
# Analyze voice dimensions
|
|
for dimension, categories in self.voice_dimensions.items():
|
|
dim_scores = {}
|
|
for category, keywords in categories.items():
|
|
score = sum(1 for keyword in keywords if keyword in text_lower)
|
|
dim_scores[category] = score
|
|
|
|
# Determine dominant voice
|
|
if sum(dim_scores.values()) > 0:
|
|
dominant = max(dim_scores, key=dim_scores.get)
|
|
results['voice_profile'][dimension] = {
|
|
'dominant': dominant,
|
|
'scores': dim_scores
|
|
}
|
|
|
|
# Generate recommendations
|
|
results['recommendations'] = self._generate_recommendations(results)
|
|
|
|
return results
|
|
|
|
def _calculate_readability(self, text: str) -> float:
|
|
"""Calculate Flesch Reading Ease score"""
|
|
sentences = re.split(r'[.!?]+', text)
|
|
words = text.split()
|
|
syllables = sum(self._count_syllables(word) for word in words)
|
|
|
|
if len(sentences) == 0 or len(words) == 0:
|
|
return 0
|
|
|
|
avg_sentence_length = len(words) / len(sentences)
|
|
avg_syllables_per_word = syllables / len(words)
|
|
|
|
# Flesch Reading Ease formula
|
|
score = 206.835 - 1.015 * avg_sentence_length - 84.6 * avg_syllables_per_word
|
|
return max(0, min(100, score))
|
|
|
|
def _count_syllables(self, word: str) -> int:
|
|
"""Count syllables in a word (simplified)"""
|
|
word = word.lower()
|
|
vowels = 'aeiou'
|
|
syllable_count = 0
|
|
previous_was_vowel = False
|
|
|
|
for char in word:
|
|
is_vowel = char in vowels
|
|
if is_vowel and not previous_was_vowel:
|
|
syllable_count += 1
|
|
previous_was_vowel = is_vowel
|
|
|
|
# Adjust for silent e
|
|
if word.endswith('e'):
|
|
syllable_count -= 1
|
|
|
|
return max(1, syllable_count)
|
|
|
|
def _analyze_sentences(self, text: str) -> Dict:
|
|
"""Analyze sentence structure"""
|
|
sentences = re.split(r'[.!?]+', text)
|
|
sentences = [s.strip() for s in sentences if s.strip()]
|
|
|
|
if not sentences:
|
|
return {'average_length': 0, 'variety': 'low'}
|
|
|
|
lengths = [len(s.split()) for s in sentences]
|
|
avg_length = sum(lengths) / len(lengths) if lengths else 0
|
|
|
|
# Calculate variety
|
|
if len(set(lengths)) < 3:
|
|
variety = 'low'
|
|
elif len(set(lengths)) < 5:
|
|
variety = 'medium'
|
|
else:
|
|
variety = 'high'
|
|
|
|
return {
|
|
'average_length': round(avg_length, 1),
|
|
'variety': variety,
|
|
'count': len(sentences)
|
|
}
|
|
|
|
def _generate_recommendations(self, analysis: Dict) -> List[str]:
|
|
"""Generate recommendations based on analysis"""
|
|
recommendations = []
|
|
|
|
# Readability recommendations
|
|
if analysis['readability_score'] < 30:
|
|
recommendations.append("Consider simplifying language for better readability")
|
|
elif analysis['readability_score'] > 70:
|
|
recommendations.append("Content is very easy to read - consider if this matches your audience")
|
|
|
|
# Sentence variety
|
|
if analysis['sentence_analysis']['variety'] == 'low':
|
|
recommendations.append("Vary sentence length for better flow and engagement")
|
|
|
|
# Voice consistency
|
|
if analysis['voice_profile']:
|
|
recommendations.append("Maintain consistent voice across all content")
|
|
|
|
return recommendations
|
|
|
|
def analyze_content(content: str, output_format: str = 'json') -> str:
|
|
"""Main function to analyze content"""
|
|
analyzer = BrandVoiceAnalyzer()
|
|
results = analyzer.analyze_text(content)
|
|
|
|
if output_format == 'json':
|
|
return json.dumps(results, indent=2)
|
|
else:
|
|
# Human-readable format
|
|
output = [
|
|
f"=== Brand Voice Analysis ===",
|
|
f"Word Count: {results['word_count']}",
|
|
f"Readability Score: {results['readability_score']:.1f}/100",
|
|
f"",
|
|
f"Voice Profile:"
|
|
]
|
|
|
|
for dimension, profile in results['voice_profile'].items():
|
|
output.append(f" {dimension.title()}: {profile['dominant']}")
|
|
|
|
output.extend([
|
|
f"",
|
|
f"Sentence Analysis:",
|
|
f" Average Length: {results['sentence_analysis']['average_length']} words",
|
|
f" Variety: {results['sentence_analysis']['variety']}",
|
|
f" Total Sentences: {results['sentence_analysis']['count']}",
|
|
f"",
|
|
f"Recommendations:"
|
|
])
|
|
|
|
for rec in results['recommendations']:
|
|
output.append(f" • {rec}")
|
|
|
|
return '\n'.join(output)
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
with open(sys.argv[1], 'r') as f:
|
|
content = f.read()
|
|
|
|
output_format = sys.argv[2] if len(sys.argv) > 2 else 'text'
|
|
print(analyze_content(content, output_format))
|
|
else:
|
|
print("Usage: python brand_voice_analyzer.py <file> [json|text]")
|