#!/usr/bin/env python3 """ sequence_analyzer.py — Email sequence quality analyzer Usage: python3 sequence_analyzer.py --file sequence.json python3 sequence_analyzer.py --json python3 sequence_analyzer.py # demo mode Input JSON format: [ {"subject": "...", "body": "...", "delay_days": 0}, {"subject": "...", "body": "...", "delay_days": 2}, ... ] """ import argparse import json import re import sys # --------------------------------------------------------------------------- # Word/pattern lists # --------------------------------------------------------------------------- SPAM_TRIGGER_WORDS = [ "free", "guarantee", "guaranteed", "winner", "won", "prize", "congratulations", "cash", "earn money", "make money", "extra income", "100% free", "no cost", "risk free", "act now", "limited time", "click here", "buy now", "order now", "get it now", "as seen on", "dear friend", "you have been selected", "this isn't spam", "not spam", "no credit card required", "special promotion", "special offer", "amazing offer", "!!!", "!!!", "$$$", "£££", "increase your", "increase sales", "double your", "lose weight", "weight loss", "diet", "viagra", "casino", ] CTA_PATTERNS = re.compile( r"\b(click|tap|reply|download|sign up|register|buy|purchase|get started|" r"learn more|read more|visit|go to|check out|schedule|book|claim|try|" r"subscribe|join|start|access|watch|see|grab|discover)\b", re.IGNORECASE, ) PERSONALIZATION_TOKENS = re.compile( r"\{\{?\s*\w+\s*\}?\}|%\w+%|\[FIRST_NAME\]|\[NAME\]|\[COMPANY\]|\[FIRSTNAME\]", re.IGNORECASE, ) # --------------------------------------------------------------------------- # Per-email analysis # --------------------------------------------------------------------------- def analyze_email(email: dict, index: int) -> dict: subject = email.get("subject", "") body = email.get("body", "") delay = email.get("delay_days", 0) # Subject analysis subject_len = len(subject) subject_word_count = len(subject.split()) subject_ok = 30 <= subject_len <= 60 subject_has_number = bool(re.search(r"\d", subject)) subject_question = subject.strip().endswith("?") subject_all_caps = subject == subject.upper() and len(subject) > 3 # Body analysis body_words = re.findall(r"\b\w+\b", body) body_word_count = len(body_words) # CTA detection cta_matches = CTA_PATTERNS.findall(body) has_cta = len(cta_matches) > 0 # Personalization tokens tokens_in_subject = PERSONALIZATION_TOKENS.findall(subject) tokens_in_body = PERSONALIZATION_TOKENS.findall(body) total_tokens = len(tokens_in_subject) + len(tokens_in_body) # Spam triggers combined = (subject + " " + body).lower() spam_found = [w for w in SPAM_TRIGGER_WORDS if w.lower() in combined] # Spam score (0-100, higher = more spammy) spam_score = min(100, len(spam_found) * 10) return { "email_index": index + 1, "delay_days": delay, "subject": { "text": subject, "length": subject_len, "word_count": subject_word_count, "length_ok": subject_ok, "has_number": subject_has_number, "is_question": subject_question, "all_caps_warning": subject_all_caps, "personalized": len(tokens_in_subject) > 0, }, "body": { "word_count": body_word_count, "length_verdict": _body_length_verdict(body_word_count), "has_cta": has_cta, "cta_phrases": list(set(cta_matches))[:5], "personalization_tokens": total_tokens, }, "spam": { "trigger_words_found": spam_found[:8], "trigger_count": len(spam_found), "spam_risk_score": spam_score, "risk_level": "High" if spam_score >= 40 else "Medium" if spam_score >= 20 else "Low", }, } def _body_length_verdict(word_count: int) -> str: if word_count < 50: return "Too short (<50 words)" if word_count <= 150: return "Short/punchy — good for re-engagement" if word_count <= 300: return "Optimal (150-300 words)" if word_count <= 500: return "Long — ensure high value throughout" return "Very long (500+ words) — consider trimming" # --------------------------------------------------------------------------- # Sequence-level analysis # --------------------------------------------------------------------------- def analyze_pacing(emails: list) -> dict: if len(emails) <= 1: return {"note": "Single email — no pacing to analyze"} delays = [e.get("delay_days", 0) for e in emails] gaps = [delays[i] - delays[i - 1] for i in range(1, len(delays))] issues = [] for i, gap in enumerate(gaps): if gap <= 0: issues.append(f"Email {i+2}: same-day or before previous — check delay_days") elif gap == 1: issues.append(f"Email {i+2}: only 1-day gap — may feel aggressive") elif gap > 14: issues.append(f"Email {i+2}: {gap}-day gap — momentum may drop") # Assess overall cadence avg_gap = sum(gaps) / len(gaps) if gaps else 0 if avg_gap <= 2: cadence = "Aggressive (avg <2 days)" elif avg_gap <= 5: cadence = "High-frequency (avg 2-5 days)" elif avg_gap <= 10: cadence = "Standard (avg 5-10 days)" else: cadence = "Low-frequency (avg 10+ days)" return { "email_count": len(emails), "total_duration_days": max(delays) - min(delays), "avg_gap_days": round(avg_gap, 1), "cadence_type": cadence, "gaps": gaps, "issues": issues, } # --------------------------------------------------------------------------- # Scoring # --------------------------------------------------------------------------- def compute_sequence_score(email_analyses: list, pacing: dict) -> dict: if not email_analyses: return {"overall": 0} # Subject score: avg subject length compliance subject_ok_count = sum(1 for e in email_analyses if e["subject"]["length_ok"]) subject_score = round(subject_ok_count / len(email_analyses) * 100) # CTA score: % of emails with CTA cta_count = sum(1 for e in email_analyses if e["body"]["has_cta"]) cta_score = round(cta_count / len(email_analyses) * 100) # Personalization score personalized_count = sum(1 for e in email_analyses if e["body"]["personalization_tokens"] > 0) personalization_score = round(personalized_count / len(email_analyses) * 100) # Spam score (inverted — low spam = high score) avg_spam = sum(e["spam"]["spam_risk_score"] for e in email_analyses) / len(email_analyses) spam_score = max(0, 100 - int(avg_spam)) # Pacing score pacing_issues = len(pacing.get("issues", [])) pacing_score = max(0, 100 - pacing_issues * 20) # Body length score length_ok_count = sum( 1 for e in email_analyses if "Optimal" in e["body"]["length_verdict"] or "punchy" in e["body"]["length_verdict"] ) length_score = round(length_ok_count / len(email_analyses) * 100) weights = { "subject_quality": 0.20, "cta_presence": 0.20, "spam_safety": 0.25, "personalization": 0.15, "pacing": 0.10, "body_length": 0.10, } scores = { "subject_quality": subject_score, "cta_presence": cta_score, "spam_safety": spam_score, "personalization": personalization_score, "pacing": pacing_score, "body_length": length_score, } overall = round(sum(scores[k] * weights[k] for k in weights)) grade = "A" if overall >= 85 else "B" if overall >= 70 else "C" if overall >= 55 else "D" if overall >= 40 else "F" return { "overall": overall, "grade": grade, "breakdown": {k: {"score": v, "weight": f"{int(weights[k]*100)}%"} for k, v in scores.items()}, } # --------------------------------------------------------------------------- # Demo data # --------------------------------------------------------------------------- DEMO_SEQUENCE = [ { "subject": "{{first_name}}, your free marketing audit is ready", "body": "Hi {{first_name}},\n\nWe analyzed 500 campaigns like yours and found three quick wins that could double your ROAS in 30 days.\n\nI've put together a custom audit for {{company}}. It's free and takes 10 minutes to review.\n\n→ Click here to see your results: [LINK]\n\nBest,\nSarah", "delay_days": 0, }, { "subject": "Did you see this, {{first_name}}?", "body": "Quick follow-up.\n\nMost marketers we talk to are sitting on 2-3 easy optimizations that could add 20-40% more revenue from the same ad spend.\n\nHere's the #1 thing we see: landing pages that don't match the ad promise.\n\nWorth 5 minutes? → [Review your audit]\n\nSarah", "delay_days": 3, }, { "subject": "The $50,000 mistake (and how to avoid it)", "body": "True story.\n\nOne of our clients was spending $8,500/month on Google Ads with a 1.8x ROAS. Technically above break-even, but barely.\n\nWe found that 60% of their budget was going to one keyword that had zero purchase intent.\n\nAfter fixing it: same spend, 4.2x ROAS.\n\nThat's the kind of thing our audit catches. Have you looked at yours yet?\n\n→ [Open your free audit]\n\nSarah\n\nP.S. This offer expires Friday.", "delay_days": 5, }, { "subject": "Last call — your audit expires tonight", "body": "{{first_name}}, this is the last reminder.\n\nYour personalized audit expires at midnight tonight.\n\nIf growing your ROAS is a priority this quarter, take 10 minutes now.\n\n→ [Claim your audit before it expires]\n\nSarah", "delay_days": 7, }, { "subject": "New case study: {{company}}-style win", "body": "Since you didn't grab the audit, I wanted to send you something valuable anyway.\n\nHere's a 3-minute case study showing how we helped a B2B SaaS company go from 1.9x to 5.4x ROAS in 45 days.\n\nNo audit required — just solid tactics you can steal.\n\n→ [Read the case study]\n\nHope it helps,\nSarah", "delay_days": 14, }, ] # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser( description="Email sequence analyzer — scores sequence quality 0-100." ) parser.add_argument("--file", help="JSON file with email sequence array") parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args() if args.file: with open(args.file, "r", encoding="utf-8") as f: emails = json.load(f) else: emails = DEMO_SEQUENCE if not args.json: print("No input provided — running in demo mode (5-email nurture sequence).\n") email_analyses = [analyze_email(e, i) for i, e in enumerate(emails)] pacing = analyze_pacing(emails) scoring = compute_sequence_score(email_analyses, pacing) if args.json: output = { "sequence_score": scoring, "pacing": pacing, "emails": email_analyses, } print(json.dumps(output, indent=2)) return # Human-readable overall = scoring["overall"] grade = scoring["grade"] print("=" * 64) print(f" EMAIL SEQUENCE ANALYSIS Score: {overall}/100 Grade: {grade}") print("=" * 64) # Pacing summary print(f"\n 📅 SEQUENCE PACING") print(f" Emails: {pacing['email_count']}") print(f" Duration: {pacing.get('total_duration_days', 0)} days") print(f" Avg gap: {pacing.get('avg_gap_days', 0)} days") print(f" Cadence: {pacing.get('cadence_type', 'N/A')}") if pacing.get("issues"): for issue in pacing["issues"]: print(f" ⚠️ {issue}") print(f"\n 📧 PER-EMAIL BREAKDOWN") print(f" {'#':<3} {'Subject':<40} {'Words':<6} {'CTA':<4} {'Tokens':<7} {'Spam'}") print(" " + "─" * 60) for e in email_analyses: subj = e["subject"]["text"][:38] if not e["subject"]["length_ok"]: subj += "⚠️" words = e["body"]["word_count"] cta = "✅" if e["body"]["has_cta"] else "❌" tokens = e["body"]["personalization_tokens"] spam_lvl = e["spam"]["risk_level"] spam_icon = "✅" if spam_lvl == "Low" else ("⚠️ " if spam_lvl == "Medium" else "❌") spam_str = f"{spam_icon}{spam_lvl}" print(f" {e['email_index']:<3} {subj:<40} {words:<6} {cta:<4} {tokens:<7} {spam_str}") if any(e["spam"]["trigger_words_found"] for e in email_analyses): print(f"\n ⚠️ SPAM TRIGGER WORDS DETECTED") for e in email_analyses: if e["spam"]["trigger_words_found"]: triggers = ", ".join(e["spam"]["trigger_words_found"]) print(f" Email {e['email_index']}: {triggers}") print(f"\n SCORE BREAKDOWN") for k, v in scoring["breakdown"].items(): label = k.replace("_", " ").title() bar_len = round(v["score"] / 10) bar = "█" * bar_len + "░" * (10 - bar_len) print(f" {label:<22} [{bar}] {v['score']:>3}/100 (weight {v['weight']})") print() print("=" * 64) print(f" Overall: {overall}/100 Grade: {grade}") print("=" * 64) if __name__ == "__main__": main()