Build campaign-analytics, financial-analyst, customer-success-manager, sales-engineer, and revenue-operations skills using the Claude Skills Factory workflow. Each skill includes SKILL.md, Python CLI tools, reference guides, and asset templates. All 16 Python scripts use standard library only with --format json/text support. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
348 lines
12 KiB
Python
348 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Attribution Analyzer - Multi-touch attribution modeling for marketing campaigns.
|
|
|
|
Implements 5 attribution models:
|
|
- first-touch: 100% credit to first interaction
|
|
- last-touch: 100% credit to last interaction
|
|
- linear: Equal credit across all touchpoints
|
|
- time-decay: Exponential decay favoring recent touchpoints
|
|
- position-based: 40% first, 40% last, 20% split among middle
|
|
|
|
Usage:
|
|
python attribution_analyzer.py data.json
|
|
python attribution_analyzer.py data.json --model time-decay
|
|
python attribution_analyzer.py data.json --model time-decay --half-life 14
|
|
python attribution_analyzer.py data.json --format json
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
MODELS = ["first-touch", "last-touch", "linear", "time-decay", "position-based"]
|
|
|
|
|
|
def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
|
|
"""Safely divide two numbers, returning default if denominator is zero."""
|
|
if denominator == 0:
|
|
return default
|
|
return numerator / denominator
|
|
|
|
|
|
def parse_timestamp(ts: str) -> datetime:
|
|
"""Parse an ISO-format timestamp string into a datetime object."""
|
|
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
|
|
try:
|
|
return datetime.strptime(ts, fmt)
|
|
except ValueError:
|
|
continue
|
|
raise ValueError(f"Cannot parse timestamp: {ts}")
|
|
|
|
|
|
def first_touch_attribution(journeys: List[Dict]) -> Dict[str, float]:
|
|
"""First-touch: 100% credit to the first touchpoint in each journey."""
|
|
credits: Dict[str, float] = {}
|
|
for journey in journeys:
|
|
if not journey.get("converted", False):
|
|
continue
|
|
touchpoints = journey.get("touchpoints", [])
|
|
if not touchpoints:
|
|
continue
|
|
sorted_tp = sorted(touchpoints, key=lambda t: parse_timestamp(t["timestamp"]))
|
|
channel = sorted_tp[0]["channel"]
|
|
revenue = journey.get("revenue", 1.0)
|
|
credits[channel] = credits.get(channel, 0.0) + revenue
|
|
return credits
|
|
|
|
|
|
def last_touch_attribution(journeys: List[Dict]) -> Dict[str, float]:
|
|
"""Last-touch: 100% credit to the last touchpoint in each journey."""
|
|
credits: Dict[str, float] = {}
|
|
for journey in journeys:
|
|
if not journey.get("converted", False):
|
|
continue
|
|
touchpoints = journey.get("touchpoints", [])
|
|
if not touchpoints:
|
|
continue
|
|
sorted_tp = sorted(touchpoints, key=lambda t: parse_timestamp(t["timestamp"]))
|
|
channel = sorted_tp[-1]["channel"]
|
|
revenue = journey.get("revenue", 1.0)
|
|
credits[channel] = credits.get(channel, 0.0) + revenue
|
|
return credits
|
|
|
|
|
|
def linear_attribution(journeys: List[Dict]) -> Dict[str, float]:
|
|
"""Linear: Equal credit split across all touchpoints in each journey."""
|
|
credits: Dict[str, float] = {}
|
|
for journey in journeys:
|
|
if not journey.get("converted", False):
|
|
continue
|
|
touchpoints = journey.get("touchpoints", [])
|
|
if not touchpoints:
|
|
continue
|
|
revenue = journey.get("revenue", 1.0)
|
|
share = safe_divide(revenue, len(touchpoints))
|
|
for tp in touchpoints:
|
|
channel = tp["channel"]
|
|
credits[channel] = credits.get(channel, 0.0) + share
|
|
return credits
|
|
|
|
|
|
def time_decay_attribution(journeys: List[Dict], half_life_days: float = 7.0) -> Dict[str, float]:
|
|
"""Time-decay: Exponential decay giving more credit to recent touchpoints.
|
|
|
|
Uses a configurable half-life (in days). Touchpoints closer to conversion
|
|
receive exponentially more credit.
|
|
"""
|
|
import math
|
|
|
|
credits: Dict[str, float] = {}
|
|
decay_rate = math.log(2) / half_life_days
|
|
|
|
for journey in journeys:
|
|
if not journey.get("converted", False):
|
|
continue
|
|
touchpoints = journey.get("touchpoints", [])
|
|
if not touchpoints:
|
|
continue
|
|
|
|
revenue = journey.get("revenue", 1.0)
|
|
sorted_tp = sorted(touchpoints, key=lambda t: parse_timestamp(t["timestamp"]))
|
|
conversion_time = parse_timestamp(sorted_tp[-1]["timestamp"])
|
|
|
|
# Calculate raw weights
|
|
weights: List[float] = []
|
|
for tp in sorted_tp:
|
|
tp_time = parse_timestamp(tp["timestamp"])
|
|
days_before = (conversion_time - tp_time).total_seconds() / 86400.0
|
|
weight = math.exp(-decay_rate * days_before)
|
|
weights.append(weight)
|
|
|
|
total_weight = sum(weights)
|
|
if total_weight == 0:
|
|
continue
|
|
|
|
for i, tp in enumerate(sorted_tp):
|
|
channel = tp["channel"]
|
|
share = safe_divide(weights[i], total_weight) * revenue
|
|
credits[channel] = credits.get(channel, 0.0) + share
|
|
|
|
return credits
|
|
|
|
|
|
def position_based_attribution(journeys: List[Dict]) -> Dict[str, float]:
|
|
"""Position-based: 40% first, 40% last, 20% split among middle touchpoints."""
|
|
credits: Dict[str, float] = {}
|
|
for journey in journeys:
|
|
if not journey.get("converted", False):
|
|
continue
|
|
touchpoints = journey.get("touchpoints", [])
|
|
if not touchpoints:
|
|
continue
|
|
|
|
revenue = journey.get("revenue", 1.0)
|
|
sorted_tp = sorted(touchpoints, key=lambda t: parse_timestamp(t["timestamp"]))
|
|
|
|
if len(sorted_tp) == 1:
|
|
channel = sorted_tp[0]["channel"]
|
|
credits[channel] = credits.get(channel, 0.0) + revenue
|
|
elif len(sorted_tp) == 2:
|
|
first_channel = sorted_tp[0]["channel"]
|
|
last_channel = sorted_tp[-1]["channel"]
|
|
credits[first_channel] = credits.get(first_channel, 0.0) + revenue * 0.5
|
|
credits[last_channel] = credits.get(last_channel, 0.0) + revenue * 0.5
|
|
else:
|
|
first_channel = sorted_tp[0]["channel"]
|
|
last_channel = sorted_tp[-1]["channel"]
|
|
credits[first_channel] = credits.get(first_channel, 0.0) + revenue * 0.4
|
|
credits[last_channel] = credits.get(last_channel, 0.0) + revenue * 0.4
|
|
|
|
middle_count = len(sorted_tp) - 2
|
|
middle_share = safe_divide(revenue * 0.2, middle_count)
|
|
for tp in sorted_tp[1:-1]:
|
|
channel = tp["channel"]
|
|
credits[channel] = credits.get(channel, 0.0) + middle_share
|
|
|
|
return credits
|
|
|
|
|
|
def run_model(model_name: str, journeys: List[Dict], half_life: float = 7.0) -> Dict[str, float]:
|
|
"""Dispatch to the appropriate attribution model."""
|
|
if model_name == "first-touch":
|
|
return first_touch_attribution(journeys)
|
|
elif model_name == "last-touch":
|
|
return last_touch_attribution(journeys)
|
|
elif model_name == "linear":
|
|
return linear_attribution(journeys)
|
|
elif model_name == "time-decay":
|
|
return time_decay_attribution(journeys, half_life)
|
|
elif model_name == "position-based":
|
|
return position_based_attribution(journeys)
|
|
else:
|
|
raise ValueError(f"Unknown model: {model_name}. Choose from: {', '.join(MODELS)}")
|
|
|
|
|
|
def compute_summary(journeys: List[Dict]) -> Dict[str, Any]:
|
|
"""Compute summary statistics about the journey data."""
|
|
total_journeys = len(journeys)
|
|
converted = sum(1 for j in journeys if j.get("converted", False))
|
|
total_revenue = sum(j.get("revenue", 0.0) for j in journeys if j.get("converted", False))
|
|
all_channels = set()
|
|
for j in journeys:
|
|
for tp in j.get("touchpoints", []):
|
|
all_channels.add(tp["channel"])
|
|
|
|
return {
|
|
"total_journeys": total_journeys,
|
|
"converted_journeys": converted,
|
|
"conversion_rate": round(safe_divide(converted, total_journeys) * 100, 2),
|
|
"total_revenue": round(total_revenue, 2),
|
|
"channels_observed": sorted(all_channels),
|
|
}
|
|
|
|
|
|
def format_text(results: Dict[str, Any]) -> str:
|
|
"""Format results as human-readable text."""
|
|
lines: List[str] = []
|
|
lines.append("=" * 70)
|
|
lines.append("MULTI-TOUCH ATTRIBUTION ANALYSIS")
|
|
lines.append("=" * 70)
|
|
|
|
summary = results["summary"]
|
|
lines.append("")
|
|
lines.append("SUMMARY")
|
|
lines.append(f" Total Journeys: {summary['total_journeys']}")
|
|
lines.append(f" Converted: {summary['converted_journeys']}")
|
|
lines.append(f" Conversion Rate: {summary['conversion_rate']}%")
|
|
lines.append(f" Total Revenue: ${summary['total_revenue']:,.2f}")
|
|
lines.append(f" Channels Observed: {', '.join(summary['channels_observed'])}")
|
|
|
|
for model_name, credits in results["models"].items():
|
|
lines.append("")
|
|
lines.append("-" * 70)
|
|
lines.append(f"MODEL: {model_name.upper()}")
|
|
lines.append("-" * 70)
|
|
|
|
if not credits:
|
|
lines.append(" No conversions to attribute.")
|
|
continue
|
|
|
|
total_credit = sum(credits.values())
|
|
sorted_channels = sorted(credits.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
lines.append(f" {'Channel':<25} {'Revenue Credit':>15} {'Share':>10}")
|
|
lines.append(f" {'-'*25} {'-'*15} {'-'*10}")
|
|
|
|
for channel, credit in sorted_channels:
|
|
pct = safe_divide(credit, total_credit) * 100
|
|
lines.append(f" {channel:<25} ${credit:>13,.2f} {pct:>8.1f}%")
|
|
|
|
lines.append(f" {'TOTAL':<25} ${total_credit:>13,.2f} {'100.0%':>10}")
|
|
|
|
# Comparison table
|
|
if len(results["models"]) > 1:
|
|
lines.append("")
|
|
lines.append("=" * 70)
|
|
lines.append("CROSS-MODEL COMPARISON")
|
|
lines.append("=" * 70)
|
|
|
|
all_channels = set()
|
|
for credits in results["models"].values():
|
|
all_channels.update(credits.keys())
|
|
all_channels_sorted = sorted(all_channels)
|
|
|
|
model_names = list(results["models"].keys())
|
|
header = f" {'Channel':<20}"
|
|
for mn in model_names:
|
|
short = mn.replace("-", " ").title()
|
|
header += f" {short:>14}"
|
|
lines.append(header)
|
|
lines.append(f" {'-'*20}" + f" {'-'*14}" * len(model_names))
|
|
|
|
for ch in all_channels_sorted:
|
|
row = f" {ch:<20}"
|
|
for mn in model_names:
|
|
val = results["models"][mn].get(ch, 0.0)
|
|
row += f" ${val:>12,.2f}"
|
|
lines.append(row)
|
|
|
|
lines.append("")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main() -> None:
|
|
"""Main entry point for the attribution analyzer."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Multi-touch attribution analyzer for marketing campaigns.",
|
|
epilog="Example: python attribution_analyzer.py data.json --model linear --format json",
|
|
)
|
|
parser.add_argument(
|
|
"input_file",
|
|
help="Path to JSON file containing journey/touchpoint data",
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
choices=MODELS,
|
|
default=None,
|
|
help="Run a specific attribution model (default: run all 5 models)",
|
|
)
|
|
parser.add_argument(
|
|
"--half-life",
|
|
type=float,
|
|
default=7.0,
|
|
help="Half-life in days for time-decay model (default: 7)",
|
|
)
|
|
parser.add_argument(
|
|
"--format",
|
|
choices=["json", "text"],
|
|
default="text",
|
|
dest="output_format",
|
|
help="Output format (default: text)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load input data
|
|
try:
|
|
with open(args.input_file, "r") as f:
|
|
data = json.load(f)
|
|
except FileNotFoundError:
|
|
print(f"Error: File not found: {args.input_file}", file=sys.stderr)
|
|
sys.exit(1)
|
|
except json.JSONDecodeError as e:
|
|
print(f"Error: Invalid JSON in {args.input_file}: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
journeys = data.get("journeys", [])
|
|
if not journeys:
|
|
print("Error: No 'journeys' array found in input data.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Determine which models to run
|
|
models_to_run = [args.model] if args.model else MODELS
|
|
|
|
# Run models
|
|
model_results: Dict[str, Dict[str, float]] = {}
|
|
for model_name in models_to_run:
|
|
credits = run_model(model_name, journeys, args.half_life)
|
|
model_results[model_name] = {ch: round(v, 2) for ch, v in credits.items()}
|
|
|
|
# Build output
|
|
results: Dict[str, Any] = {
|
|
"summary": compute_summary(journeys),
|
|
"models": model_results,
|
|
}
|
|
|
|
if args.output_format == "json":
|
|
print(json.dumps(results, indent=2))
|
|
else:
|
|
print(format_text(results))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|