Files
claude-skills-reference/product-team/product-analytics/scripts/metrics_calculator.py

156 lines
5.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""Product metrics calculator: retention, cohort matrix, and funnel conversion."""
import argparse
import csv
import datetime as dt
from collections import defaultdict
def parse_date(value: str) -> dt.date:
return dt.date.fromisoformat(value.strip()[:10])
def load_csv(path: str):
with open(path, "r", encoding="utf-8", newline="") as handle:
return list(csv.DictReader(handle))
def retention(args: argparse.Namespace) -> int:
rows = load_csv(args.input)
cohorts = {}
activity = defaultdict(set)
for row in rows:
user = row[args.user_column].strip()
cohort_date = parse_date(row[args.cohort_column])
activity_date = parse_date(row[args.activity_column])
cohorts[user] = min(cohorts.get(user, cohort_date), cohort_date)
delta = (activity_date - cohorts[user]).days
if delta >= 0:
activity[delta].add(user)
base_users = len(cohorts)
if base_users == 0:
print("No users found.")
return 1
print("Retention by period")
print("period,active_users,retention_rate")
max_period = args.max_period
for period in range(0, max_period + 1):
users = len(activity.get(period, set()))
rate = users / base_users
print(f"{period},{users},{rate:.4f}")
return 0
def cohort(args: argparse.Namespace) -> int:
rows = load_csv(args.input)
cohorts = {}
activity = defaultdict(set)
for row in rows:
user = row[args.user_column].strip()
cohort_date = parse_date(row[args.cohort_column])
activity_date = parse_date(row[args.activity_column])
if args.cohort_grain == "month":
cohort_key = cohort_date.strftime("%Y-%m")
else:
cohort_key = f"{cohort_date.isocalendar().year}-W{cohort_date.isocalendar().week:02d}"
cohorts.setdefault(user, cohort_key)
age = (activity_date - cohort_date).days
if age >= 0:
activity[(cohort_key, age)].add(user)
cohort_sizes = defaultdict(int)
for cohort_key in cohorts.values():
cohort_sizes[cohort_key] += 1
cohort_keys = sorted(cohort_sizes.keys())
print("cohort,age_days,active_users,cohort_size,retention_rate")
for cohort_key in cohort_keys:
size = cohort_sizes[cohort_key]
for age in range(0, args.max_period + 1):
active_users = len(activity.get((cohort_key, age), set()))
rate = (active_users / size) if size else 0
print(f"{cohort_key},{age},{active_users},{size},{rate:.4f}")
return 0
def funnel(args: argparse.Namespace) -> int:
rows = load_csv(args.input)
stages = [item.strip() for item in args.stages.split(",") if item.strip()]
if not stages:
print("No stages provided.")
return 1
stage_users = {stage: set() for stage in stages}
for row in rows:
user = row[args.user_column].strip()
stage = row[args.stage_column].strip()
if stage in stage_users:
stage_users[stage].add(user)
print("stage,users,conversion_from_previous,conversion_from_first")
previous_count = None
first_count = None
for stage in stages:
count = len(stage_users[stage])
if first_count is None:
first_count = count
conv_prev = (count / previous_count) if previous_count else 1.0
conv_first = (count / first_count) if first_count else 0
print(f"{stage},{count},{conv_prev:.4f},{conv_first:.4f}")
previous_count = count
return 0
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Calculate retention, cohort, and funnel metrics from CSV data."
)
subparsers = parser.add_subparsers(dest="command", required=True)
common = {
"help": "CSV input path",
}
retention_parser = subparsers.add_parser("retention", help="Calculate retention by day.")
retention_parser.add_argument("input", **common)
retention_parser.add_argument("--user-column", default="user_id")
retention_parser.add_argument("--cohort-column", default="cohort_date")
retention_parser.add_argument("--activity-column", default="activity_date")
retention_parser.add_argument("--max-period", type=int, default=30)
retention_parser.set_defaults(func=retention)
cohort_parser = subparsers.add_parser("cohort", help="Build cohort retention matrix rows.")
cohort_parser.add_argument("input", **common)
cohort_parser.add_argument("--user-column", default="user_id")
cohort_parser.add_argument("--cohort-column", default="cohort_date")
cohort_parser.add_argument("--activity-column", default="activity_date")
cohort_parser.add_argument("--cohort-grain", choices=["week", "month"], default="week")
cohort_parser.add_argument("--max-period", type=int, default=30)
cohort_parser.set_defaults(func=cohort)
funnel_parser = subparsers.add_parser("funnel", help="Calculate funnel conversion by stage.")
funnel_parser.add_argument("input", **common)
funnel_parser.add_argument("--user-column", default="user_id")
funnel_parser.add_argument("--stage-column", default="stage")
funnel_parser.add_argument("--stages", required=True)
funnel_parser.set_defaults(func=funnel)
return parser
def main() -> int:
parser = build_parser()
args = parser.parse_args()
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())