claude-skills-reference/data-analysis/statistical-analyst/scripts/confidence_interval.py

#!/usr/bin/env python3
"""
confidence_interval.py — Confidence intervals for proportions and means.

Methods:
  proportion — Wilson score interval (recommended over normal approximation for small n or extreme p)
  mean       — t-based interval using normal approximation for large n

Usage:
    python3 confidence_interval.py --type proportion --n 1200 --x 96
    python3 confidence_interval.py --type mean --n 800 --mean 42.3 --std 18.1
    python3 confidence_interval.py --type proportion --n 1200 --x 96 --confidence 0.99
    python3 confidence_interval.py --type proportion --n 1200 --x 96 --format json
"""

import argparse
import json
import math
import sys


def normal_ppf(p: float) -> float:
    """Inverse normal CDF via bisection."""
    lo, hi = -10.0, 10.0
    for _ in range(100):
        mid = (lo + hi) / 2
        if 0.5 * math.erfc(-mid / math.sqrt(2)) < p:
            lo = mid
        else:
            hi = mid
    return (lo + hi) / 2


def wilson_interval(n: int, x: int, confidence: float) -> dict:
    """
    Wilson score confidence interval for a proportion.
    More accurate than normal approximation, especially for small n or p near 0/1.
    """
    if n <= 0:
        return {"error": "n must be positive"}
    if x < 0 or x > n:
        return {"error": "x must be between 0 and n"}

    p_hat = x / n
    z = normal_ppf(1 - (1 - confidence) / 2)
    z2 = z ** 2

    center = (p_hat + z2 / (2 * n)) / (1 + z2 / n)
    margin = (z / (1 + z2 / n)) * math.sqrt(p_hat * (1 - p_hat) / n + z2 / (4 * n ** 2))

    lo = max(0.0, center - margin)
    hi = min(1.0, center + margin)

    # Normal approximation for comparison
    se = math.sqrt(p_hat * (1 - p_hat) / n) if n > 0 else 0
    normal_lo = max(0.0, p_hat - z * se)
    normal_hi = min(1.0, p_hat + z * se)

    return {
        "type": "proportion",
        "method": "Wilson score interval",
        "n": n,
        "successes": x,
        "observed_rate": round(p_hat, 6),
        "confidence": confidence,
        "lower": round(lo, 6),
        "upper": round(hi, 6),
        "margin_of_error": round((hi - lo) / 2, 6),
        "normal_approximation": {
            "lower": round(normal_lo, 6),
            "upper": round(normal_hi, 6),
            "note": "Wilson is preferred; normal approx shown for reference",
        },
    }


def mean_interval(n: int, mean: float, std: float, confidence: float) -> dict:
    """
    Confidence interval for a mean.
    Uses normal approximation (z-based) for n >= 30, t-approximation otherwise.
    """
    if n <= 1:
        return {"error": "n must be > 1"}
    if std < 0:
        return {"error": "std must be non-negative"}

    se = std / math.sqrt(n)
    z = normal_ppf(1 - (1 - confidence) / 2)

    lo = mean - z * se
    hi = mean + z * se
    moe = z * se

    rel_moe = moe / abs(mean) * 100 if mean != 0 else None

    precision_note = ""
    if rel_moe and rel_moe > 20:
        precision_note = "Wide CI — consider increasing sample size for tighter estimates."
    elif rel_moe and rel_moe < 5:
        precision_note = "Tight CI — high precision estimate."

    return {
        "type": "mean",
        "method": "Normal approximation (z-based)" if n >= 30 else "Use with caution (n < 30)",
        "n": n,
        "observed_mean": round(mean, 6),
        "std": round(std, 6),
        "standard_error": round(se, 6),
        "confidence": confidence,
        "lower": round(lo, 6),
        "upper": round(hi, 6),
        "margin_of_error": round(moe, 6),
        "relative_margin_of_error_pct": round(rel_moe, 2) if rel_moe is not None else None,
        "precision_note": precision_note,
    }


def print_report(result: dict):
    if "error" in result:
        print(f"Error: {result['error']}", file=sys.stderr)
        sys.exit(1)

    conf_pct = int(result["confidence"] * 100)
    print("=" * 60)
    print(f"  CONFIDENCE INTERVAL REPORT")
    print("=" * 60)
    print(f"  Method: {result['method']}")
    print(f"  Confidence level: {conf_pct}%")
    print()

    if result["type"] == "proportion":
        print(f"  Observed rate: {result['observed_rate']:.4%}  ({result['successes']}/{result['n']})")
        print()
        print(f"  {conf_pct}% CI: [{result['lower']:.4%}, {result['upper']:.4%}]")
        print(f"  Margin of error: ±{result['margin_of_error']:.4%}")
        print()
        norm = result.get("normal_approximation", {})
        print(f"  Normal approx CI (ref): [{norm.get('lower', 0):.4%}, {norm.get('upper', 0):.4%}]")

    elif result["type"] == "mean":
        print(f"  Observed mean: {result['observed_mean']}  (std={result['std']}, n={result['n']})")
        print(f"  Standard error: {result['standard_error']}")
        print()
        print(f"  {conf_pct}% CI: [{result['lower']}, {result['upper']}]")
        print(f"  Margin of error: ±{result['margin_of_error']}")
        if result.get("relative_margin_of_error_pct") is not None:
            print(f"  Relative MoE: ±{result['relative_margin_of_error_pct']:.1f}%")
        if result.get("precision_note"):
            print(f"\n  ℹ️  {result['precision_note']}")

    print()
    # Interpretation guide
    print(f"  Interpretation: If this experiment were repeated many times,")
    print(f"  {conf_pct}% of the computed intervals would contain the true value.")
    print(f"  This does NOT mean there is a {conf_pct}% chance the true value is")
    print(f"  in this specific interval — it either is or it isn't.")
    print("=" * 60)


def main():
    parser = argparse.ArgumentParser(
        description="Compute confidence intervals for proportions and means."
    )
    parser.add_argument("--type", choices=["proportion", "mean"], required=True)
    parser.add_argument("--confidence", type=float, default=0.95,
                        help="Confidence level (default: 0.95)")
    parser.add_argument("--format", choices=["text", "json"], default="text")

    # Proportion
    parser.add_argument("--n", type=int, help="Total sample size")
    parser.add_argument("--x", type=int, help="Number of successes (for proportion)")

    # Mean
    parser.add_argument("--mean", type=float, help="Observed mean")
    parser.add_argument("--std", type=float, help="Observed standard deviation")

    args = parser.parse_args()

    if args.type == "proportion":
        if args.n is None or args.x is None:
            print("Error: --n and --x are required for proportion CI", file=sys.stderr)
            sys.exit(1)
        result = wilson_interval(args.n, args.x, args.confidence)

    elif args.type == "mean":
        if args.n is None or args.mean is None or args.std is None:
            print("Error: --n, --mean, and --std are required for mean CI", file=sys.stderr)
            sys.exit(1)
        result = mean_interval(args.n, args.mean, args.std, args.confidence)

    if args.format == "json":
        print(json.dumps(result, indent=2))
    else:
        print_report(result)


if __name__ == "__main__":
    main()