Files
claude-skills-reference/data-analysis/statistical-analyst/scripts/confidence_interval.py
amitdhanda48 c3693f9be1 feat(data-analysis): statistical-analyst
Adds statistical-analyst skill — fills a gap in the repo (no hypothesis
testing or experiment analysis tooling exists; only ab-test-setup for
instrumentation, but zero analysis capability).

Three stdlib-only Python scripts:
- hypothesis_tester.py: Z-test (proportions), Welch's t-test (means),
  Chi-square (categorical) with p-value, CI, Cohen's d/h, Cramér's V
- sample_size_calculator.py: required n per variant for proportion and
  mean tests, with power/MDE tradeoff table and duration estimates
- confidence_interval.py: Wilson score interval (proportions) and
  z-based interval (means) with margin of error and precision notes

Validator: 86.4/100 (GOOD). Security audit: PASS (0 critical/high).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 21:54:01 -07:00

199 lines
6.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
confidence_interval.py — Confidence intervals for proportions and means.
Methods:
proportion — Wilson score interval (recommended over normal approximation for small n or extreme p)
mean — t-based interval using normal approximation for large n
Usage:
python3 confidence_interval.py --type proportion --n 1200 --x 96
python3 confidence_interval.py --type mean --n 800 --mean 42.3 --std 18.1
python3 confidence_interval.py --type proportion --n 1200 --x 96 --confidence 0.99
python3 confidence_interval.py --type proportion --n 1200 --x 96 --format json
"""
import argparse
import json
import math
import sys
def normal_ppf(p: float) -> float:
"""Inverse normal CDF via bisection."""
lo, hi = -10.0, 10.0
for _ in range(100):
mid = (lo + hi) / 2
if 0.5 * math.erfc(-mid / math.sqrt(2)) < p:
lo = mid
else:
hi = mid
return (lo + hi) / 2
def wilson_interval(n: int, x: int, confidence: float) -> dict:
"""
Wilson score confidence interval for a proportion.
More accurate than normal approximation, especially for small n or p near 0/1.
"""
if n <= 0:
return {"error": "n must be positive"}
if x < 0 or x > n:
return {"error": "x must be between 0 and n"}
p_hat = x / n
z = normal_ppf(1 - (1 - confidence) / 2)
z2 = z ** 2
center = (p_hat + z2 / (2 * n)) / (1 + z2 / n)
margin = (z / (1 + z2 / n)) * math.sqrt(p_hat * (1 - p_hat) / n + z2 / (4 * n ** 2))
lo = max(0.0, center - margin)
hi = min(1.0, center + margin)
# Normal approximation for comparison
se = math.sqrt(p_hat * (1 - p_hat) / n) if n > 0 else 0
normal_lo = max(0.0, p_hat - z * se)
normal_hi = min(1.0, p_hat + z * se)
return {
"type": "proportion",
"method": "Wilson score interval",
"n": n,
"successes": x,
"observed_rate": round(p_hat, 6),
"confidence": confidence,
"lower": round(lo, 6),
"upper": round(hi, 6),
"margin_of_error": round((hi - lo) / 2, 6),
"normal_approximation": {
"lower": round(normal_lo, 6),
"upper": round(normal_hi, 6),
"note": "Wilson is preferred; normal approx shown for reference",
},
}
def mean_interval(n: int, mean: float, std: float, confidence: float) -> dict:
"""
Confidence interval for a mean.
Uses normal approximation (z-based) for n >= 30, t-approximation otherwise.
"""
if n <= 1:
return {"error": "n must be > 1"}
if std < 0:
return {"error": "std must be non-negative"}
se = std / math.sqrt(n)
z = normal_ppf(1 - (1 - confidence) / 2)
lo = mean - z * se
hi = mean + z * se
moe = z * se
rel_moe = moe / abs(mean) * 100 if mean != 0 else None
precision_note = ""
if rel_moe and rel_moe > 20:
precision_note = "Wide CI — consider increasing sample size for tighter estimates."
elif rel_moe and rel_moe < 5:
precision_note = "Tight CI — high precision estimate."
return {
"type": "mean",
"method": "Normal approximation (z-based)" if n >= 30 else "Use with caution (n < 30)",
"n": n,
"observed_mean": round(mean, 6),
"std": round(std, 6),
"standard_error": round(se, 6),
"confidence": confidence,
"lower": round(lo, 6),
"upper": round(hi, 6),
"margin_of_error": round(moe, 6),
"relative_margin_of_error_pct": round(rel_moe, 2) if rel_moe is not None else None,
"precision_note": precision_note,
}
def print_report(result: dict):
if "error" in result:
print(f"Error: {result['error']}", file=sys.stderr)
sys.exit(1)
conf_pct = int(result["confidence"] * 100)
print("=" * 60)
print(f" CONFIDENCE INTERVAL REPORT")
print("=" * 60)
print(f" Method: {result['method']}")
print(f" Confidence level: {conf_pct}%")
print()
if result["type"] == "proportion":
print(f" Observed rate: {result['observed_rate']:.4%} ({result['successes']}/{result['n']})")
print()
print(f" {conf_pct}% CI: [{result['lower']:.4%}, {result['upper']:.4%}]")
print(f" Margin of error: ±{result['margin_of_error']:.4%}")
print()
norm = result.get("normal_approximation", {})
print(f" Normal approx CI (ref): [{norm.get('lower', 0):.4%}, {norm.get('upper', 0):.4%}]")
elif result["type"] == "mean":
print(f" Observed mean: {result['observed_mean']} (std={result['std']}, n={result['n']})")
print(f" Standard error: {result['standard_error']}")
print()
print(f" {conf_pct}% CI: [{result['lower']}, {result['upper']}]")
print(f" Margin of error: ±{result['margin_of_error']}")
if result.get("relative_margin_of_error_pct") is not None:
print(f" Relative MoE: ±{result['relative_margin_of_error_pct']:.1f}%")
if result.get("precision_note"):
print(f"\n {result['precision_note']}")
print()
# Interpretation guide
print(f" Interpretation: If this experiment were repeated many times,")
print(f" {conf_pct}% of the computed intervals would contain the true value.")
print(f" This does NOT mean there is a {conf_pct}% chance the true value is")
print(f" in this specific interval — it either is or it isn't.")
print("=" * 60)
def main():
parser = argparse.ArgumentParser(
description="Compute confidence intervals for proportions and means."
)
parser.add_argument("--type", choices=["proportion", "mean"], required=True)
parser.add_argument("--confidence", type=float, default=0.95,
help="Confidence level (default: 0.95)")
parser.add_argument("--format", choices=["text", "json"], default="text")
# Proportion
parser.add_argument("--n", type=int, help="Total sample size")
parser.add_argument("--x", type=int, help="Number of successes (for proportion)")
# Mean
parser.add_argument("--mean", type=float, help="Observed mean")
parser.add_argument("--std", type=float, help="Observed standard deviation")
args = parser.parse_args()
if args.type == "proportion":
if args.n is None or args.x is None:
print("Error: --n and --x are required for proportion CI", file=sys.stderr)
sys.exit(1)
result = wilson_interval(args.n, args.x, args.confidence)
elif args.type == "mean":
if args.n is None or args.mean is None or args.std is None:
print("Error: --n, --mean, and --std are required for mean CI", file=sys.stderr)
sys.exit(1)
result = mean_interval(args.n, args.mean, args.std, args.confidence)
if args.format == "json":
print(json.dumps(result, indent=2))
else:
print_report(result)
if __name__ == "__main__":
main()