Files
claude-skills-reference/ra-qm-team/soc2-compliance/scripts/evidence_tracker.py
Reza Rezvani 87f3a007c9 feat(engineering,ra-qm): add secrets-vault-manager, sql-database-assistant, gcp-cloud-architect, soc2-compliance
secrets-vault-manager (403-line SKILL.md, 3 scripts, 3 references):
- HashiCorp Vault, AWS SM, Azure KV, GCP SM integration
- Secret rotation, dynamic secrets, audit logging, emergency procedures

sql-database-assistant (457-line SKILL.md, 3 scripts, 3 references):
- Query optimization, migration generation, schema exploration
- Multi-DB support (PostgreSQL, MySQL, SQLite, SQL Server)
- ORM patterns (Prisma, Drizzle, TypeORM, SQLAlchemy)

gcp-cloud-architect (418-line SKILL.md, 3 scripts, 3 references):
- 6-step workflow mirroring aws-solution-architect for GCP
- Cloud Run, GKE, BigQuery, Cloud Functions, cost optimization
- Completes cloud trifecta (AWS + Azure + GCP)

soc2-compliance (417-line SKILL.md, 3 scripts, 3 references):
- SOC 2 Type I & II preparation, Trust Service Criteria mapping
- Control matrix generation, evidence tracking, gap analysis
- First SOC 2 skill in ra-qm-team (joins GDPR, ISO 27001, ISO 13485)

All 12 scripts pass --help. Docs generated, mkdocs.yml nav updated.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 14:05:11 +01:00

241 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
SOC 2 Evidence Tracker
Tracks evidence collection status per control in a SOC 2 control matrix.
Reads a JSON control matrix (from control_matrix_builder.py) and reports
collection completeness, overdue items, and readiness scoring.
Usage:
python evidence_tracker.py --matrix controls.json --status
python evidence_tracker.py --matrix controls.json --status --json
"""
import argparse
import json
import sys
from datetime import datetime
from typing import Dict, List, Any
# Evidence status classifications
EVIDENCE_STATUSES = {
"collected": "Evidence gathered and verified",
"pending": "Evidence identified but not yet collected",
"overdue": "Evidence past its collection deadline",
"not_started": "No evidence collection initiated",
"not_applicable": "Control not applicable to the environment",
}
# Expected evidence fields for a well-formed control entry
REQUIRED_FIELDS = ["control_id", "tsc_criteria", "description", "evidence_required"]
def load_matrix(filepath: str) -> List[Dict[str, Any]]:
"""Load a control matrix from a JSON file."""
try:
with open(filepath, "r") as f:
data = json.load(f)
except FileNotFoundError:
print(f"Error: File not found: {filepath}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in {filepath}: {e}", file=sys.stderr)
sys.exit(1)
# Accept both {"controls": [...]} and plain [...]
if isinstance(data, dict) and "controls" in data:
controls = data["controls"]
elif isinstance(data, list):
controls = data
else:
print(
"Error: Expected JSON with 'controls' array or a plain array.",
file=sys.stderr,
)
sys.exit(1)
return controls
def classify_evidence_status(control: Dict[str, Any]) -> str:
"""Classify the evidence collection status for a control."""
status = control.get("status", "Not Started").lower().strip()
evidence_date = control.get("evidence_date", "")
if status in ("not_applicable", "n/a", "not applicable"):
return "not_applicable"
if status in ("collected", "complete", "done"):
return "collected"
if status in ("pending", "in progress", "in_progress"):
# Check if overdue
if evidence_date:
try:
due = datetime.strptime(evidence_date, "%Y-%m-%d")
if due < datetime.now():
return "overdue"
except ValueError:
pass
return "pending"
if status in ("overdue", "late"):
return "overdue"
return "not_started"
def generate_status_report(controls: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Generate an evidence collection status report."""
total = len(controls)
status_counts = {s: 0 for s in EVIDENCE_STATUSES}
by_category: Dict[str, Dict[str, int]] = {}
issues: List[Dict[str, str]] = []
for ctrl in controls:
status = classify_evidence_status(ctrl)
status_counts[status] = status_counts.get(status, 0) + 1
category = ctrl.get("category", "Unknown")
if category not in by_category:
by_category[category] = {s: 0 for s in EVIDENCE_STATUSES}
by_category[category][status] += 1
# Flag issues
if status == "overdue":
issues.append(
{
"control_id": ctrl.get("control_id", "N/A"),
"tsc_criteria": ctrl.get("tsc_criteria", "N/A"),
"description": ctrl.get("description", "N/A"),
"issue": "Evidence collection overdue",
"evidence_date": ctrl.get("evidence_date", "N/A"),
}
)
elif status == "not_started":
issues.append(
{
"control_id": ctrl.get("control_id", "N/A"),
"tsc_criteria": ctrl.get("tsc_criteria", "N/A"),
"description": ctrl.get("description", "N/A"),
"issue": "Evidence collection not started",
}
)
# Check for missing required fields
missing = [f for f in REQUIRED_FIELDS if f not in ctrl or not ctrl[f]]
if missing:
issues.append(
{
"control_id": ctrl.get("control_id", "N/A"),
"issue": f"Missing fields: {', '.join(missing)}",
}
)
# Calculate readiness score
applicable = total - status_counts.get("not_applicable", 0)
collected = status_counts.get("collected", 0)
readiness_pct = round((collected / applicable * 100), 1) if applicable > 0 else 0.0
if readiness_pct >= 90:
readiness_rating = "Audit Ready"
elif readiness_pct >= 75:
readiness_rating = "Minor Gaps"
elif readiness_pct >= 50:
readiness_rating = "Significant Gaps"
else:
readiness_rating = "Not Ready"
return {
"summary": {
"total_controls": total,
"status_breakdown": status_counts,
"readiness_score": readiness_pct,
"readiness_rating": readiness_rating,
"report_date": datetime.now().strftime("%Y-%m-%d"),
},
"by_category": by_category,
"issues": issues,
}
def format_status_text(report: Dict[str, Any]) -> str:
"""Format the status report as human-readable text."""
lines = ["=" * 60, "SOC 2 Evidence Collection Status Report", "=" * 60, ""]
summary = report["summary"]
lines.append(f"Report Date: {summary['report_date']}")
lines.append(f"Total Controls: {summary['total_controls']}")
lines.append(
f"Readiness Score: {summary['readiness_score']}% ({summary['readiness_rating']})"
)
lines.append("")
# Status breakdown
lines.append("--- Status Breakdown ---")
for status, count in summary["status_breakdown"].items():
label = EVIDENCE_STATUSES.get(status, status)
lines.append(f" {status:15s}: {count:3d} ({label})")
lines.append("")
# By category
lines.append("--- By Category ---")
for cat, statuses in report["by_category"].items():
cat_total = sum(statuses.values())
cat_collected = statuses.get("collected", 0)
cat_pct = round(cat_collected / cat_total * 100, 1) if cat_total > 0 else 0
lines.append(f" {cat}: {cat_collected}/{cat_total} collected ({cat_pct}%)")
lines.append("")
# Issues
if report["issues"]:
lines.append(f"--- Issues ({len(report['issues'])}) ---")
for issue in report["issues"]:
ctrl_id = issue.get("control_id", "N/A")
desc = issue.get("issue", "Unknown issue")
lines.append(f" [{ctrl_id}] {desc}")
else:
lines.append("--- No Issues Found ---")
lines.append("")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="SOC 2 Evidence Tracker — tracks evidence collection status per control."
)
parser.add_argument(
"--matrix",
type=str,
required=True,
help="Path to JSON control matrix file (from control_matrix_builder.py)",
)
parser.add_argument(
"--status",
action="store_true",
help="Generate evidence collection status report",
)
parser.add_argument(
"--json",
action="store_true",
help="Output in JSON format",
)
args = parser.parse_args()
if not args.status:
parser.print_help()
print("\nError: --status flag is required.", file=sys.stderr)
sys.exit(1)
controls = load_matrix(args.matrix)
report = generate_status_report(controls)
if args.json:
print(json.dumps(report, indent=2))
else:
print(format_status_text(report))
if __name__ == "__main__":
main()