#!/usr/bin/env python3
"""WCAG 2.2 Accessibility Scanner for Frontend Codebases.
Scans HTML, JSX, TSX, Vue, Svelte, and CSS files for accessibility
violations across 10 categories: images, forms, headings, landmarks,
keyboard, ARIA, color/contrast, links, tables, and media.
Usage:
python a11y_scanner.py /path/to/project
python a11y_scanner.py /path/to/project --json
python a11y_scanner.py /path/to/project --severity critical,serious
python a11y_scanner.py /path/to/project --format json
"""
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass, asdict
from typing import List, Optional
@dataclass
class Finding:
"""A single accessibility finding."""
rule_id: str
category: str
severity: str
message: str
file: str
line: int
snippet: str
wcag_criterion: str
fix: str
# ---------------------------------------------------------------------------
# Rule definitions: each returns a list of Finding from a single file
# ---------------------------------------------------------------------------
VALID_ARIA_ATTRS = {
"aria-activedescendant", "aria-atomic", "aria-autocomplete", "aria-busy",
"aria-checked", "aria-colcount", "aria-colindex", "aria-colspan",
"aria-controls", "aria-current", "aria-describedby", "aria-details",
"aria-disabled", "aria-dropeffect", "aria-errormessage", "aria-expanded",
"aria-flowto", "aria-grabbed", "aria-haspopup", "aria-hidden",
"aria-invalid", "aria-keyshortcuts", "aria-label", "aria-labelledby",
"aria-level", "aria-live", "aria-modal", "aria-multiline",
"aria-multiselectable", "aria-orientation", "aria-owns", "aria-placeholder",
"aria-posinset", "aria-pressed", "aria-readonly", "aria-relevant",
"aria-required", "aria-roledescription", "aria-rowcount", "aria-rowindex",
"aria-rowspan", "aria-selected", "aria-setsize", "aria-sort",
"aria-valuemax", "aria-valuemin", "aria-valuenow", "aria-valuetext",
"aria-braillelabel", "aria-brailleroledescription", "aria-description",
}
BAD_LINK_TEXT = re.compile(
r">\s*(click here|here|read more|more|link|this)\s*<", re.IGNORECASE
)
TAG_RE = re.compile(r"<(\w[\w-]*)\b([^>]*)(/?)>", re.DOTALL)
ATTR_RE = re.compile(r"""([\w:.-]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))""")
ATTR_BOOL_RE = re.compile(r"\b([\w:.-]+)(?=\s|/?>|$)")
INLINE_COLOR_RE = re.compile(
r'style\s*=\s*["\'][^"\']*\bcolor\s*:', re.IGNORECASE
)
ARIA_ATTR_RE = re.compile(r"\baria-[\w-]+")
def _attrs(attr_str: str) -> dict:
"""Parse HTML/JSX attribute string into a dict."""
result = {}
for m in ATTR_RE.finditer(attr_str):
result[m.group(1)] = m.group(2) or m.group(3) or m.group(4) or ""
# boolean attrs
cleaned = ATTR_RE.sub("", attr_str)
for m in ATTR_BOOL_RE.finditer(cleaned):
name = m.group(1)
if name not in result and not name.startswith("/"):
result[name] = True
return result
def _snippet(line_text: str) -> str:
"""Trim a line for display as a code snippet."""
s = line_text.rstrip("\n\r")
return s[:120] + "..." if len(s) > 120 else s
def _find(rule_id, cat, sev, msg, fp, ln, snip, wcag, fix):
return Finding(rule_id, cat, sev, msg, fp, ln, snip, wcag, fix)
# ---------- Images ----------------------------------------------------------
def check_img_missing_alt(tag, attrs, fp, ln, snip):
if tag == "img" and "alt" not in attrs:
return _find("img-alt-missing", "images", "critical",
"
missing alt attribute",
fp, ln, snip, "1.1.1 Non-text Content",
"Add alt=\"description\" or alt=\"\" for decorative images.")
def check_img_empty_alt_informative(tag, attrs, fp, ln, snip):
if tag == "img" and attrs.get("alt") == "" and attrs.get("src", ""):
src = attrs.get("src", "")
if not any(kw in src.lower() for kw in ("spacer", "border", "decorat", "bg")):
return _find("img-alt-empty-informative", "images", "serious",
"
has empty alt but may be informative",
fp, ln, snip, "1.1.1 Non-text Content",
"If image conveys information, add descriptive alt text.")
def check_img_decorative_has_alt(tag, attrs, fp, ln, snip):
if tag == "img" and attrs.get("role") == "presentation" and attrs.get("alt", "") != "":
return _find("img-decorative-alt", "images", "moderate",
"Decorative image (role=presentation) should have alt=\"\"",
fp, ln, snip, "1.1.1 Non-text Content",
"Set alt=\"\" on decorative images with role=presentation.")
# ---------- Forms -----------------------------------------------------------
def check_input_missing_label(tag, attrs, fp, ln, snip):
input_types = {"text", "email", "password", "search", "tel", "url", "number", "date"}
if tag == "input" and attrs.get("type", "text") in input_types:
if "aria-label" not in attrs and "aria-labelledby" not in attrs and "id" not in attrs:
return _find("form-input-no-label", "forms", "critical",
" has no id, aria-label, or aria-labelledby",
fp, ln, snip, "1.3.1 Info and Relationships",
"Add id +