#!/usr/bin/env python3 """WCAG 2.2 Accessibility Scanner for Frontend Codebases. Scans HTML, JSX, TSX, Vue, Svelte, and CSS files for accessibility violations across 10 categories: images, forms, headings, landmarks, keyboard, ARIA, color/contrast, links, tables, and media. Usage: python a11y_scanner.py /path/to/project python a11y_scanner.py /path/to/project --json python a11y_scanner.py /path/to/project --severity critical,serious python a11y_scanner.py /path/to/project --format json """ import argparse import json import os import re import sys from dataclasses import dataclass, asdict from typing import List, Optional @dataclass class Finding: """A single accessibility finding.""" rule_id: str category: str severity: str message: str file: str line: int snippet: str wcag_criterion: str fix: str # --------------------------------------------------------------------------- # Rule definitions: each returns a list of Finding from a single file # --------------------------------------------------------------------------- VALID_ARIA_ATTRS = { "aria-activedescendant", "aria-atomic", "aria-autocomplete", "aria-busy", "aria-checked", "aria-colcount", "aria-colindex", "aria-colspan", "aria-controls", "aria-current", "aria-describedby", "aria-details", "aria-disabled", "aria-dropeffect", "aria-errormessage", "aria-expanded", "aria-flowto", "aria-grabbed", "aria-haspopup", "aria-hidden", "aria-invalid", "aria-keyshortcuts", "aria-label", "aria-labelledby", "aria-level", "aria-live", "aria-modal", "aria-multiline", "aria-multiselectable", "aria-orientation", "aria-owns", "aria-placeholder", "aria-posinset", "aria-pressed", "aria-readonly", "aria-relevant", "aria-required", "aria-roledescription", "aria-rowcount", "aria-rowindex", "aria-rowspan", "aria-selected", "aria-setsize", "aria-sort", "aria-valuemax", "aria-valuemin", "aria-valuenow", "aria-valuetext", "aria-braillelabel", "aria-brailleroledescription", "aria-description", } BAD_LINK_TEXT = re.compile( r">\s*(click here|here|read more|more|link|this)\s*<", re.IGNORECASE ) TAG_RE = re.compile(r"<(\w[\w-]*)\b([^>]*)(/?)>", re.DOTALL) ATTR_RE = re.compile(r"""([\w:.-]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))""") ATTR_BOOL_RE = re.compile(r"\b([\w:.-]+)(?=\s|/?>|$)") INLINE_COLOR_RE = re.compile( r'style\s*=\s*["\'][^"\']*\bcolor\s*:', re.IGNORECASE ) ARIA_ATTR_RE = re.compile(r"\baria-[\w-]+") def _attrs(attr_str: str) -> dict: """Parse HTML/JSX attribute string into a dict.""" result = {} for m in ATTR_RE.finditer(attr_str): result[m.group(1)] = m.group(2) or m.group(3) or m.group(4) or "" # boolean attrs cleaned = ATTR_RE.sub("", attr_str) for m in ATTR_BOOL_RE.finditer(cleaned): name = m.group(1) if name not in result and not name.startswith("/"): result[name] = True return result def _snippet(line_text: str) -> str: """Trim a line for display as a code snippet.""" s = line_text.rstrip("\n\r") return s[:120] + "..." if len(s) > 120 else s def _find(rule_id, cat, sev, msg, fp, ln, snip, wcag, fix): return Finding(rule_id, cat, sev, msg, fp, ln, snip, wcag, fix) # ---------- Images ---------------------------------------------------------- def check_img_missing_alt(tag, attrs, fp, ln, snip): if tag == "img" and "alt" not in attrs: return _find("img-alt-missing", "images", "critical", " missing alt attribute", fp, ln, snip, "1.1.1 Non-text Content", "Add alt=\"description\" or alt=\"\" for decorative images.") def check_img_empty_alt_informative(tag, attrs, fp, ln, snip): if tag == "img" and attrs.get("alt") == "" and attrs.get("src", ""): src = attrs.get("src", "") if not any(kw in src.lower() for kw in ("spacer", "border", "decorat", "bg")): return _find("img-alt-empty-informative", "images", "serious", " has empty alt but may be informative", fp, ln, snip, "1.1.1 Non-text Content", "If image conveys information, add descriptive alt text.") def check_img_decorative_has_alt(tag, attrs, fp, ln, snip): if tag == "img" and attrs.get("role") == "presentation" and attrs.get("alt", "") != "": return _find("img-decorative-alt", "images", "moderate", "Decorative image (role=presentation) should have alt=\"\"", fp, ln, snip, "1.1.1 Non-text Content", "Set alt=\"\" on decorative images with role=presentation.") # ---------- Forms ----------------------------------------------------------- def check_input_missing_label(tag, attrs, fp, ln, snip): input_types = {"text", "email", "password", "search", "tel", "url", "number", "date"} if tag == "input" and attrs.get("type", "text") in input_types: if "aria-label" not in attrs and "aria-labelledby" not in attrs and "id" not in attrs: return _find("form-input-no-label", "forms", "critical", " has no id, aria-label, or aria-labelledby", fp, ln, snip, "1.3.1 Info and Relationships", "Add id +