Built from scratch (replaces reverted PR #375 contribution). Skill package: - SKILL.md: 1132 lines, 3-phase workflow (scan → fix → verify), per-framework fix patterns (React, Next.js, Vue, Angular, Svelte, HTML), CI/CD integration guide, 20+ issue type coverage - scripts/a11y_scanner.py: static scanner detecting 20+ violation types across HTML/JSX/TSX/Vue/Svelte/CSS — severity-ranked, CI-friendly exit codes - scripts/contrast_checker.py: WCAG contrast calculator with AA/AAA checks, --suggest mode, --batch CSS scanning, named color support - references/wcag-quick-ref.md: WCAG 2.2 Level A/AA criteria table - references/aria-patterns.md: ARIA roles, live regions, keyboard interaction - references/framework-a11y-patterns.md: React, Vue, Angular, Svelte fix patterns - assets/sample-component.tsx: sample file with intentional violations - expected_outputs/: scan report, contrast output, JSON output samples - /a11y-audit slash command, settings.json, plugin.json, README.md Validation: 97.6/100 (EXCELLENT), quality 73.9/100 (B-), scripts 2/2 PASS Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
685 lines
29 KiB
Python
685 lines
29 KiB
Python
#!/usr/bin/env python3
|
|
"""WCAG 2.2 Accessibility Scanner for Frontend Codebases.
|
|
|
|
Scans HTML, JSX, TSX, Vue, Svelte, and CSS files for accessibility
|
|
violations across 10 categories: images, forms, headings, landmarks,
|
|
keyboard, ARIA, color/contrast, links, tables, and media.
|
|
|
|
Usage:
|
|
python a11y_scanner.py /path/to/project
|
|
python a11y_scanner.py /path/to/project --json
|
|
python a11y_scanner.py /path/to/project --severity critical,serious
|
|
python a11y_scanner.py /path/to/project --format json
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass, asdict
|
|
from typing import List, Optional
|
|
|
|
|
|
@dataclass
|
|
class Finding:
|
|
"""A single accessibility finding."""
|
|
rule_id: str
|
|
category: str
|
|
severity: str
|
|
message: str
|
|
file: str
|
|
line: int
|
|
snippet: str
|
|
wcag_criterion: str
|
|
fix: str
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Rule definitions: each returns a list of Finding from a single file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
VALID_ARIA_ATTRS = {
|
|
"aria-activedescendant", "aria-atomic", "aria-autocomplete", "aria-busy",
|
|
"aria-checked", "aria-colcount", "aria-colindex", "aria-colspan",
|
|
"aria-controls", "aria-current", "aria-describedby", "aria-details",
|
|
"aria-disabled", "aria-dropeffect", "aria-errormessage", "aria-expanded",
|
|
"aria-flowto", "aria-grabbed", "aria-haspopup", "aria-hidden",
|
|
"aria-invalid", "aria-keyshortcuts", "aria-label", "aria-labelledby",
|
|
"aria-level", "aria-live", "aria-modal", "aria-multiline",
|
|
"aria-multiselectable", "aria-orientation", "aria-owns", "aria-placeholder",
|
|
"aria-posinset", "aria-pressed", "aria-readonly", "aria-relevant",
|
|
"aria-required", "aria-roledescription", "aria-rowcount", "aria-rowindex",
|
|
"aria-rowspan", "aria-selected", "aria-setsize", "aria-sort",
|
|
"aria-valuemax", "aria-valuemin", "aria-valuenow", "aria-valuetext",
|
|
"aria-braillelabel", "aria-brailleroledescription", "aria-description",
|
|
}
|
|
|
|
BAD_LINK_TEXT = re.compile(
|
|
r">\s*(click here|here|read more|more|link|this)\s*<", re.IGNORECASE
|
|
)
|
|
|
|
TAG_RE = re.compile(r"<(\w[\w-]*)\b([^>]*)(/?)>", re.DOTALL)
|
|
ATTR_RE = re.compile(r"""([\w:.-]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))""")
|
|
ATTR_BOOL_RE = re.compile(r"\b([\w:.-]+)(?=\s|/?>|$)")
|
|
INLINE_COLOR_RE = re.compile(
|
|
r'style\s*=\s*["\'][^"\']*\bcolor\s*:', re.IGNORECASE
|
|
)
|
|
ARIA_ATTR_RE = re.compile(r"\baria-[\w-]+")
|
|
|
|
|
|
def _attrs(attr_str: str) -> dict:
|
|
"""Parse HTML/JSX attribute string into a dict."""
|
|
result = {}
|
|
for m in ATTR_RE.finditer(attr_str):
|
|
result[m.group(1)] = m.group(2) or m.group(3) or m.group(4) or ""
|
|
# boolean attrs
|
|
cleaned = ATTR_RE.sub("", attr_str)
|
|
for m in ATTR_BOOL_RE.finditer(cleaned):
|
|
name = m.group(1)
|
|
if name not in result and not name.startswith("/"):
|
|
result[name] = True
|
|
return result
|
|
|
|
|
|
def _snippet(line_text: str) -> str:
|
|
"""Trim a line for display as a code snippet."""
|
|
s = line_text.rstrip("\n\r")
|
|
return s[:120] + "..." if len(s) > 120 else s
|
|
|
|
|
|
def _find(rule_id, cat, sev, msg, fp, ln, snip, wcag, fix):
|
|
return Finding(rule_id, cat, sev, msg, fp, ln, snip, wcag, fix)
|
|
|
|
|
|
# ---------- Images ----------------------------------------------------------
|
|
|
|
def check_img_missing_alt(tag, attrs, fp, ln, snip):
|
|
if tag == "img" and "alt" not in attrs:
|
|
return _find("img-alt-missing", "images", "critical",
|
|
"<img> missing alt attribute",
|
|
fp, ln, snip, "1.1.1 Non-text Content",
|
|
"Add alt=\"description\" or alt=\"\" for decorative images.")
|
|
|
|
def check_img_empty_alt_informative(tag, attrs, fp, ln, snip):
|
|
if tag == "img" and attrs.get("alt") == "" and attrs.get("src", ""):
|
|
src = attrs.get("src", "")
|
|
if not any(kw in src.lower() for kw in ("spacer", "border", "decorat", "bg")):
|
|
return _find("img-alt-empty-informative", "images", "serious",
|
|
"<img> has empty alt but may be informative",
|
|
fp, ln, snip, "1.1.1 Non-text Content",
|
|
"If image conveys information, add descriptive alt text.")
|
|
|
|
def check_img_decorative_has_alt(tag, attrs, fp, ln, snip):
|
|
if tag == "img" and attrs.get("role") == "presentation" and attrs.get("alt", "") != "":
|
|
return _find("img-decorative-alt", "images", "moderate",
|
|
"Decorative image (role=presentation) should have alt=\"\"",
|
|
fp, ln, snip, "1.1.1 Non-text Content",
|
|
"Set alt=\"\" on decorative images with role=presentation.")
|
|
|
|
|
|
# ---------- Forms -----------------------------------------------------------
|
|
|
|
def check_input_missing_label(tag, attrs, fp, ln, snip):
|
|
input_types = {"text", "email", "password", "search", "tel", "url", "number", "date"}
|
|
if tag == "input" and attrs.get("type", "text") in input_types:
|
|
if "aria-label" not in attrs and "aria-labelledby" not in attrs and "id" not in attrs:
|
|
return _find("form-input-no-label", "forms", "critical",
|
|
"<input> has no id, aria-label, or aria-labelledby",
|
|
fp, ln, snip, "1.3.1 Info and Relationships",
|
|
"Add id + <label for>, or aria-label attribute.")
|
|
|
|
def check_input_no_aria_label(tag, attrs, fp, ln, snip):
|
|
if tag in ("select", "textarea"):
|
|
if "aria-label" not in attrs and "aria-labelledby" not in attrs and "id" not in attrs:
|
|
return _find("form-select-no-label", "forms", "critical",
|
|
f"<{tag}> has no accessible name",
|
|
fp, ln, snip, "4.1.2 Name, Role, Value",
|
|
f"Add aria-label or id + <label for> to <{tag}>.")
|
|
|
|
def check_orphan_label(lines, fp):
|
|
"""Labels whose 'for' points to a non-existent id."""
|
|
findings = []
|
|
ids = set()
|
|
label_fors = []
|
|
for ln, line in enumerate(lines, 1):
|
|
for m in re.finditer(r'\bid\s*=\s*["\']([^"\']+)["\']', line):
|
|
ids.add(m.group(1))
|
|
for m in re.finditer(r'<label[^>]*\bfor\s*=\s*["\']([^"\']+)["\']', line):
|
|
label_fors.append((ln, m.group(1), line))
|
|
for ln, for_val, line in label_fors:
|
|
if for_val not in ids:
|
|
findings.append(_find("form-orphan-label", "forms", "serious",
|
|
f"<label for=\"{for_val}\"> references non-existent id",
|
|
fp, ln, _snippet(line), "1.3.1 Info and Relationships",
|
|
f"Ensure an element with id=\"{for_val}\" exists."))
|
|
return findings
|
|
|
|
def check_fieldset_legend(lines, fp):
|
|
"""Radio/checkbox groups without fieldset."""
|
|
findings = []
|
|
radio_lines = []
|
|
has_fieldset = any("fieldset" in l.lower() for l in lines)
|
|
for ln, line in enumerate(lines, 1):
|
|
if re.search(r'type\s*=\s*["\'](?:radio|checkbox)["\']', line, re.I):
|
|
radio_lines.append((ln, line))
|
|
if radio_lines and not has_fieldset:
|
|
ln, line = radio_lines[0]
|
|
findings.append(_find("form-missing-fieldset", "forms", "serious",
|
|
"Radio/checkbox group without <fieldset>/<legend>",
|
|
fp, ln, _snippet(line), "1.3.1 Info and Relationships",
|
|
"Wrap related radio/checkbox inputs in <fieldset> with <legend>."))
|
|
return findings
|
|
|
|
|
|
# ---------- Headings --------------------------------------------------------
|
|
|
|
def check_headings(lines, fp):
|
|
findings = []
|
|
heading_levels = []
|
|
for ln, line in enumerate(lines, 1):
|
|
for m in re.finditer(r"<[hH]([1-6])\b", line):
|
|
heading_levels.append((int(m.group(1)), ln, line))
|
|
if not heading_levels:
|
|
return findings
|
|
# Missing h1
|
|
levels_seen = {h[0] for h in heading_levels}
|
|
if 1 not in levels_seen and any(l <= 3 for l in levels_seen):
|
|
findings.append(_find("heading-missing-h1", "headings", "serious",
|
|
"Page has headings but no <h1>",
|
|
fp, heading_levels[0][1], _snippet(heading_levels[0][2]),
|
|
"1.3.1 Info and Relationships",
|
|
"Add a single <h1> as the main page heading."))
|
|
# Multiple h1s
|
|
h1_lines = [(ln, line) for lvl, ln, line in heading_levels if lvl == 1]
|
|
if len(h1_lines) > 1:
|
|
findings.append(_find("heading-multiple-h1", "headings", "moderate",
|
|
f"Page has {len(h1_lines)} <h1> elements",
|
|
fp, h1_lines[1][0], _snippet(h1_lines[1][1]),
|
|
"1.3.1 Info and Relationships",
|
|
"Use a single <h1> per page. Demote others to <h2>+."))
|
|
# Skipped levels
|
|
prev_level = 0
|
|
for lvl, ln, line in heading_levels:
|
|
if prev_level > 0 and lvl > prev_level + 1:
|
|
findings.append(_find("heading-skipped", "headings", "moderate",
|
|
f"Heading level skips from h{prev_level} to h{lvl}",
|
|
fp, ln, _snippet(line),
|
|
"1.3.1 Info and Relationships",
|
|
f"Use <h{prev_level + 1}> instead of <h{lvl}>."))
|
|
prev_level = lvl
|
|
return findings
|
|
|
|
|
|
# ---------- Landmarks -------------------------------------------------------
|
|
|
|
def check_landmarks(lines, fp):
|
|
findings = []
|
|
content = "\n".join(lines)
|
|
# Missing main landmark
|
|
if not re.search(r'<main\b|role\s*=\s*["\']main["\']', content, re.I):
|
|
findings.append(_find("landmark-no-main", "landmarks", "serious",
|
|
"Page missing <main> landmark",
|
|
fp, 1, "", "1.3.1 Info and Relationships",
|
|
"Add a <main> element to wrap primary content."))
|
|
# Missing nav
|
|
if not re.search(r'<nav\b|role\s*=\s*["\']navigation["\']', content, re.I):
|
|
findings.append(_find("landmark-no-nav", "landmarks", "moderate",
|
|
"Page missing <nav> landmark",
|
|
fp, 1, "", "1.3.1 Info and Relationships",
|
|
"Add <nav> for primary navigation blocks."))
|
|
# Missing skip link
|
|
if not re.search(r'skip.{0,10}(nav|main|content)', content, re.I):
|
|
findings.append(_find("landmark-no-skip-link", "landmarks", "serious",
|
|
"Page missing skip navigation link",
|
|
fp, 1, "", "2.4.1 Bypass Blocks",
|
|
"Add <a href=\"#main\">Skip to main content</a> as first focusable element."))
|
|
return findings
|
|
|
|
|
|
# ---------- Keyboard --------------------------------------------------------
|
|
|
|
def check_tabindex_positive(tag, attrs, fp, ln, snip):
|
|
ti = attrs.get("tabindex", "")
|
|
if isinstance(ti, str) and ti.lstrip("-").isdigit() and int(ti) > 0:
|
|
return _find("keyboard-tabindex-positive", "keyboard", "serious",
|
|
f"tabindex={ti} creates unexpected tab order",
|
|
fp, ln, snip, "2.4.3 Focus Order",
|
|
"Use tabindex=\"0\" or tabindex=\"-1\" instead of positive values.")
|
|
|
|
def check_click_no_keyboard(tag, attrs, fp, ln, snip):
|
|
has_click = "onClick" in attrs or "onclick" in attrs or "@click" in attrs or "on:click" in attrs
|
|
has_key = any(k for k in attrs if "keydown" in k.lower() or "keyup" in k.lower() or "keypress" in k.lower())
|
|
if tag in ("div", "span", "td", "li", "p", "section") and has_click and not has_key:
|
|
if attrs.get("role") not in ("button", "link", "tab", "menuitem"):
|
|
return _find("keyboard-click-no-key", "keyboard", "critical",
|
|
f"<{tag}> has click handler but no keyboard handler",
|
|
fp, ln, snip, "2.1.1 Keyboard",
|
|
f"Add onKeyDown handler or use <button> instead of <{tag}>.")
|
|
|
|
def check_autofocus_misuse(tag, attrs, fp, ln, snip):
|
|
if "autofocus" in attrs or "autoFocus" in attrs:
|
|
if tag not in ("input", "textarea", "select"):
|
|
return _find("keyboard-autofocus", "keyboard", "moderate",
|
|
f"autofocus on <{tag}> can disorient screen reader users",
|
|
fp, ln, snip, "3.2.1 On Focus",
|
|
"Avoid autofocus on non-input elements. Use focus management instead.")
|
|
|
|
|
|
# ---------- ARIA ------------------------------------------------------------
|
|
|
|
def check_invalid_aria(tag, attrs, fp, ln, snip):
|
|
findings = []
|
|
for key in attrs:
|
|
if key.startswith("aria-") and key.lower() not in VALID_ARIA_ATTRS:
|
|
findings.append(_find("aria-invalid-attr", "aria", "serious",
|
|
f"Invalid ARIA attribute: {key}",
|
|
fp, ln, snip, "4.1.2 Name, Role, Value",
|
|
f"Remove or replace \"{key}\" with a valid ARIA attribute."))
|
|
return findings
|
|
|
|
def check_aria_hidden_focusable(tag, attrs, fp, ln, snip):
|
|
if attrs.get("aria-hidden") in ("true", True):
|
|
focusable_tags = {"a", "button", "input", "select", "textarea"}
|
|
if tag in focusable_tags or (isinstance(attrs.get("tabindex", ""), str) and
|
|
attrs.get("tabindex", "-1") != "-1"):
|
|
return _find("aria-hidden-focusable", "aria", "critical",
|
|
f"aria-hidden=\"true\" on focusable <{tag}>",
|
|
fp, ln, snip, "4.1.2 Name, Role, Value",
|
|
"Remove aria-hidden or make element non-focusable (tabindex=\"-1\").")
|
|
|
|
def check_aria_live_missing(lines, fp):
|
|
"""Alert/status roles or live regions without aria-live."""
|
|
findings = []
|
|
for ln, line in enumerate(lines, 1):
|
|
if re.search(r'role\s*=\s*["\'](?:alert|status)["\']', line, re.I):
|
|
if "aria-live" not in line:
|
|
findings.append(_find("aria-live-missing", "aria", "serious",
|
|
"role=alert/status without explicit aria-live",
|
|
fp, ln, _snippet(line),
|
|
"4.1.3 Status Messages",
|
|
"Add aria-live=\"assertive\" (alert) or aria-live=\"polite\" (status)."))
|
|
return findings
|
|
|
|
|
|
# ---------- Color/Contrast --------------------------------------------------
|
|
|
|
def check_inline_color(tag, attrs, fp, ln, snip):
|
|
style = attrs.get("style", "")
|
|
if isinstance(style, str) and re.search(r"\bcolor\s*:", style, re.I):
|
|
if not re.search(r"background", style, re.I):
|
|
return _find("color-inline-no-bg", "color", "moderate",
|
|
"Inline color set without background — contrast may be insufficient",
|
|
fp, ln, snip, "1.4.3 Contrast (Minimum)",
|
|
"Ensure foreground and background colors meet 4.5:1 contrast ratio.")
|
|
|
|
def check_text_over_image(lines, fp):
|
|
"""Detects patterns where text is positioned over background images without overlay."""
|
|
findings = []
|
|
for ln, line in enumerate(lines, 1):
|
|
if re.search(r"background-image\s*:", line, re.I):
|
|
if not re.search(r"(overlay|rgba|linear-gradient)", line, re.I):
|
|
findings.append(_find("color-text-over-image", "color", "serious",
|
|
"Background image without contrast overlay for text",
|
|
fp, ln, _snippet(line),
|
|
"1.4.3 Contrast (Minimum)",
|
|
"Add a semi-transparent overlay or ensure text contrast."))
|
|
return findings
|
|
|
|
|
|
# ---------- Links -----------------------------------------------------------
|
|
|
|
def check_empty_link(tag, attrs, fp, ln, snip):
|
|
if tag == "a" and not attrs.get("aria-label") and not attrs.get("aria-labelledby"):
|
|
return None # handled by line-level check below
|
|
|
|
def check_empty_links_line(lines, fp):
|
|
findings = []
|
|
for ln, line in enumerate(lines, 1):
|
|
# <a ...></a> or <a ...> </a>
|
|
if re.search(r"<a\b[^>]*>\s*</a>", line, re.I):
|
|
if "aria-label" not in line and "aria-labelledby" not in line:
|
|
findings.append(_find("link-empty", "links", "critical",
|
|
"Empty link — no text or accessible name",
|
|
fp, ln, _snippet(line), "2.4.4 Link Purpose",
|
|
"Add link text or aria-label."))
|
|
# Bad link text
|
|
if BAD_LINK_TEXT.search(line):
|
|
findings.append(_find("link-bad-text", "links", "serious",
|
|
"Link uses vague text like 'click here'",
|
|
fp, ln, _snippet(line), "2.4.4 Link Purpose",
|
|
"Use descriptive link text that makes sense out of context."))
|
|
return findings
|
|
|
|
def check_same_page_link(tag, attrs, fp, ln, snip):
|
|
href = attrs.get("href", "")
|
|
if tag == "a" and isinstance(href, str) and href == "#":
|
|
return _find("link-empty-fragment", "links", "moderate",
|
|
"Link with href=\"#\" — use a button or valid fragment",
|
|
fp, ln, snip, "2.4.4 Link Purpose",
|
|
"Use <button> for actions or href=\"#section-id\" for anchors.")
|
|
|
|
|
|
# ---------- Tables ----------------------------------------------------------
|
|
|
|
def check_table_headers(lines, fp):
|
|
findings = []
|
|
in_table = False
|
|
table_start = 0
|
|
has_th = False
|
|
has_caption = False
|
|
has_aria_label = False
|
|
for ln, line in enumerate(lines, 1):
|
|
if re.search(r"<table\b", line, re.I):
|
|
in_table = True
|
|
table_start = ln
|
|
has_th = False
|
|
has_caption = False
|
|
has_aria_label = "aria-label" in line
|
|
if in_table:
|
|
if "<th" in line.lower():
|
|
has_th = True
|
|
if "<caption" in line.lower():
|
|
has_caption = True
|
|
if re.search(r"</table>", line, re.I):
|
|
if not has_th:
|
|
findings.append(_find("table-no-headers", "tables", "serious",
|
|
"<table> has no <th> header cells",
|
|
fp, table_start, _snippet(lines[table_start - 1]),
|
|
"1.3.1 Info and Relationships",
|
|
"Add <th> elements to identify column/row headers."))
|
|
if not has_caption and not has_aria_label:
|
|
findings.append(_find("table-no-caption", "tables", "moderate",
|
|
"<table> missing <caption> or aria-label",
|
|
fp, table_start, _snippet(lines[table_start - 1]),
|
|
"1.3.1 Info and Relationships",
|
|
"Add <caption> or aria-label to describe the table."))
|
|
in_table = False
|
|
return findings
|
|
|
|
|
|
# ---------- Media -----------------------------------------------------------
|
|
|
|
def check_media_captions(tag, attrs, fp, ln, snip):
|
|
if tag == "video":
|
|
return None # handled at block level
|
|
|
|
def check_media_captions_block(lines, fp):
|
|
findings = []
|
|
in_video = False
|
|
video_start = 0
|
|
has_track = False
|
|
has_controls = False
|
|
has_autoplay = False
|
|
for ln, line in enumerate(lines, 1):
|
|
if re.search(r"<video\b", line, re.I):
|
|
in_video = True
|
|
video_start = ln
|
|
has_track = False
|
|
has_controls = "controls" in line.lower()
|
|
has_autoplay = "autoplay" in line.lower()
|
|
if in_video:
|
|
if re.search(r'<track\b[^>]*kind\s*=\s*["\']captions["\']', line, re.I):
|
|
has_track = True
|
|
if "controls" in line.lower():
|
|
has_controls = True
|
|
if re.search(r"</video>", line, re.I) or (re.search(r"<video\b", line, re.I) and "/>" in line):
|
|
if not has_track:
|
|
findings.append(_find("media-no-captions", "media", "critical",
|
|
"<video> missing captions track",
|
|
fp, video_start, _snippet(lines[video_start - 1]),
|
|
"1.2.2 Captions (Prerecorded)",
|
|
"Add <track kind=\"captions\" src=\"...\" srclang=\"en\">."))
|
|
if has_autoplay and not has_controls:
|
|
findings.append(_find("media-autoplay-no-controls", "media", "serious",
|
|
"<video> has autoplay without controls",
|
|
fp, video_start, _snippet(lines[video_start - 1]),
|
|
"1.4.2 Audio Control",
|
|
"Add the controls attribute so users can pause/stop."))
|
|
in_video = False
|
|
# Single-line video tags
|
|
for ln, line in enumerate(lines, 1):
|
|
if re.search(r"<audio\b", line, re.I):
|
|
if "autoplay" in line.lower() and "controls" not in line.lower():
|
|
findings.append(_find("media-audio-autoplay", "media", "serious",
|
|
"<audio> has autoplay without controls",
|
|
fp, ln, _snippet(line), "1.4.2 Audio Control",
|
|
"Add the controls attribute to <audio>."))
|
|
return findings
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scanner engine
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SUPPORTED_EXTENSIONS = {".html", ".htm", ".jsx", ".tsx", ".vue", ".svelte", ".css"}
|
|
|
|
TAG_LEVEL_CHECKS = [
|
|
check_img_missing_alt,
|
|
check_img_empty_alt_informative,
|
|
check_img_decorative_has_alt,
|
|
check_input_missing_label,
|
|
check_input_no_aria_label,
|
|
check_tabindex_positive,
|
|
check_click_no_keyboard,
|
|
check_autofocus_misuse,
|
|
check_aria_hidden_focusable,
|
|
check_inline_color,
|
|
check_same_page_link,
|
|
]
|
|
|
|
TAG_LEVEL_MULTI_CHECKS = [
|
|
check_invalid_aria,
|
|
]
|
|
|
|
|
|
def scan_file(filepath: str) -> List[Finding]:
|
|
"""Scan a single file and return all findings."""
|
|
findings: List[Finding] = []
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
|
|
lines = f.readlines()
|
|
except (OSError, IOError):
|
|
return findings
|
|
|
|
# Tag-level checks
|
|
for ln, line in enumerate(lines, 1):
|
|
for m in TAG_RE.finditer(line):
|
|
tag = m.group(1).lower()
|
|
attr_str = m.group(2)
|
|
attrs = _attrs(attr_str)
|
|
snip = _snippet(line)
|
|
for check in TAG_LEVEL_CHECKS:
|
|
result = check(tag, attrs, filepath, ln, snip)
|
|
if result:
|
|
findings.append(result)
|
|
for check in TAG_LEVEL_MULTI_CHECKS:
|
|
results = check(tag, attrs, filepath, ln, snip)
|
|
if results:
|
|
findings.extend(results)
|
|
|
|
# File-level / multi-line checks
|
|
findings.extend(check_orphan_label(lines, filepath))
|
|
findings.extend(check_fieldset_legend(lines, filepath))
|
|
findings.extend(check_headings(lines, filepath))
|
|
findings.extend(check_landmarks(lines, filepath))
|
|
findings.extend(check_aria_live_missing(lines, filepath))
|
|
findings.extend(check_text_over_image(lines, filepath))
|
|
findings.extend(check_empty_links_line(lines, filepath))
|
|
findings.extend(check_table_headers(lines, filepath))
|
|
findings.extend(check_media_captions_block(lines, filepath))
|
|
|
|
return findings
|
|
|
|
|
|
def collect_files(path: str) -> List[str]:
|
|
"""Recursively collect scannable files under path."""
|
|
files = []
|
|
if os.path.isfile(path):
|
|
_, ext = os.path.splitext(path)
|
|
if ext.lower() in SUPPORTED_EXTENSIONS:
|
|
files.append(path)
|
|
return files
|
|
for root, dirs, filenames in os.walk(path):
|
|
# Skip common non-source directories
|
|
dirs[:] = [d for d in dirs if d not in (
|
|
"node_modules", ".git", "dist", "build", "__pycache__",
|
|
".next", ".nuxt", "vendor", "coverage"
|
|
)]
|
|
for fname in filenames:
|
|
_, ext = os.path.splitext(fname)
|
|
if ext.lower() in SUPPORTED_EXTENSIONS:
|
|
files.append(os.path.join(root, fname))
|
|
files.sort()
|
|
return files
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Output formatting
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SEVERITY_ORDER = {"critical": 0, "serious": 1, "moderate": 2, "minor": 3}
|
|
|
|
|
|
def format_human(findings: List[Finding], files_scanned: int) -> str:
|
|
"""Format findings as human-readable text report."""
|
|
if not findings:
|
|
return (f"Scanned {files_scanned} file(s) -- no accessibility issues found.\n"
|
|
"All checks passed.")
|
|
|
|
lines = []
|
|
lines.append(f"WCAG 2.2 Accessibility Scan Results")
|
|
lines.append(f"{'=' * 50}")
|
|
lines.append(f"Files scanned: {files_scanned}")
|
|
lines.append(f"Issues found: {len(findings)}")
|
|
|
|
# Summary by severity
|
|
severity_counts = {}
|
|
for f in findings:
|
|
severity_counts[f.severity] = severity_counts.get(f.severity, 0) + 1
|
|
for sev in ("critical", "serious", "moderate", "minor"):
|
|
if sev in severity_counts:
|
|
lines.append(f" {sev.upper():10s}: {severity_counts[sev]}")
|
|
lines.append("")
|
|
|
|
# Summary by category
|
|
cat_counts = {}
|
|
for f in findings:
|
|
cat_counts[f.category] = cat_counts.get(f.category, 0) + 1
|
|
lines.append("By category:")
|
|
for cat in sorted(cat_counts, key=lambda c: -cat_counts[c]):
|
|
lines.append(f" {cat:20s}: {cat_counts[cat]}")
|
|
lines.append("")
|
|
|
|
# Detailed findings sorted by severity then file
|
|
sorted_findings = sorted(findings, key=lambda f: (SEVERITY_ORDER.get(f.severity, 9), f.file, f.line))
|
|
for i, f in enumerate(sorted_findings, 1):
|
|
lines.append(f"[{f.severity.upper()}] {f.rule_id}")
|
|
lines.append(f" File: {f.file}:{f.line}")
|
|
lines.append(f" WCAG: {f.wcag_criterion}")
|
|
lines.append(f" Issue: {f.message}")
|
|
if f.snippet:
|
|
lines.append(f" Code: {f.snippet}")
|
|
lines.append(f" Fix: {f.fix}")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def format_json(findings: List[Finding], files_scanned: int) -> str:
|
|
"""Format findings as JSON."""
|
|
severity_counts = {}
|
|
for f in findings:
|
|
severity_counts[f.severity] = severity_counts.get(f.severity, 0) + 1
|
|
|
|
report = {
|
|
"summary": {
|
|
"files_scanned": files_scanned,
|
|
"total_issues": len(findings),
|
|
"by_severity": severity_counts,
|
|
},
|
|
"findings": [asdict(f) for f in findings],
|
|
}
|
|
return json.dumps(report, indent=2)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def build_parser() -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
prog="a11y_scanner",
|
|
description="Scan frontend codebases for WCAG 2.2 accessibility violations.",
|
|
epilog=(
|
|
"Supported file types: .html, .htm, .jsx, .tsx, .vue, .svelte, .css\n"
|
|
"Exit codes: 0 = pass, 1 = critical/serious found, 2 = moderate/minor only"
|
|
),
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
parser.add_argument(
|
|
"path",
|
|
help="File or directory to scan",
|
|
)
|
|
parser.add_argument(
|
|
"--json", dest="json_flag", action="store_true",
|
|
help="Output results as JSON (shorthand for --format json)",
|
|
)
|
|
parser.add_argument(
|
|
"--format", dest="output_format", choices=["text", "json"],
|
|
default="text",
|
|
help="Output format: text (default) or json",
|
|
)
|
|
parser.add_argument(
|
|
"--severity", dest="severity",
|
|
default=None,
|
|
help="Comma-separated severity filter (e.g. critical,serious)",
|
|
)
|
|
return parser
|
|
|
|
|
|
def main():
|
|
parser = build_parser()
|
|
args = parser.parse_args()
|
|
|
|
path = os.path.abspath(args.path)
|
|
if not os.path.exists(path):
|
|
print(f"Error: path does not exist: {path}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
use_json = args.json_flag or args.output_format == "json"
|
|
|
|
# Collect and scan files
|
|
files = collect_files(path)
|
|
if not files:
|
|
print(f"No scannable files found in: {path}", file=sys.stderr)
|
|
sys.exit(0)
|
|
|
|
all_findings: List[Finding] = []
|
|
for fpath in files:
|
|
all_findings.extend(scan_file(fpath))
|
|
|
|
# Filter by severity if requested
|
|
if args.severity:
|
|
allowed = {s.strip().lower() for s in args.severity.split(",")}
|
|
all_findings = [f for f in all_findings if f.severity in allowed]
|
|
|
|
# Output
|
|
if use_json:
|
|
print(format_json(all_findings, len(files)))
|
|
else:
|
|
print(format_human(all_findings, len(files)))
|
|
|
|
# Exit code
|
|
severities = {f.severity for f in all_findings}
|
|
if severities & {"critical", "serious"}:
|
|
sys.exit(1)
|
|
elif severities & {"moderate", "minor"}:
|
|
sys.exit(2)
|
|
else:
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|