Files
daymade 2192458ef7 release: add scrapling-skill and fix script compatibility
- add scrapling-skill with validated CLI workflow, diagnostics, packaging, and docs integration
- fix skill-creator package_skill.py so direct script invocation works from repo root
- fix continue-claude-work extract_resume_context.py typing compatibility for local python3
- bump marketplace to 1.39.0 and updated skill versions
2026-03-18 23:08:55 +08:00

192 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Diagnose a local Scrapling CLI installation and optionally run a smoke test.
"""
import argparse
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Iterable, List, Tuple
def run_command(cmd: List[str]) -> Tuple[int, str, str]:
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
check=False,
)
return result.returncode, result.stdout, result.stderr
def print_section(title: str) -> None:
print("")
print(title)
print("-" * len(title))
def existing_dirs(paths: Iterable[Path]) -> List[str]:
return [str(path) for path in paths if path.exists()]
def detect_browser_cache() -> Tuple[List[str], List[str]]:
roots = [
Path.home() / "Library" / "Caches" / "ms-playwright",
Path.home() / ".cache" / "ms-playwright",
]
chromium = []
headless_shell = []
for root in roots:
if not root.exists():
continue
chromium.extend(existing_dirs(sorted(root.glob("chromium-*"))))
headless_shell.extend(existing_dirs(sorted(root.glob("chromium_headless_shell-*"))))
return chromium, headless_shell
def diagnose_cli() -> bool:
print_section("CLI")
scrapling_path = shutil.which("scrapling")
if not scrapling_path:
print("status: missing")
print("fix: install with `uv tool install 'scrapling[shell]'`")
return False
print("path: {0}".format(scrapling_path))
code, stdout, stderr = run_command(["scrapling", "--help"])
output = (stdout + "\n" + stderr).strip()
if code == 0:
print("status: working")
return True
print("status: broken")
if "install scrapling with any of the extras" in output.lower() or "no module named 'click'" in output.lower():
print("cause: installed without CLI extras")
print("fix: `uv tool uninstall scrapling` then `uv tool install 'scrapling[shell]'`")
else:
print("cause: unknown")
if output:
print("details:")
print(output[:1200])
return False
def diagnose_browsers() -> None:
print_section("Browser Runtime")
chromium, headless_shell = detect_browser_cache()
print("chromium: {0}".format("present" if chromium else "missing"))
for path in chromium:
print(" - {0}".format(path))
print("chrome-headless-shell: {0}".format("present" if headless_shell else "missing"))
for path in headless_shell:
print(" - {0}".format(path))
if not chromium or not headless_shell:
print("hint: run `scrapling install` before browser-backed fetches")
def preview_file(path: Path, preview_lines: int) -> None:
print_section("Smoke Test Output")
if not path.exists():
print("status: missing output file")
return
size = path.stat().st_size
print("path: {0}".format(path))
print("bytes: {0}".format(size))
if size == 0:
print("status: empty")
return
if path.suffix in (".md", ".txt"):
print("preview:")
with path.open("r", encoding="utf-8", errors="replace") as handle:
for index, line in enumerate(handle):
if index >= preview_lines:
break
print(line.rstrip())
def run_smoke_test(args: argparse.Namespace) -> int:
print_section("Smoke Test")
suffix = ".html"
if args.selector:
suffix = ".md"
output_path = Path(tempfile.gettempdir()) / ("scrapling-smoke" + suffix)
if output_path.exists():
output_path.unlink()
cmd = ["scrapling", "extract", "fetch" if args.dynamic else "get", args.url, str(output_path)]
if args.selector:
cmd.extend(["-s", args.selector])
if args.dynamic:
cmd.extend(["--timeout", str(args.timeout)])
elif args.no_verify:
cmd.append("--no-verify")
print("command: {0}".format(" ".join(cmd)))
code, stdout, stderr = run_command(cmd)
if stdout.strip():
print(stdout.strip())
if stderr.strip():
print(stderr.strip())
preview_file(output_path, args.preview_lines)
return code
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Diagnose Scrapling and run an optional smoke test.")
parser.add_argument("--url", help="Optional URL for a smoke test")
parser.add_argument("--selector", help="Optional CSS selector for the smoke test")
parser.add_argument(
"--dynamic",
action="store_true",
help="Use `scrapling extract fetch` instead of `scrapling extract get`",
)
parser.add_argument(
"--no-verify",
action="store_true",
help="Pass `--no-verify` to static smoke tests",
)
parser.add_argument(
"--timeout",
type=int,
default=20000,
help="Timeout in milliseconds for dynamic smoke tests",
)
parser.add_argument(
"--preview-lines",
type=int,
default=20,
help="Number of preview lines for markdown/text output",
)
return parser
def main() -> int:
parser = build_parser()
args = parser.parse_args()
cli_ok = diagnose_cli()
diagnose_browsers()
if not cli_ok:
return 1
if not args.url:
return 0
return run_smoke_test(args)
if __name__ == "__main__":
sys.exit(main())