release: add scrapling-skill and fix script compatibility
- add scrapling-skill with validated CLI workflow, diagnostics, packaging, and docs integration - fix skill-creator package_skill.py so direct script invocation works from repo root - fix continue-claude-work extract_resume_context.py typing compatibility for local python3 - bump marketplace to 1.39.0 and updated skill versions
This commit is contained in:
191
scrapling-skill/scripts/diagnose_scrapling.py
Executable file
191
scrapling-skill/scripts/diagnose_scrapling.py
Executable file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Diagnose a local Scrapling CLI installation and optionally run a smoke test.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Tuple
|
||||
|
||||
|
||||
def run_command(cmd: List[str]) -> Tuple[int, str, str]:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
universal_newlines=True,
|
||||
check=False,
|
||||
)
|
||||
return result.returncode, result.stdout, result.stderr
|
||||
|
||||
|
||||
def print_section(title: str) -> None:
|
||||
print("")
|
||||
print(title)
|
||||
print("-" * len(title))
|
||||
|
||||
|
||||
def existing_dirs(paths: Iterable[Path]) -> List[str]:
|
||||
return [str(path) for path in paths if path.exists()]
|
||||
|
||||
|
||||
def detect_browser_cache() -> Tuple[List[str], List[str]]:
|
||||
roots = [
|
||||
Path.home() / "Library" / "Caches" / "ms-playwright",
|
||||
Path.home() / ".cache" / "ms-playwright",
|
||||
]
|
||||
chromium = []
|
||||
headless_shell = []
|
||||
for root in roots:
|
||||
if not root.exists():
|
||||
continue
|
||||
chromium.extend(existing_dirs(sorted(root.glob("chromium-*"))))
|
||||
headless_shell.extend(existing_dirs(sorted(root.glob("chromium_headless_shell-*"))))
|
||||
return chromium, headless_shell
|
||||
|
||||
|
||||
def diagnose_cli() -> bool:
|
||||
print_section("CLI")
|
||||
scrapling_path = shutil.which("scrapling")
|
||||
if not scrapling_path:
|
||||
print("status: missing")
|
||||
print("fix: install with `uv tool install 'scrapling[shell]'`")
|
||||
return False
|
||||
|
||||
print("path: {0}".format(scrapling_path))
|
||||
code, stdout, stderr = run_command(["scrapling", "--help"])
|
||||
output = (stdout + "\n" + stderr).strip()
|
||||
|
||||
if code == 0:
|
||||
print("status: working")
|
||||
return True
|
||||
|
||||
print("status: broken")
|
||||
if "install scrapling with any of the extras" in output.lower() or "no module named 'click'" in output.lower():
|
||||
print("cause: installed without CLI extras")
|
||||
print("fix: `uv tool uninstall scrapling` then `uv tool install 'scrapling[shell]'`")
|
||||
else:
|
||||
print("cause: unknown")
|
||||
|
||||
if output:
|
||||
print("details:")
|
||||
print(output[:1200])
|
||||
return False
|
||||
|
||||
|
||||
def diagnose_browsers() -> None:
|
||||
print_section("Browser Runtime")
|
||||
chromium, headless_shell = detect_browser_cache()
|
||||
print("chromium: {0}".format("present" if chromium else "missing"))
|
||||
for path in chromium:
|
||||
print(" - {0}".format(path))
|
||||
print("chrome-headless-shell: {0}".format("present" if headless_shell else "missing"))
|
||||
for path in headless_shell:
|
||||
print(" - {0}".format(path))
|
||||
if not chromium or not headless_shell:
|
||||
print("hint: run `scrapling install` before browser-backed fetches")
|
||||
|
||||
|
||||
def preview_file(path: Path, preview_lines: int) -> None:
|
||||
print_section("Smoke Test Output")
|
||||
if not path.exists():
|
||||
print("status: missing output file")
|
||||
return
|
||||
|
||||
size = path.stat().st_size
|
||||
print("path: {0}".format(path))
|
||||
print("bytes: {0}".format(size))
|
||||
if size == 0:
|
||||
print("status: empty")
|
||||
return
|
||||
|
||||
if path.suffix in (".md", ".txt"):
|
||||
print("preview:")
|
||||
with path.open("r", encoding="utf-8", errors="replace") as handle:
|
||||
for index, line in enumerate(handle):
|
||||
if index >= preview_lines:
|
||||
break
|
||||
print(line.rstrip())
|
||||
|
||||
|
||||
def run_smoke_test(args: argparse.Namespace) -> int:
|
||||
print_section("Smoke Test")
|
||||
|
||||
suffix = ".html"
|
||||
if args.selector:
|
||||
suffix = ".md"
|
||||
|
||||
output_path = Path(tempfile.gettempdir()) / ("scrapling-smoke" + suffix)
|
||||
if output_path.exists():
|
||||
output_path.unlink()
|
||||
|
||||
cmd = ["scrapling", "extract", "fetch" if args.dynamic else "get", args.url, str(output_path)]
|
||||
if args.selector:
|
||||
cmd.extend(["-s", args.selector])
|
||||
if args.dynamic:
|
||||
cmd.extend(["--timeout", str(args.timeout)])
|
||||
elif args.no_verify:
|
||||
cmd.append("--no-verify")
|
||||
|
||||
print("command: {0}".format(" ".join(cmd)))
|
||||
code, stdout, stderr = run_command(cmd)
|
||||
if stdout.strip():
|
||||
print(stdout.strip())
|
||||
if stderr.strip():
|
||||
print(stderr.strip())
|
||||
|
||||
preview_file(output_path, args.preview_lines)
|
||||
return code
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="Diagnose Scrapling and run an optional smoke test.")
|
||||
parser.add_argument("--url", help="Optional URL for a smoke test")
|
||||
parser.add_argument("--selector", help="Optional CSS selector for the smoke test")
|
||||
parser.add_argument(
|
||||
"--dynamic",
|
||||
action="store_true",
|
||||
help="Use `scrapling extract fetch` instead of `scrapling extract get`",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-verify",
|
||||
action="store_true",
|
||||
help="Pass `--no-verify` to static smoke tests",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=20000,
|
||||
help="Timeout in milliseconds for dynamic smoke tests",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--preview-lines",
|
||||
type=int,
|
||||
default=20,
|
||||
help="Number of preview lines for markdown/text output",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
cli_ok = diagnose_cli()
|
||||
diagnose_browsers()
|
||||
|
||||
if not cli_ok:
|
||||
return 1
|
||||
|
||||
if not args.url:
|
||||
return 0
|
||||
|
||||
return run_smoke_test(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user