From 37349607aecb7e12c413cfc1ded6acd609ee1ffc Mon Sep 17 00:00:00 2001 From: 8hoursking <125183117+8hrsk@users.noreply.github.com> Date: Mon, 16 Feb 2026 09:20:43 +0300 Subject: [PATCH] New skill - go-rod-master. Browser automation with Golang (#83) * New skill - go-rod-master. Pretty big skill for browser automation with go and go-rod. * chore: sync generated registry files --------- Co-authored-by: 8hoursking --- CATALOG.md | 5 +- data/bundles.json | 1 + data/catalog.json | 28 +- skills/go-rod-master/SKILL.md | 544 ++++++++++++++++++ skills/go-rod-master/examples/basic_scrape.go | 41 ++ .../examples/concurrent_pages.go | 81 +++ .../examples/request_hijacking.go | 85 +++ skills/go-rod-master/examples/stealth_page.go | 91 +++ .../go-rod-master/references/api-reference.md | 148 +++++ 9 files changed, 1021 insertions(+), 3 deletions(-) create mode 100644 skills/go-rod-master/SKILL.md create mode 100644 skills/go-rod-master/examples/basic_scrape.go create mode 100644 skills/go-rod-master/examples/concurrent_pages.go create mode 100644 skills/go-rod-master/examples/request_hijacking.go create mode 100644 skills/go-rod-master/examples/stealth_page.go create mode 100644 skills/go-rod-master/references/api-reference.md diff --git a/CATALOG.md b/CATALOG.md index 285554f2..c3c5ac59 100644 --- a/CATALOG.md +++ b/CATALOG.md @@ -2,7 +2,7 @@ Generated at: 2026-02-08T00:00:00.000Z -Total skills: 856 +Total skills: 857 ## architecture (64) @@ -300,7 +300,7 @@ Use when creating container-based agents that run custom code in Azure ... | hos | `xlsx-official` | Comprehensive spreadsheet creation, editing, and analysis with support for formulas, formatting, data analysis, and visualization. When Claude needs to work ... | xlsx, official | xlsx, official, spreadsheet, creation, editing, analysis, formulas, formatting, data, visualization, claude, work | | `youtube-automation` | Automate YouTube tasks via Rube MCP (Composio): upload videos, manage playlists, search content, get analytics, and handle comments. Always search tools firs... | youtube | youtube, automation, automate, tasks, via, rube, mcp, composio, upload, videos, playlists, search | -## development (127) +## development (128) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -392,6 +392,7 @@ Triggers: "queue storage", "QueueServic... | azure, storage, queue, py | azure, | `gemini-api-dev` | Use this skill when building applications with Gemini models, Gemini API, working with multimodal content (text, images, audio, video), implementing function... | gemini, api, dev | gemini, api, dev, skill, building, applications, models, working, multimodal, content, text, images | | `go-concurrency-patterns` | Master Go concurrency with goroutines, channels, sync primitives, and context. Use when building concurrent Go applications, implementing worker pools, or de... | go, concurrency | go, concurrency, goroutines, channels, sync, primitives, context, building, concurrent, applications, implementing, worker | | `go-playwright` | Expert capability for robust, stealthy, and efficient browser automation using Playwright Go. | go, playwright | go, playwright, capability, robust, stealthy, efficient, browser, automation | +| `go-rod-master` | Comprehensive guide for browser automation and web scraping with go-rod (Chrome DevTools Protocol) including stealth anti-bot-detection patterns. | go, rod, master | go, rod, master, browser, automation, web, scraping, chrome, devtools, protocol, including, stealth | | `golang-pro` | Master Go 1.21+ with modern patterns, advanced concurrency, performance optimization, and production-ready microservices. Expert in the latest Go ecosystem i... | golang | golang, pro, go, 21, concurrency, performance, optimization, microservices, latest, ecosystem, including, generics | | `hubspot-integration` | Expert patterns for HubSpot CRM integration including OAuth authentication, CRM objects, associations, batch operations, webhooks, and custom objects. Covers... | hubspot, integration | hubspot, integration, crm, including, oauth, authentication, objects, associations, batch, operations, webhooks, custom | | `javascript-mastery` | Comprehensive JavaScript reference covering 33+ essential concepts every developer should know. From fundamentals like primitives and closures to advanced pa... | javascript, mastery | javascript, mastery, reference, covering, 33, essential, concepts, every, developer, should, know, fundamentals | diff --git a/data/bundles.json b/data/bundles.json index 24cc8558..543d9d90 100644 --- a/data/bundles.json +++ b/data/bundles.json @@ -148,6 +148,7 @@ "gemini-api-dev", "go-concurrency-patterns", "go-playwright", + "go-rod-master", "golang-pro", "graphql", "hubspot-integration", diff --git a/data/catalog.json b/data/catalog.json index c7cd3dda..907afad6 100644 --- a/data/catalog.json +++ b/data/catalog.json @@ -1,6 +1,6 @@ { "generatedAt": "2026-02-08T00:00:00.000Z", - "total": 856, + "total": 857, "skills": [ { "id": "3d-web-experience", @@ -11041,6 +11041,32 @@ ], "path": "skills/go-playwright/SKILL.md" }, + { + "id": "go-rod-master", + "name": "go-rod-master", + "description": "Comprehensive guide for browser automation and web scraping with go-rod (Chrome DevTools Protocol) including stealth anti-bot-detection patterns.", + "category": "development", + "tags": [ + "go", + "rod", + "master" + ], + "triggers": [ + "go", + "rod", + "master", + "browser", + "automation", + "web", + "scraping", + "chrome", + "devtools", + "protocol", + "including", + "stealth" + ], + "path": "skills/go-rod-master/SKILL.md" + }, { "id": "godot-gdscript-patterns", "name": "godot-gdscript-patterns", diff --git a/skills/go-rod-master/SKILL.md b/skills/go-rod-master/SKILL.md new file mode 100644 index 00000000..ead9464a --- /dev/null +++ b/skills/go-rod-master/SKILL.md @@ -0,0 +1,544 @@ +--- +name: go-rod-master +description: "Comprehensive guide for browser automation and web scraping with go-rod (Chrome DevTools Protocol) including stealth anti-bot-detection patterns." +risk: safe +source: https://github.com/go-rod/rod +--- + +# Go-Rod Browser Automation Master + +## Overview + +[Rod](https://github.com/go-rod/rod) is a high-level Go driver built directly on the [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/) for browser automation and web scraping. Unlike wrappers around other tools, Rod communicates with the browser natively via CDP, providing thread-safe operations, chained context design for timeouts/cancellation, auto-wait for elements, correct iframe/shadow DOM handling, and zero zombie browser processes. + +The companion library [go-rod/stealth](https://github.com/go-rod/stealth) injects anti-bot-detection evasions based on [puppeteer-extra stealth](https://github.com/nichochar/puppeteer-extra/tree/master/packages/extract-stealth-evasions), hiding headless browser fingerprints from detection systems. + +## When to Use This Skill + +- Use when the user asks to **scrape**, **automate**, or **test** a website using Go. +- Use when the user needs a **headless browser** for dynamic/SPA content (React, Vue, Angular). +- Use when the user mentions **stealth**, **anti-bot**, **avoiding detection**, **Cloudflare**, or **bot detection bypass**. +- Use when the user wants to work with the **Chrome DevTools Protocol (CDP)** directly from Go. +- Use when the user needs to **intercept** or **hijack** network requests in a browser context. +- Use when the user asks about **concurrent browser scraping** or **page pooling** in Go. +- Use when the user is migrating from **chromedp** or **Playwright Go** and wants a simpler API. + +## Safety & Risk + +**Risk Level: 🔵 Safe** + +- **Read-Only by Default:** Default behavior is navigating and reading page content (scraping/testing). +- **Isolated Contexts:** Browser contexts are sandboxed; cookies and storage do not persist unless explicitly saved. +- **Resource Cleanup:** Designed around Go's `defer` pattern — browsers and pages close automatically. +- **No External Mutations:** Does not modify external state unless the script explicitly submits forms or POSTs data. + +## Installation + +```bash +# Core rod library +go get github.com/go-rod/rod@latest + +# Stealth anti-detection plugin (ALWAYS include for production scraping) +go get github.com/go-rod/stealth@latest +``` + +Rod auto-downloads a compatible Chromium binary on first run. To pre-download: + +```bash +go run github.com/nichochar/go-rod.github.io/cmd/launcher@latest +``` + +## Core Concepts + +### Browser Lifecycle + +Rod manages three layers: **Browser → Page → Element**. + +```go +// Launch and connect to a browser +browser := rod.New().MustConnect() +defer browser.MustClose() + +// Create a page (tab) +page := browser.MustPage("https://example.com") + +// Find an element +el := page.MustElement("h1") +fmt.Println(el.MustText()) +``` + +### Must vs Error Patterns + +Rod provides two API styles for every operation: + +| Style | Method | Use Case | +|:------|:-------|:---------| +| **Must** | `MustElement()`, `MustClick()`, `MustText()` | Scripting, debugging, prototyping. Panics on error. | +| **Error** | `Element()`, `Click()`, `Text()` | Production code. Returns `error` for explicit handling. | + +**Production pattern:** + +```go +el, err := page.Element("#login-btn") +if err != nil { + return fmt.Errorf("login button not found: %w", err) +} +if err := el.Click(proto.InputMouseButtonLeft, 1); err != nil { + return fmt.Errorf("click failed: %w", err) +} +``` + +**Scripting pattern with Try:** + +```go +err := rod.Try(func() { + page.MustElement("#login-btn").MustClick() +}) +if errors.Is(err, context.DeadlineExceeded) { + log.Println("timeout finding login button") +} +``` + +### Context & Timeout + +Rod uses Go's `context.Context` for cancellation and timeouts. Context propagates recursively to all child operations. + +```go +// Set a 5-second timeout for the entire operation chain +page.Timeout(5 * time.Second). + MustWaitLoad(). + MustElement("title"). + CancelTimeout(). // subsequent calls are not bound by the 5s timeout + Timeout(30 * time.Second). + MustText() +``` + +### Element Selectors + +Rod supports multiple selector strategies: + +```go +// CSS selector (most common) +page.MustElement("div.content > p.intro") + +// CSS selector with text regex matching +page.MustElementR("button", "Submit|Send") + +// XPath +page.MustElementX("//div[@class='content']//p") + +// Search across iframes and shadow DOM (like DevTools Ctrl+F) +page.MustSearch(".deeply-nested-element") +``` + +### Auto-Wait + +Rod automatically retries element queries until the element appears or the context times out. You do not need manual sleeps: + +```go +// This will automatically wait until the element exists +el := page.MustElement("#dynamic-content") + +// Wait until the element is stable (position/size not changing) +el.MustWaitStable().MustClick() + +// Wait until page has no pending network requests +wait := page.MustWaitRequestIdle() +page.MustElement("#search").MustInput("query") +wait() +``` + +--- + +## Stealth & Anti-Bot Detection (go-rod/stealth) + +> **IMPORTANT:** For any production scraping or automation against real websites, ALWAYS use `stealth.MustPage()` instead of `browser.MustPage()`. This is the single most important step for avoiding bot detection. + +### How Stealth Works + +The `go-rod/stealth` package injects JavaScript evasions into every new page that: + +- **Remove `navigator.webdriver`** — the primary headless detection signal. +- **Spoof WebGL vendor/renderer** — presents real GPU info (e.g., "Intel Inc." / "Intel Iris OpenGL Engine") instead of headless markers like "Google SwiftShader". +- **Fix Chrome plugin array** — reports proper `PluginArray` type with realistic plugin count. +- **Patch permissions API** — returns `"prompt"` instead of bot-revealing values. +- **Set realistic languages** — reports `en-US,en` instead of empty arrays. +- **Fix broken image dimensions** — headless browsers report 0x0; stealth fixes this to 16x16. + +### Usage + +**Creating a stealth page (recommended for all production use):** + +```go +import ( + "github.com/go-rod/rod" + "github.com/go-rod/stealth" +) + +browser := rod.New().MustConnect() +defer browser.MustClose() + +// Use stealth.MustPage instead of browser.MustPage +page := stealth.MustPage(browser) +page.MustNavigate("https://bot.sannysoft.com") +``` + +**With error handling:** + +```go +page, err := stealth.Page(browser) +if err != nil { + return fmt.Errorf("failed to create stealth page: %w", err) +} +page.MustNavigate("https://example.com") +``` + +**Using stealth.JS directly (advanced — for custom page creation):** + +```go +// If you need to create the page yourself (e.g., with specific options), +// inject stealth.JS manually via EvalOnNewDocument +page := browser.MustPage() +page.MustEvalOnNewDocument(stealth.JS) +page.MustNavigate("https://example.com") +``` + +### Verifying Stealth + +Navigate to a bot detection test page to verify evasions: + +```go +page := stealth.MustPage(browser) +page.MustNavigate("https://bot.sannysoft.com") +page.MustScreenshot("stealth_test.png") +``` + +Expected results for a properly stealth-configured browser: +- **WebDriver**: `missing (passed)` +- **Chrome**: `present (passed)` +- **Plugins Length**: `3` (not `0`) +- **Languages**: `en-US,en` + +--- + +## Implementation Guidelines + +### 1. Launcher Configuration + +Use the `launcher` package to customize browser launch flags: + +```go +import "github.com/go-rod/rod/lib/launcher" + +url := launcher.New(). + Headless(true). // false for debugging + Proxy("127.0.0.1:8080"). // upstream proxy + Set("disable-gpu", ""). // custom Chrome flag + Delete("use-mock-keychain"). // remove a default flag + MustLaunch() + +browser := rod.New().ControlURL(url).MustConnect() +defer browser.MustClose() +``` + +**Debugging mode (visible browser + slow motion):** + +```go +l := launcher.New(). + Headless(false). + Devtools(true) +defer l.Cleanup() + +browser := rod.New(). + ControlURL(l.MustLaunch()). + Trace(true). + SlowMotion(2 * time.Second). + MustConnect() +``` + +### 2. Proxy Support + +```go +// Set proxy at launch +url := launcher.New(). + Proxy("socks5://127.0.0.1:1080"). + MustLaunch() + +browser := rod.New().ControlURL(url).MustConnect() + +// Handle proxy authentication +go browser.MustHandleAuth("username", "password")() + +// Ignore SSL certificate errors (for MITM proxies) +browser.MustIgnoreCertErrors(true) +``` + +### 3. Input Simulation + +```go +import "github.com/go-rod/rod/lib/input" + +// Type into an input field (replaces existing value) +page.MustElement("#email").MustInput("user@example.com") + +// Simulate keyboard keys +page.Keyboard.MustType(input.Enter) + +// Press key combinations +page.Keyboard.MustPress(input.ControlLeft) +page.Keyboard.MustType(input.KeyA) +page.Keyboard.MustRelease(input.ControlLeft) + +// Mouse click at coordinates +page.Mouse.MustClick(input.MouseLeft) +page.Mouse.MustMoveTo(100, 200) +``` + +### 4. Network Request Interception (Hijacking) + +```go +router := browser.HijackRequests() +defer router.MustStop() + +// Block all image requests +router.MustAdd("*.png", func(ctx *rod.Hijack) { + ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient) +}) + +// Modify request headers +router.MustAdd("*api.example.com*", func(ctx *rod.Hijack) { + ctx.Request.Req().Header.Set("Authorization", "Bearer token123") + ctx.MustLoadResponse() +}) + +// Modify response body +router.MustAdd("*.js", func(ctx *rod.Hijack) { + ctx.MustLoadResponse() + ctx.Response.SetBody(ctx.Response.Body() + "\n// injected") +}) + +go router.Run() +``` + +### 5. Waiting Strategies + +```go +// Wait for page load event +page.MustWaitLoad() + +// Wait for no pending network requests (AJAX idle) +wait := page.MustWaitRequestIdle() +page.MustElement("#search").MustInput("query") +wait() + +// Wait for element to be stable (not animating) +page.MustElement(".modal").MustWaitStable().MustClick() + +// Wait for element to become invisible +page.MustElement(".loading").MustWaitInvisible() + +// Wait for JavaScript condition +page.MustWait(`() => document.title === 'Ready'`) + +// Wait for specific navigation/event +wait := page.WaitEvent(&proto.PageLoadEventFired{}) +page.MustNavigate("https://example.com") +wait() +``` + +### 6. Race Selectors (Multiple Outcomes) + +Handle pages where the result can be one of several outcomes (e.g., login success vs error): + +```go +page.MustElement("#username").MustInput("user") +page.MustElement("#password").MustInput("pass").MustType(input.Enter) + +// Race between success and error selectors +elm := page.Race(). + Element(".dashboard").MustHandle(func(e *rod.Element) { + fmt.Println("Login successful:", e.MustText()) + }). + Element(".error-message").MustDo() + +if elm.MustMatches(".error-message") { + log.Fatal("Login failed:", elm.MustText()) +} +``` + +### 7. Screenshots & PDF + +```go +// Full-page screenshot +page.MustScreenshot("page.png") + +// Custom screenshot (JPEG, specific region) +img, _ := page.Screenshot(true, &proto.PageCaptureScreenshot{ + Format: proto.PageCaptureScreenshotFormatJpeg, + Quality: gson.Int(90), + Clip: &proto.PageViewport{ + X: 0, Y: 0, Width: 1280, Height: 800, Scale: 1, + }, +}) +utils.OutputFile("screenshot.jpg", img) + +// Scroll screenshot (captures full scrollable page) +img, _ := page.MustWaitStable().ScrollScreenshot(nil) +utils.OutputFile("full_page.jpg", img) + +// PDF export +page.MustPDF("output.pdf") +``` + +### 8. Concurrent Page Pool + +```go +pool := rod.NewPagePool(5) // max 5 concurrent pages + +create := func() *rod.Page { + return browser.MustIncognito().MustPage() +} + +var wg sync.WaitGroup +for _, url := range urls { + wg.Add(1) + go func(u string) { + defer wg.Done() + + page := pool.MustGet(create) + defer pool.Put(page) + + page.MustNavigate(u).MustWaitLoad() + fmt.Println(page.MustInfo().Title) + }(url) +} +wg.Wait() + +pool.Cleanup(func(p *rod.Page) { p.MustClose() }) +``` + +### 9. Event Handling + +```go +// Listen for console.log output +go page.EachEvent(func(e *proto.RuntimeConsoleAPICalled) { + if e.Type == proto.RuntimeConsoleAPICalledTypeLog { + fmt.Println(page.MustObjectsToJSON(e.Args)) + } +})() + +// Wait for a specific event before proceeding +wait := page.WaitEvent(&proto.PageLoadEventFired{}) +page.MustNavigate("https://example.com") +wait() +``` + +### 10. File Download + +```go +wait := browser.MustWaitDownload() + +page.MustElementR("a", "Download PDF").MustClick() + +data := wait() +utils.OutputFile("downloaded.pdf", data) +``` + +### 11. JavaScript Evaluation + +```go +// Execute JS on the page +page.MustEval(`() => console.log("hello")`) + +// Pass parameters and get return value +result := page.MustEval(`(a, b) => a + b`, 1, 2) +fmt.Println(result.Int()) // 3 + +// Eval on a specific element ("this" = the DOM element) +title := page.MustElement("title").MustEval(`() => this.innerText`).String() + +// Direct CDP calls for features Rod doesn't wrap +proto.PageSetAdBlockingEnabled{Enabled: true}.Call(page) +``` + +### 12. Loading Chrome Extensions + +```go +extPath, _ := filepath.Abs("./my-extension") + +u := launcher.New(). + Set("load-extension", extPath). + Headless(false). // extensions require headed mode + MustLaunch() + +browser := rod.New().ControlURL(u).MustConnect() +``` + +--- + +## Examples + +See the `examples/` directory for complete, runnable Go files: +- `examples/basic_scrape.go` — Minimal scraping example +- `examples/stealth_page.go` — Anti-detection with go-rod/stealth +- `examples/request_hijacking.go` — Intercepting and modifying network requests +- `examples/concurrent_pages.go` — Page pool for concurrent scraping + +--- + +## Best Practices + +- ✅ **ALWAYS use `stealth.MustPage(browser)`** instead of `browser.MustPage()` for real-world sites. +- ✅ **ALWAYS `defer browser.MustClose()`** immediately after connecting. +- ✅ Use the error-returning API (not `Must*`) in production code. +- ✅ Set explicit timeouts with `.Timeout()` — never rely on defaults for production. +- ✅ Use `browser.MustIncognito().MustPage()` for isolated sessions. +- ✅ Use `PagePool` for concurrent scraping instead of spawning unlimited pages. +- ✅ Use `MustWaitStable()` before clicking elements that might be animating. +- ✅ Use `MustWaitRequestIdle()` after actions that trigger AJAX calls. +- ✅ Use `launcher.New().Headless(false).Devtools(true)` for debugging. +- ❌ **NEVER** use `time.Sleep()` for waiting — use Rod's built-in wait methods. +- ❌ **NEVER** create a new `Browser` per task — create one Browser, use multiple `Page` instances. +- ❌ **NEVER** use `browser.MustPage()` for production scraping — use `stealth.MustPage()`. +- ❌ **NEVER** ignore errors in production — always handle them explicitly. +- ❌ **NEVER** forget to defer-close browsers, pages, and hijack routers. + +## Common Pitfalls + +- **Problem:** Element not found even though it exists on the page. + **Solution:** The element may be inside an iframe or shadow DOM. Use `page.MustSearch()` instead of `page.MustElement()` — it searches across all iframes and shadow DOMs. + +- **Problem:** Click doesn't work because the element is animating. + **Solution:** Call `el.MustWaitStable()` before `el.MustClick()`. + +- **Problem:** Bot detection despite using stealth. + **Solution:** Combine `stealth.MustPage()` with: randomized viewport sizes, realistic User-Agent strings, human-like input delays between keystrokes, and random idle behaviors (scroll, hover). + +- **Problem:** Browser process leaks (zombie processes). + **Solution:** Always `defer browser.MustClose()`. Rod uses [leakless](https://github.com/ysmood/leakless) to kill zombies after main process crash, but explicit cleanup is preferred. + +- **Problem:** Timeout errors on slow pages. + **Solution:** Use chained context: `page.Timeout(30 * time.Second).MustWaitLoad()`. For AJAX-heavy pages, use `MustWaitRequestIdle()` instead of `MustWaitLoad()`. + +- **Problem:** HijackRequests router not intercepting requests. + **Solution:** You must call `go router.Run()` after setting up routes, and `defer router.MustStop()` for cleanup. + +## Limitations + +- **CAPTCHAs:** Rod does not include CAPTCHA solving. External services (2captcha, etc.) must be integrated separately. +- **Extreme Anti-Bot:** While `go-rod/stealth` handles common detection (WebDriver, plugin fingerprints, WebGL), extremely strict systems (some Cloudflare configurations, Akamai Bot Manager) may still detect automation. Additional measures (residential proxies, human-like behavioral patterns) may be needed. +- **DRM Content:** Cannot interact with DRM-protected media (e.g., Widevine). +- **Resource Usage:** Each browser instance consumes significant RAM (~100-300MB+). Use `PagePool` and limit concurrency on memory-constrained systems. +- **Extensions in Headless:** Chrome extensions do not work in headless mode. Use `Headless(false)` with XVFB for server environments. +- **Platform:** Requires a Chromium-compatible browser. Does not support Firefox or Safari. + +## Documentation References + +- [Official Documentation](https://go-rod.github.io/) — Guides, tutorials, FAQ +- [Go API Reference](https://pkg.go.dev/github.com/go-rod/rod) — Complete type and method documentation +- [go-rod/stealth](https://github.com/go-rod/stealth) — Anti-bot detection plugin +- [Examples (source)](https://github.com/go-rod/rod/blob/main/examples_test.go) — Official example tests +- [Rod vs Chromedp Comparison](https://github.com/nichochar/go-rod.github.io/blob/main/lib/examples/compare-chromedp) — Migration reference +- [Chrome DevTools Protocol Docs](https://chromedevtools.github.io/devtools-protocol/) — Underlying protocol reference +- [Chrome CLI Flags Reference](https://peter.sh/experiments/chromium-command-line-switches) — Launcher flag documentation +- `references/api-reference.md` — Quick-reference cheat sheet diff --git a/skills/go-rod-master/examples/basic_scrape.go b/skills/go-rod-master/examples/basic_scrape.go new file mode 100644 index 00000000..16f3a14c --- /dev/null +++ b/skills/go-rod-master/examples/basic_scrape.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/input" +) + +// basic_scrape demonstrates a minimal go-rod scraping workflow: +// Launch browser → navigate → extract text → close. +func main() { + // Launch and connect to a new browser instance. + // Rod auto-downloads Chromium if not present. + browser := rod.New(). + Timeout(time.Minute). // global timeout for the browser + MustConnect() + defer browser.MustClose() + + // Navigate to the target page and wait for it to stabilize + page := browser.MustPage("https://github.com").MustWaitStable() + + // Extract the page title via JavaScript evaluation + title := page.MustElement("title").MustEval(`() => this.innerText`).String() + fmt.Println("Page title:", title) + + // Use CSS selector to find elements + links := page.MustElements("a[href]") + fmt.Printf("Found %d links on the page\n", len(links)) + + // Use keyboard shortcut to trigger search + page.Keyboard.MustType(input.Slash) + + // Type into the search input and press Enter + page.MustElement("#query-builder-test").MustInput("go-rod").MustType(input.Enter) + + // Wait for results — MustElementR matches by CSS selector + text regex + result := page.MustElementR("span", "DevTools Protocol").MustText() + fmt.Println("Found result:", result) +} diff --git a/skills/go-rod-master/examples/concurrent_pages.go b/skills/go-rod-master/examples/concurrent_pages.go new file mode 100644 index 00000000..a19d186c --- /dev/null +++ b/skills/go-rod-master/examples/concurrent_pages.go @@ -0,0 +1,81 @@ +package main + +import ( + "fmt" + "sync" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/stealth" +) + +// concurrent_pages demonstrates using rod.PagePool for concurrent scraping +// with stealth-enabled pages. +func main() { + browser := rod.New(). + Timeout(2 * time.Minute). + MustConnect() + defer browser.MustClose() + + // URLs to scrape concurrently + urls := []string{ + "https://example.com", + "https://example.org", + "https://www.iana.org/domains/reserved", + "https://www.iana.org/about", + } + + // Create a page pool with max 3 concurrent pages + pool := rod.NewPagePool(3) + + // Factory function: creates stealth-enabled pages in isolated incognito contexts + create := func() *rod.Page { + // MustIncognito creates an isolated browser context (separate cookies, storage) + page := stealth.MustPage(browser.MustIncognito()) + return page + } + + // Collect results safely using a mutex + var mu sync.Mutex + results := make(map[string]string) + + // Scrape all URLs concurrently + var wg sync.WaitGroup + for _, url := range urls { + wg.Add(1) + go func(u string) { + defer wg.Done() + + // Get a page from the pool (blocks if pool is full) + page := pool.MustGet(create) + defer pool.Put(page) // return page to pool when done + + // Navigate and wait for the page to stabilize + page.MustNavigate(u).MustWaitStable() + + // Extract the page title + title := page.MustInfo().Title + + // Store result + mu.Lock() + results[u] = title + mu.Unlock() + + fmt.Printf("[done] %s → %s\n", u, title) + }(url) + } + + // Wait for all goroutines to complete + wg.Wait() + + // Clean up the pool + pool.Cleanup(func(p *rod.Page) { + p.MustClose() + }) + + // Print summary + fmt.Printf("\n--- Results (%d pages scraped) ---\n", len(results)) + for url, title := range results { + fmt.Printf(" %s: %s\n", url, title) + } +} diff --git a/skills/go-rod-master/examples/request_hijacking.go b/skills/go-rod-master/examples/request_hijacking.go new file mode 100644 index 00000000..32f8c354 --- /dev/null +++ b/skills/go-rod-master/examples/request_hijacking.go @@ -0,0 +1,85 @@ +package main + +import ( + "fmt" + "net/http" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/proto" + "github.com/go-rod/stealth" +) + +// request_hijacking demonstrates intercepting and modifying network requests +// using Rod's HijackRequests API. +func main() { + browser := rod.New(). + Timeout(time.Minute). + MustConnect() + defer browser.MustClose() + + // --- Example 1: Block image requests to save bandwidth --- + router := browser.HijackRequests() + defer router.MustStop() + + // Block all PNG and JPEG image requests + router.MustAdd("*.png", func(ctx *rod.Hijack) { + ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient) + }) + router.MustAdd("*.jpg", func(ctx *rod.Hijack) { + ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient) + }) + + // Modify request headers for API calls + router.MustAdd("*api.*", func(ctx *rod.Hijack) { + ctx.Request.Req().Header.Set("X-Custom-Header", "go-rod") + ctx.Request.Req().Header.Set("Authorization", "Bearer my-token") + + // Load the actual response from the server + if err := ctx.LoadResponse(http.DefaultClient, true); err != nil { + fmt.Printf("Failed to load response: %v\n", err) + return + } + + fmt.Printf("API response status: %d\n", ctx.Response.Payload().ResponseCode) + }) + + // Inject JavaScript into every JS file loaded + router.MustAdd("*.js", func(ctx *rod.Hijack) { + if err := ctx.LoadResponse(http.DefaultClient, true); err != nil { + return + } + // Append tracking code to all JavaScript files + body := ctx.Response.Body() + ctx.Response.SetBody(body + "\n// Monitored by go-rod") + }) + + // IMPORTANT: Start the router in a goroutine + go router.Run() + + // Use stealth page for anti-detection + page := stealth.MustPage(browser) + page.MustNavigate("https://example.com").MustWaitLoad() + + fmt.Println("Page loaded with request hijacking active") + fmt.Println("Title:", page.MustElement("title").MustText()) + + // --- Example 2: Capture and log all network requests --- + // (Using a separate page to show different patterns) + page2 := stealth.MustPage(browser) + + // Enable network domain for request logging + proto.NetworkEnable{}.Call(page2) + + // Listen for network responses + go page2.EachEvent(func(e *proto.NetworkResponseReceived) { + fmt.Printf(" [%d] %s %s\n", + e.Response.Status, + e.Type.String(), + e.Response.URL, + ) + })() + + page2.MustNavigate("https://example.com").MustWaitLoad() + fmt.Println("\nNetwork log above shows all requests captured") +} diff --git a/skills/go-rod-master/examples/stealth_page.go b/skills/go-rod-master/examples/stealth_page.go new file mode 100644 index 00000000..2320b625 --- /dev/null +++ b/skills/go-rod-master/examples/stealth_page.go @@ -0,0 +1,91 @@ +package main + +import ( + "fmt" + "strings" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/launcher" + "github.com/go-rod/rod/lib/utils" + "github.com/go-rod/stealth" +) + +// stealth_page demonstrates using go-rod/stealth to bypass bot detection. +// It creates a stealth-enabled page and verifies evasions against a detection site. +func main() { + // Ensure the browser binary is downloaded + launcher.NewBrowser().MustGet() + + // Launch browser with custom launcher settings + url := launcher.New(). + Headless(true). + MustLaunch() + + browser := rod.New(). + ControlURL(url). + Timeout(time.Minute). + MustConnect() + defer browser.MustClose() + + // CRITICAL: Use stealth.MustPage instead of browser.MustPage + // This injects anti-detection JavaScript into every new document + page := stealth.MustPage(browser) + + // Navigate to a bot detection test page + page.MustNavigate("https://bot.sannysoft.com") + + // Wait for the detection tests to complete + page.MustElement("#broken-image-dimensions.passed") + + // Take a screenshot to verify results + page.MustScreenshot("stealth_result.png") + fmt.Println("Screenshot saved to stealth_result.png") + + // Print detection results + printBotDetectionReport(page) + + // ---- Advanced: Using stealth.JS directly ---- + // If you need to create the page manually (e.g., with specific context), + // you can inject stealth.JS via EvalOnNewDocument: + advancedPage := browser.MustPage() + advancedPage.MustEvalOnNewDocument(stealth.JS) + advancedPage.MustNavigate("https://bot.sannysoft.com") + advancedPage.MustElement("#broken-image-dimensions.passed") + fmt.Println("\nAdvanced stealth page also passed detection tests") + + // ---- Production: Error handling pattern ---- + prodPage, err := stealth.Page(browser) + if err != nil { + fmt.Printf("Failed to create stealth page: %v\n", err) + return + } + prodPage.MustNavigate("https://example.com") + title, err := prodPage.MustElement("title").Text() + if err != nil { + fmt.Printf("Failed to get title: %v\n", err) + return + } + fmt.Printf("\nProduction page title: %s\n", title) +} + +// printBotDetectionReport extracts and prints the detection test results. +func printBotDetectionReport(page *rod.Page) { + el := page.MustElement("#broken-image-dimensions.passed") + for _, row := range el.MustParents("table").First().MustElements("tr:nth-child(n+2)") { + cells := row.MustElements("td") + key := cells[0].MustProperty("textContent") + + if strings.HasPrefix(key.String(), "User Agent") { + ua := cells[1].MustProperty("textContent").String() + passed := !strings.Contains(ua, "HeadlessChrome/") + fmt.Printf(" %s: %t\n", key, passed) + } else if strings.HasPrefix(key.String(), "Hairline Feature") { + continue // machine-dependent, skip + } else { + fmt.Printf(" %s: %s\n", key, cells[1].MustProperty("textContent")) + } + } + + _ = utils.OutputFile("stealth_result.png", []byte{}) +} diff --git a/skills/go-rod-master/references/api-reference.md b/skills/go-rod-master/references/api-reference.md new file mode 100644 index 00000000..fbb81e8e --- /dev/null +++ b/skills/go-rod-master/references/api-reference.md @@ -0,0 +1,148 @@ +# Go-Rod API Quick Reference + +Cheat sheet for the most-used `go-rod/rod` and `go-rod/stealth` APIs. +Every `Must*` method has a corresponding error-returning version (without the `Must` prefix). + +--- + +## Browser (`rod.Browser`) + +| Method | Description | +|:-------|:------------| +| `rod.New().MustConnect()` | Launch new browser and connect | +| `rod.New().ControlURL(url).MustConnect()` | Connect to existing browser via WebSocket URL | +| `browser.MustClose()` | Close browser and all pages | +| `browser.MustPage(url)` | Create new page (tab) and navigate | +| `browser.MustPage()` | Create blank page | +| `browser.MustIncognito()` | Create isolated incognito context | +| `browser.MustIgnoreCertErrors(true)` | Ignore SSL certificate errors | +| `browser.MustHandleAuth(user, pass)` | Handle HTTP basic/proxy auth | +| `browser.HijackRequests()` | Create request interceptor router | +| `browser.MustWaitDownload()` | Wait for a file download to complete | +| `browser.ServeMonitor("")` | Start visual monitoring server | +| `browser.Trace(true)` | Enable verbose tracing | +| `browser.SlowMotion(duration)` | Add delay between actions | +| `rod.NewPagePool(n)` | Create pool of max `n` reusable pages | +| `rod.NewBrowserPool(n)` | Create pool of max `n` reusable browsers | + +## Page (`rod.Page`) + +| Method | Description | +|:-------|:------------| +| `page.MustNavigate(url)` | Navigate to URL | +| `page.MustWaitLoad()` | Wait for `load` event | +| `page.MustWaitStable()` | Wait until page DOM is stable | +| `page.MustWaitRequestIdle()` | Wait until no pending network requests | +| `page.MustWaitIdle()` | Wait for both load and request idle | +| `page.MustWait(js)` | Wait for JS expression to return truthy | +| `page.MustElement(selector)` | Find element by CSS selector (auto-wait) | +| `page.MustElementR(selector, regex)` | Find element by CSS + text regex | +| `page.MustElementX(xpath)` | Find element by XPath | +| `page.MustElements(selector)` | Find all matching elements | +| `page.MustSearch(query)` | Search across iframes + shadow DOM | +| `page.MustEval(js, args...)` | Execute JavaScript on page | +| `page.MustEvalOnNewDocument(js)` | Inject JS before any page script runs | +| `page.MustScreenshot(path)` | Take PNG screenshot | +| `page.MustPDF(path)` | Export page as PDF | +| `page.ScrollScreenshot(opts)` | Full-page scroll screenshot | +| `page.MustInfo()` | Get page info (title, URL) | +| `page.Timeout(duration)` | Set timeout for chained operations | +| `page.CancelTimeout()` | Remove timeout for subsequent operations | +| `page.Race()` | Start race selector (multiple outcomes) | +| `page.Keyboard` | Access keyboard controller | +| `page.Mouse` | Access mouse controller | +| `page.WaitEvent(proto)` | Wait for specific CDP event | +| `page.EachEvent(handler)` | Subscribe to events continuously | +| `page.Event()` | Channel-based event stream | + +## Element (`rod.Element`) + +| Method | Description | +|:-------|:------------| +| `el.MustClick()` | Click the element | +| `el.MustInput(text)` | Clear and type text into input | +| `el.MustType(keys...)` | Simulate key presses | +| `el.MustText()` | Get text content | +| `el.MustHTML()` | Get outer HTML | +| `el.MustProperty(name)` | Get JS property value | +| `el.MustAttribute(name)` | Get HTML attribute value | +| `el.MustWaitStable()` | Wait until position/size stable | +| `el.MustWaitVisible()` | Wait until element is visible | +| `el.MustWaitInvisible()` | Wait until element is hidden | +| `el.MustParents(selector)` | Find parent elements matching selector | +| `el.MustElements(selector)` | Find child elements | +| `el.MustMatches(selector)` | Check if element matches selector | +| `el.MustEval(js)` | Eval JS with `this` = element | +| `el.MustScreenshot(path)` | Screenshot just this element | + +## Input (`rod/lib/input`) + +| Constant | Description | +|:---------|:------------| +| `input.Enter` | Enter key | +| `input.Escape` | Escape key | +| `input.Tab` | Tab key | +| `input.Slash` | `/` key | +| `input.ControlLeft` | Left Ctrl | +| `input.ShiftLeft` | Left Shift | +| `input.KeyA` — `input.KeyZ` | Letter keys | +| `input.MouseLeft` | Left mouse button | + +## Launcher (`rod/lib/launcher`) + +| Method | Description | +|:-------|:------------| +| `launcher.New()` | Create new launcher | +| `l.Headless(bool)` | Enable/disable headless mode | +| `l.Devtools(bool)` | Auto-open DevTools | +| `l.Proxy(addr)` | Set proxy server | +| `l.Set(flag, value)` | Set Chrome CLI flag | +| `l.Delete(flag)` | Remove Chrome CLI flag | +| `l.MustLaunch()` | Launch browser, return control URL | +| `l.Cleanup()` | Kill browser process | +| `launcher.NewBrowser().MustGet()` | Download browser binary | +| `launcher.Open(url)` | Open URL in system browser | + +## Stealth (`go-rod/stealth`) + +| API | Description | +|:----|:------------| +| `stealth.MustPage(browser)` | Create stealth page (panics on error) | +| `stealth.Page(browser)` | Create stealth page (returns error) | +| `stealth.JS` | Raw JS string with all stealth evasions | + +**What stealth.JS injects:** +- Removes `navigator.webdriver` detection +- Spoofs WebGL vendor/renderer to real GPU values +- Fixes Chrome plugin array (`PluginArray` type, count=3) +- Patches permissions API (returns `"prompt"`) +- Sets realistic languages (`en-US,en`) +- Fixes broken image dimensions (16x16 instead of 0x0) + +## Network Hijacking (`rod.Hijack`) + +| Method | Description | +|:-------|:------------| +| `router.MustAdd(pattern, handler)` | Add URL pattern handler | +| `router.Run()` | Start intercepting (call with `go`) | +| `router.MustStop()` | Stop intercepting | +| `ctx.Request.Req()` | Access `*http.Request` | +| `ctx.Request.URL()` | Get request URL | +| `ctx.LoadResponse(client, true)` | Load response from server | +| `ctx.MustLoadResponse()` | Load response (panics on error) | +| `ctx.Response.Body()` | Get response body | +| `ctx.Response.SetBody(s)` | Modify response body | +| `ctx.Response.Fail(reason)` | Block the request | +| `ctx.Response.Payload()` | Get response metadata | + +## Direct CDP (`rod/lib/proto`) + +```go +// Call any CDP method directly +proto.PageSetAdBlockingEnabled{Enabled: true}.Call(page) + +// Or via generic JSON API +page.Call(ctx, "", "Page.setAdBlockingEnabled", map[string]bool{"enabled": true}) +``` + +Full CDP protocol reference: https://chromedevtools.github.io/devtools-protocol/