From 4b70c5a8601157348e62a808cf45e5cc32b51d89 Mon Sep 17 00:00:00 2001 From: yusyus Date: Sat, 21 Feb 2026 23:36:58 +0300 Subject: [PATCH] feat: add workflow support to unified_scraper (fixes gap #1) unified_scraper.py was the only scraper missing --enhance-workflow, --enhance-stage, --var, and --workflow-dry-run support. All other scrapers (doc_scraper, github_scraper, pdf_scraper, codebase_scraper) already called run_workflows() after building the skill. Changes: - arguments/unified.py: add 4 workflow args to UNIFIED_ARGUMENTS so the unified CLI subparser picks them up automatically - unified_scraper.py main(): register the same 4 workflow args in the standalone parser - unified_scraper.py run(): accept optional `args` parameter and call run_workflows() after build_skill(), passing unified context (name + description) consistent with doc_scraper pattern Co-Authored-By: Claude Sonnet 4.6 --- src/skill_seekers/cli/arguments/unified.py | 32 ++++++++++++++ src/skill_seekers/cli/unified_scraper.py | 49 ++++++++++++++++++++-- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/skill_seekers/cli/arguments/unified.py b/src/skill_seekers/cli/arguments/unified.py index 111230c..f42d5ea 100644 --- a/src/skill_seekers/cli/arguments/unified.py +++ b/src/skill_seekers/cli/arguments/unified.py @@ -40,6 +40,38 @@ UNIFIED_ARGUMENTS: dict[str, dict[str, Any]] = { "help": "Dry run mode", }, }, + # Enhancement Workflow arguments (mirrors scrape/github/pdf/codebase scrapers) + "enhance_workflow": { + "flags": ("--enhance-workflow",), + "kwargs": { + "action": "append", + "help": "Apply enhancement workflow (file path or preset: security-focus, minimal, api-documentation, architecture-comprehensive). Can use multiple times to chain workflows.", + "metavar": "WORKFLOW", + }, + }, + "enhance_stage": { + "flags": ("--enhance-stage",), + "kwargs": { + "action": "append", + "help": "Add inline enhancement stage (format: 'name:prompt'). Can be used multiple times.", + "metavar": "STAGE", + }, + }, + "var": { + "flags": ("--var",), + "kwargs": { + "action": "append", + "help": "Override workflow variable (format: 'key=value'). Can be used multiple times.", + "metavar": "VAR", + }, + }, + "workflow_dry_run": { + "flags": ("--workflow-dry-run",), + "kwargs": { + "action": "store_true", + "help": "Preview workflow stages without executing (requires --enhance-workflow)", + }, + }, } diff --git a/src/skill_seekers/cli/unified_scraper.py b/src/skill_seekers/cli/unified_scraper.py index 1f65203..dc10eaf 100644 --- a/src/skill_seekers/cli/unified_scraper.py +++ b/src/skill_seekers/cli/unified_scraper.py @@ -943,9 +943,14 @@ class UnifiedScraper: logger.info(f"✅ Unified skill built: {self.output_dir}/") - def run(self): + def run(self, args=None): """ Execute complete unified scraping workflow. + + Args: + args: Optional parsed CLI arguments for workflow integration. + When provided, enhancement workflows (--enhance-workflow, + --enhance-stage) are executed after the skill is built. """ logger.info("\n" + "🚀 " * 20) logger.info(f"Unified Scraper: {self.config['name']}") @@ -966,6 +971,16 @@ class UnifiedScraper: # Phase 4: Build skill self.build_skill(merged_data) + # Phase 5: Enhancement Workflow Integration + if args is not None: + from skill_seekers.cli.workflow_runner import run_workflows + + unified_context = { + "name": self.config.get("name", ""), + "description": self.config.get("description", ""), + } + run_workflows(args, context=unified_context) + logger.info("\n" + "✅ " * 20) logger.info("Unified scraping complete!") logger.info("✅ " * 20 + "\n") @@ -1024,6 +1039,34 @@ Examples: action="store_true", help="Preview what will be scraped without actually scraping", ) + # Enhancement Workflow arguments (mirrors scrape/github/pdf/codebase scrapers) + parser.add_argument( + "--enhance-workflow", + action="append", + dest="enhance_workflow", + help="Apply enhancement workflow (file path or preset). Can use multiple times to chain workflows.", + metavar="WORKFLOW", + ) + parser.add_argument( + "--enhance-stage", + action="append", + dest="enhance_stage", + help="Add inline enhancement stage (format: 'name:prompt'). Can be used multiple times.", + metavar="STAGE", + ) + parser.add_argument( + "--var", + action="append", + dest="var", + help="Override workflow variable (format: 'key=value'). Can be used multiple times.", + metavar="VAR", + ) + parser.add_argument( + "--workflow-dry-run", + action="store_true", + dest="workflow_dry_run", + help="Preview workflow stages without executing (requires --enhance-workflow)", + ) args = parser.parse_args() @@ -1068,8 +1111,8 @@ Examples: logger.info(f"Merge mode: {scraper.merge_mode}") return - # Run scraper - scraper.run() + # Run scraper (pass args for workflow integration) + scraper.run(args=args) if __name__ == "__main__":