feat: add workflow support to unified_scraper (fixes gap #1)
unified_scraper.py was the only scraper missing --enhance-workflow, --enhance-stage, --var, and --workflow-dry-run support. All other scrapers (doc_scraper, github_scraper, pdf_scraper, codebase_scraper) already called run_workflows() after building the skill. Changes: - arguments/unified.py: add 4 workflow args to UNIFIED_ARGUMENTS so the unified CLI subparser picks them up automatically - unified_scraper.py main(): register the same 4 workflow args in the standalone parser - unified_scraper.py run(): accept optional `args` parameter and call run_workflows() after build_skill(), passing unified context (name + description) consistent with doc_scraper pattern Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,38 @@ UNIFIED_ARGUMENTS: dict[str, dict[str, Any]] = {
|
||||
"help": "Dry run mode",
|
||||
},
|
||||
},
|
||||
# Enhancement Workflow arguments (mirrors scrape/github/pdf/codebase scrapers)
|
||||
"enhance_workflow": {
|
||||
"flags": ("--enhance-workflow",),
|
||||
"kwargs": {
|
||||
"action": "append",
|
||||
"help": "Apply enhancement workflow (file path or preset: security-focus, minimal, api-documentation, architecture-comprehensive). Can use multiple times to chain workflows.",
|
||||
"metavar": "WORKFLOW",
|
||||
},
|
||||
},
|
||||
"enhance_stage": {
|
||||
"flags": ("--enhance-stage",),
|
||||
"kwargs": {
|
||||
"action": "append",
|
||||
"help": "Add inline enhancement stage (format: 'name:prompt'). Can be used multiple times.",
|
||||
"metavar": "STAGE",
|
||||
},
|
||||
},
|
||||
"var": {
|
||||
"flags": ("--var",),
|
||||
"kwargs": {
|
||||
"action": "append",
|
||||
"help": "Override workflow variable (format: 'key=value'). Can be used multiple times.",
|
||||
"metavar": "VAR",
|
||||
},
|
||||
},
|
||||
"workflow_dry_run": {
|
||||
"flags": ("--workflow-dry-run",),
|
||||
"kwargs": {
|
||||
"action": "store_true",
|
||||
"help": "Preview workflow stages without executing (requires --enhance-workflow)",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -943,9 +943,14 @@ class UnifiedScraper:
|
||||
|
||||
logger.info(f"✅ Unified skill built: {self.output_dir}/")
|
||||
|
||||
def run(self):
|
||||
def run(self, args=None):
|
||||
"""
|
||||
Execute complete unified scraping workflow.
|
||||
|
||||
Args:
|
||||
args: Optional parsed CLI arguments for workflow integration.
|
||||
When provided, enhancement workflows (--enhance-workflow,
|
||||
--enhance-stage) are executed after the skill is built.
|
||||
"""
|
||||
logger.info("\n" + "🚀 " * 20)
|
||||
logger.info(f"Unified Scraper: {self.config['name']}")
|
||||
@@ -966,6 +971,16 @@ class UnifiedScraper:
|
||||
# Phase 4: Build skill
|
||||
self.build_skill(merged_data)
|
||||
|
||||
# Phase 5: Enhancement Workflow Integration
|
||||
if args is not None:
|
||||
from skill_seekers.cli.workflow_runner import run_workflows
|
||||
|
||||
unified_context = {
|
||||
"name": self.config.get("name", ""),
|
||||
"description": self.config.get("description", ""),
|
||||
}
|
||||
run_workflows(args, context=unified_context)
|
||||
|
||||
logger.info("\n" + "✅ " * 20)
|
||||
logger.info("Unified scraping complete!")
|
||||
logger.info("✅ " * 20 + "\n")
|
||||
@@ -1024,6 +1039,34 @@ Examples:
|
||||
action="store_true",
|
||||
help="Preview what will be scraped without actually scraping",
|
||||
)
|
||||
# Enhancement Workflow arguments (mirrors scrape/github/pdf/codebase scrapers)
|
||||
parser.add_argument(
|
||||
"--enhance-workflow",
|
||||
action="append",
|
||||
dest="enhance_workflow",
|
||||
help="Apply enhancement workflow (file path or preset). Can use multiple times to chain workflows.",
|
||||
metavar="WORKFLOW",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance-stage",
|
||||
action="append",
|
||||
dest="enhance_stage",
|
||||
help="Add inline enhancement stage (format: 'name:prompt'). Can be used multiple times.",
|
||||
metavar="STAGE",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--var",
|
||||
action="append",
|
||||
dest="var",
|
||||
help="Override workflow variable (format: 'key=value'). Can be used multiple times.",
|
||||
metavar="VAR",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workflow-dry-run",
|
||||
action="store_true",
|
||||
dest="workflow_dry_run",
|
||||
help="Preview workflow stages without executing (requires --enhance-workflow)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -1068,8 +1111,8 @@ Examples:
|
||||
logger.info(f"Merge mode: {scraper.merge_mode}")
|
||||
return
|
||||
|
||||
# Run scraper
|
||||
scraper.run()
|
||||
# Run scraper (pass args for workflow integration)
|
||||
scraper.run(args=args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user