feat: add workflow support to unified_scraper (fixes gap #1)
unified_scraper.py was the only scraper missing --enhance-workflow, --enhance-stage, --var, and --workflow-dry-run support. All other scrapers (doc_scraper, github_scraper, pdf_scraper, codebase_scraper) already called run_workflows() after building the skill. Changes: - arguments/unified.py: add 4 workflow args to UNIFIED_ARGUMENTS so the unified CLI subparser picks them up automatically - unified_scraper.py main(): register the same 4 workflow args in the standalone parser - unified_scraper.py run(): accept optional `args` parameter and call run_workflows() after build_skill(), passing unified context (name + description) consistent with doc_scraper pattern Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -943,9 +943,14 @@ class UnifiedScraper:
|
||||
|
||||
logger.info(f"✅ Unified skill built: {self.output_dir}/")
|
||||
|
||||
def run(self):
|
||||
def run(self, args=None):
|
||||
"""
|
||||
Execute complete unified scraping workflow.
|
||||
|
||||
Args:
|
||||
args: Optional parsed CLI arguments for workflow integration.
|
||||
When provided, enhancement workflows (--enhance-workflow,
|
||||
--enhance-stage) are executed after the skill is built.
|
||||
"""
|
||||
logger.info("\n" + "🚀 " * 20)
|
||||
logger.info(f"Unified Scraper: {self.config['name']}")
|
||||
@@ -966,6 +971,16 @@ class UnifiedScraper:
|
||||
# Phase 4: Build skill
|
||||
self.build_skill(merged_data)
|
||||
|
||||
# Phase 5: Enhancement Workflow Integration
|
||||
if args is not None:
|
||||
from skill_seekers.cli.workflow_runner import run_workflows
|
||||
|
||||
unified_context = {
|
||||
"name": self.config.get("name", ""),
|
||||
"description": self.config.get("description", ""),
|
||||
}
|
||||
run_workflows(args, context=unified_context)
|
||||
|
||||
logger.info("\n" + "✅ " * 20)
|
||||
logger.info("Unified scraping complete!")
|
||||
logger.info("✅ " * 20 + "\n")
|
||||
@@ -1024,6 +1039,34 @@ Examples:
|
||||
action="store_true",
|
||||
help="Preview what will be scraped without actually scraping",
|
||||
)
|
||||
# Enhancement Workflow arguments (mirrors scrape/github/pdf/codebase scrapers)
|
||||
parser.add_argument(
|
||||
"--enhance-workflow",
|
||||
action="append",
|
||||
dest="enhance_workflow",
|
||||
help="Apply enhancement workflow (file path or preset). Can use multiple times to chain workflows.",
|
||||
metavar="WORKFLOW",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance-stage",
|
||||
action="append",
|
||||
dest="enhance_stage",
|
||||
help="Add inline enhancement stage (format: 'name:prompt'). Can be used multiple times.",
|
||||
metavar="STAGE",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--var",
|
||||
action="append",
|
||||
dest="var",
|
||||
help="Override workflow variable (format: 'key=value'). Can be used multiple times.",
|
||||
metavar="VAR",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workflow-dry-run",
|
||||
action="store_true",
|
||||
dest="workflow_dry_run",
|
||||
help="Preview workflow stages without executing (requires --enhance-workflow)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -1068,8 +1111,8 @@ Examples:
|
||||
logger.info(f"Merge mode: {scraper.merge_mode}")
|
||||
return
|
||||
|
||||
# Run scraper
|
||||
scraper.run()
|
||||
# Run scraper (pass args for workflow integration)
|
||||
scraper.run(args=args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user