Auto-fixed (whitespace, imports, type annotations): - codebase_scraper.py: W293 blank lines with whitespace - doc_scraper.py: W293 blank lines with whitespace - parsers/extractors/__init__.py: W293 - parsers/extractors/base_parser.py: W293, UP007, UP045, F401 Manual fixes: - enhancement_workflow.py: B904 raise without `from exc`, remove unused `os` import - parsers/extractors/quality_scorer.py: E741 ambiguous var `l` → `line` - parsers/extractors/rst_parser.py: SIM102 nested if → combined conditions (x2) - pdf_scraper.py: F821 undefined `logger` → `print()` (consistent with file style) - mcp/tools/workflow_tools.py: ARG001 unused `args` → `_args` - tests/test_workflow_runner.py: ARG005 unused lambda args → `_a`/`_kw`, ARG001 `kwargs` → `_kwargs` - tests/test_workflows_command.py: SIM117 nested with → combined with (x2) All 1922 tests pass. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
495 lines
18 KiB
Python
495 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Enhancement Workflow Engine
|
|
|
|
Allows users to define custom AI enhancement workflows with:
|
|
- Sequential stages that build on previous results
|
|
- Custom prompts per stage
|
|
- History passing between stages
|
|
- Post-processing configuration
|
|
- Per-project and global workflow support
|
|
|
|
Usage:
|
|
# Use global workflow
|
|
skill-seekers analyze . --enhance-workflow security-focus
|
|
|
|
# Use project workflow
|
|
skill-seekers analyze . --enhance-workflow .skill-seekers/enhancement.yaml
|
|
|
|
# Quick inline stages
|
|
skill-seekers analyze . \\
|
|
--enhance-stage "security:Analyze for security issues" \\
|
|
--enhance-stage "cleanup:Remove boilerplate"
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from importlib.resources import files as importlib_files
|
|
from pathlib import Path
|
|
from typing import Any, Literal
|
|
|
|
import yaml
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class WorkflowStage:
|
|
"""Single enhancement stage in a workflow."""
|
|
|
|
name: str
|
|
type: Literal["builtin", "custom"]
|
|
target: str # "patterns", "examples", "config", "skill_md", "all"
|
|
prompt: str | None = None
|
|
uses_history: bool = False
|
|
enabled: bool = True
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
@dataclass
|
|
class PostProcessConfig:
|
|
"""Post-processing configuration."""
|
|
|
|
remove_sections: list[str] = field(default_factory=list)
|
|
reorder_sections: list[str] = field(default_factory=list)
|
|
add_metadata: dict[str, Any] = field(default_factory=dict)
|
|
custom_transforms: list[dict[str, Any]] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class EnhancementWorkflow:
|
|
"""Complete enhancement workflow definition."""
|
|
|
|
name: str
|
|
description: str
|
|
version: str = "1.0"
|
|
applies_to: list[str] = field(default_factory=lambda: ["codebase_analysis"])
|
|
variables: dict[str, Any] = field(default_factory=dict)
|
|
stages: list[WorkflowStage] = field(default_factory=list)
|
|
post_process: PostProcessConfig = field(default_factory=PostProcessConfig)
|
|
extends: str | None = None # Inherit from another workflow
|
|
|
|
|
|
class WorkflowEngine:
|
|
"""
|
|
Execute enhancement workflows with sequential stages.
|
|
|
|
Each stage can:
|
|
- Access previous stage results
|
|
- Access all history
|
|
- Access specific stages by name
|
|
- Run custom AI prompts
|
|
- Target specific parts of the analysis
|
|
"""
|
|
|
|
def __init__(self, workflow: EnhancementWorkflow | str | Path):
|
|
"""
|
|
Initialize workflow engine.
|
|
|
|
Args:
|
|
workflow: EnhancementWorkflow object or path to YAML file
|
|
"""
|
|
if isinstance(workflow, (str, Path)):
|
|
self.workflow = self._load_workflow(workflow)
|
|
else:
|
|
self.workflow = workflow
|
|
|
|
self.history: list[dict[str, Any]] = []
|
|
self.enhancer = None # Lazy load UnifiedEnhancer
|
|
|
|
def _load_workflow(self, workflow_ref: str | Path) -> EnhancementWorkflow:
|
|
"""Load workflow from YAML file using 3-level search order.
|
|
|
|
Search order:
|
|
1. Raw file path (absolute or relative) — existing behaviour
|
|
2. ~/.config/skill-seekers/workflows/{name}.yaml — user overrides/custom
|
|
3. skill_seekers/workflows/{name}.yaml via importlib.resources — bundled defaults
|
|
"""
|
|
workflow_ref = Path(workflow_ref)
|
|
|
|
# Add .yaml extension for bare names
|
|
name_str = str(workflow_ref)
|
|
if not name_str.endswith((".yaml", ".yml")):
|
|
yaml_ref = Path(name_str + ".yaml")
|
|
else:
|
|
yaml_ref = workflow_ref
|
|
|
|
resolved_path: Path | None = None
|
|
yaml_text: str | None = None
|
|
|
|
# Level 1: absolute path or relative-to-CWD
|
|
if yaml_ref.is_absolute():
|
|
if yaml_ref.exists():
|
|
resolved_path = yaml_ref
|
|
else:
|
|
cwd_path = Path.cwd() / yaml_ref
|
|
if cwd_path.exists():
|
|
resolved_path = cwd_path
|
|
elif yaml_ref.exists():
|
|
resolved_path = yaml_ref
|
|
|
|
# Level 2: user config directory
|
|
if resolved_path is None:
|
|
user_dir = Path.home() / ".config" / "skill-seekers" / "workflows"
|
|
user_path = user_dir / yaml_ref.name
|
|
if user_path.exists():
|
|
resolved_path = user_path
|
|
|
|
# Level 3: bundled package workflows via importlib.resources
|
|
if resolved_path is None:
|
|
bare_name = yaml_ref.name # e.g. "security-focus.yaml"
|
|
try:
|
|
pkg_ref = importlib_files("skill_seekers.workflows").joinpath(bare_name)
|
|
yaml_text = pkg_ref.read_text(encoding="utf-8")
|
|
logger.info(f"📋 Loading bundled workflow: {bare_name}")
|
|
except (FileNotFoundError, TypeError, ModuleNotFoundError) as exc:
|
|
raise FileNotFoundError(
|
|
f"Workflow '{yaml_ref.stem}' not found. "
|
|
"Use 'skill-seekers workflows list' to see available workflows."
|
|
) from exc
|
|
|
|
if resolved_path is not None:
|
|
logger.info(f"📋 Loading workflow: {resolved_path}")
|
|
with open(resolved_path, encoding="utf-8") as f:
|
|
data = yaml.safe_load(f)
|
|
else:
|
|
data = yaml.safe_load(yaml_text)
|
|
|
|
# Handle inheritance (extends)
|
|
if "extends" in data and data["extends"]:
|
|
parent = self._load_workflow(data["extends"])
|
|
data = self._merge_workflows(parent, data)
|
|
|
|
# Parse stages
|
|
stages = []
|
|
for stage_data in data.get("stages", []):
|
|
stages.append(
|
|
WorkflowStage(
|
|
name=stage_data["name"],
|
|
type=stage_data.get("type", "custom"),
|
|
target=stage_data.get("target", "all"),
|
|
prompt=stage_data.get("prompt"),
|
|
uses_history=stage_data.get("uses_history", False),
|
|
enabled=stage_data.get("enabled", True),
|
|
metadata=stage_data.get("metadata", {}),
|
|
)
|
|
)
|
|
|
|
# Parse post-processing
|
|
post_process_data = data.get("post_process", {})
|
|
post_process = PostProcessConfig(
|
|
remove_sections=post_process_data.get("remove_sections", []),
|
|
reorder_sections=post_process_data.get("reorder_sections", []),
|
|
add_metadata=post_process_data.get("add_metadata", {}),
|
|
custom_transforms=post_process_data.get("custom_transforms", []),
|
|
)
|
|
|
|
return EnhancementWorkflow(
|
|
name=data.get("name", "Unnamed Workflow"),
|
|
description=data.get("description", ""),
|
|
version=data.get("version", "1.0"),
|
|
applies_to=data.get("applies_to", ["codebase_analysis"]),
|
|
variables=data.get("variables", {}),
|
|
stages=stages,
|
|
post_process=post_process,
|
|
extends=data.get("extends"),
|
|
)
|
|
|
|
def _merge_workflows(
|
|
self, parent: EnhancementWorkflow, child_data: dict
|
|
) -> dict:
|
|
"""Merge child workflow with parent (inheritance)."""
|
|
# Start with parent as dict
|
|
merged = {
|
|
"name": child_data.get("name", parent.name),
|
|
"description": child_data.get("description", parent.description),
|
|
"version": child_data.get("version", parent.version),
|
|
"applies_to": child_data.get("applies_to", parent.applies_to),
|
|
"variables": {**parent.variables, **child_data.get("variables", {})},
|
|
"stages": [],
|
|
"post_process": {},
|
|
}
|
|
|
|
# Merge stages (child can override by name)
|
|
parent_stages = {s.name: s for s in parent.stages}
|
|
child_stages = {s["name"]: s for s in child_data.get("stages", [])}
|
|
|
|
for name in list(parent_stages.keys()) + list(child_stages.keys()):
|
|
if name in child_stages:
|
|
# Child overrides parent
|
|
stage_dict = child_stages[name]
|
|
else:
|
|
# Use parent stage
|
|
stage = parent_stages[name]
|
|
stage_dict = {
|
|
"name": stage.name,
|
|
"type": stage.type,
|
|
"target": stage.target,
|
|
"prompt": stage.prompt,
|
|
"uses_history": stage.uses_history,
|
|
"enabled": stage.enabled,
|
|
}
|
|
|
|
if stage_dict not in merged["stages"]:
|
|
merged["stages"].append(stage_dict)
|
|
|
|
# Merge post-processing
|
|
parent_post = parent.post_process
|
|
child_post = child_data.get("post_process", {})
|
|
merged["post_process"] = {
|
|
"remove_sections": child_post.get(
|
|
"remove_sections", parent_post.remove_sections
|
|
),
|
|
"reorder_sections": child_post.get(
|
|
"reorder_sections", parent_post.reorder_sections
|
|
),
|
|
"add_metadata": {
|
|
**parent_post.add_metadata,
|
|
**child_post.get("add_metadata", {}),
|
|
},
|
|
"custom_transforms": parent_post.custom_transforms
|
|
+ child_post.get("custom_transforms", []),
|
|
}
|
|
|
|
return merged
|
|
|
|
def run(self, analysis_results: dict, context: dict | None = None) -> dict:
|
|
"""
|
|
Run workflow stages sequentially.
|
|
|
|
Args:
|
|
analysis_results: Results from analysis (patterns, examples, etc.)
|
|
context: Additional context variables
|
|
|
|
Returns:
|
|
Enhanced results after all stages
|
|
"""
|
|
logger.info(f"🚀 Starting workflow: {self.workflow.name}")
|
|
logger.info(f" Description: {self.workflow.description}")
|
|
logger.info(f" Stages: {len(self.workflow.stages)}")
|
|
|
|
current_results = analysis_results
|
|
context = context or {}
|
|
|
|
# Merge workflow variables into context
|
|
context.update(self.workflow.variables)
|
|
|
|
# Run each stage
|
|
for idx, stage in enumerate(self.workflow.stages, 1):
|
|
if not stage.enabled:
|
|
logger.info(f"⏭️ Skipping disabled stage: {stage.name}")
|
|
continue
|
|
|
|
logger.info(f"🔄 Running stage {idx}/{len(self.workflow.stages)}: {stage.name}")
|
|
|
|
# Build stage context
|
|
stage_context = self._build_stage_context(
|
|
stage, current_results, context
|
|
)
|
|
|
|
# Run stage
|
|
try:
|
|
stage_results = self._run_stage(stage, stage_context)
|
|
|
|
# Save to history
|
|
self.history.append(
|
|
{
|
|
"stage": stage.name,
|
|
"results": stage_results,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"metadata": stage.metadata,
|
|
}
|
|
)
|
|
|
|
# Merge stage results into current results
|
|
current_results = self._merge_stage_results(
|
|
current_results, stage_results, stage.target
|
|
)
|
|
|
|
logger.info(f" ✅ Stage complete: {stage.name}")
|
|
|
|
except Exception as e:
|
|
logger.error(f" ❌ Stage failed: {stage.name} - {e}")
|
|
# Continue with next stage (optional: make this configurable)
|
|
continue
|
|
|
|
# Post-processing
|
|
logger.info("🔧 Running post-processing...")
|
|
final_results = self._post_process(current_results)
|
|
|
|
logger.info(f"✅ Workflow complete: {self.workflow.name}")
|
|
return final_results
|
|
|
|
def _build_stage_context(
|
|
self, stage: WorkflowStage, current_results: dict, base_context: dict
|
|
) -> dict:
|
|
"""Build context for a stage (includes history if needed)."""
|
|
context = {
|
|
"current_results": current_results,
|
|
**base_context,
|
|
}
|
|
|
|
if stage.uses_history and self.history:
|
|
# Add previous stage
|
|
context["previous_results"] = self.history[-1]["results"]
|
|
|
|
# Add all history
|
|
context["all_history"] = self.history
|
|
|
|
# Add stages by name for easy access
|
|
context["stages"] = {h["stage"]: h["results"] for h in self.history}
|
|
|
|
return context
|
|
|
|
def _run_stage(self, stage: WorkflowStage, context: dict) -> dict:
|
|
"""Run a single stage."""
|
|
if stage.type == "builtin":
|
|
return self._run_builtin_stage(stage, context)
|
|
else:
|
|
return self._run_custom_stage(stage, context)
|
|
|
|
def _run_builtin_stage(self, stage: WorkflowStage, context: dict) -> dict:
|
|
"""Run built-in enhancement stage."""
|
|
# Use existing enhancement system
|
|
from skill_seekers.cli.ai_enhancer import PatternEnhancer, TestExampleEnhancer
|
|
|
|
current = context["current_results"]
|
|
|
|
# Determine what to enhance based on target
|
|
if stage.target == "patterns" and "patterns" in current:
|
|
enhancer = PatternEnhancer()
|
|
enhanced_patterns = enhancer.enhance_patterns(current["patterns"])
|
|
return {"patterns": enhanced_patterns}
|
|
|
|
elif stage.target == "examples" and "examples" in current:
|
|
enhancer = TestExampleEnhancer()
|
|
enhanced_examples = enhancer.enhance_examples(current["examples"])
|
|
return {"examples": enhanced_examples}
|
|
|
|
else:
|
|
logger.warning(f"Unknown builtin target: {stage.target}")
|
|
return {}
|
|
|
|
def _run_custom_stage(self, stage: WorkflowStage, context: dict) -> dict:
|
|
"""Run custom AI enhancement stage."""
|
|
if not stage.prompt:
|
|
logger.warning(f"Custom stage '{stage.name}' has no prompt")
|
|
return {}
|
|
|
|
# Lazy load enhancer
|
|
if not self.enhancer:
|
|
from skill_seekers.cli.ai_enhancer import AIEnhancer
|
|
|
|
self.enhancer = AIEnhancer()
|
|
|
|
# Format prompt with context
|
|
try:
|
|
formatted_prompt = stage.prompt.format(**context)
|
|
except KeyError as e:
|
|
logger.warning(f"Missing context variable: {e}")
|
|
formatted_prompt = stage.prompt
|
|
|
|
# Call AI with custom prompt
|
|
logger.info(f" 🤖 Running custom AI prompt...")
|
|
response = self.enhancer._call_claude(formatted_prompt, max_tokens=3000)
|
|
|
|
if not response:
|
|
logger.warning(f" ⚠️ No response from AI")
|
|
return {}
|
|
|
|
# Try to parse as JSON first, fallback to plain text
|
|
try:
|
|
result = json.loads(response)
|
|
except json.JSONDecodeError:
|
|
# Plain text response
|
|
result = {"content": response, "stage": stage.name}
|
|
|
|
return result
|
|
|
|
def _merge_stage_results(
|
|
self, current: dict, stage_results: dict, target: str
|
|
) -> dict:
|
|
"""Merge stage results into current results."""
|
|
if target == "all":
|
|
# Merge everything
|
|
return {**current, **stage_results}
|
|
else:
|
|
# Merge only specific target
|
|
current[target] = stage_results.get(target, stage_results)
|
|
return current
|
|
|
|
def _post_process(self, results: dict) -> dict:
|
|
"""Apply post-processing configuration."""
|
|
config = self.workflow.post_process
|
|
|
|
# Remove sections
|
|
for section in config.remove_sections:
|
|
if section in results:
|
|
logger.info(f" 🗑️ Removing section: {section}")
|
|
del results[section]
|
|
|
|
# Add metadata
|
|
if config.add_metadata:
|
|
if "metadata" not in results:
|
|
results["metadata"] = {}
|
|
results["metadata"].update(config.add_metadata)
|
|
logger.info(f" 📝 Added metadata: {list(config.add_metadata.keys())}")
|
|
|
|
# Reorder sections (for SKILL.md generation)
|
|
if config.reorder_sections and "skill_md_sections" in results:
|
|
logger.info(f" 🔄 Reordering sections...")
|
|
# This will be used during SKILL.md generation
|
|
results["section_order"] = config.reorder_sections
|
|
|
|
# Custom transforms (extensibility)
|
|
for transform in config.custom_transforms:
|
|
logger.info(f" ⚙️ Applying transform: {transform.get('name', 'unknown')}")
|
|
# TODO: Implement custom transform system
|
|
|
|
return results
|
|
|
|
def save_history(self, output_path: Path):
|
|
"""Save workflow execution history."""
|
|
output_path = Path(output_path)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
history_data = {
|
|
"workflow": self.workflow.name,
|
|
"version": self.workflow.version,
|
|
"executed_at": datetime.now().isoformat(),
|
|
"stages": self.history,
|
|
}
|
|
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
json.dump(history_data, f, indent=2)
|
|
|
|
logger.info(f"💾 Saved workflow history: {output_path}")
|
|
|
|
|
|
def list_bundled_workflows() -> list[str]:
|
|
"""Return names of all bundled default workflows (without .yaml extension)."""
|
|
try:
|
|
pkg = importlib_files("skill_seekers.workflows")
|
|
names = []
|
|
for item in pkg.iterdir():
|
|
name = str(item.name)
|
|
if name.endswith((".yaml", ".yml")):
|
|
names.append(name.removesuffix(".yaml").removesuffix(".yml"))
|
|
return sorted(names)
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def list_user_workflows() -> list[str]:
|
|
"""Return names of all user-defined workflows (without .yaml extension)."""
|
|
user_dir = Path.home() / ".config" / "skill-seekers" / "workflows"
|
|
if not user_dir.exists():
|
|
return []
|
|
names = []
|
|
for p in user_dir.iterdir():
|
|
if p.suffix in (".yaml", ".yml"):
|
|
names.append(p.stem)
|
|
return sorted(names)
|