Completes the implementation for Unity/Unreal/Godot game engine support and adds missing "local" source type validation. Changes: - Add "local" to VALID_SOURCE_TYPES in config_validator.py - Add _validate_local_source() method with full validation - Add Unity/Unreal/Godot to FRAMEWORK_MARKERS for priority detection - Add game engine directory exclusions to all 3 scrapers: * Unity: Library/, Temp/, Logs/, UserSettings/, etc. * Unreal: Intermediate/, Saved/, DerivedDataCache/ * Godot: .godot/, .import/ - Prevents scanning massive build cache directories (saves GBs + hours) This completes all features mentioned in PR #278: ✅ Unity/Unreal/Godot framework detection with priority ✅ Pattern enhancement performance fix (grouped approach) ✅ Game engine directory exclusions ✅ Phase 5 SKILL.md AI enhancement ✅ Local source references copying ✅ "local" source type validation ✅ Config field name compatibility ✅ C# test example extraction Tested: - All unified config tests pass (18/18) - All config validation tests pass (28/28) - Ready for Unity project testing Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
890 lines
30 KiB
Python
890 lines
30 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Configuration Pattern Extraction (C3.4)
|
|
|
|
Extracts configuration patterns from actual config files in the codebase.
|
|
Supports JSON, YAML, TOML, ENV, INI, Python config modules, and more.
|
|
|
|
This is different from C3.2 which extracts config examples from test code.
|
|
C3.4 focuses on documenting the actual project configuration.
|
|
"""
|
|
|
|
import ast
|
|
import json
|
|
import logging
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Literal
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Optional dependencies
|
|
try:
|
|
import yaml
|
|
|
|
YAML_AVAILABLE = True
|
|
except ImportError:
|
|
YAML_AVAILABLE = False
|
|
logger.debug("PyYAML not available - YAML parsing will be limited")
|
|
|
|
try:
|
|
import tomli as toml_lib
|
|
|
|
TOML_AVAILABLE = True
|
|
except ImportError:
|
|
try:
|
|
import toml as toml_lib # noqa: F401
|
|
|
|
TOML_AVAILABLE = True
|
|
except ImportError:
|
|
toml_lib = None
|
|
TOML_AVAILABLE = False
|
|
logger.debug("toml/tomli not available - TOML parsing disabled")
|
|
|
|
|
|
@dataclass
|
|
class ConfigSetting:
|
|
"""Individual configuration setting"""
|
|
|
|
key: str
|
|
value: Any
|
|
value_type: str # 'string', 'integer', 'boolean', 'array', 'object', 'null'
|
|
default_value: Any | None = None
|
|
required: bool = False
|
|
env_var: str | None = None
|
|
description: str = ""
|
|
validation: dict[str, Any] = field(default_factory=dict)
|
|
nested_path: list[str] = field(default_factory=list) # For nested configs
|
|
|
|
|
|
@dataclass
|
|
class ConfigFile:
|
|
"""Represents a configuration file"""
|
|
|
|
file_path: str
|
|
relative_path: str
|
|
config_type: Literal[
|
|
"json",
|
|
"yaml",
|
|
"toml",
|
|
"env",
|
|
"ini",
|
|
"python",
|
|
"javascript",
|
|
"dockerfile",
|
|
"docker-compose",
|
|
]
|
|
purpose: str # Inferred purpose: database, api, logging, etc.
|
|
settings: list[ConfigSetting] = field(default_factory=list)
|
|
patterns: list[str] = field(default_factory=list)
|
|
raw_content: str | None = None
|
|
parse_errors: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class ConfigExtractionResult:
|
|
"""Result of config extraction"""
|
|
|
|
config_files: list[ConfigFile] = field(default_factory=list)
|
|
total_files: int = 0
|
|
total_settings: int = 0
|
|
detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files
|
|
errors: list[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> dict:
|
|
"""Convert result to dictionary for JSON output"""
|
|
return {
|
|
"total_files": self.total_files,
|
|
"total_settings": self.total_settings,
|
|
"detected_patterns": self.detected_patterns,
|
|
"config_files": [
|
|
{
|
|
"file_path": cf.file_path,
|
|
"relative_path": cf.relative_path,
|
|
"type": cf.config_type,
|
|
"purpose": cf.purpose,
|
|
"patterns": cf.patterns,
|
|
"settings_count": len(cf.settings),
|
|
"settings": [
|
|
{
|
|
"key": s.key,
|
|
"value": s.value,
|
|
"type": s.value_type,
|
|
"env_var": s.env_var,
|
|
"description": s.description,
|
|
}
|
|
for s in cf.settings
|
|
],
|
|
"parse_errors": cf.parse_errors,
|
|
}
|
|
for cf in self.config_files
|
|
],
|
|
"errors": self.errors,
|
|
}
|
|
|
|
def to_markdown(self) -> str:
|
|
"""Generate markdown report of extraction results"""
|
|
md = "# Configuration Extraction Report\n\n"
|
|
md += f"**Total Files:** {self.total_files}\n"
|
|
md += f"**Total Settings:** {self.total_settings}\n"
|
|
|
|
# Handle both dict and list formats for detected_patterns
|
|
if self.detected_patterns:
|
|
if isinstance(self.detected_patterns, dict):
|
|
patterns_str = ", ".join(self.detected_patterns.keys())
|
|
else:
|
|
patterns_str = ", ".join(self.detected_patterns)
|
|
else:
|
|
patterns_str = "None"
|
|
md += f"**Detected Patterns:** {patterns_str}\n\n"
|
|
|
|
if self.config_files:
|
|
md += "## Configuration Files\n\n"
|
|
for cf in self.config_files:
|
|
md += f"### {cf.relative_path}\n\n"
|
|
md += f"- **Type:** {cf.config_type}\n"
|
|
md += f"- **Purpose:** {cf.purpose}\n"
|
|
md += f"- **Settings:** {len(cf.settings)}\n"
|
|
if cf.patterns:
|
|
md += f"- **Patterns:** {', '.join(cf.patterns)}\n"
|
|
if cf.parse_errors:
|
|
md += f"- **Errors:** {len(cf.parse_errors)}\n"
|
|
md += "\n"
|
|
|
|
if self.errors:
|
|
md += "## Errors\n\n"
|
|
for error in self.errors:
|
|
md += f"- {error}\n"
|
|
|
|
return md
|
|
|
|
|
|
class ConfigFileDetector:
|
|
"""Detect configuration files in codebase"""
|
|
|
|
# Config file patterns by type
|
|
CONFIG_PATTERNS = {
|
|
"json": {
|
|
"patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"],
|
|
"names": [
|
|
"config.json",
|
|
"settings.json",
|
|
"app.json",
|
|
".eslintrc.json",
|
|
".prettierrc.json",
|
|
],
|
|
},
|
|
"yaml": {
|
|
"patterns": ["*.yaml", "*.yml"],
|
|
"names": [
|
|
"config.yml",
|
|
"settings.yml",
|
|
".travis.yml",
|
|
".gitlab-ci.yml",
|
|
"docker-compose.yml",
|
|
],
|
|
},
|
|
"toml": {
|
|
"patterns": ["*.toml"],
|
|
"names": ["pyproject.toml", "Cargo.toml", "config.toml"],
|
|
},
|
|
"env": {
|
|
"patterns": [".env*", "*.env"],
|
|
"names": [".env", ".env.example", ".env.local", ".env.production"],
|
|
},
|
|
"ini": {
|
|
"patterns": ["*.ini", "*.cfg"],
|
|
"names": ["config.ini", "setup.cfg", "tox.ini"],
|
|
},
|
|
"python": {
|
|
"patterns": [],
|
|
"names": ["settings.py", "config.py", "configuration.py", "constants.py"],
|
|
},
|
|
"javascript": {
|
|
"patterns": ["*.config.js", "*.config.ts"],
|
|
"names": [
|
|
"config.js",
|
|
"next.config.js",
|
|
"vue.config.js",
|
|
"webpack.config.js",
|
|
],
|
|
},
|
|
"dockerfile": {
|
|
"patterns": ["Dockerfile*"],
|
|
"names": ["Dockerfile", "Dockerfile.dev", "Dockerfile.prod"],
|
|
},
|
|
"docker-compose": {
|
|
"patterns": ["docker-compose*.yml", "docker-compose*.yaml"],
|
|
"names": ["docker-compose.yml", "docker-compose.yaml"],
|
|
},
|
|
}
|
|
|
|
# Directories to skip
|
|
SKIP_DIRS = {
|
|
# Python/Node
|
|
"node_modules",
|
|
"venv",
|
|
"env",
|
|
".venv",
|
|
"__pycache__",
|
|
".git",
|
|
"build",
|
|
"dist",
|
|
".tox",
|
|
".mypy_cache",
|
|
".pytest_cache",
|
|
"htmlcov",
|
|
"coverage",
|
|
".eggs",
|
|
"*.egg-info",
|
|
# Unity (critical - contains massive build cache)
|
|
"Library",
|
|
"Temp",
|
|
"Logs",
|
|
"UserSettings",
|
|
"MemoryCaptures",
|
|
"Recordings",
|
|
# Unreal Engine
|
|
"Intermediate",
|
|
"Saved",
|
|
"DerivedDataCache",
|
|
# Godot
|
|
".godot",
|
|
".import",
|
|
# Misc
|
|
"tmp",
|
|
".tmp",
|
|
}
|
|
|
|
def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]:
|
|
"""
|
|
Find all configuration files in directory.
|
|
|
|
Args:
|
|
directory: Root directory to search
|
|
max_files: Maximum number of config files to find
|
|
|
|
Returns:
|
|
List of ConfigFile objects
|
|
"""
|
|
config_files = []
|
|
found_count = 0
|
|
|
|
for file_path in self._walk_directory(directory):
|
|
if found_count >= max_files:
|
|
logger.info(f"Reached max_files limit ({max_files})")
|
|
break
|
|
|
|
config_type = self._detect_config_type(file_path)
|
|
if config_type:
|
|
relative_path = str(file_path.relative_to(directory))
|
|
config_file = ConfigFile(
|
|
file_path=str(file_path),
|
|
relative_path=relative_path,
|
|
config_type=config_type,
|
|
purpose=self._infer_purpose(file_path, config_type),
|
|
)
|
|
config_files.append(config_file)
|
|
found_count += 1
|
|
logger.debug(f"Found {config_type} config: {relative_path}")
|
|
|
|
logger.info(f"Found {len(config_files)} configuration files")
|
|
return config_files
|
|
|
|
def _walk_directory(self, directory: Path):
|
|
"""Walk directory, skipping excluded directories"""
|
|
for item in directory.rglob("*"):
|
|
# Skip directories
|
|
if item.is_dir():
|
|
continue
|
|
|
|
# Skip if in excluded directory
|
|
if any(skip_dir in item.parts for skip_dir in self.SKIP_DIRS):
|
|
continue
|
|
|
|
yield item
|
|
|
|
def _detect_config_type(self, file_path: Path) -> str | None:
|
|
"""Detect configuration file type"""
|
|
filename = file_path.name.lower()
|
|
|
|
# Check each config type
|
|
for config_type, patterns in self.CONFIG_PATTERNS.items():
|
|
# Check exact name matches
|
|
if filename in patterns["names"]:
|
|
return config_type
|
|
|
|
# Check pattern matches
|
|
for pattern in patterns["patterns"]:
|
|
if file_path.match(pattern):
|
|
return config_type
|
|
|
|
return None
|
|
|
|
def _infer_purpose(self, file_path: Path, _config_type: str) -> str:
|
|
"""Infer configuration purpose from file path and name"""
|
|
path_lower = str(file_path).lower()
|
|
filename = file_path.name.lower()
|
|
|
|
# Database configs
|
|
if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]):
|
|
return "database_configuration"
|
|
|
|
# API configs
|
|
if any(word in path_lower for word in ["api", "rest", "graphql", "endpoint"]):
|
|
return "api_configuration"
|
|
|
|
# Logging configs
|
|
if any(word in path_lower for word in ["log", "logger", "logging"]):
|
|
return "logging_configuration"
|
|
|
|
# Docker configs
|
|
if "docker" in filename:
|
|
return "docker_configuration"
|
|
|
|
# CI/CD configs
|
|
if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]):
|
|
return "ci_cd_configuration"
|
|
|
|
# Package configs
|
|
if filename in ["package.json", "pyproject.toml", "cargo.toml"]:
|
|
return "package_configuration"
|
|
|
|
# TypeScript/JavaScript configs
|
|
if filename in ["tsconfig.json", "jsconfig.json"]:
|
|
return "typescript_configuration"
|
|
|
|
# Framework configs
|
|
if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename:
|
|
return "framework_configuration"
|
|
|
|
# Environment configs
|
|
if ".env" in filename:
|
|
return "environment_configuration"
|
|
|
|
# Default
|
|
return "general_configuration"
|
|
|
|
|
|
class ConfigParser:
|
|
"""Parse different configuration file formats"""
|
|
|
|
def parse_config_file(self, config_file: ConfigFile) -> ConfigFile:
|
|
"""
|
|
Parse configuration file and extract settings.
|
|
|
|
Args:
|
|
config_file: ConfigFile object to parse
|
|
|
|
Returns:
|
|
Updated ConfigFile with settings populated
|
|
"""
|
|
try:
|
|
# Read file content
|
|
with open(config_file.file_path, encoding="utf-8") as f:
|
|
config_file.raw_content = f.read()
|
|
|
|
# Parse based on type
|
|
if config_file.config_type == "json":
|
|
self._parse_json(config_file)
|
|
elif config_file.config_type == "yaml":
|
|
self._parse_yaml(config_file)
|
|
elif config_file.config_type == "toml":
|
|
self._parse_toml(config_file)
|
|
elif config_file.config_type == "env":
|
|
self._parse_env(config_file)
|
|
elif config_file.config_type == "ini":
|
|
self._parse_ini(config_file)
|
|
elif config_file.config_type == "python":
|
|
self._parse_python_config(config_file)
|
|
elif config_file.config_type == "javascript":
|
|
self._parse_javascript_config(config_file)
|
|
elif config_file.config_type == "dockerfile":
|
|
self._parse_dockerfile(config_file)
|
|
elif config_file.config_type == "docker-compose":
|
|
self._parse_yaml(config_file) # Docker compose is YAML
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error parsing {config_file.relative_path}: {str(e)}"
|
|
logger.warning(error_msg)
|
|
config_file.parse_errors.append(error_msg)
|
|
|
|
return config_file
|
|
|
|
def _parse_json(self, config_file: ConfigFile):
|
|
"""Parse JSON configuration"""
|
|
try:
|
|
data = json.loads(config_file.raw_content)
|
|
self._extract_settings_from_dict(data, config_file)
|
|
except json.JSONDecodeError as e:
|
|
config_file.parse_errors.append(f"JSON parse error: {str(e)}")
|
|
|
|
def _parse_yaml(self, config_file: ConfigFile):
|
|
"""Parse YAML configuration"""
|
|
if not YAML_AVAILABLE:
|
|
config_file.parse_errors.append("PyYAML not installed")
|
|
return
|
|
|
|
try:
|
|
data = yaml.safe_load(config_file.raw_content)
|
|
if isinstance(data, dict):
|
|
self._extract_settings_from_dict(data, config_file)
|
|
except yaml.YAMLError as e:
|
|
config_file.parse_errors.append(f"YAML parse error: {str(e)}")
|
|
|
|
def _parse_toml(self, config_file: ConfigFile):
|
|
"""Parse TOML configuration"""
|
|
if not TOML_AVAILABLE:
|
|
config_file.parse_errors.append("toml/tomli not installed")
|
|
return
|
|
|
|
try:
|
|
data = toml_lib.loads(config_file.raw_content)
|
|
self._extract_settings_from_dict(data, config_file)
|
|
except Exception as e:
|
|
config_file.parse_errors.append(f"TOML parse error: {str(e)}")
|
|
|
|
def _parse_env(self, config_file: ConfigFile):
|
|
"""Parse .env file"""
|
|
lines = config_file.raw_content.split("\n")
|
|
|
|
for line_num, line in enumerate(lines, 1):
|
|
line = line.strip()
|
|
|
|
# Skip comments and empty lines
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
|
|
# Parse KEY=VALUE
|
|
match = re.match(r"([A-Z_][A-Z0-9_]*)\s*=\s*(.+)", line)
|
|
if match:
|
|
key, value = match.groups()
|
|
value = value.strip().strip('"').strip("'")
|
|
|
|
setting = ConfigSetting(
|
|
key=key,
|
|
value=value,
|
|
value_type=self._infer_type(value),
|
|
env_var=key,
|
|
description=self._extract_env_description(lines, line_num - 1),
|
|
)
|
|
config_file.settings.append(setting)
|
|
|
|
def _parse_ini(self, config_file: ConfigFile):
|
|
"""Parse INI configuration"""
|
|
import configparser
|
|
|
|
try:
|
|
parser = configparser.ConfigParser()
|
|
parser.read_string(config_file.raw_content)
|
|
|
|
for section in parser.sections():
|
|
for key, value in parser[section].items():
|
|
setting = ConfigSetting(
|
|
key=f"{section}.{key}",
|
|
value=value,
|
|
value_type=self._infer_type(value),
|
|
nested_path=[section, key],
|
|
)
|
|
config_file.settings.append(setting)
|
|
except Exception as e:
|
|
config_file.parse_errors.append(f"INI parse error: {str(e)}")
|
|
|
|
def _parse_python_config(self, config_file: ConfigFile):
|
|
"""Parse Python configuration module"""
|
|
try:
|
|
tree = ast.parse(config_file.raw_content)
|
|
|
|
for node in ast.walk(tree):
|
|
# Get variable name and skip private variables
|
|
if (
|
|
isinstance(node, ast.Assign)
|
|
and len(node.targets) == 1
|
|
and isinstance(node.targets[0], ast.Name)
|
|
and not node.targets[0].id.startswith("_")
|
|
):
|
|
key = node.targets[0].id
|
|
|
|
# Extract value
|
|
try:
|
|
value = ast.literal_eval(node.value)
|
|
setting = ConfigSetting(
|
|
key=key,
|
|
value=value,
|
|
value_type=self._infer_type(value),
|
|
description=self._extract_python_docstring(node),
|
|
)
|
|
config_file.settings.append(setting)
|
|
except (ValueError, TypeError):
|
|
# Can't evaluate complex expressions
|
|
pass
|
|
|
|
except SyntaxError as e:
|
|
config_file.parse_errors.append(f"Python parse error: {str(e)}")
|
|
|
|
def _parse_javascript_config(self, config_file: ConfigFile):
|
|
"""Parse JavaScript/TypeScript config (basic extraction)"""
|
|
# Simple regex-based extraction for common patterns
|
|
patterns = [
|
|
r'(?:const|let|var)\s+(\w+)\s*[:=]\s*(["\'])(.*?)\2', # String values
|
|
r"(?:const|let|var)\s+(\w+)\s*[:=]\s*(\d+)", # Number values
|
|
r"(?:const|let|var)\s+(\w+)\s*[:=]\s*(true|false)", # Boolean values
|
|
]
|
|
|
|
for pattern in patterns:
|
|
for match in re.finditer(pattern, config_file.raw_content):
|
|
if len(match.groups()) >= 2:
|
|
key = match.group(1)
|
|
value = match.group(3) if len(match.groups()) > 2 else match.group(2)
|
|
|
|
setting = ConfigSetting(
|
|
key=key, value=value, value_type=self._infer_type(value)
|
|
)
|
|
config_file.settings.append(setting)
|
|
|
|
def _parse_dockerfile(self, config_file: ConfigFile):
|
|
"""Parse Dockerfile configuration"""
|
|
lines = config_file.raw_content.split("\n")
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
# Extract ENV variables
|
|
if line.startswith("ENV "):
|
|
parts = line[4:].split("=", 1)
|
|
if len(parts) == 2:
|
|
key, value = parts
|
|
setting = ConfigSetting(
|
|
key=key.strip(),
|
|
value=value.strip(),
|
|
value_type="string",
|
|
env_var=key.strip(),
|
|
)
|
|
config_file.settings.append(setting)
|
|
|
|
# Extract ARG variables
|
|
elif line.startswith("ARG "):
|
|
parts = line[4:].split("=", 1)
|
|
key = parts[0].strip()
|
|
value = parts[1].strip() if len(parts) == 2 else None
|
|
|
|
setting = ConfigSetting(key=key, value=value, value_type="string")
|
|
config_file.settings.append(setting)
|
|
|
|
def _extract_settings_from_dict(
|
|
self, data: dict, config_file: ConfigFile, parent_path: list[str] = None
|
|
):
|
|
"""Recursively extract settings from dictionary"""
|
|
if parent_path is None:
|
|
parent_path = []
|
|
|
|
for key, value in data.items():
|
|
if isinstance(value, dict):
|
|
# Recurse into nested dicts
|
|
self._extract_settings_from_dict(value, config_file, parent_path + [key])
|
|
else:
|
|
setting = ConfigSetting(
|
|
key=".".join(parent_path + [key]) if parent_path else key,
|
|
value=value,
|
|
value_type=self._infer_type(value),
|
|
nested_path=parent_path + [key],
|
|
)
|
|
config_file.settings.append(setting)
|
|
|
|
def _infer_type(self, value: Any) -> str:
|
|
"""Infer value type"""
|
|
if value is None:
|
|
return "null"
|
|
elif isinstance(value, bool):
|
|
return "boolean"
|
|
elif isinstance(value, int):
|
|
return "integer"
|
|
elif isinstance(value, float):
|
|
return "number"
|
|
elif isinstance(value, (list, tuple)):
|
|
return "array"
|
|
elif isinstance(value, dict):
|
|
return "object"
|
|
else:
|
|
return "string"
|
|
|
|
def _extract_env_description(self, lines: list[str], line_index: int) -> str:
|
|
"""Extract description from comment above env variable"""
|
|
if line_index > 0:
|
|
prev_line = lines[line_index - 1].strip()
|
|
if prev_line.startswith("#"):
|
|
return prev_line[1:].strip()
|
|
return ""
|
|
|
|
def _extract_python_docstring(self, _node: ast.AST) -> str:
|
|
"""Extract docstring/comment for Python node"""
|
|
# This is simplified - real implementation would need more context
|
|
return ""
|
|
|
|
|
|
class ConfigPatternDetector:
|
|
"""Detect common configuration patterns"""
|
|
|
|
# Known configuration patterns
|
|
KNOWN_PATTERNS = {
|
|
"database_config": {
|
|
"keys": [
|
|
"host",
|
|
"port",
|
|
"database",
|
|
"user",
|
|
"username",
|
|
"password",
|
|
"db_name",
|
|
],
|
|
"min_match": 3,
|
|
},
|
|
"api_config": {
|
|
"keys": [
|
|
"base_url",
|
|
"api_key",
|
|
"api_secret",
|
|
"timeout",
|
|
"retry",
|
|
"endpoint",
|
|
],
|
|
"min_match": 2,
|
|
},
|
|
"logging_config": {
|
|
"keys": ["level", "format", "handler", "file", "console", "log_level"],
|
|
"min_match": 2,
|
|
},
|
|
"cache_config": {
|
|
"keys": ["backend", "ttl", "timeout", "max_size", "redis", "memcached"],
|
|
"min_match": 2,
|
|
},
|
|
"email_config": {
|
|
"keys": ["smtp_host", "smtp_port", "email", "from_email", "mail_server"],
|
|
"min_match": 2,
|
|
},
|
|
"auth_config": {
|
|
"keys": ["secret_key", "jwt_secret", "token", "oauth", "authentication"],
|
|
"min_match": 1,
|
|
},
|
|
"server_config": {
|
|
"keys": ["host", "port", "bind", "workers", "threads"],
|
|
"min_match": 2,
|
|
},
|
|
}
|
|
|
|
def detect_patterns(self, config_file: ConfigFile) -> list[str]:
|
|
"""
|
|
Detect which patterns this config file matches.
|
|
|
|
Args:
|
|
config_file: ConfigFile with settings extracted
|
|
|
|
Returns:
|
|
List of detected pattern names
|
|
"""
|
|
detected = []
|
|
|
|
# Get all keys from settings (lowercase for matching)
|
|
setting_keys = {s.key.lower() for s in config_file.settings}
|
|
|
|
# Check against each known pattern
|
|
for pattern_name, pattern_def in self.KNOWN_PATTERNS.items():
|
|
pattern_keys = {k.lower() for k in pattern_def["keys"]}
|
|
min_match = pattern_def["min_match"]
|
|
|
|
# Count matches
|
|
matches = len(setting_keys & pattern_keys)
|
|
|
|
if matches >= min_match:
|
|
detected.append(pattern_name)
|
|
logger.debug(
|
|
f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)"
|
|
)
|
|
|
|
return detected
|
|
|
|
|
|
class ConfigExtractor:
|
|
"""Main configuration extraction orchestrator"""
|
|
|
|
def __init__(self):
|
|
self.detector = ConfigFileDetector()
|
|
self.parser = ConfigParser()
|
|
self.pattern_detector = ConfigPatternDetector()
|
|
|
|
def extract_from_directory(
|
|
self, directory: Path, max_files: int = 100
|
|
) -> ConfigExtractionResult:
|
|
"""
|
|
Extract configuration patterns from directory.
|
|
|
|
Args:
|
|
directory: Root directory to analyze
|
|
max_files: Maximum config files to process
|
|
|
|
Returns:
|
|
ConfigExtractionResult with all findings
|
|
"""
|
|
result = ConfigExtractionResult()
|
|
|
|
logger.info(f"Extracting configuration patterns from: {directory}")
|
|
|
|
# Step 1: Find config files
|
|
config_files = self.detector.find_config_files(directory, max_files)
|
|
result.total_files = len(config_files)
|
|
|
|
if not config_files:
|
|
logger.warning("No configuration files found")
|
|
return result
|
|
|
|
# Step 2: Parse each config file
|
|
for config_file in config_files:
|
|
try:
|
|
parsed = self.parser.parse_config_file(config_file)
|
|
|
|
# Step 3: Detect patterns
|
|
patterns = self.pattern_detector.detect_patterns(parsed)
|
|
parsed.patterns = patterns
|
|
|
|
# Track patterns
|
|
for pattern in patterns:
|
|
if pattern not in result.detected_patterns:
|
|
result.detected_patterns[pattern] = []
|
|
result.detected_patterns[pattern].append(parsed.relative_path)
|
|
|
|
result.config_files.append(parsed)
|
|
result.total_settings += len(parsed.settings)
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error processing {config_file.relative_path}: {str(e)}"
|
|
logger.error(error_msg)
|
|
result.errors.append(error_msg)
|
|
|
|
logger.info(
|
|
f"Extracted {result.total_settings} settings from {result.total_files} config files"
|
|
)
|
|
logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}")
|
|
|
|
return result
|
|
|
|
def to_dict(self, result: ConfigExtractionResult) -> dict:
|
|
"""Convert result to dictionary for JSON output"""
|
|
return {
|
|
"total_files": result.total_files,
|
|
"total_settings": result.total_settings,
|
|
"detected_patterns": result.detected_patterns,
|
|
"config_files": [
|
|
{
|
|
"file_path": cf.file_path,
|
|
"relative_path": cf.relative_path,
|
|
"type": cf.config_type,
|
|
"purpose": cf.purpose,
|
|
"patterns": cf.patterns,
|
|
"settings_count": len(cf.settings),
|
|
"settings": [
|
|
{
|
|
"key": s.key,
|
|
"value": s.value,
|
|
"type": s.value_type,
|
|
"env_var": s.env_var,
|
|
"description": s.description,
|
|
}
|
|
for s in cf.settings
|
|
],
|
|
"parse_errors": cf.parse_errors,
|
|
}
|
|
for cf in result.config_files
|
|
],
|
|
"errors": result.errors,
|
|
}
|
|
|
|
|
|
def main():
|
|
"""CLI entry point for config extraction"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Extract configuration patterns from codebase with optional AI enhancement"
|
|
)
|
|
parser.add_argument("directory", type=Path, help="Directory to analyze")
|
|
parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
|
|
parser.add_argument(
|
|
"--max-files", type=int, default=100, help="Maximum config files to process"
|
|
)
|
|
parser.add_argument(
|
|
"--enhance",
|
|
action="store_true",
|
|
help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)",
|
|
)
|
|
parser.add_argument(
|
|
"--enhance-local",
|
|
action="store_true",
|
|
help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)",
|
|
)
|
|
parser.add_argument(
|
|
"--ai-mode",
|
|
choices=["auto", "api", "local", "none"],
|
|
default="none",
|
|
help="AI enhancement mode: auto (detect), api (Claude API), local (Claude Code CLI), none (disable)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Setup logging
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
|
|
# Extract
|
|
extractor = ConfigExtractor()
|
|
result = extractor.extract_from_directory(args.directory, args.max_files)
|
|
|
|
# Convert to dict
|
|
output_dict = extractor.to_dict(result)
|
|
|
|
# AI Enhancement (if requested)
|
|
enhance_mode = args.ai_mode
|
|
if args.enhance:
|
|
enhance_mode = "api"
|
|
elif args.enhance_local:
|
|
enhance_mode = "local"
|
|
|
|
if enhance_mode != "none":
|
|
try:
|
|
from skill_seekers.cli.config_enhancer import ConfigEnhancer
|
|
|
|
logger.info(f"🤖 Starting AI enhancement (mode: {enhance_mode})...")
|
|
enhancer = ConfigEnhancer(mode=enhance_mode)
|
|
output_dict = enhancer.enhance_config_result(output_dict)
|
|
logger.info("✅ AI enhancement complete")
|
|
except ImportError:
|
|
logger.warning("⚠️ ConfigEnhancer not available, skipping enhancement")
|
|
except Exception as e:
|
|
logger.error(f"❌ AI enhancement failed: {e}")
|
|
|
|
# Output
|
|
if args.output:
|
|
with open(args.output, "w") as f:
|
|
json.dump(output_dict, f, indent=2)
|
|
print(f"✅ Saved config extraction results to: {args.output}")
|
|
else:
|
|
print(json.dumps(output_dict, indent=2))
|
|
|
|
# Summary
|
|
print("\n📊 Summary:")
|
|
print(f" Config files found: {result.total_files}")
|
|
print(f" Total settings: {result.total_settings}")
|
|
print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}")
|
|
|
|
if "ai_enhancements" in output_dict:
|
|
print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)")
|
|
insights = output_dict["ai_enhancements"].get("overall_insights", {})
|
|
if insights.get("security_issues_found"):
|
|
print(f" 🔐 Security issues found: {insights['security_issues_found']}")
|
|
|
|
if result.errors:
|
|
print(f"\n⚠️ Errors: {len(result.errors)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|