- Update default base_url in ConfigAnalyzer to api.skillseekersweb.com - Update website URL in API root endpoint - Update test_api.py to use custom domain - Prepare for custom domain deployment
349 lines
11 KiB
Python
349 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Config Analyzer - Extract metadata from Skill Seekers config files
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional
|
|
from datetime import datetime
|
|
|
|
|
|
class ConfigAnalyzer:
|
|
"""Analyzes Skill Seekers config files and extracts metadata"""
|
|
|
|
# Category mapping based on config content
|
|
CATEGORY_MAPPING = {
|
|
"web-frameworks": [
|
|
"react", "vue", "django", "fastapi", "laravel", "astro", "hono"
|
|
],
|
|
"game-engines": [
|
|
"godot", "unity", "unreal"
|
|
],
|
|
"devops": [
|
|
"kubernetes", "ansible", "docker", "terraform"
|
|
],
|
|
"css-frameworks": [
|
|
"tailwind", "bootstrap", "bulma"
|
|
],
|
|
"development-tools": [
|
|
"claude-code", "vscode", "git"
|
|
],
|
|
"gaming": [
|
|
"steam"
|
|
],
|
|
"testing": [
|
|
"pytest", "jest", "test"
|
|
]
|
|
}
|
|
|
|
# Tag extraction keywords
|
|
TAG_KEYWORDS = {
|
|
"javascript": ["react", "vue", "astro", "hono", "javascript", "js", "node"],
|
|
"python": ["django", "fastapi", "ansible", "python", "flask"],
|
|
"php": ["laravel", "php"],
|
|
"frontend": ["react", "vue", "astro", "tailwind", "frontend", "ui"],
|
|
"backend": ["django", "fastapi", "laravel", "backend", "server", "api"],
|
|
"css": ["tailwind", "css", "styling"],
|
|
"game-development": ["godot", "unity", "unreal", "game"],
|
|
"devops": ["kubernetes", "ansible", "docker", "k8s", "devops"],
|
|
"documentation": ["docs", "documentation"],
|
|
"testing": ["test", "testing", "pytest", "jest"]
|
|
}
|
|
|
|
def __init__(self, config_dir: Path, base_url: str = "https://api.skillseekersweb.com"):
|
|
"""
|
|
Initialize config analyzer
|
|
|
|
Args:
|
|
config_dir: Path to configs directory
|
|
base_url: Base URL for download links
|
|
"""
|
|
self.config_dir = Path(config_dir)
|
|
self.base_url = base_url
|
|
|
|
if not self.config_dir.exists():
|
|
raise ValueError(f"Config directory not found: {self.config_dir}")
|
|
|
|
def analyze_all_configs(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Analyze all config files and extract metadata
|
|
|
|
Returns:
|
|
List of config metadata dicts
|
|
"""
|
|
configs = []
|
|
|
|
# Find all JSON files in configs directory
|
|
for config_file in sorted(self.config_dir.glob("*.json")):
|
|
try:
|
|
metadata = self.analyze_config(config_file)
|
|
if metadata: # Skip invalid configs
|
|
configs.append(metadata)
|
|
except Exception as e:
|
|
print(f"Warning: Failed to analyze {config_file.name}: {e}")
|
|
continue
|
|
|
|
return configs
|
|
|
|
def analyze_config(self, config_path: Path) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Analyze a single config file and extract metadata
|
|
|
|
Args:
|
|
config_path: Path to config JSON file
|
|
|
|
Returns:
|
|
Config metadata dict or None if invalid
|
|
"""
|
|
try:
|
|
# Read config file
|
|
with open(config_path, 'r') as f:
|
|
config_data = json.load(f)
|
|
|
|
# Skip if no name field
|
|
if "name" not in config_data:
|
|
return None
|
|
|
|
name = config_data["name"]
|
|
description = config_data.get("description", "")
|
|
|
|
# Determine config type
|
|
config_type = self._determine_type(config_data)
|
|
|
|
# Get primary source (base_url or repo)
|
|
primary_source = self._get_primary_source(config_data, config_type)
|
|
|
|
# Auto-categorize
|
|
category = self._categorize_config(name, description, config_data)
|
|
|
|
# Extract tags
|
|
tags = self._extract_tags(name, description, config_data)
|
|
|
|
# Get file metadata
|
|
file_size = config_path.stat().st_size
|
|
last_updated = self._get_last_updated(config_path)
|
|
|
|
# Generate download URL
|
|
download_url = f"{self.base_url}/api/download/{config_path.name}"
|
|
|
|
# Get max_pages (for estimation)
|
|
max_pages = self._get_max_pages(config_data)
|
|
|
|
return {
|
|
"name": name,
|
|
"description": description,
|
|
"type": config_type,
|
|
"category": category,
|
|
"tags": tags,
|
|
"primary_source": primary_source,
|
|
"max_pages": max_pages,
|
|
"file_size": file_size,
|
|
"last_updated": last_updated,
|
|
"download_url": download_url,
|
|
"config_file": config_path.name
|
|
}
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(f"Invalid JSON in {config_path.name}: {e}")
|
|
return None
|
|
except Exception as e:
|
|
print(f"Error analyzing {config_path.name}: {e}")
|
|
return None
|
|
|
|
def get_config_by_name(self, name: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get config metadata by name
|
|
|
|
Args:
|
|
name: Config name (e.g., "react", "django")
|
|
|
|
Returns:
|
|
Config metadata or None if not found
|
|
"""
|
|
configs = self.analyze_all_configs()
|
|
for config in configs:
|
|
if config["name"] == name:
|
|
return config
|
|
return None
|
|
|
|
def _determine_type(self, config_data: Dict[str, Any]) -> str:
|
|
"""
|
|
Determine if config is single-source or unified
|
|
|
|
Args:
|
|
config_data: Config JSON data
|
|
|
|
Returns:
|
|
"single-source" or "unified"
|
|
"""
|
|
# Unified configs have "sources" array
|
|
if "sources" in config_data:
|
|
return "unified"
|
|
|
|
# Check for merge_mode (another indicator of unified configs)
|
|
if "merge_mode" in config_data:
|
|
return "unified"
|
|
|
|
return "single-source"
|
|
|
|
def _get_primary_source(self, config_data: Dict[str, Any], config_type: str) -> str:
|
|
"""
|
|
Get primary source URL/repo
|
|
|
|
Args:
|
|
config_data: Config JSON data
|
|
config_type: "single-source" or "unified"
|
|
|
|
Returns:
|
|
Primary source URL or repo name
|
|
"""
|
|
if config_type == "unified":
|
|
# Get first source
|
|
sources = config_data.get("sources", [])
|
|
if sources:
|
|
first_source = sources[0]
|
|
if first_source.get("type") == "documentation":
|
|
return first_source.get("base_url", "")
|
|
elif first_source.get("type") == "github":
|
|
return f"github.com/{first_source.get('repo', '')}"
|
|
elif first_source.get("type") == "pdf":
|
|
return first_source.get("pdf_url", "PDF file")
|
|
return "Multiple sources"
|
|
|
|
# Single-source configs
|
|
if "base_url" in config_data:
|
|
return config_data["base_url"]
|
|
elif "repo" in config_data:
|
|
return f"github.com/{config_data['repo']}"
|
|
elif "pdf_url" in config_data or "pdf" in config_data:
|
|
return "PDF file"
|
|
|
|
return "Unknown"
|
|
|
|
def _categorize_config(self, name: str, description: str, config_data: Dict[str, Any]) -> str:
|
|
"""
|
|
Auto-categorize config based on name and content
|
|
|
|
Args:
|
|
name: Config name
|
|
description: Config description
|
|
config_data: Full config data
|
|
|
|
Returns:
|
|
Category name
|
|
"""
|
|
name_lower = name.lower()
|
|
|
|
# Check against category mapping
|
|
for category, keywords in self.CATEGORY_MAPPING.items():
|
|
if any(keyword in name_lower for keyword in keywords):
|
|
return category
|
|
|
|
# Check description for hints
|
|
desc_lower = description.lower()
|
|
if "framework" in desc_lower or "library" in desc_lower:
|
|
if any(word in desc_lower for word in ["web", "frontend", "backend", "api"]):
|
|
return "web-frameworks"
|
|
|
|
if "game" in desc_lower or "engine" in desc_lower:
|
|
return "game-engines"
|
|
|
|
if "devops" in desc_lower or "deployment" in desc_lower or "infrastructure" in desc_lower:
|
|
return "devops"
|
|
|
|
# Default to uncategorized
|
|
return "uncategorized"
|
|
|
|
def _extract_tags(self, name: str, description: str, config_data: Dict[str, Any]) -> List[str]:
|
|
"""
|
|
Extract relevant tags from config
|
|
|
|
Args:
|
|
name: Config name
|
|
description: Config description
|
|
config_data: Full config data
|
|
|
|
Returns:
|
|
List of tags
|
|
"""
|
|
tags = set()
|
|
name_lower = name.lower()
|
|
desc_lower = description.lower()
|
|
|
|
# Check against tag keywords
|
|
for tag, keywords in self.TAG_KEYWORDS.items():
|
|
if any(keyword in name_lower or keyword in desc_lower for keyword in keywords):
|
|
tags.add(tag)
|
|
|
|
# Add config type as tag
|
|
config_type = self._determine_type(config_data)
|
|
if config_type == "unified":
|
|
tags.add("multi-source")
|
|
|
|
# Add source type tags
|
|
if "base_url" in config_data or (config_type == "unified" and any(s.get("type") == "documentation" for s in config_data.get("sources", []))):
|
|
tags.add("documentation")
|
|
|
|
if "repo" in config_data or (config_type == "unified" and any(s.get("type") == "github" for s in config_data.get("sources", []))):
|
|
tags.add("github")
|
|
|
|
if "pdf" in config_data or "pdf_url" in config_data or (config_type == "unified" and any(s.get("type") == "pdf" for s in config_data.get("sources", []))):
|
|
tags.add("pdf")
|
|
|
|
return sorted(list(tags))
|
|
|
|
def _get_max_pages(self, config_data: Dict[str, Any]) -> Optional[int]:
|
|
"""
|
|
Get max_pages value from config
|
|
|
|
Args:
|
|
config_data: Config JSON data
|
|
|
|
Returns:
|
|
max_pages value or None
|
|
"""
|
|
# Single-source configs
|
|
if "max_pages" in config_data:
|
|
return config_data["max_pages"]
|
|
|
|
# Unified configs - get from first documentation source
|
|
if "sources" in config_data:
|
|
for source in config_data["sources"]:
|
|
if source.get("type") == "documentation" and "max_pages" in source:
|
|
return source["max_pages"]
|
|
|
|
return None
|
|
|
|
def _get_last_updated(self, config_path: Path) -> str:
|
|
"""
|
|
Get last updated date from git history
|
|
|
|
Args:
|
|
config_path: Path to config file
|
|
|
|
Returns:
|
|
ISO format date string
|
|
"""
|
|
try:
|
|
# Try to get last commit date for this file
|
|
result = subprocess.run(
|
|
["git", "log", "-1", "--format=%cI", str(config_path)],
|
|
cwd=config_path.parent.parent,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5
|
|
)
|
|
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
return result.stdout.strip()
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
# Fallback to file modification time
|
|
mtime = config_path.stat().st_mtime
|
|
return datetime.fromtimestamp(mtime).isoformat()
|