fix: update configs submodule to latest (14 → 178 configs) and fix categorization

The api/configs_repo git submodule was pinned to commit d4c0710 which only
had 14 configs. Updated to latest main (4275d6f) which has 178 configs across
21 categories (web-frameworks, ai-ml, game-engines, databases, devops, etc.)

Also fixed ConfigAnalyzer._categorize_config() to use directory structure
(official/{category}/{name}.json) as authoritative category instead of
keyword matching, which was classifying most new configs as "uncategorized".

Result: API /api/configs now returns 178 configs (was 14).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-02-22 22:51:55 +03:00
parent ef14fd4b5d
commit 39c4362d85
2 changed files with 25 additions and 9 deletions

View File

@@ -105,8 +105,9 @@ class ConfigAnalyzer:
# Get primary source (base_url or repo)
primary_source = self._get_primary_source(config_data, config_type)
# Auto-categorize
category = self._categorize_config(name, description, config_data)
# Use directory name as category (official/{category}/{name}.json)
# Fall back to keyword-based categorization if not in a named subdirectory
category = self._categorize_config(name, description, config_data, config_path)
# Extract tags
tags = self._extract_tags(name, description, config_data)
@@ -212,26 +213,42 @@ class ConfigAnalyzer:
return "Unknown"
def _categorize_config(self, name: str, description: str, config_data: dict[str, Any]) -> str:
def _categorize_config(
self,
name: str,
description: str,
config_data: dict[str, Any],
config_path: Path | None = None,
) -> str:
"""
Auto-categorize config based on name and content
Categorize config using directory structure first, then keyword fallback.
The configs_repo organizes files as official/{category}/{name}.json so the
parent directory name is the authoritative category.
Args:
name: Config name
description: Config description
config_data: Full config data
config_path: Path to config file (used to read directory-based category)
Returns:
Category name
"""
name_lower = name.lower()
# Primary: use directory structure (official/{category}/{name}.json)
if config_path is not None:
parent = config_path.parent.name
# Exclude generic/root directories from being used as categories
if parent not in ("official", "community", "configs", "configs_repo", "."):
return parent
# Check against category mapping
# Fallback: keyword matching against config name
name_lower = name.lower()
for category, keywords in self.CATEGORY_MAPPING.items():
if any(keyword in name_lower for keyword in keywords):
return category
# Check description for hints
# Fallback: description hints
desc_lower = description.lower()
if "framework" in desc_lower or "library" in desc_lower:
if any(word in desc_lower for word in ["web", "frontend", "backend", "api"]):
@@ -243,7 +260,6 @@ class ConfigAnalyzer:
if "devops" in desc_lower or "deployment" in desc_lower or "infrastructure" in desc_lower:
return "devops"
# Default to uncategorized
return "uncategorized"
def _extract_tags(self, name: str, description: str, config_data: dict[str, Any]) -> list[str]: