From 39c4362d850fbb890dadbb0e307227ecae2aa5b7 Mon Sep 17 00:00:00 2001 From: yusyus Date: Sun, 22 Feb 2026 22:51:55 +0300 Subject: [PATCH] =?UTF-8?q?fix:=20update=20configs=20submodule=20to=20late?= =?UTF-8?q?st=20(14=20=E2=86=92=20178=20configs)=20and=20fix=20categorizat?= =?UTF-8?q?ion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The api/configs_repo git submodule was pinned to commit d4c0710 which only had 14 configs. Updated to latest main (4275d6f) which has 178 configs across 21 categories (web-frameworks, ai-ml, game-engines, databases, devops, etc.) Also fixed ConfigAnalyzer._categorize_config() to use directory structure (official/{category}/{name}.json) as authoritative category instead of keyword matching, which was classifying most new configs as "uncategorized". Result: API /api/configs now returns 178 configs (was 14). Co-Authored-By: Claude Sonnet 4.6 --- api/config_analyzer.py | 32 ++++++++++++++++++++++++-------- api/configs_repo | 2 +- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/api/config_analyzer.py b/api/config_analyzer.py index 916af61..20c9eb9 100644 --- a/api/config_analyzer.py +++ b/api/config_analyzer.py @@ -105,8 +105,9 @@ class ConfigAnalyzer: # Get primary source (base_url or repo) primary_source = self._get_primary_source(config_data, config_type) - # Auto-categorize - category = self._categorize_config(name, description, config_data) + # Use directory name as category (official/{category}/{name}.json) + # Fall back to keyword-based categorization if not in a named subdirectory + category = self._categorize_config(name, description, config_data, config_path) # Extract tags tags = self._extract_tags(name, description, config_data) @@ -212,26 +213,42 @@ class ConfigAnalyzer: return "Unknown" - def _categorize_config(self, name: str, description: str, config_data: dict[str, Any]) -> str: + def _categorize_config( + self, + name: str, + description: str, + config_data: dict[str, Any], + config_path: Path | None = None, + ) -> str: """ - Auto-categorize config based on name and content + Categorize config using directory structure first, then keyword fallback. + + The configs_repo organizes files as official/{category}/{name}.json so the + parent directory name is the authoritative category. Args: name: Config name description: Config description config_data: Full config data + config_path: Path to config file (used to read directory-based category) Returns: Category name """ - name_lower = name.lower() + # Primary: use directory structure (official/{category}/{name}.json) + if config_path is not None: + parent = config_path.parent.name + # Exclude generic/root directories from being used as categories + if parent not in ("official", "community", "configs", "configs_repo", "."): + return parent - # Check against category mapping + # Fallback: keyword matching against config name + name_lower = name.lower() for category, keywords in self.CATEGORY_MAPPING.items(): if any(keyword in name_lower for keyword in keywords): return category - # Check description for hints + # Fallback: description hints desc_lower = description.lower() if "framework" in desc_lower or "library" in desc_lower: if any(word in desc_lower for word in ["web", "frontend", "backend", "api"]): @@ -243,7 +260,6 @@ class ConfigAnalyzer: if "devops" in desc_lower or "deployment" in desc_lower or "infrastructure" in desc_lower: return "devops" - # Default to uncategorized return "uncategorized" def _extract_tags(self, name: str, description: str, config_data: dict[str, Any]) -> list[str]: diff --git a/api/configs_repo b/api/configs_repo index d4c0710..4275d6f 160000 --- a/api/configs_repo +++ b/api/configs_repo @@ -1 +1 @@ -Subproject commit d4c07108337d599300d0905b8787011db425dded +Subproject commit 4275d6fe41746365d3b08c6c2d271f9039a73527