From e3b49574d30805ee1389049c7350a2ca7e7904c5 Mon Sep 17 00:00:00 2001 From: yusyus Date: Fri, 7 Nov 2025 00:37:04 +0300 Subject: [PATCH] fix: Add C# language detection to code extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: System couldn't extract C# code examples from documentation because the language detector only recognized C# from CSS classes but failed to detect C# from code content. Solution: Added C# heuristic detection patterns: - 'using System' - System namespace imports - 'namespace ' - Namespace declarations - '{ get; set; }' - Property auto-property syntax - 'public class ' - Public class declarations - 'private class ' - Private class declarations - 'internal class ' - Internal class declarations - 'public static void ' - Static method declarations Changes: - cli/doc_scraper.py: Added C# patterns to detect_language() method - tests/test_scraper_features.py: Added 7 comprehensive C# detection tests Test Results: 409 passed (+7 new tests), 3 skipped, 0 failed 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- cli/doc_scraper.py | 7 +++++ tests/test_scraper_features.py | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/cli/doc_scraper.py b/cli/doc_scraper.py index fa4c059..a7125be 100755 --- a/cli/doc_scraper.py +++ b/cli/doc_scraper.py @@ -335,6 +335,13 @@ class DocToSkillConverter: return 'python' if '#include' in code or 'int main' in code: return 'cpp' + # C# detection + if 'using System' in code or 'namespace ' in code: + return 'csharp' + if '{ get; set; }' in code: + return 'csharp' + if any(keyword in code for keyword in ['public class ', 'private class ', 'internal class ', 'public static void ']): + return 'csharp' return 'unknown' diff --git a/tests/test_scraper_features.py b/tests/test_scraper_features.py index eb6d295..2ca0bd9 100644 --- a/tests/test_scraper_features.py +++ b/tests/test_scraper_features.py @@ -195,6 +195,62 @@ class TestLanguageDetection(unittest.TestCase): lang = self.converter.detect_language(elem, 'x') self.assertEqual(lang, 'python', 'Should detect python from bare class name') + def test_detect_csharp_from_using_system(self): + """Test C# detection from 'using System' keyword""" + html = 'using System;\nnamespace MyApp { }' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from using System') + + def test_detect_csharp_from_namespace(self): + """Test C# detection from 'namespace' keyword""" + html = 'namespace MyNamespace\n{\n public class Test { }\n}' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from namespace') + + def test_detect_csharp_from_property_syntax(self): + """Test C# detection from property syntax""" + html = 'public string Name { get; set; }' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from { get; set; } syntax') + + def test_detect_csharp_from_public_class(self): + """Test C# detection from 'public class' keyword""" + html = 'public class MyClass\n{\n private int value;\n}' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from public class') + + def test_detect_csharp_from_private_class(self): + """Test C# detection from 'private class' keyword""" + html = 'private class Helper { }' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from private class') + + def test_detect_csharp_from_public_static_void(self): + """Test C# detection from 'public static void' keyword""" + html = 'public static void Main(string[] args)\n{\n Console.WriteLine("Test");\n}' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from public static void') + + def test_detect_csharp_from_class_attribute(self): + """Test C# detection from CSS class attribute""" + html = 'var x = 5;' + elem = BeautifulSoup(html, 'html.parser').find('code') + code = elem.get_text() + lang = self.converter.detect_language(elem, code) + self.assertEqual(lang, 'csharp', 'Should detect C# from language-csharp class') + class TestPatternExtraction(unittest.TestCase): """Test pattern extraction from documentation"""