run ruff

2026-01-17 17:29:21 +00:00
parent c89f059712
commit 5ed767ff9a
144 changed files with 14142 additions and 16488 deletions
--- a/tests/test_multi_source.py
+++ b/tests/test_multi_source.py
@@ -9,10 +9,10 @@ Tests the following functionality:
 5. Multiple GitHub repositories handling
 """

-import unittest
-import tempfile
 import os
 import shutil
+import tempfile
+import unittest


 class TestUnifiedScraperDataStructure(unittest.TestCase):
@@ -23,11 +23,9 @@ class TestUnifiedScraperDataStructure(unittest.TestCase):
        from skill_seekers.cli.unified_scraper import UnifiedScraper

        config = {
-            'name': 'test_multi',
-            'description': 'Test skill',
-            'sources': [
-                {'type': 'documentation', 'base_url': 'https://example.com'}
-            ]
+            "name": "test_multi",
+            "description": "Test skill",
+            "sources": [{"type": "documentation", "base_url": "https://example.com"}],
        }

        with tempfile.TemporaryDirectory() as temp_dir:
@@ -36,9 +34,9 @@ class TestUnifiedScraperDataStructure(unittest.TestCase):
                os.chdir(temp_dir)
                scraper = UnifiedScraper(config)

-                self.assertIsInstance(scraper.scraped_data['documentation'], list)
-                self.assertIsInstance(scraper.scraped_data['github'], list)
-                self.assertIsInstance(scraper.scraped_data['pdf'], list)
+                self.assertIsInstance(scraper.scraped_data["documentation"], list)
+                self.assertIsInstance(scraper.scraped_data["github"], list)
+                self.assertIsInstance(scraper.scraped_data["pdf"], list)
            finally:
                os.chdir(original_dir)

@@ -47,11 +45,9 @@ class TestUnifiedScraperDataStructure(unittest.TestCase):
        from skill_seekers.cli.unified_scraper import UnifiedScraper

        config = {
-            'name': 'test_counters',
-            'description': 'Test skill',
-            'sources': [
-                {'type': 'documentation', 'base_url': 'https://example.com'}
-            ]
+            "name": "test_counters",
+            "description": "Test skill",
+            "sources": [{"type": "documentation", "base_url": "https://example.com"}],
        }

        with tempfile.TemporaryDirectory() as temp_dir:
@@ -60,9 +56,9 @@ class TestUnifiedScraperDataStructure(unittest.TestCase):
                os.chdir(temp_dir)
                scraper = UnifiedScraper(config)

-                self.assertEqual(scraper._source_counters['documentation'], 0)
-                self.assertEqual(scraper._source_counters['github'], 0)
-                self.assertEqual(scraper._source_counters['pdf'], 0)
+                self.assertEqual(scraper._source_counters["documentation"], 0)
+                self.assertEqual(scraper._source_counters["github"], 0)
+                self.assertEqual(scraper._source_counters["pdf"], 0)
            finally:
                os.chdir(original_dir)

@@ -71,11 +67,9 @@ class TestUnifiedScraperDataStructure(unittest.TestCase):
        from skill_seekers.cli.unified_scraper import UnifiedScraper

        config = {
-            'name': 'test_empty',
-            'description': 'Test skill',
-            'sources': [
-                {'type': 'documentation', 'base_url': 'https://example.com'}
-            ]
+            "name": "test_empty",
+            "description": "Test skill",
+            "sources": [{"type": "documentation", "base_url": "https://example.com"}],
        }

        with tempfile.TemporaryDirectory() as temp_dir:
@@ -84,9 +78,9 @@ class TestUnifiedScraperDataStructure(unittest.TestCase):
                os.chdir(temp_dir)
                scraper = UnifiedScraper(config)

-                self.assertEqual(len(scraper.scraped_data['documentation']), 0)
-                self.assertEqual(len(scraper.scraped_data['github']), 0)
-                self.assertEqual(len(scraper.scraped_data['pdf']), 0)
+                self.assertEqual(len(scraper.scraped_data["documentation"]), 0)
+                self.assertEqual(len(scraper.scraped_data["github"]), 0)
+                self.assertEqual(len(scraper.scraped_data["pdf"]), 0)
            finally:
                os.chdir(original_dir)

@@ -111,134 +105,118 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

        # Create mock refs directories
-        refs_dir1 = os.path.join(self.temp_dir, 'refs1')
-        refs_dir2 = os.path.join(self.temp_dir, 'refs2')
+        refs_dir1 = os.path.join(self.temp_dir, "refs1")
+        refs_dir2 = os.path.join(self.temp_dir, "refs2")
        os.makedirs(refs_dir1)
        os.makedirs(refs_dir2)

-        config = {
-            'name': 'test_docs_refs',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_docs_refs", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [
-                {'source_id': 'source_a', 'base_url': 'https://a.com', 'total_pages': 5, 'refs_dir': refs_dir1},
-                {'source_id': 'source_b', 'base_url': 'https://b.com', 'total_pages': 3, 'refs_dir': refs_dir2}
+            "documentation": [
+                {"source_id": "source_a", "base_url": "https://a.com", "total_pages": 5, "refs_dir": refs_dir1},
+                {"source_id": "source_b", "base_url": "https://b.com", "total_pages": 3, "refs_dir": refs_dir2},
            ],
-            'github': [],
-            'pdf': []
+            "github": [],
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_docs_references(scraped_data['documentation'])
+        builder._generate_docs_references(scraped_data["documentation"])

-        docs_dir = os.path.join(builder.skill_dir, 'references', 'documentation')
-        self.assertTrue(os.path.exists(os.path.join(docs_dir, 'source_a')))
-        self.assertTrue(os.path.exists(os.path.join(docs_dir, 'source_b')))
+        docs_dir = os.path.join(builder.skill_dir, "references", "documentation")
+        self.assertTrue(os.path.exists(os.path.join(docs_dir, "source_a")))
+        self.assertTrue(os.path.exists(os.path.join(docs_dir, "source_b")))

    def test_creates_index_per_source(self):
        """Test that each source subdirectory has its own index.md."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        refs_dir = os.path.join(self.temp_dir, 'refs')
+        refs_dir = os.path.join(self.temp_dir, "refs")
        os.makedirs(refs_dir)

-        config = {
-            'name': 'test_source_index',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_source_index", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [
-                {'source_id': 'my_source', 'base_url': 'https://example.com', 'total_pages': 10, 'refs_dir': refs_dir}
+            "documentation": [
+                {"source_id": "my_source", "base_url": "https://example.com", "total_pages": 10, "refs_dir": refs_dir}
            ],
-            'github': [],
-            'pdf': []
+            "github": [],
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_docs_references(scraped_data['documentation'])
+        builder._generate_docs_references(scraped_data["documentation"])

-        source_index = os.path.join(builder.skill_dir, 'references', 'documentation', 'my_source', 'index.md')
+        source_index = os.path.join(builder.skill_dir, "references", "documentation", "my_source", "index.md")
        self.assertTrue(os.path.exists(source_index))

-        with open(source_index, 'r') as f:
+        with open(source_index) as f:
            content = f.read()
-            self.assertIn('my_source', content)
-            self.assertIn('https://example.com', content)
+            self.assertIn("my_source", content)
+            self.assertIn("https://example.com", content)

    def test_creates_main_index_listing_all_sources(self):
        """Test that main index.md lists all documentation sources."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        refs_dir1 = os.path.join(self.temp_dir, 'refs1')
-        refs_dir2 = os.path.join(self.temp_dir, 'refs2')
+        refs_dir1 = os.path.join(self.temp_dir, "refs1")
+        refs_dir2 = os.path.join(self.temp_dir, "refs2")
        os.makedirs(refs_dir1)
        os.makedirs(refs_dir2)

-        config = {
-            'name': 'test_main_index',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_main_index", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [
-                {'source_id': 'docs_one', 'base_url': 'https://one.com', 'total_pages': 10, 'refs_dir': refs_dir1},
-                {'source_id': 'docs_two', 'base_url': 'https://two.com', 'total_pages': 20, 'refs_dir': refs_dir2}
+            "documentation": [
+                {"source_id": "docs_one", "base_url": "https://one.com", "total_pages": 10, "refs_dir": refs_dir1},
+                {"source_id": "docs_two", "base_url": "https://two.com", "total_pages": 20, "refs_dir": refs_dir2},
            ],
-            'github': [],
-            'pdf': []
+            "github": [],
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_docs_references(scraped_data['documentation'])
+        builder._generate_docs_references(scraped_data["documentation"])

-        main_index = os.path.join(builder.skill_dir, 'references', 'documentation', 'index.md')
+        main_index = os.path.join(builder.skill_dir, "references", "documentation", "index.md")
        self.assertTrue(os.path.exists(main_index))

-        with open(main_index, 'r') as f:
+        with open(main_index) as f:
            content = f.read()
-            self.assertIn('docs_one', content)
-            self.assertIn('docs_two', content)
-            self.assertIn('2 documentation sources', content)
+            self.assertIn("docs_one", content)
+            self.assertIn("docs_two", content)
+            self.assertIn("2 documentation sources", content)

    def test_copies_reference_files_to_source_dir(self):
        """Test that reference files are copied to source subdirectory."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        refs_dir = os.path.join(self.temp_dir, 'refs')
+        refs_dir = os.path.join(self.temp_dir, "refs")
        os.makedirs(refs_dir)

        # Create mock reference files
-        with open(os.path.join(refs_dir, 'api.md'), 'w') as f:
-            f.write('# API Reference')
-        with open(os.path.join(refs_dir, 'guide.md'), 'w') as f:
-            f.write('# User Guide')
+        with open(os.path.join(refs_dir, "api.md"), "w") as f:
+            f.write("# API Reference")
+        with open(os.path.join(refs_dir, "guide.md"), "w") as f:
+            f.write("# User Guide")

-        config = {
-            'name': 'test_copy_refs',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_copy_refs", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [
-                {'source_id': 'test_source', 'base_url': 'https://test.com', 'total_pages': 5, 'refs_dir': refs_dir}
+            "documentation": [
+                {"source_id": "test_source", "base_url": "https://test.com", "total_pages": 5, "refs_dir": refs_dir}
            ],
-            'github': [],
-            'pdf': []
+            "github": [],
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_docs_references(scraped_data['documentation'])
+        builder._generate_docs_references(scraped_data["documentation"])

-        source_dir = os.path.join(builder.skill_dir, 'references', 'documentation', 'test_source')
-        self.assertTrue(os.path.exists(os.path.join(source_dir, 'api.md')))
-        self.assertTrue(os.path.exists(os.path.join(source_dir, 'guide.md')))
+        source_dir = os.path.join(builder.skill_dir, "references", "documentation", "test_source")
+        self.assertTrue(os.path.exists(os.path.join(source_dir, "api.md")))
+        self.assertTrue(os.path.exists(os.path.join(source_dir, "guide.md")))


 class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
@@ -260,127 +238,148 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
        """Test that each GitHub repo gets its own subdirectory."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        config = {
-            'name': 'test_github_refs',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_github_refs", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [],
-            'github': [
-                {'repo': 'org/repo1', 'repo_id': 'org_repo1', 'data': {'readme': '# Repo 1', 'issues': [], 'releases': [], 'repo_info': {}}},
-                {'repo': 'org/repo2', 'repo_id': 'org_repo2', 'data': {'readme': '# Repo 2', 'issues': [], 'releases': [], 'repo_info': {}}}
+            "documentation": [],
+            "github": [
+                {
+                    "repo": "org/repo1",
+                    "repo_id": "org_repo1",
+                    "data": {"readme": "# Repo 1", "issues": [], "releases": [], "repo_info": {}},
+                },
+                {
+                    "repo": "org/repo2",
+                    "repo_id": "org_repo2",
+                    "data": {"readme": "# Repo 2", "issues": [], "releases": [], "repo_info": {}},
+                },
            ],
-            'pdf': []
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_github_references(scraped_data['github'])
+        builder._generate_github_references(scraped_data["github"])

-        github_dir = os.path.join(builder.skill_dir, 'references', 'github')
-        self.assertTrue(os.path.exists(os.path.join(github_dir, 'org_repo1')))
-        self.assertTrue(os.path.exists(os.path.join(github_dir, 'org_repo2')))
+        github_dir = os.path.join(builder.skill_dir, "references", "github")
+        self.assertTrue(os.path.exists(os.path.join(github_dir, "org_repo1")))
+        self.assertTrue(os.path.exists(os.path.join(github_dir, "org_repo2")))

    def test_creates_readme_per_repo(self):
        """Test that README.md is created for each repo."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        config = {
-            'name': 'test_readme',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_readme", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [],
-            'github': [
-                {'repo': 'test/myrepo', 'repo_id': 'test_myrepo', 'data': {'readme': '# My Repository\n\nDescription here.', 'issues': [], 'releases': [], 'repo_info': {}}}
+            "documentation": [],
+            "github": [
+                {
+                    "repo": "test/myrepo",
+                    "repo_id": "test_myrepo",
+                    "data": {
+                        "readme": "# My Repository\n\nDescription here.",
+                        "issues": [],
+                        "releases": [],
+                        "repo_info": {},
+                    },
+                }
            ],
-            'pdf': []
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_github_references(scraped_data['github'])
+        builder._generate_github_references(scraped_data["github"])

-        readme_path = os.path.join(builder.skill_dir, 'references', 'github', 'test_myrepo', 'README.md')
+        readme_path = os.path.join(builder.skill_dir, "references", "github", "test_myrepo", "README.md")
        self.assertTrue(os.path.exists(readme_path))

-        with open(readme_path, 'r') as f:
+        with open(readme_path) as f:
            content = f.read()
-            self.assertIn('test/myrepo', content)
+            self.assertIn("test/myrepo", content)

    def test_creates_issues_file_when_issues_exist(self):
        """Test that issues.md is created when repo has issues."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        config = {
-            'name': 'test_issues',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_issues", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [],
-            'github': [
+            "documentation": [],
+            "github": [
                {
-                    'repo': 'test/repo',
-                    'repo_id': 'test_repo',
-                    'data': {
-                        'readme': '# Repo',
-                        'issues': [
-                            {'number': 1, 'title': 'Bug report', 'state': 'open', 'labels': ['bug'], 'url': 'https://github.com/test/repo/issues/1'},
-                            {'number': 2, 'title': 'Feature request', 'state': 'closed', 'labels': ['enhancement'], 'url': 'https://github.com/test/repo/issues/2'}
+                    "repo": "test/repo",
+                    "repo_id": "test_repo",
+                    "data": {
+                        "readme": "# Repo",
+                        "issues": [
+                            {
+                                "number": 1,
+                                "title": "Bug report",
+                                "state": "open",
+                                "labels": ["bug"],
+                                "url": "https://github.com/test/repo/issues/1",
+                            },
+                            {
+                                "number": 2,
+                                "title": "Feature request",
+                                "state": "closed",
+                                "labels": ["enhancement"],
+                                "url": "https://github.com/test/repo/issues/2",
+                            },
                        ],
-                        'releases': [],
-                        'repo_info': {}
-                    }
+                        "releases": [],
+                        "repo_info": {},
+                    },
                }
            ],
-            'pdf': []
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_github_references(scraped_data['github'])
+        builder._generate_github_references(scraped_data["github"])

-        issues_path = os.path.join(builder.skill_dir, 'references', 'github', 'test_repo', 'issues.md')
+        issues_path = os.path.join(builder.skill_dir, "references", "github", "test_repo", "issues.md")
        self.assertTrue(os.path.exists(issues_path))

-        with open(issues_path, 'r') as f:
+        with open(issues_path) as f:
            content = f.read()
-            self.assertIn('Bug report', content)
-            self.assertIn('Feature request', content)
+            self.assertIn("Bug report", content)
+            self.assertIn("Feature request", content)

    def test_creates_main_index_listing_all_repos(self):
        """Test that main index.md lists all GitHub repositories."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        config = {
-            'name': 'test_github_index',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_github_index", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [],
-            'github': [
-                {'repo': 'org/first', 'repo_id': 'org_first', 'data': {'readme': '#', 'issues': [], 'releases': [], 'repo_info': {'stars': 100}}},
-                {'repo': 'org/second', 'repo_id': 'org_second', 'data': {'readme': '#', 'issues': [], 'releases': [], 'repo_info': {'stars': 50}}}
+            "documentation": [],
+            "github": [
+                {
+                    "repo": "org/first",
+                    "repo_id": "org_first",
+                    "data": {"readme": "#", "issues": [], "releases": [], "repo_info": {"stars": 100}},
+                },
+                {
+                    "repo": "org/second",
+                    "repo_id": "org_second",
+                    "data": {"readme": "#", "issues": [], "releases": [], "repo_info": {"stars": 50}},
+                },
            ],
-            'pdf': []
+            "pdf": [],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_github_references(scraped_data['github'])
+        builder._generate_github_references(scraped_data["github"])

-        main_index = os.path.join(builder.skill_dir, 'references', 'github', 'index.md')
+        main_index = os.path.join(builder.skill_dir, "references", "github", "index.md")
        self.assertTrue(os.path.exists(main_index))

-        with open(main_index, 'r') as f:
+        with open(main_index) as f:
            content = f.read()
-            self.assertIn('org/first', content)
-            self.assertIn('org/second', content)
-            self.assertIn('2 GitHub repositories', content)
+            self.assertIn("org/first", content)
+            self.assertIn("org/second", content)
+            self.assertIn("2 GitHub repositories", content)


 class TestUnifiedSkillBuilderPdfReferences(unittest.TestCase):
@@ -402,32 +401,24 @@ class TestUnifiedSkillBuilderPdfReferences(unittest.TestCase):
        """Test that PDF index shows correct document count."""
        from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder

-        config = {
-            'name': 'test_pdf',
-            'description': 'Test',
-            'sources': []
-        }
+        config = {"name": "test_pdf", "description": "Test", "sources": []}

        scraped_data = {
-            'documentation': [],
-            'github': [],
-            'pdf': [
-                {'path': '/path/to/doc1.pdf'},
-                {'path': '/path/to/doc2.pdf'},
-                {'path': '/path/to/doc3.pdf'}
-            ]
+            "documentation": [],
+            "github": [],
+            "pdf": [{"path": "/path/to/doc1.pdf"}, {"path": "/path/to/doc2.pdf"}, {"path": "/path/to/doc3.pdf"}],
        }

        builder = UnifiedSkillBuilder(config, scraped_data)
-        builder._generate_pdf_references(scraped_data['pdf'])
+        builder._generate_pdf_references(scraped_data["pdf"])

-        pdf_index = os.path.join(builder.skill_dir, 'references', 'pdf', 'index.md')
+        pdf_index = os.path.join(builder.skill_dir, "references", "pdf", "index.md")
        self.assertTrue(os.path.exists(pdf_index))

-        with open(pdf_index, 'r') as f:
+        with open(pdf_index) as f:
            content = f.read()
-            self.assertIn('3 PDF document', content)
+            self.assertIn("3 PDF document", content)


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()