style: fix ruff lint and formatting errors
- E741: rename ambiguous variable `l` → `line_text` in enhance_skill_local.py - ARG001: suppress unused `doc` param in word_scraper _build_section() - SIM108: use ternary for code_text assignment in word_scraper - F841: remove unused `metadata` variable in test_chunking_integration - F401: remove unused imports in test_pinecone_adaptor - ARG001: rename unused `docs` → `_docs` in test_pinecone_adaptor - Format 20 files to match ruff formatting rules Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -367,7 +367,7 @@ class LocalSkillEnhancer:
|
||||
if line.startswith("#"):
|
||||
# Found heading - keep it and next 3 lines
|
||||
chunk = lines[i : min(i + 4, len(lines))]
|
||||
chunk_chars = sum(len(l) for l in chunk)
|
||||
chunk_chars = sum(len(line_text) for line_text in chunk)
|
||||
if current_chars + chunk_chars > max_chars:
|
||||
break
|
||||
result.extend(chunk)
|
||||
|
||||
@@ -79,7 +79,9 @@ class WordToSkillConverter:
|
||||
self.config = config
|
||||
self.name = config["name"]
|
||||
self.docx_path = config.get("docx_path", "")
|
||||
self.description = config.get("description") or f"Use when referencing {self.name} documentation"
|
||||
self.description = (
|
||||
config.get("description") or f"Use when referencing {self.name} documentation"
|
||||
)
|
||||
|
||||
# Paths
|
||||
self.skill_dir = f"output/{self.name}"
|
||||
@@ -110,9 +112,7 @@ class WordToSkillConverter:
|
||||
raise FileNotFoundError(f"Word document not found: {self.docx_path}")
|
||||
|
||||
if not self.docx_path.lower().endswith(".docx"):
|
||||
raise ValueError(
|
||||
f"Not a Word document (expected .docx): {self.docx_path}"
|
||||
)
|
||||
raise ValueError(f"Not a Word document (expected .docx): {self.docx_path}")
|
||||
|
||||
# --- Extract metadata via python-docx ---
|
||||
doc = python_docx.Document(self.docx_path)
|
||||
@@ -733,12 +733,13 @@ class WordToSkillConverter:
|
||||
# HTML-to-sections helper (module-level for clarity)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_section(
|
||||
section_number: int,
|
||||
heading: str | None,
|
||||
heading_level: str | None,
|
||||
elements: list,
|
||||
doc,
|
||||
doc, # noqa: ARG001
|
||||
) -> dict:
|
||||
"""Build a section dict from a list of BeautifulSoup elements.
|
||||
|
||||
@@ -774,10 +775,7 @@ def _build_section(
|
||||
# Code blocks
|
||||
if tag == "pre" or (tag == "code" and elem.find_parent("pre") is None):
|
||||
code_elem = elem.find("code") if tag == "pre" else elem
|
||||
if code_elem:
|
||||
code_text = code_elem.get_text()
|
||||
else:
|
||||
code_text = elem.get_text()
|
||||
code_text = code_elem.get_text() if code_elem else elem.get_text()
|
||||
|
||||
code_text = code_text.strip()
|
||||
if code_text:
|
||||
@@ -961,7 +959,8 @@ def main():
|
||||
name = Path(args.from_json).stem.replace("_extracted", "")
|
||||
config = {
|
||||
"name": getattr(args, "name", None) or name,
|
||||
"description": getattr(args, "description", None) or f"Use when referencing {name} documentation",
|
||||
"description": getattr(args, "description", None)
|
||||
or f"Use when referencing {name} documentation",
|
||||
}
|
||||
try:
|
||||
converter = WordToSkillConverter(config)
|
||||
@@ -1049,6 +1048,7 @@ def main():
|
||||
except Exception as e:
|
||||
print(f"\n❌ Unexpected error during Word processing: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -358,7 +358,6 @@ class TestChunkingCLIIntegration:
|
||||
f"Small chunks ({len(data_small)}) should be more than large chunks ({len(data_large)})"
|
||||
)
|
||||
|
||||
|
||||
def test_chunk_overlap_tokens_parameter(self, tmp_path):
|
||||
"""Test --chunk-overlap-tokens controls RAGChunker overlap."""
|
||||
from skill_seekers.cli.package_skill import package_skill
|
||||
@@ -406,17 +405,21 @@ class TestChunkingCLIIntegration:
|
||||
|
||||
def test_chunk_overlap_scales_with_chunk_size(self, tmp_path):
|
||||
"""Test that overlap auto-scales when chunk_tokens is non-default but overlap is default."""
|
||||
from skill_seekers.cli.adaptors.base import DEFAULT_CHUNK_TOKENS, DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
from skill_seekers.cli.adaptors.base import (
|
||||
DEFAULT_CHUNK_TOKENS,
|
||||
DEFAULT_CHUNK_OVERLAP_TOKENS,
|
||||
)
|
||||
|
||||
adaptor = get_adaptor("langchain")
|
||||
|
||||
skill_dir = create_test_skill(tmp_path, large_doc=True)
|
||||
metadata = adaptor._build_skill_metadata(skill_dir)
|
||||
adaptor._build_skill_metadata(skill_dir)
|
||||
content = (skill_dir / "SKILL.md").read_text()
|
||||
|
||||
# With default chunk size (512) and default overlap (50), overlap should be 50
|
||||
chunks_default = adaptor._maybe_chunk_content(
|
||||
content, {"source": "test"},
|
||||
content,
|
||||
{"source": "test"},
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=DEFAULT_CHUNK_TOKENS,
|
||||
chunk_overlap_tokens=DEFAULT_CHUNK_OVERLAP_TOKENS,
|
||||
@@ -425,7 +428,8 @@ class TestChunkingCLIIntegration:
|
||||
# With large chunk size (1024) and default overlap (50),
|
||||
# overlap should auto-scale to max(50, 1024//10) = 102
|
||||
chunks_large = adaptor._maybe_chunk_content(
|
||||
content, {"source": "test"},
|
||||
content,
|
||||
{"source": "test"},
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=1024,
|
||||
chunk_overlap_tokens=DEFAULT_CHUNK_OVERLAP_TOKENS,
|
||||
|
||||
@@ -131,7 +131,9 @@ class TestArgumentHelpers:
|
||||
"""Should return set of universal argument names."""
|
||||
names = get_universal_argument_names()
|
||||
assert isinstance(names, set)
|
||||
assert len(names) == 19 # Phase 2: added 4 workflow arguments + local_repo_path + doc_version
|
||||
assert (
|
||||
len(names) == 19
|
||||
) # Phase 2: added 4 workflow arguments + local_repo_path + doc_version
|
||||
assert "name" in names
|
||||
assert "enhance_level" in names # Phase 1: consolidated flag
|
||||
assert "enhance_workflow" in names # Phase 2: workflow support
|
||||
|
||||
@@ -4,11 +4,10 @@ Tests for Pinecone adaptor and doc_version metadata flow.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from skill_seekers.cli.adaptors.base import SkillAdaptor, SkillMetadata
|
||||
from skill_seekers.cli.adaptors.base import SkillMetadata
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -40,9 +39,7 @@ Get started quickly.
|
||||
|
||||
refs_dir = skill_dir / "references"
|
||||
refs_dir.mkdir()
|
||||
(refs_dir / "api_reference.md").write_text(
|
||||
"# API Reference\n\nSome API docs.\n"
|
||||
)
|
||||
(refs_dir / "api_reference.md").write_text("# API Reference\n\nSome API docs.\n")
|
||||
(refs_dir / "getting_started.md").write_text(
|
||||
"# Getting Started\n\nSome getting started docs.\n"
|
||||
)
|
||||
@@ -330,13 +327,17 @@ class TestPineconeAdaptor:
|
||||
if vectors is None:
|
||||
vectors = [{"id": "a", "metadata": {"text": "hello world"}}]
|
||||
pkg = tmp_path / "test-pinecone.json"
|
||||
pkg.write_text(json.dumps({
|
||||
"vectors": vectors,
|
||||
"index_name": "test",
|
||||
"namespace": "test",
|
||||
"metric": "cosine",
|
||||
"dimension": 1536,
|
||||
}))
|
||||
pkg.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"vectors": vectors,
|
||||
"index_name": "test",
|
||||
"namespace": "test",
|
||||
"metric": "cosine",
|
||||
"dimension": 1536,
|
||||
}
|
||||
)
|
||||
)
|
||||
return pkg
|
||||
|
||||
def test_upload_success_has_url_key(self, tmp_path, monkeypatch):
|
||||
@@ -346,7 +347,8 @@ class TestPineconeAdaptor:
|
||||
adaptor = PineconeAdaptor()
|
||||
mock_pc, _mock_index = self._make_mock_pinecone(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
adaptor, "_generate_openai_embeddings",
|
||||
adaptor,
|
||||
"_generate_openai_embeddings",
|
||||
lambda docs: [[0.0] * 1536] * len(docs),
|
||||
)
|
||||
pkg = self._make_package(tmp_path)
|
||||
@@ -364,13 +366,16 @@ class TestPineconeAdaptor:
|
||||
adaptor = PineconeAdaptor()
|
||||
mock_pc, _mock_index = self._make_mock_pinecone(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
adaptor, "_generate_st_embeddings",
|
||||
adaptor,
|
||||
"_generate_st_embeddings",
|
||||
lambda docs: [[0.0] * 384] * len(docs),
|
||||
)
|
||||
pkg = self._make_package(tmp_path)
|
||||
|
||||
result = adaptor.upload(
|
||||
pkg, api_key="fake-key", embedding_function="sentence-transformers",
|
||||
pkg,
|
||||
api_key="fake-key",
|
||||
embedding_function="sentence-transformers",
|
||||
)
|
||||
assert result["success"] is True
|
||||
# Verify create_index was called with dimension=384
|
||||
@@ -385,13 +390,16 @@ class TestPineconeAdaptor:
|
||||
adaptor = PineconeAdaptor()
|
||||
mock_pc, _mock_index = self._make_mock_pinecone(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
adaptor, "_generate_openai_embeddings",
|
||||
adaptor,
|
||||
"_generate_openai_embeddings",
|
||||
lambda docs: [[0.0] * 1536] * len(docs),
|
||||
)
|
||||
pkg = self._make_package(tmp_path)
|
||||
|
||||
result = adaptor.upload(
|
||||
pkg, api_key="fake-key", embedding_function="openai",
|
||||
pkg,
|
||||
api_key="fake-key",
|
||||
embedding_function="openai",
|
||||
)
|
||||
assert result["success"] is True
|
||||
mock_pc.create_index.assert_called_once()
|
||||
@@ -405,7 +413,7 @@ class TestPineconeAdaptor:
|
||||
adaptor = PineconeAdaptor()
|
||||
mock_pc, _mock_index = self._make_mock_pinecone(monkeypatch)
|
||||
|
||||
def fail_embeddings(docs):
|
||||
def fail_embeddings(_docs):
|
||||
raise RuntimeError("OPENAI_API_KEY not set")
|
||||
|
||||
monkeypatch.setattr(adaptor, "_generate_openai_embeddings", fail_embeddings)
|
||||
@@ -423,13 +431,17 @@ class TestPineconeAdaptor:
|
||||
adaptor = PineconeAdaptor()
|
||||
mock_pc, _mock_index = self._make_mock_pinecone(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
adaptor, "_generate_openai_embeddings",
|
||||
adaptor,
|
||||
"_generate_openai_embeddings",
|
||||
lambda docs: [[0.0] * 768] * len(docs),
|
||||
)
|
||||
pkg = self._make_package(tmp_path)
|
||||
|
||||
result = adaptor.upload(
|
||||
pkg, api_key="fake-key", embedding_function="openai", dimension=768,
|
||||
pkg,
|
||||
api_key="fake-key",
|
||||
embedding_function="openai",
|
||||
dimension=768,
|
||||
)
|
||||
assert result["success"] is True
|
||||
mock_pc.create_index.assert_called_once()
|
||||
|
||||
@@ -160,7 +160,10 @@ class TestEmbeddingMethodInheritance:
|
||||
assert hasattr(adaptor, "_generate_openai_embeddings")
|
||||
# Verify it's the base class method, not a local override
|
||||
from skill_seekers.cli.adaptors.base import SkillAdaptor
|
||||
assert adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
|
||||
|
||||
assert (
|
||||
adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
|
||||
)
|
||||
|
||||
def test_weaviate_inherits_both_embedding_methods(self):
|
||||
"""Test weaviate adaptor gets both embedding methods from base."""
|
||||
@@ -168,7 +171,10 @@ class TestEmbeddingMethodInheritance:
|
||||
assert hasattr(adaptor, "_generate_openai_embeddings")
|
||||
assert hasattr(adaptor, "_generate_st_embeddings")
|
||||
from skill_seekers.cli.adaptors.base import SkillAdaptor
|
||||
assert adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
|
||||
|
||||
assert (
|
||||
adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
|
||||
)
|
||||
assert adaptor._generate_st_embeddings.__func__ is SkillAdaptor._generate_st_embeddings
|
||||
|
||||
def test_pinecone_inherits_both_embedding_methods(self):
|
||||
@@ -177,7 +183,10 @@ class TestEmbeddingMethodInheritance:
|
||||
assert hasattr(adaptor, "_generate_openai_embeddings")
|
||||
assert hasattr(adaptor, "_generate_st_embeddings")
|
||||
from skill_seekers.cli.adaptors.base import SkillAdaptor
|
||||
assert adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
|
||||
|
||||
assert (
|
||||
adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
|
||||
)
|
||||
assert adaptor._generate_st_embeddings.__func__ is SkillAdaptor._generate_st_embeddings
|
||||
|
||||
|
||||
|
||||
@@ -31,8 +31,9 @@ except ImportError:
|
||||
WORD_AVAILABLE = False
|
||||
|
||||
|
||||
def _make_sample_extracted_data(num_sections=2, include_code=False, include_tables=False,
|
||||
include_images=False):
|
||||
def _make_sample_extracted_data(
|
||||
num_sections=2, include_code=False, include_tables=False, include_images=False
|
||||
):
|
||||
"""Helper to build a minimal extracted_data dict for testing."""
|
||||
mock_image_bytes = (
|
||||
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
|
||||
@@ -54,23 +55,29 @@ def _make_sample_extracted_data(num_sections=2, include_code=False, include_tabl
|
||||
}
|
||||
if include_code:
|
||||
section["code_samples"] = [
|
||||
{"code": f"def hello_{i}():\n return 'world'", "language": "python",
|
||||
"quality_score": 7.5}
|
||||
{
|
||||
"code": f"def hello_{i}():\n return 'world'",
|
||||
"language": "python",
|
||||
"quality_score": 7.5,
|
||||
}
|
||||
]
|
||||
if include_tables:
|
||||
section["tables"] = [
|
||||
{"headers": ["Col A", "Col B"], "rows": [["val1", "val2"], ["val3", "val4"]]}
|
||||
]
|
||||
if include_images:
|
||||
section["images"] = [
|
||||
{"index": 0, "data": mock_image_bytes, "width": 100, "height": 80}
|
||||
]
|
||||
section["images"] = [{"index": 0, "data": mock_image_bytes, "width": 100, "height": 80}]
|
||||
pages.append(section)
|
||||
|
||||
return {
|
||||
"source_file": "test.docx",
|
||||
"metadata": {"title": "Test Doc", "author": "Test Author", "created": "", "modified": "",
|
||||
"subject": ""},
|
||||
"metadata": {
|
||||
"title": "Test Doc",
|
||||
"author": "Test Author",
|
||||
"created": "",
|
||||
"modified": "",
|
||||
"subject": "",
|
||||
},
|
||||
"total_sections": num_sections,
|
||||
"total_code_blocks": num_sections if include_code else 0,
|
||||
"total_images": num_sections if include_images else 0,
|
||||
@@ -86,6 +93,7 @@ class TestWordToSkillConverterInit(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -131,6 +139,7 @@ class TestWordToSkillConverterInit(unittest.TestCase):
|
||||
def test_name_auto_detected_from_filename(self):
|
||||
"""Test name can be extracted from filename via infer_description_from_word."""
|
||||
from skill_seekers.cli.word_scraper import infer_description_from_word
|
||||
|
||||
desc = infer_description_from_word({}, name="my_doc")
|
||||
self.assertIn("my_doc", desc)
|
||||
|
||||
@@ -142,6 +151,7 @@ class TestWordCategorization(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -175,10 +185,22 @@ class TestWordCategorization(unittest.TestCase):
|
||||
converter.docx_path = ""
|
||||
converter.extracted_data = {
|
||||
"pages": [
|
||||
{"section_number": 1, "heading": "API Reference", "text": "api reference docs",
|
||||
"code_samples": [], "tables": [], "images": []},
|
||||
{"section_number": 2, "heading": "Getting Started", "text": "getting started guide",
|
||||
"code_samples": [], "tables": [], "images": []},
|
||||
{
|
||||
"section_number": 1,
|
||||
"heading": "API Reference",
|
||||
"text": "api reference docs",
|
||||
"code_samples": [],
|
||||
"tables": [],
|
||||
"images": [],
|
||||
},
|
||||
{
|
||||
"section_number": 2,
|
||||
"heading": "Getting Started",
|
||||
"text": "getting started guide",
|
||||
"code_samples": [],
|
||||
"tables": [],
|
||||
"images": [],
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -205,6 +227,7 @@ class TestWordSkillBuilding(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -297,6 +320,7 @@ class TestWordCodeBlocks(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -351,6 +375,7 @@ class TestWordTables(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -393,6 +418,7 @@ class TestWordImages(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -434,6 +460,7 @@ class TestWordErrorHandling(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
@@ -496,6 +523,7 @@ class TestWordJSONWorkflow(unittest.TestCase):
|
||||
if not WORD_AVAILABLE:
|
||||
self.skipTest("mammoth and python-docx not installed")
|
||||
from skill_seekers.cli.word_scraper import WordToSkillConverter
|
||||
|
||||
self.WordToSkillConverter = WordToSkillConverter
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user