diff --git a/src/skill_seekers/cli/enhance_skill_local.py b/src/skill_seekers/cli/enhance_skill_local.py index 99480c5..781d9ab 100644 --- a/src/skill_seekers/cli/enhance_skill_local.py +++ b/src/skill_seekers/cli/enhance_skill_local.py @@ -87,8 +87,84 @@ class LocalSkillEnhancer: self.references_dir = self.skill_dir / "references" self.skill_md_path = self.skill_dir / "SKILL.md" - def create_enhancement_prompt(self): - """Create the prompt file for Claude Code""" + def summarize_reference(self, content: str, target_ratio: float = 0.3) -> str: + """Intelligently summarize reference content to reduce size. + + Strategy: + 1. Keep first 20% (introduction/overview) + 2. Extract code blocks (prioritize examples) + 3. Keep headings and their first paragraph + 4. Skip repetitive content + + Args: + content: Full reference content + target_ratio: Target size as ratio of original (0.3 = 30%) + + Returns: + Summarized content + """ + lines = content.split('\n') + target_lines = int(len(lines) * target_ratio) + + # Priority 1: Keep introduction (first 20%) + intro_lines = int(len(lines) * 0.2) + result_lines = lines[:intro_lines] + + # Priority 2: Extract code blocks + in_code_block = False + code_blocks = [] + current_block = [] + block_start_idx = 0 + + for i, line in enumerate(lines[intro_lines:], start=intro_lines): + if line.strip().startswith('```'): + if in_code_block: + # End of code block - add closing ``` and save + current_block.append(line) + code_blocks.append((block_start_idx, current_block)) + current_block = [] + in_code_block = False + else: + # Start of code block + in_code_block = True + block_start_idx = i + current_block = [line] + elif in_code_block: + current_block.append(line) + + # Combine: intro + code blocks + headings + result = result_lines.copy() + + # Add code blocks first (prioritize code examples) + for idx, block in code_blocks[:5]: # Max 5 code blocks + result.append("") # Add blank line before code block + result.extend(block) + + # Priority 3: Keep headings with first paragraph + i = intro_lines + headings_added = 0 + while i < len(lines) and headings_added < 10: + line = lines[i] + if line.startswith('#'): + # Found heading - keep it and next 3 lines + chunk = lines[i:min(i+4, len(lines))] + result.extend(chunk) + headings_added += 1 + i += 4 + else: + i += 1 + + result.append("\n\n[Content intelligently summarized - full details in reference files]") + + return '\n'.join(result) + + def create_enhancement_prompt(self, use_summarization=False, summarization_ratio=0.3): + """Create the prompt file for Claude Code + + Args: + use_summarization: If True, apply smart summarization to reduce size + summarization_ratio: Target size ratio when summarizing (0.3 = 30%) + """ # Read reference files references = read_reference_files( @@ -101,6 +177,27 @@ class LocalSkillEnhancer: print("❌ No reference files found") return None + # Calculate total size + total_ref_size = sum(len(c) for c in references.values()) + + # Apply summarization if requested or if content is too large + if use_summarization or total_ref_size > 30000: + if not use_summarization: + print(f" ⚠️ Large skill detected ({total_ref_size:,} chars)") + print(f" 📊 Applying smart summarization (target: {int(summarization_ratio*100)}% of original)") + print() + + # Summarize each reference + summarized_refs = {} + for filename, content in references.items(): + summarized = self.summarize_reference(content, summarization_ratio) + summarized_refs[filename] = summarized + + references = summarized_refs + new_size = sum(len(c) for c in references.values()) + print(f" ✓ Reduced from {total_ref_size:,} to {new_size:,} chars ({int(new_size/total_ref_size*100)}%)") + print() + # Read current SKILL.md current_skill_md = "" if self.skill_md_path.exists(): @@ -118,8 +215,13 @@ REFERENCE DOCUMENTATION: {'-'*60} """ + # Add references (already summarized if needed) for filename, content in references.items(): - prompt += f"\n## {filename}\n{content[:15000]}\n" + # Further limit per-file to 12K to be safe + max_per_file = 12000 + if len(content) > max_per_file: + content = content[:max_per_file] + "\n\n[Content truncated for size...]" + prompt += f"\n## {filename}\n{content}\n" prompt += f""" {'-'*60} @@ -167,11 +269,23 @@ First, backup the original to: {self.skill_md_path.with_suffix('.md.backup').abs return prompt def run(self, headless=True, timeout=600): - """Main enhancement workflow + """Main enhancement workflow with automatic smart summarization for large skills. + + Automatically detects large skills (>30K chars) and applies smart summarization + to ensure compatibility with Claude CLI's ~30-40K character limit. + + Smart summarization strategy: + - Keeps first 20% (introduction/overview) + - Extracts up to 5 best code blocks + - Keeps up to 10 section headings with first paragraph + - Reduces to ~30% of original size Args: headless: If True, run claude directly without opening terminal (default: True) timeout: Maximum time to wait for enhancement in seconds (default: 600 = 10 minutes) + + Returns: + bool: True if enhancement process started successfully, False otherwise """ print(f"\n{'='*60}") print(f"LOCAL ENHANCEMENT: {self.skill_dir.name}") @@ -198,9 +312,24 @@ First, backup the original to: {self.skill_md_path.with_suffix('.md.backup').abs total_size = sum(len(c) for c in references.values()) print(f" ✓ Total size: {total_size:,} characters\n") + # Check if we need smart summarization + use_summarization = total_size > 30000 + + if use_summarization: + print("⚠️ LARGE SKILL DETECTED") + print(f" 📊 Reference content: {total_size:,} characters") + print(f" 💡 Claude CLI limit: ~30,000-40,000 characters") + print() + print(" 🔧 Applying smart summarization to ensure success...") + print(" • Keeping introductions and overviews") + print(" • Extracting best code examples") + print(" • Preserving key concepts and headings") + print(" • Target: ~30% of original size") + print() + # Create prompt print("📝 Creating enhancement prompt...") - prompt = self.create_enhancement_prompt() + prompt = self.create_enhancement_prompt(use_summarization=use_summarization) if not prompt: return False @@ -210,7 +339,12 @@ First, backup the original to: {self.skill_md_path.with_suffix('.md.backup').abs prompt_file = f.name f.write(prompt) - print(f" ✓ Prompt saved ({len(prompt):,} characters)\n") + if use_summarization: + print(f" ✓ Prompt created and optimized ({len(prompt):,} characters)") + print(f" ✓ Ready for Claude CLI (within safe limits)") + print() + else: + print(f" ✓ Prompt saved ({len(prompt):,} characters)\n") # Headless mode: Run claude directly without opening terminal if headless: diff --git a/tests/test_smart_summarization.py b/tests/test_smart_summarization.py new file mode 100644 index 0000000..0cd905e --- /dev/null +++ b/tests/test_smart_summarization.py @@ -0,0 +1,223 @@ +""" +Tests for smart summarization feature in enhance_skill_local.py + +Tests the automatic content reduction for large skills to ensure +compatibility with Claude CLI's character limits. +""" + +import pytest +from pathlib import Path +from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer + + +class TestSmartSummarization: + """Test smart summarization feature for large skills""" + + def test_summarize_reference_basic(self, tmp_path): + """Test basic summarization preserves structure""" + enhancer = LocalSkillEnhancer(tmp_path) + + # Create a realistic reference content with more text to make summarization worthwhile + sections = [] + for i in range(20): + sections.append(f""" +## Section {i} + +This is section {i} with detailed explanation that would benefit from summarization. +We add multiple paragraphs to make the content more realistic and substantial. +This content explains various aspects of the framework in detail. + +Another paragraph with more information about this specific topic. +Technical details and explanations continue here with examples and use cases. + +```python +# Example code for section {i} +def function_{i}(): + print("Section {i}") + return {i} +``` + +Final paragraph wrapping up this section with concluding remarks. +""") + + content = "# Introduction\n\nThis is the framework introduction.\n" + "\n".join(sections) + + # Summarize to 30% + summarized = enhancer.summarize_reference(content, target_ratio=0.3) + + # Verify key elements preserved + assert "# Introduction" in summarized + assert "```python" in summarized # Code blocks preserved + assert "[Content intelligently summarized" in summarized + # For large content, summarization should reduce size + assert len(summarized) < len(content) + + def test_summarize_preserves_code_blocks(self, tmp_path): + """Test that code blocks are prioritized and preserved""" + enhancer = LocalSkillEnhancer(tmp_path) + + content = """# Framework + +Some text here. + +```python +# Example 1 +def hello(): + print("Hello") +``` + +More text between examples. + +```python +# Example 2 +def world(): + print("World") +``` + +Even more text. + +```python +# Example 3 +def important(): + return "key" +``` + +Final text section. +""" + + summarized = enhancer.summarize_reference(content, target_ratio=0.5) + + # Should preserve multiple code blocks + assert summarized.count("```python") >= 2 + assert "Example 1" in summarized or "Example 2" in summarized or "Example 3" in summarized + + def test_summarize_large_content(self, tmp_path): + """Test summarization with very large content""" + enhancer = LocalSkillEnhancer(tmp_path) + + # Create large content (simulate 50K chars) + sections = [] + for i in range(50): + sections.append(f""" +## Section {i} + +This is section {i} with lots of content that needs to be summarized. +We add multiple paragraphs to make it realistic. + +```python +# Code example {i} +def function_{i}(): + return {i} +``` + +More explanatory text follows here. +Another paragraph of content. +""") + + content = "\n".join(sections) + original_size = len(content) + + # Summarize to 30% + summarized = enhancer.summarize_reference(content, target_ratio=0.3) + summarized_size = len(summarized) + + # Should be significantly reduced + assert summarized_size < original_size + # Should be roughly 30% (allow 20-50% range due to structural constraints) + ratio = summarized_size / original_size + assert 0.2 <= ratio <= 0.5, f"Ratio {ratio:.2f} not in expected range" + + def test_create_prompt_without_summarization(self, tmp_path): + """Test prompt creation with normal-sized content""" + # Create test skill directory + skill_dir = tmp_path / "small_skill" + skill_dir.mkdir() + + # Create references directory with small content + refs_dir = skill_dir / "references" + refs_dir.mkdir() + + (refs_dir / "index.md").write_text("# Index\n\nSmall content here.") + (refs_dir / "api.md").write_text("# API\n\n```python\ndef test(): pass\n```") + + enhancer = LocalSkillEnhancer(skill_dir) + + # Create prompt without summarization + prompt = enhancer.create_enhancement_prompt(use_summarization=False) + + assert prompt is not None + assert "YOUR TASK:" in prompt + assert "REFERENCE DOCUMENTATION:" in prompt + assert "[Content intelligently summarized" not in prompt + + def test_create_prompt_with_summarization(self, tmp_path): + """Test prompt creation with summarization enabled""" + # Create test skill directory + skill_dir = tmp_path / "large_skill" + skill_dir.mkdir() + + # Create SKILL.md + (skill_dir / "SKILL.md").write_text("# Test Skill\n\nTest skill content.") + + # Create references directory with large content + refs_dir = skill_dir / "references" + refs_dir.mkdir() + + # Create large reference file (>12K chars to trigger per-file truncation) + # Note: read_reference_files() skips index.md, so use api.md + large_content = "\n".join([f"# Section {i}\n\nContent here with more text to make it substantial.\n\n```python\ndef func_{i}(): pass\n```\n" for i in range(200)]) + (refs_dir / "api.md").write_text(large_content) + + enhancer = LocalSkillEnhancer(skill_dir) + + # Create prompt with summarization + prompt = enhancer.create_enhancement_prompt(use_summarization=True, summarization_ratio=0.3) + + assert prompt is not None + assert "YOUR TASK:" in prompt + assert "REFERENCE DOCUMENTATION:" in prompt + # After summarization, content should include the marker + assert "[Content intelligently summarized" in prompt or "[Content truncated for size...]" in prompt + + def test_run_detects_large_skill(self, tmp_path, monkeypatch, capsys): + """Test that run() automatically detects large skills""" + # Create test skill directory with large content + skill_dir = tmp_path / "large_skill" + skill_dir.mkdir() + + refs_dir = skill_dir / "references" + refs_dir.mkdir() + + # Create SKILL.md (required for skill directory validation) + (skill_dir / "SKILL.md").write_text("# Test Skill\n\nTest skill content.") + + # Create content that exceeds 30K threshold + # Note: read_reference_files() skips index.md, so use different names + large_content = "\n".join([f"# Section {i}\n\n" + "Content with detailed explanations " * 50 + "\n\n```python\ndef func_{i}(): pass\n```\n" for i in range(150)]) + (refs_dir / "api.md").write_text(large_content) + # Add more reference files to ensure we exceed 30K + (refs_dir / "guide.md").write_text(large_content) + (refs_dir / "tutorial.md").write_text(large_content[:len(large_content)//2]) # Half size + + enhancer = LocalSkillEnhancer(skill_dir) + + # Mock the headless run to avoid actually calling Claude + def mock_headless(prompt_file, timeout): + return True + + monkeypatch.setattr(enhancer, '_run_headless', mock_headless) + + # Run enhancement + result = enhancer.run(headless=True) + + # Capture output + captured = capsys.readouterr() + + # Should detect large skill and show warning + assert "LARGE SKILL DETECTED" in captured.out + assert "smart summarization" in captured.out.lower() + assert result is True + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])