#!/usr/bin/env python3 """Check for HTML content in skills and identify which need conversion.""" import json import re from pathlib import Path def check_html_content(skill_path: Path) -> dict: """Check if a skill file contains HTML content.""" try: content = skill_path.read_text(encoding='utf-8') except Exception as e: return {'error': str(e), 'has_html': False} # HTML patterns (excluding code blocks) html_patterns = [ r'', r'', r' 5, # Threshold 'html_count': len(html_matches), 'matches': html_matches[:10] # First 10 matches } def main(): # Load similar skills from analysis analysis_file = Path('voltagent_analysis.json') if not analysis_file.exists(): print("āŒ voltagent_analysis.json not found") return with open(analysis_file, 'r') as f: analysis = json.load(f) similar_skills = analysis.get('similar_skills', []) skills_dir = Path('skills') print(f"šŸ” Checking {len(similar_skills)} similar skills for HTML content...\n") skills_with_html = [] skills_checked = 0 for item in similar_skills: skill_name = item['voltagent']['normalized_name'] skill_path = skills_dir / skill_name / 'SKILL.md' if not skill_path.exists(): continue skills_checked += 1 result = check_html_content(skill_path) if result.get('has_html'): skills_with_html.append({ 'name': skill_name, 'url': item['voltagent']['url'], 'description': item['voltagent']['description'], 'html_count': result['html_count'], 'matches': result.get('matches', []) }) print(f"šŸ“Š Checked {skills_checked} skills") print(f"āš ļø Found {len(skills_with_html)} skills with HTML content\n") if skills_with_html: print("Skills needing HTML-to-Markdown conversion:") for skill in skills_with_html: print(f"\n • {skill['name']}") print(f" HTML patterns: {skill['html_count']}") print(f" URL: {skill['url']}") if skill['matches']: print(f" Sample match (line {skill['matches'][0]['line']}): {skill['matches'][0]['preview'][:80]}...") # Also check recently implemented skills print("\n\nšŸ” Checking recently implemented skills...\n") validation_file = Path('voltagent_validation.json') if validation_file.exists(): with open(validation_file, 'r') as f: validation = json.load(f) validated_skills = validation.get('validated', []) recent_with_html = [] for item in validated_skills: skill_name = item['skill']['normalized_name'] skill_path = skills_dir / skill_name / 'SKILL.md' if not skill_path.exists(): continue result = check_html_content(skill_path) if result.get('has_html'): recent_with_html.append({ 'name': skill_name, 'html_count': result['html_count'] }) if recent_with_html: print(f"āš ļø Found {len(recent_with_html)} recently implemented skills with HTML:") for skill in recent_with_html: print(f" • {skill['name']} ({skill['html_count']} HTML patterns)") else: print("āœ… No HTML content found in recently implemented skills") # Save results output = { 'similar_skills_with_html': skills_with_html, 'total_checked': skills_checked, 'total_with_html': len(skills_with_html) } output_file = Path('html_content_analysis.json') with open(output_file, 'w') as f: json.dump(output, f, indent=2) print(f"\nšŸ’¾ Results saved to: {output_file}") if __name__ == "__main__": main()