Release v1.21.0: Add macos-cleaner skill
- Add macos-cleaner v1.0.0 - Intelligent macOS disk space recovery - Safety-first philosophy with risk categorization (Safe/Caution/Keep) - Smart analysis: caches, app remnants, large files, dev environments - Interactive cleanup with explicit user confirmation - Bundled scripts: analyze_caches, analyze_dev_env, analyze_large_files, find_app_remnants, safe_delete, cleanup_report - Comprehensive references: cleanup_targets, mole_integration, safety_rules - Update marketplace to v1.21.0 - Update all documentation (README.md, README.zh-CN.md, CHANGELOG.md, CLAUDE.md) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
241
macos-cleaner/scripts/analyze_large_files.py
Executable file
241
macos-cleaner/scripts/analyze_large_files.py
Executable file
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Find large files on macOS and categorize them.
|
||||
|
||||
Usage:
|
||||
python3 analyze_large_files.py [--threshold SIZE] [--path PATH] [--limit N]
|
||||
|
||||
Options:
|
||||
--threshold Minimum file size in MB (default: 100)
|
||||
--path Path to search (default: ~)
|
||||
--limit Maximum number of results (default: 50)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def format_size(bytes_size):
|
||||
"""Convert bytes to human-readable format."""
|
||||
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||
if bytes_size < 1024.0:
|
||||
return f"{bytes_size:.1f} {unit}"
|
||||
bytes_size /= 1024.0
|
||||
return f"{bytes_size:.1f} PB"
|
||||
|
||||
|
||||
def categorize_file(path):
|
||||
"""
|
||||
Categorize file by type and suggest safety.
|
||||
|
||||
Returns:
|
||||
(category, icon, safety_note)
|
||||
"""
|
||||
suffix = path.suffix.lower()
|
||||
|
||||
# Video files
|
||||
video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.flv', '.wmv'}
|
||||
if suffix in video_exts:
|
||||
return ('Video', '🎬', 'Review and archive to external storage')
|
||||
|
||||
# Archive files
|
||||
archive_exts = {'.zip', '.tar', '.gz', '.bz2', '.7z', '.rar', '.dmg'}
|
||||
if suffix in archive_exts:
|
||||
return ('Archive', '📦', 'Extract if needed, then delete archive')
|
||||
|
||||
# Disk images
|
||||
disk_exts = {'.iso', '.img', '.toast'}
|
||||
if suffix in disk_exts:
|
||||
return ('Disk Image', '💿', 'Delete after installation/use')
|
||||
|
||||
# Database files
|
||||
db_exts = {'.db', '.sqlite', '.sqlite3', '.sql'}
|
||||
if suffix in db_exts:
|
||||
return ('Database', '🗄️', '⚠️ Verify not in use before deleting')
|
||||
|
||||
# Data files
|
||||
data_exts = {'.csv', '.json', '.xml', '.parquet', '.arrow'}
|
||||
if suffix in data_exts:
|
||||
return ('Data File', '📊', 'Archive or compress if historical data')
|
||||
|
||||
# Log files
|
||||
if suffix == '.log' or 'log' in path.name.lower():
|
||||
return ('Log File', '📝', 'Safe to delete old logs')
|
||||
|
||||
# Build artifacts
|
||||
build_patterns = ['.o', '.a', '.so', '.dylib', '.framework']
|
||||
if suffix in build_patterns:
|
||||
return ('Build Artifact', '🔨', 'Safe to delete, rebuild will regenerate')
|
||||
|
||||
# Virtual machine images
|
||||
vm_exts = {'.vmdk', '.vdi', '.qcow2', '.vhd'}
|
||||
if suffix in vm_exts:
|
||||
return ('VM Image', '💻', '⚠️ Contains VM data, verify before deleting')
|
||||
|
||||
# Other
|
||||
return ('Other', '📄', 'Review before deleting')
|
||||
|
||||
|
||||
def find_large_files(search_path, threshold_bytes, limit):
|
||||
"""
|
||||
Find files larger than threshold using find command.
|
||||
|
||||
Args:
|
||||
search_path: Path to search
|
||||
threshold_bytes: Minimum size in bytes
|
||||
limit: Maximum results
|
||||
|
||||
Returns:
|
||||
List of (path, size_bytes) tuples
|
||||
"""
|
||||
# Convert bytes to 512-byte blocks (find -size uses 512-byte blocks)
|
||||
threshold_blocks = threshold_bytes // 512
|
||||
|
||||
# Exclude common directories to avoid
|
||||
exclude_dirs = [
|
||||
'.Trash',
|
||||
'Library/Caches',
|
||||
'Library/Application Support/MobileSync', # iOS backups
|
||||
'.git',
|
||||
'node_modules',
|
||||
'__pycache__'
|
||||
]
|
||||
|
||||
# Build find command
|
||||
cmd = ['find', search_path, '-type', 'f', '-size', f'+{threshold_blocks}']
|
||||
|
||||
# Add exclusions
|
||||
for exclude in exclude_dirs:
|
||||
cmd.extend(['-not', '-path', f'*/{exclude}/*'])
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"⚠️ Warning: find command had errors", file=sys.stderr)
|
||||
|
||||
files = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
path = Path(line)
|
||||
if path.exists():
|
||||
size = path.stat().st_size
|
||||
files.append((path, size))
|
||||
except (OSError, PermissionError):
|
||||
continue
|
||||
|
||||
# Sort by size descending
|
||||
files.sort(key=lambda x: x[1], reverse=True)
|
||||
return files[:limit]
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print("⚠️ Search timed out, showing partial results", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Find large files on macOS'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--threshold',
|
||||
type=int,
|
||||
default=100,
|
||||
help='Minimum file size in MB (default: 100)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--path',
|
||||
default=os.path.expanduser('~'),
|
||||
help='Path to search (default: ~)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--limit',
|
||||
type=int,
|
||||
default=50,
|
||||
help='Maximum number of results (default: 50)'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
threshold_bytes = args.threshold * 1024 * 1024
|
||||
search_path = os.path.expanduser(args.path)
|
||||
|
||||
print(f"🔍 Searching for files larger than {args.threshold} MB")
|
||||
print(f"📂 Search path: {search_path}")
|
||||
print("=" * 80)
|
||||
print("This may take a few minutes...\n")
|
||||
|
||||
large_files = find_large_files(search_path, threshold_bytes, args.limit)
|
||||
|
||||
if not large_files:
|
||||
print("✅ No large files found above the threshold.")
|
||||
return 0
|
||||
|
||||
print(f"\n📦 Found {len(large_files)} large files")
|
||||
print("=" * 80)
|
||||
print(f"{'#':<4} {'Size':<12} {'Type':<12} {'Location'}")
|
||||
print("-" * 80)
|
||||
|
||||
# Group by category
|
||||
by_category = {}
|
||||
total_size = 0
|
||||
|
||||
for i, (path, size) in enumerate(large_files, 1):
|
||||
category, icon, note = categorize_file(path)
|
||||
|
||||
# Shorten path for display
|
||||
try:
|
||||
rel_path = path.relative_to(Path.home())
|
||||
display_path = f"~/{rel_path}"
|
||||
except ValueError:
|
||||
display_path = str(path)
|
||||
|
||||
# Truncate long paths
|
||||
if len(display_path) > 45:
|
||||
display_path = display_path[:42] + "..."
|
||||
|
||||
print(f"{i:<4} {format_size(size):<12} {icon} {category:<10} {display_path}")
|
||||
|
||||
# Track by category
|
||||
if category not in by_category:
|
||||
by_category[category] = {'count': 0, 'size': 0, 'note': note}
|
||||
by_category[category]['count'] += 1
|
||||
by_category[category]['size'] += size
|
||||
total_size += size
|
||||
|
||||
print("-" * 80)
|
||||
print(f"{'Total':<4} {format_size(total_size):<12}")
|
||||
|
||||
# Category summary
|
||||
print("\n\n📊 Breakdown by Category")
|
||||
print("=" * 80)
|
||||
for category, data in sorted(
|
||||
by_category.items(),
|
||||
key=lambda x: x[1]['size'],
|
||||
reverse=True
|
||||
):
|
||||
print(f"\n{category}")
|
||||
print(f" Files: {data['count']}")
|
||||
print(f" Total: {format_size(data['size'])}")
|
||||
print(f" 💡 {data['note']}")
|
||||
|
||||
print("\n\n💡 Next Steps:")
|
||||
print(" 1. Review the list and identify files you no longer need")
|
||||
print(" 2. For videos/archives: consider moving to external storage")
|
||||
print(" 3. For databases/VMs: verify they're not in use")
|
||||
print(" 4. Use safe_delete.py for interactive cleanup")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user