- Add macos-cleaner v1.0.0 - Intelligent macOS disk space recovery - Safety-first philosophy with risk categorization (Safe/Caution/Keep) - Smart analysis: caches, app remnants, large files, dev environments - Interactive cleanup with explicit user confirmation - Bundled scripts: analyze_caches, analyze_dev_env, analyze_large_files, find_app_remnants, safe_delete, cleanup_report - Comprehensive references: cleanup_targets, mole_integration, safety_rules - Update marketplace to v1.21.0 - Update all documentation (README.md, README.zh-CN.md, CHANGELOG.md, CLAUDE.md) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
242 lines
6.9 KiB
Python
Executable File
242 lines
6.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Find large files on macOS and categorize them.
|
|
|
|
Usage:
|
|
python3 analyze_large_files.py [--threshold SIZE] [--path PATH] [--limit N]
|
|
|
|
Options:
|
|
--threshold Minimum file size in MB (default: 100)
|
|
--path Path to search (default: ~)
|
|
--limit Maximum number of results (default: 50)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
|
|
def format_size(bytes_size):
|
|
"""Convert bytes to human-readable format."""
|
|
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
|
if bytes_size < 1024.0:
|
|
return f"{bytes_size:.1f} {unit}"
|
|
bytes_size /= 1024.0
|
|
return f"{bytes_size:.1f} PB"
|
|
|
|
|
|
def categorize_file(path):
|
|
"""
|
|
Categorize file by type and suggest safety.
|
|
|
|
Returns:
|
|
(category, icon, safety_note)
|
|
"""
|
|
suffix = path.suffix.lower()
|
|
|
|
# Video files
|
|
video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.flv', '.wmv'}
|
|
if suffix in video_exts:
|
|
return ('Video', '🎬', 'Review and archive to external storage')
|
|
|
|
# Archive files
|
|
archive_exts = {'.zip', '.tar', '.gz', '.bz2', '.7z', '.rar', '.dmg'}
|
|
if suffix in archive_exts:
|
|
return ('Archive', '📦', 'Extract if needed, then delete archive')
|
|
|
|
# Disk images
|
|
disk_exts = {'.iso', '.img', '.toast'}
|
|
if suffix in disk_exts:
|
|
return ('Disk Image', '💿', 'Delete after installation/use')
|
|
|
|
# Database files
|
|
db_exts = {'.db', '.sqlite', '.sqlite3', '.sql'}
|
|
if suffix in db_exts:
|
|
return ('Database', '🗄️', '⚠️ Verify not in use before deleting')
|
|
|
|
# Data files
|
|
data_exts = {'.csv', '.json', '.xml', '.parquet', '.arrow'}
|
|
if suffix in data_exts:
|
|
return ('Data File', '📊', 'Archive or compress if historical data')
|
|
|
|
# Log files
|
|
if suffix == '.log' or 'log' in path.name.lower():
|
|
return ('Log File', '📝', 'Safe to delete old logs')
|
|
|
|
# Build artifacts
|
|
build_patterns = ['.o', '.a', '.so', '.dylib', '.framework']
|
|
if suffix in build_patterns:
|
|
return ('Build Artifact', '🔨', 'Safe to delete, rebuild will regenerate')
|
|
|
|
# Virtual machine images
|
|
vm_exts = {'.vmdk', '.vdi', '.qcow2', '.vhd'}
|
|
if suffix in vm_exts:
|
|
return ('VM Image', '💻', '⚠️ Contains VM data, verify before deleting')
|
|
|
|
# Other
|
|
return ('Other', '📄', 'Review before deleting')
|
|
|
|
|
|
def find_large_files(search_path, threshold_bytes, limit):
|
|
"""
|
|
Find files larger than threshold using find command.
|
|
|
|
Args:
|
|
search_path: Path to search
|
|
threshold_bytes: Minimum size in bytes
|
|
limit: Maximum results
|
|
|
|
Returns:
|
|
List of (path, size_bytes) tuples
|
|
"""
|
|
# Convert bytes to 512-byte blocks (find -size uses 512-byte blocks)
|
|
threshold_blocks = threshold_bytes // 512
|
|
|
|
# Exclude common directories to avoid
|
|
exclude_dirs = [
|
|
'.Trash',
|
|
'Library/Caches',
|
|
'Library/Application Support/MobileSync', # iOS backups
|
|
'.git',
|
|
'node_modules',
|
|
'__pycache__'
|
|
]
|
|
|
|
# Build find command
|
|
cmd = ['find', search_path, '-type', 'f', '-size', f'+{threshold_blocks}']
|
|
|
|
# Add exclusions
|
|
for exclude in exclude_dirs:
|
|
cmd.extend(['-not', '-path', f'*/{exclude}/*'])
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=120
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
print(f"⚠️ Warning: find command had errors", file=sys.stderr)
|
|
|
|
files = []
|
|
for line in result.stdout.strip().split('\n'):
|
|
if not line:
|
|
continue
|
|
try:
|
|
path = Path(line)
|
|
if path.exists():
|
|
size = path.stat().st_size
|
|
files.append((path, size))
|
|
except (OSError, PermissionError):
|
|
continue
|
|
|
|
# Sort by size descending
|
|
files.sort(key=lambda x: x[1], reverse=True)
|
|
return files[:limit]
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print("⚠️ Search timed out, showing partial results", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Find large files on macOS'
|
|
)
|
|
parser.add_argument(
|
|
'--threshold',
|
|
type=int,
|
|
default=100,
|
|
help='Minimum file size in MB (default: 100)'
|
|
)
|
|
parser.add_argument(
|
|
'--path',
|
|
default=os.path.expanduser('~'),
|
|
help='Path to search (default: ~)'
|
|
)
|
|
parser.add_argument(
|
|
'--limit',
|
|
type=int,
|
|
default=50,
|
|
help='Maximum number of results (default: 50)'
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
threshold_bytes = args.threshold * 1024 * 1024
|
|
search_path = os.path.expanduser(args.path)
|
|
|
|
print(f"🔍 Searching for files larger than {args.threshold} MB")
|
|
print(f"📂 Search path: {search_path}")
|
|
print("=" * 80)
|
|
print("This may take a few minutes...\n")
|
|
|
|
large_files = find_large_files(search_path, threshold_bytes, args.limit)
|
|
|
|
if not large_files:
|
|
print("✅ No large files found above the threshold.")
|
|
return 0
|
|
|
|
print(f"\n📦 Found {len(large_files)} large files")
|
|
print("=" * 80)
|
|
print(f"{'#':<4} {'Size':<12} {'Type':<12} {'Location'}")
|
|
print("-" * 80)
|
|
|
|
# Group by category
|
|
by_category = {}
|
|
total_size = 0
|
|
|
|
for i, (path, size) in enumerate(large_files, 1):
|
|
category, icon, note = categorize_file(path)
|
|
|
|
# Shorten path for display
|
|
try:
|
|
rel_path = path.relative_to(Path.home())
|
|
display_path = f"~/{rel_path}"
|
|
except ValueError:
|
|
display_path = str(path)
|
|
|
|
# Truncate long paths
|
|
if len(display_path) > 45:
|
|
display_path = display_path[:42] + "..."
|
|
|
|
print(f"{i:<4} {format_size(size):<12} {icon} {category:<10} {display_path}")
|
|
|
|
# Track by category
|
|
if category not in by_category:
|
|
by_category[category] = {'count': 0, 'size': 0, 'note': note}
|
|
by_category[category]['count'] += 1
|
|
by_category[category]['size'] += size
|
|
total_size += size
|
|
|
|
print("-" * 80)
|
|
print(f"{'Total':<4} {format_size(total_size):<12}")
|
|
|
|
# Category summary
|
|
print("\n\n📊 Breakdown by Category")
|
|
print("=" * 80)
|
|
for category, data in sorted(
|
|
by_category.items(),
|
|
key=lambda x: x[1]['size'],
|
|
reverse=True
|
|
):
|
|
print(f"\n{category}")
|
|
print(f" Files: {data['count']}")
|
|
print(f" Total: {format_size(data['size'])}")
|
|
print(f" 💡 {data['note']}")
|
|
|
|
print("\n\n💡 Next Steps:")
|
|
print(" 1. Review the list and identify files you no longer need")
|
|
print(" 2. For videos/archives: consider moving to external storage")
|
|
print(" 3. For databases/VMs: verify they're not in use")
|
|
print(" 4. Use safe_delete.py for interactive cleanup")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|