Files
daymade 4d6ed53c1e Release v1.21.0: Add macos-cleaner skill
- Add macos-cleaner v1.0.0 - Intelligent macOS disk space recovery
- Safety-first philosophy with risk categorization (Safe/Caution/Keep)
- Smart analysis: caches, app remnants, large files, dev environments
- Interactive cleanup with explicit user confirmation
- Bundled scripts: analyze_caches, analyze_dev_env, analyze_large_files,
  find_app_remnants, safe_delete, cleanup_report
- Comprehensive references: cleanup_targets, mole_integration, safety_rules
- Update marketplace to v1.21.0
- Update all documentation (README.md, README.zh-CN.md, CHANGELOG.md, CLAUDE.md)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 15:59:13 +08:00

242 lines
6.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Find large files on macOS and categorize them.
Usage:
python3 analyze_large_files.py [--threshold SIZE] [--path PATH] [--limit N]
Options:
--threshold Minimum file size in MB (default: 100)
--path Path to search (default: ~)
--limit Maximum number of results (default: 50)
"""
import os
import sys
import argparse
import subprocess
from pathlib import Path
def format_size(bytes_size):
"""Convert bytes to human-readable format."""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if bytes_size < 1024.0:
return f"{bytes_size:.1f} {unit}"
bytes_size /= 1024.0
return f"{bytes_size:.1f} PB"
def categorize_file(path):
"""
Categorize file by type and suggest safety.
Returns:
(category, icon, safety_note)
"""
suffix = path.suffix.lower()
# Video files
video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.flv', '.wmv'}
if suffix in video_exts:
return ('Video', '🎬', 'Review and archive to external storage')
# Archive files
archive_exts = {'.zip', '.tar', '.gz', '.bz2', '.7z', '.rar', '.dmg'}
if suffix in archive_exts:
return ('Archive', '📦', 'Extract if needed, then delete archive')
# Disk images
disk_exts = {'.iso', '.img', '.toast'}
if suffix in disk_exts:
return ('Disk Image', '💿', 'Delete after installation/use')
# Database files
db_exts = {'.db', '.sqlite', '.sqlite3', '.sql'}
if suffix in db_exts:
return ('Database', '🗄️', '⚠️ Verify not in use before deleting')
# Data files
data_exts = {'.csv', '.json', '.xml', '.parquet', '.arrow'}
if suffix in data_exts:
return ('Data File', '📊', 'Archive or compress if historical data')
# Log files
if suffix == '.log' or 'log' in path.name.lower():
return ('Log File', '📝', 'Safe to delete old logs')
# Build artifacts
build_patterns = ['.o', '.a', '.so', '.dylib', '.framework']
if suffix in build_patterns:
return ('Build Artifact', '🔨', 'Safe to delete, rebuild will regenerate')
# Virtual machine images
vm_exts = {'.vmdk', '.vdi', '.qcow2', '.vhd'}
if suffix in vm_exts:
return ('VM Image', '💻', '⚠️ Contains VM data, verify before deleting')
# Other
return ('Other', '📄', 'Review before deleting')
def find_large_files(search_path, threshold_bytes, limit):
"""
Find files larger than threshold using find command.
Args:
search_path: Path to search
threshold_bytes: Minimum size in bytes
limit: Maximum results
Returns:
List of (path, size_bytes) tuples
"""
# Convert bytes to 512-byte blocks (find -size uses 512-byte blocks)
threshold_blocks = threshold_bytes // 512
# Exclude common directories to avoid
exclude_dirs = [
'.Trash',
'Library/Caches',
'Library/Application Support/MobileSync', # iOS backups
'.git',
'node_modules',
'__pycache__'
]
# Build find command
cmd = ['find', search_path, '-type', 'f', '-size', f'+{threshold_blocks}']
# Add exclusions
for exclude in exclude_dirs:
cmd.extend(['-not', '-path', f'*/{exclude}/*'])
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=120
)
if result.returncode != 0:
print(f"⚠️ Warning: find command had errors", file=sys.stderr)
files = []
for line in result.stdout.strip().split('\n'):
if not line:
continue
try:
path = Path(line)
if path.exists():
size = path.stat().st_size
files.append((path, size))
except (OSError, PermissionError):
continue
# Sort by size descending
files.sort(key=lambda x: x[1], reverse=True)
return files[:limit]
except subprocess.TimeoutExpired:
print("⚠️ Search timed out, showing partial results", file=sys.stderr)
return []
def main():
parser = argparse.ArgumentParser(
description='Find large files on macOS'
)
parser.add_argument(
'--threshold',
type=int,
default=100,
help='Minimum file size in MB (default: 100)'
)
parser.add_argument(
'--path',
default=os.path.expanduser('~'),
help='Path to search (default: ~)'
)
parser.add_argument(
'--limit',
type=int,
default=50,
help='Maximum number of results (default: 50)'
)
args = parser.parse_args()
threshold_bytes = args.threshold * 1024 * 1024
search_path = os.path.expanduser(args.path)
print(f"🔍 Searching for files larger than {args.threshold} MB")
print(f"📂 Search path: {search_path}")
print("=" * 80)
print("This may take a few minutes...\n")
large_files = find_large_files(search_path, threshold_bytes, args.limit)
if not large_files:
print("✅ No large files found above the threshold.")
return 0
print(f"\n📦 Found {len(large_files)} large files")
print("=" * 80)
print(f"{'#':<4} {'Size':<12} {'Type':<12} {'Location'}")
print("-" * 80)
# Group by category
by_category = {}
total_size = 0
for i, (path, size) in enumerate(large_files, 1):
category, icon, note = categorize_file(path)
# Shorten path for display
try:
rel_path = path.relative_to(Path.home())
display_path = f"~/{rel_path}"
except ValueError:
display_path = str(path)
# Truncate long paths
if len(display_path) > 45:
display_path = display_path[:42] + "..."
print(f"{i:<4} {format_size(size):<12} {icon} {category:<10} {display_path}")
# Track by category
if category not in by_category:
by_category[category] = {'count': 0, 'size': 0, 'note': note}
by_category[category]['count'] += 1
by_category[category]['size'] += size
total_size += size
print("-" * 80)
print(f"{'Total':<4} {format_size(total_size):<12}")
# Category summary
print("\n\n📊 Breakdown by Category")
print("=" * 80)
for category, data in sorted(
by_category.items(),
key=lambda x: x[1]['size'],
reverse=True
):
print(f"\n{category}")
print(f" Files: {data['count']}")
print(f" Total: {format_size(data['size'])}")
print(f" 💡 {data['note']}")
print("\n\n💡 Next Steps:")
print(" 1. Review the list and identify files you no longer need")
print(" 2. For videos/archives: consider moving to external storage")
print(" 3. For databases/VMs: verify they're not in use")
print(" 4. Use safe_delete.py for interactive cleanup")
return 0
if __name__ == '__main__':
sys.exit(main())