Import the official Hugging Face ecosystem skills and sync the\nexisting local coverage with upstream metadata and assets.\n\nRegenerate the canonical catalog, plugin mirrors, docs, and release\nnotes after the maintainer merge batch so main stays in sync.\n\nFixes #417
513 lines
16 KiB
Python
513 lines
16 KiB
Python
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = [
|
|
# "unsloth",
|
|
# "datasets",
|
|
# "trl==0.22.2",
|
|
# "huggingface_hub[hf_transfer]",
|
|
# "trackio",
|
|
# "tensorboard",
|
|
# "transformers==4.57.3",
|
|
# ]
|
|
# ///
|
|
"""
|
|
Fine-tune LLMs using Unsloth optimizations for ~60% less VRAM and 2x faster training.
|
|
|
|
Supports epoch-based or step-based training with optional eval split.
|
|
Default model: LFM2.5-1.2B-Instruct (Liquid Foundation Model).
|
|
|
|
Epoch-based training (recommended for full datasets):
|
|
uv run unsloth_sft_example.py \
|
|
--dataset mlabonne/FineTome-100k \
|
|
--num-epochs 1 \
|
|
--eval-split 0.2 \
|
|
--output-repo your-username/model-finetuned
|
|
|
|
Run on HF Jobs (1 epoch with eval):
|
|
hf jobs uv run unsloth_sft_example.py \
|
|
--flavor a10g-small --secrets HF_TOKEN --timeout 4h \
|
|
-- --dataset mlabonne/FineTome-100k \
|
|
--num-epochs 1 \
|
|
--eval-split 0.2 \
|
|
--output-repo your-username/model-finetuned
|
|
|
|
Step-based training (for quick tests):
|
|
uv run unsloth_sft_example.py \
|
|
--dataset mlabonne/FineTome-100k \
|
|
--max-steps 500 \
|
|
--output-repo your-username/model-finetuned
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
# Force unbuffered output for HF Jobs logs
|
|
sys.stdout.reconfigure(line_buffering=True)
|
|
sys.stderr.reconfigure(line_buffering=True)
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def check_cuda():
|
|
"""Check CUDA availability and exit if not available."""
|
|
import torch
|
|
|
|
if not torch.cuda.is_available():
|
|
logger.error("CUDA is not available. This script requires a GPU.")
|
|
logger.error("Run on a machine with a CUDA-capable GPU or use HF Jobs:")
|
|
logger.error(
|
|
" hf jobs uv run unsloth_sft_example.py --flavor a10g-small ..."
|
|
)
|
|
sys.exit(1)
|
|
logger.info(f"CUDA available: {torch.cuda.get_device_name(0)}")
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Fine-tune LLMs with Unsloth optimizations",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Quick test run
|
|
uv run unsloth_sft_example.py \\
|
|
--dataset mlabonne/FineTome-100k \\
|
|
--max-steps 50 \\
|
|
--output-repo username/model-test
|
|
|
|
# Full training with eval
|
|
uv run unsloth_sft_example.py \\
|
|
--dataset mlabonne/FineTome-100k \\
|
|
--num-epochs 1 \\
|
|
--eval-split 0.2 \\
|
|
--output-repo username/model-finetuned
|
|
|
|
# With Trackio monitoring
|
|
uv run unsloth_sft_example.py \\
|
|
--dataset mlabonne/FineTome-100k \\
|
|
--num-epochs 1 \\
|
|
--output-repo username/model-finetuned \\
|
|
--trackio-space username/trackio
|
|
""",
|
|
)
|
|
|
|
# Model and data
|
|
parser.add_argument(
|
|
"--base-model",
|
|
default="LiquidAI/LFM2.5-1.2B-Instruct",
|
|
help="Base model (default: LiquidAI/LFM2.5-1.2B-Instruct)",
|
|
)
|
|
parser.add_argument(
|
|
"--dataset",
|
|
required=True,
|
|
help="Dataset in ShareGPT/conversation format (e.g., mlabonne/FineTome-100k)",
|
|
)
|
|
parser.add_argument(
|
|
"--output-repo",
|
|
required=True,
|
|
help="HF Hub repo to push model to (e.g., 'username/model-finetuned')",
|
|
)
|
|
|
|
# Training config
|
|
parser.add_argument(
|
|
"--num-epochs",
|
|
type=float,
|
|
default=None,
|
|
help="Number of epochs (default: None). Use instead of --max-steps.",
|
|
)
|
|
parser.add_argument(
|
|
"--max-steps",
|
|
type=int,
|
|
default=None,
|
|
help="Training steps (default: None). Use for quick tests or streaming.",
|
|
)
|
|
parser.add_argument(
|
|
"--batch-size",
|
|
type=int,
|
|
default=2,
|
|
help="Per-device batch size (default: 2)",
|
|
)
|
|
parser.add_argument(
|
|
"--gradient-accumulation",
|
|
type=int,
|
|
default=4,
|
|
help="Gradient accumulation steps (default: 4). Effective batch = batch-size * this",
|
|
)
|
|
parser.add_argument(
|
|
"--learning-rate",
|
|
type=float,
|
|
default=2e-4,
|
|
help="Learning rate (default: 2e-4)",
|
|
)
|
|
parser.add_argument(
|
|
"--max-seq-length",
|
|
type=int,
|
|
default=2048,
|
|
help="Maximum sequence length (default: 2048)",
|
|
)
|
|
|
|
# LoRA config
|
|
parser.add_argument(
|
|
"--lora-r",
|
|
type=int,
|
|
default=16,
|
|
help="LoRA rank (default: 16). Higher = more capacity but more VRAM",
|
|
)
|
|
parser.add_argument(
|
|
"--lora-alpha",
|
|
type=int,
|
|
default=16,
|
|
help="LoRA alpha (default: 16). Same as r per Unsloth recommendation",
|
|
)
|
|
|
|
# Logging
|
|
parser.add_argument(
|
|
"--trackio-space",
|
|
default=None,
|
|
help="HF Space for Trackio dashboard (e.g., 'username/trackio')",
|
|
)
|
|
parser.add_argument(
|
|
"--run-name",
|
|
default=None,
|
|
help="Custom run name for Trackio (default: auto-generated)",
|
|
)
|
|
parser.add_argument(
|
|
"--save-local",
|
|
default="unsloth-output",
|
|
help="Local directory to save model (default: unsloth-output)",
|
|
)
|
|
|
|
# Evaluation and data control
|
|
parser.add_argument(
|
|
"--eval-split",
|
|
type=float,
|
|
default=0.0,
|
|
help="Fraction of data for evaluation (0.0-0.5). Default: 0.0 (no eval)",
|
|
)
|
|
parser.add_argument(
|
|
"--num-samples",
|
|
type=int,
|
|
default=None,
|
|
help="Limit samples (default: None = use all)",
|
|
)
|
|
parser.add_argument(
|
|
"--seed",
|
|
type=int,
|
|
default=3407,
|
|
help="Random seed for reproducibility (default: 3407)",
|
|
)
|
|
parser.add_argument(
|
|
"--merge-model",
|
|
action="store_true",
|
|
default=False,
|
|
help="Merge LoRA weights into base model before uploading (larger file, easier to use)",
|
|
)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
# Validate epochs/steps configuration
|
|
if not args.num_epochs and not args.max_steps:
|
|
args.num_epochs = 1
|
|
logger.info("Using default --num-epochs=1")
|
|
|
|
# Determine training duration display
|
|
if args.num_epochs:
|
|
duration_str = f"{args.num_epochs} epoch(s)"
|
|
else:
|
|
duration_str = f"{args.max_steps} steps"
|
|
|
|
print("=" * 70)
|
|
print("LLM Fine-tuning with Unsloth")
|
|
print("=" * 70)
|
|
print("\nConfiguration:")
|
|
print(f" Base model: {args.base_model}")
|
|
print(f" Dataset: {args.dataset}")
|
|
print(f" Num samples: {args.num_samples or 'all'}")
|
|
print(f" Eval split: {args.eval_split if args.eval_split > 0 else '(disabled)'}")
|
|
print(f" Seed: {args.seed}")
|
|
print(f" Training: {duration_str}")
|
|
print(f" Batch size: {args.batch_size} x {args.gradient_accumulation} = {args.batch_size * args.gradient_accumulation}")
|
|
print(f" Learning rate: {args.learning_rate}")
|
|
print(f" LoRA rank: {args.lora_r}")
|
|
print(f" Max seq length: {args.max_seq_length}")
|
|
print(f" Output repo: {args.output_repo}")
|
|
print(f" Trackio space: {args.trackio_space or '(not configured)'}")
|
|
print()
|
|
|
|
# Check CUDA before heavy imports
|
|
check_cuda()
|
|
|
|
# Enable fast transfers
|
|
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
|
|
# Set Trackio space if provided
|
|
if args.trackio_space:
|
|
os.environ["TRACKIO_SPACE_ID"] = args.trackio_space
|
|
logger.info(f"Trackio dashboard: https://huggingface.co/spaces/{args.trackio_space}")
|
|
|
|
# Import heavy dependencies
|
|
from unsloth import FastLanguageModel
|
|
from unsloth.chat_templates import standardize_data_formats, train_on_responses_only
|
|
from datasets import load_dataset
|
|
from trl import SFTTrainer, SFTConfig
|
|
from huggingface_hub import login
|
|
|
|
# Login to Hub
|
|
token = os.environ.get("HF_TOKEN") or os.environ.get("hfjob")
|
|
if token:
|
|
login(token=token)
|
|
logger.info("Logged in to Hugging Face Hub")
|
|
else:
|
|
logger.warning("HF_TOKEN not set - model upload may fail")
|
|
|
|
# 1. Load model
|
|
print("\n[1/5] Loading model...")
|
|
start = time.time()
|
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
model_name=args.base_model,
|
|
max_seq_length=args.max_seq_length,
|
|
load_in_4bit=False,
|
|
load_in_8bit=False,
|
|
load_in_16bit=True,
|
|
full_finetuning=False,
|
|
)
|
|
|
|
# Add LoRA adapters
|
|
model = FastLanguageModel.get_peft_model(
|
|
model,
|
|
r=args.lora_r,
|
|
target_modules=["q_proj", "k_proj", "v_proj", "out_proj", "in_proj", "w1", "w2", "w3"],
|
|
lora_alpha=args.lora_alpha,
|
|
lora_dropout=0,
|
|
bias="none",
|
|
use_gradient_checkpointing="unsloth",
|
|
random_state=args.seed,
|
|
use_rslora=False,
|
|
loftq_config=None,
|
|
)
|
|
print(f"Model loaded in {time.time() - start:.1f}s")
|
|
|
|
# 2. Load and prepare dataset
|
|
print("\n[2/5] Loading dataset...")
|
|
start = time.time()
|
|
|
|
dataset = load_dataset(args.dataset, split="train")
|
|
print(f" Dataset has {len(dataset)} total samples")
|
|
|
|
if args.num_samples:
|
|
dataset = dataset.select(range(min(args.num_samples, len(dataset))))
|
|
print(f" Limited to {len(dataset)} samples")
|
|
|
|
# Auto-detect and normalize conversation column
|
|
for col in ["messages", "conversations", "conversation"]:
|
|
if col in dataset.column_names and isinstance(dataset[0][col], list):
|
|
if col != "conversations":
|
|
dataset = dataset.rename_column(col, "conversations")
|
|
break
|
|
dataset = standardize_data_formats(dataset)
|
|
|
|
# Apply chat template
|
|
def formatting_prompts_func(examples):
|
|
texts = tokenizer.apply_chat_template(
|
|
examples["conversations"],
|
|
tokenize=False,
|
|
add_generation_prompt=False,
|
|
)
|
|
# Remove BOS token to avoid duplicates
|
|
return {"text": [x.removeprefix(tokenizer.bos_token) for x in texts]}
|
|
|
|
dataset = dataset.map(formatting_prompts_func, batched=True)
|
|
|
|
# Split for evaluation if requested
|
|
if args.eval_split > 0:
|
|
split = dataset.train_test_split(test_size=args.eval_split, seed=args.seed)
|
|
train_data = split["train"]
|
|
eval_data = split["test"]
|
|
print(f" Train: {len(train_data)} samples, Eval: {len(eval_data)} samples")
|
|
else:
|
|
train_data = dataset
|
|
eval_data = None
|
|
|
|
print(f" Dataset ready in {time.time() - start:.1f}s")
|
|
|
|
# 3. Configure trainer
|
|
print("\n[3/5] Configuring trainer...")
|
|
|
|
# Calculate steps per epoch for logging/eval intervals
|
|
effective_batch = args.batch_size * args.gradient_accumulation
|
|
num_samples = len(train_data)
|
|
steps_per_epoch = num_samples // effective_batch
|
|
|
|
# Determine run name and logging steps
|
|
if args.run_name:
|
|
run_name = args.run_name
|
|
elif args.num_epochs:
|
|
run_name = f"unsloth-sft-{args.num_epochs}ep"
|
|
else:
|
|
run_name = f"unsloth-sft-{args.max_steps}steps"
|
|
|
|
if args.num_epochs:
|
|
logging_steps = max(1, steps_per_epoch // 10)
|
|
save_steps = max(1, steps_per_epoch // 4)
|
|
else:
|
|
logging_steps = max(1, args.max_steps // 20)
|
|
save_steps = max(1, args.max_steps // 4)
|
|
|
|
# Determine reporting backend
|
|
if args.trackio_space:
|
|
report_to = ["tensorboard", "trackio"]
|
|
else:
|
|
report_to = ["tensorboard"]
|
|
|
|
training_config = SFTConfig(
|
|
output_dir=args.save_local,
|
|
dataset_text_field="text",
|
|
per_device_train_batch_size=args.batch_size,
|
|
gradient_accumulation_steps=args.gradient_accumulation,
|
|
warmup_steps=5,
|
|
num_train_epochs=args.num_epochs if args.num_epochs else 1,
|
|
max_steps=args.max_steps if args.max_steps else -1,
|
|
learning_rate=args.learning_rate,
|
|
logging_steps=logging_steps,
|
|
optim="adamw_8bit",
|
|
weight_decay=0.01,
|
|
lr_scheduler_type="linear",
|
|
seed=args.seed,
|
|
max_length=args.max_seq_length,
|
|
report_to=report_to,
|
|
run_name=run_name,
|
|
push_to_hub=True,
|
|
hub_model_id=args.output_repo,
|
|
save_steps=save_steps,
|
|
save_total_limit=3,
|
|
)
|
|
|
|
# Add evaluation config if eval is enabled
|
|
if eval_data:
|
|
if args.num_epochs:
|
|
training_config.eval_strategy = "epoch"
|
|
print(" Evaluation enabled: every epoch")
|
|
else:
|
|
training_config.eval_strategy = "steps"
|
|
training_config.eval_steps = max(1, args.max_steps // 5)
|
|
print(f" Evaluation enabled: every {training_config.eval_steps} steps")
|
|
|
|
trainer = SFTTrainer(
|
|
model=model,
|
|
tokenizer=tokenizer,
|
|
train_dataset=train_data,
|
|
eval_dataset=eval_data,
|
|
args=training_config,
|
|
)
|
|
|
|
# Train on responses only (mask user inputs)
|
|
trainer = train_on_responses_only(
|
|
trainer,
|
|
instruction_part="<|im_start|>user\n",
|
|
response_part="<|im_start|>assistant\n",
|
|
)
|
|
|
|
# 4. Train
|
|
print(f"\n[4/5] Training for {duration_str}...")
|
|
if args.num_epochs:
|
|
print(f" (~{steps_per_epoch} steps/epoch, {int(steps_per_epoch * args.num_epochs)} total steps)")
|
|
start = time.time()
|
|
|
|
train_result = trainer.train()
|
|
|
|
train_time = time.time() - start
|
|
total_steps = train_result.metrics.get("train_steps", args.max_steps or steps_per_epoch * args.num_epochs)
|
|
print(f"\nTraining completed in {train_time / 60:.1f} minutes")
|
|
print(f" Speed: {total_steps / train_time:.2f} steps/s")
|
|
|
|
# Print training metrics
|
|
train_loss = train_result.metrics.get("train_loss")
|
|
if train_loss:
|
|
print(f" Final train loss: {train_loss:.4f}")
|
|
|
|
# Print eval results if eval was enabled
|
|
if eval_data:
|
|
print("\nRunning final evaluation...")
|
|
try:
|
|
eval_results = trainer.evaluate()
|
|
eval_loss = eval_results.get("eval_loss")
|
|
if eval_loss:
|
|
print(f" Final eval loss: {eval_loss:.4f}")
|
|
if train_loss:
|
|
ratio = eval_loss / train_loss
|
|
if ratio > 1.5:
|
|
print(f" Warning: Eval loss is {ratio:.1f}x train loss - possible overfitting")
|
|
else:
|
|
print(f" Eval/train ratio: {ratio:.2f} - model generalizes well")
|
|
except Exception as e:
|
|
print(f" Warning: Final evaluation failed: {e}")
|
|
print(" Continuing to save model...")
|
|
|
|
# 5. Save and push
|
|
print("\n[5/5] Saving model...")
|
|
|
|
if args.merge_model:
|
|
print("Merging LoRA weights into base model...")
|
|
print(f"\nPushing merged model to {args.output_repo}...")
|
|
model.push_to_hub_merged(
|
|
args.output_repo,
|
|
tokenizer=tokenizer,
|
|
save_method="merged_16bit",
|
|
)
|
|
print(f"Merged model available at: https://huggingface.co/{args.output_repo}")
|
|
else:
|
|
model.save_pretrained(args.save_local)
|
|
tokenizer.save_pretrained(args.save_local)
|
|
print(f"Saved locally to {args.save_local}/")
|
|
|
|
print(f"\nPushing adapter to {args.output_repo}...")
|
|
model.push_to_hub(args.output_repo, tokenizer=tokenizer)
|
|
print(f"Adapter available at: https://huggingface.co/{args.output_repo}")
|
|
|
|
print("\n" + "=" * 70)
|
|
print("Done!")
|
|
print("=" * 70)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) == 1:
|
|
print("=" * 70)
|
|
print("LLM Fine-tuning with Unsloth")
|
|
print("=" * 70)
|
|
print("\nFine-tune language models with optional train/eval split.")
|
|
print("\nFeatures:")
|
|
print(" - ~60% less VRAM with Unsloth optimizations")
|
|
print(" - 2x faster training vs standard methods")
|
|
print(" - Epoch-based or step-based training")
|
|
print(" - Optional evaluation to detect overfitting")
|
|
print(" - Trains only on assistant responses (masked user inputs)")
|
|
print("\nEpoch-based training:")
|
|
print("\n uv run unsloth_sft_example.py \\")
|
|
print(" --dataset mlabonne/FineTome-100k \\")
|
|
print(" --num-epochs 1 \\")
|
|
print(" --eval-split 0.2 \\")
|
|
print(" --output-repo your-username/model-finetuned")
|
|
print("\nHF Jobs example:")
|
|
print("\n hf jobs uv run unsloth_sft_example.py \\")
|
|
print(" --flavor a10g-small --secrets HF_TOKEN --timeout 4h \\")
|
|
print(" -- --dataset mlabonne/FineTome-100k \\")
|
|
print(" --num-epochs 1 \\")
|
|
print(" --eval-split 0.2 \\")
|
|
print(" --output-repo your-username/model-finetuned")
|
|
print("\nFor full help: uv run unsloth_sft_example.py --help")
|
|
print("=" * 70)
|
|
sys.exit(0)
|
|
|
|
main()
|