From bdcfbb9625f5ddad4560b92192092a8a2c2185d7 Mon Sep 17 00:00:00 2001 From: sickn33 Date: Sun, 29 Mar 2026 18:31:46 +0200 Subject: [PATCH] feat(hugging-face): Add official ecosystem skills Import the official Hugging Face ecosystem skills and sync the\nexisting local coverage with upstream metadata and assets.\n\nRegenerate the canonical catalog, plugin mirrors, docs, and release\nnotes after the maintainer merge batch so main stays in sync.\n\nFixes #417 --- .claude-plugin/plugin.json | 2 +- CATALOG.md | 28 +- CHANGELOG.md | 47 + README.md | 31 +- apps/web-app/public/sitemap.xml | 24 +- apps/web-app/public/skills.json.backup | 202 ++++- data/aliases.json | 1 + data/bundles.json | 8 + data/catalog.json | 277 +++++- data/plugin-compatibility.json | 158 +++- docs/integrations/jetski-cortex.md | 6 +- .../jetski-gemini-loader/README.md | 2 +- docs/maintainers/repo-growth-seo.md | 6 +- docs/maintainers/skills-update-guide.md | 2 +- docs/sources/sources.md | 4 +- docs/users/bundles.md | 2 +- docs/users/claude-code-skills.md | 2 +- docs/users/gemini-cli-skills.md | 2 +- docs/users/kiro-integration.md | 2 +- docs/users/usage.md | 8 +- docs/users/visual-guide.md | 8 +- docs_zh-CN/sources/sources.md | 6 +- package.json | 2 +- .../skills/hugging-face-cli/SKILL.md | 331 ++++--- .../hugging-face-community-evals/SKILL.md | 213 +++++ .../examples/.env.example | 3 + .../examples/USAGE_EXAMPLES.md | 101 +++ .../scripts/inspect_eval_uv.py | 104 +++ .../scripts/inspect_vllm_uv.py | 306 +++++++ .../scripts/lighteval_vllm_uv.py | 297 +++++++ .../hugging-face-dataset-viewer/SKILL.md | 240 +++--- .../skills/hugging-face-gradio/SKILL.md | 304 +++++++ .../skills/hugging-face-gradio/examples.md | 613 +++++++++++++ .../skills/hugging-face-jobs/SKILL.md | 43 +- .../skills/hugging-face-jobs/index.html | 216 +++++ .../references/hardware_guide.md | 336 ++++++++ .../references/hub_saving.md | 352 ++++++++ .../references/token_usage.md | 570 ++++++++++++ .../references/troubleshooting.md | 475 ++++++++++ .../scripts/cot-self-instruct.py | 718 +++++++++++++++ .../scripts/finepdfs-stats.py | 546 ++++++++++++ .../scripts/generate-responses.py | 587 +++++++++++++ .../hugging-face-model-trainer/SKILL.md | 23 +- .../references/gguf_conversion.md | 296 +++++++ .../references/hardware_guide.md | 283 ++++++ .../references/hub_saving.md | 364 ++++++++ .../references/local_training_macos.md | 231 +++++ .../references/reliability_principles.md | 371 ++++++++ .../references/trackio_guide.md | 189 ++++ .../references/training_methods.md | 150 ++++ .../references/training_patterns.md | 203 +++++ .../references/troubleshooting.md | 282 ++++++ .../references/unsloth.md | 313 +++++++ .../scripts/convert_to_gguf.py | 424 +++++++++ .../scripts/dataset_inspector.py | 417 +++++++++ .../scripts/estimate_cost.py | 150 ++++ .../scripts/train_dpo_example.py | 106 +++ .../scripts/train_grpo_example.py | 89 ++ .../scripts/train_sft_example.py | 122 +++ .../scripts/unsloth_sft_example.py | 512 +++++++++++ .../hugging-face-paper-publisher/SKILL.md | 15 +- .../examples/example_usage.md | 326 +++++++ .../references/quick_reference.md | 216 +++++ .../scripts/paper_manager.py | 606 +++++++++++++ .../templates/arxiv.md | 299 +++++++ .../templates/ml-report.md | 358 ++++++++ .../templates/modern.md | 319 +++++++ .../templates/standard.md | 201 +++++ .../skills/hugging-face-papers/SKILL.md | 241 ++++++ .../skills/hugging-face-trackio/SKILL.md | 117 +++ .../hugging-face-trackio/references/alerts.md | 196 +++++ .../references/logging_metrics.md | 206 +++++ .../references/retrieving_metrics.md | 251 ++++++ .../hugging-face-vision-trainer/SKILL.md | 595 +++++++++++++ .../references/finetune_sam2_trainer.md | 254 ++++++ .../references/hub_saving.md | 618 +++++++++++++ .../image_classification_training_notebook.md | 279 ++++++ .../object_detection_training_notebook.md | 700 +++++++++++++++ .../references/reliability_principles.md | 310 +++++++ .../references/timm_trainer.md | 91 ++ .../scripts/dataset_inspector.py | 814 ++++++++++++++++++ .../scripts/estimate_cost.py | 217 +++++ .../scripts/image_classification_training.py | 383 ++++++++ .../scripts/object_detection_training.py | 710 +++++++++++++++ .../scripts/sam_segmentation_training.py | 382 ++++++++ .../skills/jq/SKILL.md | 273 ++++++ .../skills/tmux/SKILL.md | 370 ++++++++ .../skills/transformers-js/SKILL.md | 639 ++++++++++++++ .../transformers-js/references/CACHE.md | 339 ++++++++ .../references/CONFIGURATION.md | 390 +++++++++ .../transformers-js/references/EXAMPLES.md | 605 +++++++++++++ .../references/MODEL_ARCHITECTURES.md | 167 ++++ .../references/PIPELINE_OPTIONS.md | 545 ++++++++++++ .../references/TEXT_GENERATION.md | 315 +++++++ .../.codex-plugin/plugin.json | 2 +- .../skills/hugging-face-cli/SKILL.md | 331 ++++--- .../hugging-face-community-evals/SKILL.md | 213 +++++ .../examples/.env.example | 3 + .../examples/USAGE_EXAMPLES.md | 101 +++ .../scripts/inspect_eval_uv.py | 104 +++ .../scripts/inspect_vllm_uv.py | 306 +++++++ .../scripts/lighteval_vllm_uv.py | 297 +++++++ .../hugging-face-dataset-viewer/SKILL.md | 240 +++--- .../skills/hugging-face-gradio/SKILL.md | 304 +++++++ .../skills/hugging-face-gradio/examples.md | 613 +++++++++++++ .../skills/hugging-face-jobs/SKILL.md | 43 +- .../skills/hugging-face-jobs/index.html | 216 +++++ .../references/hardware_guide.md | 336 ++++++++ .../references/hub_saving.md | 352 ++++++++ .../references/token_usage.md | 570 ++++++++++++ .../references/troubleshooting.md | 475 ++++++++++ .../scripts/cot-self-instruct.py | 718 +++++++++++++++ .../scripts/finepdfs-stats.py | 546 ++++++++++++ .../scripts/generate-responses.py | 587 +++++++++++++ .../hugging-face-model-trainer/SKILL.md | 23 +- .../references/gguf_conversion.md | 296 +++++++ .../references/hardware_guide.md | 283 ++++++ .../references/hub_saving.md | 364 ++++++++ .../references/local_training_macos.md | 231 +++++ .../references/reliability_principles.md | 371 ++++++++ .../references/trackio_guide.md | 189 ++++ .../references/training_methods.md | 150 ++++ .../references/training_patterns.md | 203 +++++ .../references/troubleshooting.md | 282 ++++++ .../references/unsloth.md | 313 +++++++ .../scripts/convert_to_gguf.py | 424 +++++++++ .../scripts/dataset_inspector.py | 417 +++++++++ .../scripts/estimate_cost.py | 150 ++++ .../scripts/train_dpo_example.py | 106 +++ .../scripts/train_grpo_example.py | 89 ++ .../scripts/train_sft_example.py | 122 +++ .../scripts/unsloth_sft_example.py | 512 +++++++++++ .../hugging-face-paper-publisher/SKILL.md | 15 +- .../examples/example_usage.md | 326 +++++++ .../references/quick_reference.md | 216 +++++ .../scripts/paper_manager.py | 606 +++++++++++++ .../templates/arxiv.md | 299 +++++++ .../templates/ml-report.md | 358 ++++++++ .../templates/modern.md | 319 +++++++ .../templates/standard.md | 201 +++++ .../skills/hugging-face-papers/SKILL.md | 241 ++++++ .../skills/hugging-face-trackio/SKILL.md | 117 +++ .../hugging-face-trackio/references/alerts.md | 196 +++++ .../references/logging_metrics.md | 206 +++++ .../references/retrieving_metrics.md | 251 ++++++ .../hugging-face-vision-trainer/SKILL.md | 595 +++++++++++++ .../references/finetune_sam2_trainer.md | 254 ++++++ .../references/hub_saving.md | 618 +++++++++++++ .../image_classification_training_notebook.md | 279 ++++++ .../object_detection_training_notebook.md | 700 +++++++++++++++ .../references/reliability_principles.md | 310 +++++++ .../references/timm_trainer.md | 91 ++ .../scripts/dataset_inspector.py | 814 ++++++++++++++++++ .../scripts/estimate_cost.py | 217 +++++ .../scripts/image_classification_training.py | 383 ++++++++ .../scripts/object_detection_training.py | 710 +++++++++++++++ .../scripts/sam_segmentation_training.py | 382 ++++++++ .../skills/jq/SKILL.md | 273 ++++++ .../skills/tmux/SKILL.md | 370 ++++++++ .../skills/transformers-js/SKILL.md | 639 ++++++++++++++ .../transformers-js/references/CACHE.md | 339 ++++++++ .../references/CONFIGURATION.md | 390 +++++++++ .../transformers-js/references/EXAMPLES.md | 605 +++++++++++++ .../references/MODEL_ARCHITECTURES.md | 167 ++++ .../references/PIPELINE_OPTIONS.md | 545 ++++++++++++ .../references/TEXT_GENERATION.md | 315 +++++++ skills/hugging-face-cli/SKILL.md | 331 ++++--- skills/hugging-face-community-evals/SKILL.md | 213 +++++ .../examples/.env.example | 3 + .../examples/USAGE_EXAMPLES.md | 101 +++ .../scripts/inspect_eval_uv.py | 104 +++ .../scripts/inspect_vllm_uv.py | 306 +++++++ .../scripts/lighteval_vllm_uv.py | 297 +++++++ skills/hugging-face-dataset-viewer/SKILL.md | 240 +++--- skills/hugging-face-gradio/SKILL.md | 304 +++++++ skills/hugging-face-gradio/examples.md | 613 +++++++++++++ skills/hugging-face-jobs/SKILL.md | 43 +- skills/hugging-face-jobs/index.html | 216 +++++ .../references/hardware_guide.md | 336 ++++++++ .../references/hub_saving.md | 352 ++++++++ .../references/token_usage.md | 570 ++++++++++++ .../references/troubleshooting.md | 475 ++++++++++ .../scripts/cot-self-instruct.py | 718 +++++++++++++++ .../scripts/finepdfs-stats.py | 546 ++++++++++++ .../scripts/generate-responses.py | 587 +++++++++++++ skills/hugging-face-model-trainer/SKILL.md | 23 +- .../references/gguf_conversion.md | 296 +++++++ .../references/hardware_guide.md | 283 ++++++ .../references/hub_saving.md | 364 ++++++++ .../references/local_training_macos.md | 231 +++++ .../references/reliability_principles.md | 371 ++++++++ .../references/trackio_guide.md | 189 ++++ .../references/training_methods.md | 150 ++++ .../references/training_patterns.md | 203 +++++ .../references/troubleshooting.md | 282 ++++++ .../references/unsloth.md | 313 +++++++ .../scripts/convert_to_gguf.py | 424 +++++++++ .../scripts/dataset_inspector.py | 417 +++++++++ .../scripts/estimate_cost.py | 150 ++++ .../scripts/train_dpo_example.py | 106 +++ .../scripts/train_grpo_example.py | 89 ++ .../scripts/train_sft_example.py | 122 +++ .../scripts/unsloth_sft_example.py | 512 +++++++++++ skills/hugging-face-paper-publisher/SKILL.md | 15 +- .../examples/example_usage.md | 326 +++++++ .../references/quick_reference.md | 216 +++++ .../scripts/paper_manager.py | 606 +++++++++++++ .../templates/arxiv.md | 299 +++++++ .../templates/ml-report.md | 358 ++++++++ .../templates/modern.md | 319 +++++++ .../templates/standard.md | 201 +++++ skills/hugging-face-papers/SKILL.md | 241 ++++++ skills/hugging-face-trackio/SKILL.md | 117 +++ .../hugging-face-trackio/references/alerts.md | 196 +++++ .../references/logging_metrics.md | 206 +++++ .../references/retrieving_metrics.md | 251 ++++++ skills/hugging-face-vision-trainer/SKILL.md | 595 +++++++++++++ .../references/finetune_sam2_trainer.md | 254 ++++++ .../references/hub_saving.md | 618 +++++++++++++ .../image_classification_training_notebook.md | 279 ++++++ .../object_detection_training_notebook.md | 700 +++++++++++++++ .../references/reliability_principles.md | 310 +++++++ .../references/timm_trainer.md | 91 ++ .../scripts/dataset_inspector.py | 814 ++++++++++++++++++ .../scripts/estimate_cost.py | 217 +++++ .../scripts/image_classification_training.py | 383 ++++++++ .../scripts/object_detection_training.py | 710 +++++++++++++++ .../scripts/sam_segmentation_training.py | 382 ++++++++ skills/transformers-js/SKILL.md | 639 ++++++++++++++ skills/transformers-js/references/CACHE.md | 339 ++++++++ .../references/CONFIGURATION.md | 390 +++++++++ skills/transformers-js/references/EXAMPLES.md | 605 +++++++++++++ .../references/MODEL_ARCHITECTURES.md | 167 ++++ .../references/PIPELINE_OPTIONS.md | 545 ++++++++++++ .../references/TEXT_GENERATION.md | 315 +++++++ skills_index.json | 202 ++++- 236 files changed, 68977 insertions(+), 1095 deletions(-) create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/.env.example create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/examples.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/index.html create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hardware_guide.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hub_saving.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/token_usage.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/troubleshooting.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/cot-self-instruct.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/finepdfs-stats.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/generate-responses.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/gguf_conversion.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hardware_guide.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hub_saving.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/local_training_macos.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/reliability_principles.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/trackio_guide.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_methods.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_patterns.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/troubleshooting.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/unsloth.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/dataset_inspector.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/estimate_cost.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/train_dpo_example.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/train_grpo_example.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/train_sft_example.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/examples/example_usage.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/references/quick_reference.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/scripts/paper_manager.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/templates/arxiv.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/templates/ml-report.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/templates/modern.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-paper-publisher/templates/standard.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-papers/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-trackio/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-trackio/references/alerts.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-trackio/references/logging_metrics.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-trackio/references/retrieving_metrics.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/references/hub_saving.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/references/reliability_principles.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/references/timm_trainer.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/scripts/dataset_inspector.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/scripts/estimate_cost.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/scripts/image_classification_training.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/scripts/object_detection_training.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py create mode 100644 plugins/antigravity-awesome-skills-claude/skills/jq/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/tmux/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/SKILL.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CACHE.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CONFIGURATION.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/EXAMPLES.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/MODEL_ARCHITECTURES.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/PIPELINE_OPTIONS.md create mode 100644 plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/TEXT_GENERATION.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/.env.example create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-gradio/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-gradio/examples.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/index.html create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hardware_guide.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hub_saving.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/token_usage.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/troubleshooting.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/cot-self-instruct.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/finepdfs-stats.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/generate-responses.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/gguf_conversion.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hardware_guide.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hub_saving.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/local_training_macos.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/reliability_principles.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/trackio_guide.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_methods.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_patterns.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/troubleshooting.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/unsloth.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/dataset_inspector.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/estimate_cost.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/train_dpo_example.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/train_grpo_example.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/train_sft_example.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/examples/example_usage.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/references/quick_reference.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/scripts/paper_manager.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/templates/arxiv.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/templates/ml-report.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/templates/modern.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-paper-publisher/templates/standard.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-papers/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-trackio/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-trackio/references/alerts.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-trackio/references/logging_metrics.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-trackio/references/retrieving_metrics.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/references/hub_saving.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/references/reliability_principles.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/references/timm_trainer.md create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/scripts/dataset_inspector.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/scripts/estimate_cost.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/scripts/image_classification_training.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/scripts/object_detection_training.py create mode 100644 plugins/antigravity-awesome-skills/skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py create mode 100644 plugins/antigravity-awesome-skills/skills/jq/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/tmux/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/SKILL.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/references/CACHE.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/references/CONFIGURATION.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/references/EXAMPLES.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/references/MODEL_ARCHITECTURES.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/references/PIPELINE_OPTIONS.md create mode 100644 plugins/antigravity-awesome-skills/skills/transformers-js/references/TEXT_GENERATION.md create mode 100644 skills/hugging-face-community-evals/SKILL.md create mode 100644 skills/hugging-face-community-evals/examples/.env.example create mode 100644 skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md create mode 100644 skills/hugging-face-community-evals/scripts/inspect_eval_uv.py create mode 100644 skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py create mode 100644 skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py create mode 100644 skills/hugging-face-gradio/SKILL.md create mode 100644 skills/hugging-face-gradio/examples.md create mode 100644 skills/hugging-face-jobs/index.html create mode 100644 skills/hugging-face-jobs/references/hardware_guide.md create mode 100644 skills/hugging-face-jobs/references/hub_saving.md create mode 100644 skills/hugging-face-jobs/references/token_usage.md create mode 100644 skills/hugging-face-jobs/references/troubleshooting.md create mode 100644 skills/hugging-face-jobs/scripts/cot-self-instruct.py create mode 100644 skills/hugging-face-jobs/scripts/finepdfs-stats.py create mode 100644 skills/hugging-face-jobs/scripts/generate-responses.py create mode 100644 skills/hugging-face-model-trainer/references/gguf_conversion.md create mode 100644 skills/hugging-face-model-trainer/references/hardware_guide.md create mode 100644 skills/hugging-face-model-trainer/references/hub_saving.md create mode 100644 skills/hugging-face-model-trainer/references/local_training_macos.md create mode 100644 skills/hugging-face-model-trainer/references/reliability_principles.md create mode 100644 skills/hugging-face-model-trainer/references/trackio_guide.md create mode 100644 skills/hugging-face-model-trainer/references/training_methods.md create mode 100644 skills/hugging-face-model-trainer/references/training_patterns.md create mode 100644 skills/hugging-face-model-trainer/references/troubleshooting.md create mode 100644 skills/hugging-face-model-trainer/references/unsloth.md create mode 100644 skills/hugging-face-model-trainer/scripts/convert_to_gguf.py create mode 100644 skills/hugging-face-model-trainer/scripts/dataset_inspector.py create mode 100644 skills/hugging-face-model-trainer/scripts/estimate_cost.py create mode 100644 skills/hugging-face-model-trainer/scripts/train_dpo_example.py create mode 100644 skills/hugging-face-model-trainer/scripts/train_grpo_example.py create mode 100644 skills/hugging-face-model-trainer/scripts/train_sft_example.py create mode 100644 skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py create mode 100644 skills/hugging-face-paper-publisher/examples/example_usage.md create mode 100644 skills/hugging-face-paper-publisher/references/quick_reference.md create mode 100644 skills/hugging-face-paper-publisher/scripts/paper_manager.py create mode 100644 skills/hugging-face-paper-publisher/templates/arxiv.md create mode 100644 skills/hugging-face-paper-publisher/templates/ml-report.md create mode 100644 skills/hugging-face-paper-publisher/templates/modern.md create mode 100644 skills/hugging-face-paper-publisher/templates/standard.md create mode 100644 skills/hugging-face-papers/SKILL.md create mode 100644 skills/hugging-face-trackio/SKILL.md create mode 100644 skills/hugging-face-trackio/references/alerts.md create mode 100644 skills/hugging-face-trackio/references/logging_metrics.md create mode 100644 skills/hugging-face-trackio/references/retrieving_metrics.md create mode 100644 skills/hugging-face-vision-trainer/SKILL.md create mode 100644 skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md create mode 100644 skills/hugging-face-vision-trainer/references/hub_saving.md create mode 100644 skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md create mode 100644 skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md create mode 100644 skills/hugging-face-vision-trainer/references/reliability_principles.md create mode 100644 skills/hugging-face-vision-trainer/references/timm_trainer.md create mode 100644 skills/hugging-face-vision-trainer/scripts/dataset_inspector.py create mode 100644 skills/hugging-face-vision-trainer/scripts/estimate_cost.py create mode 100644 skills/hugging-face-vision-trainer/scripts/image_classification_training.py create mode 100644 skills/hugging-face-vision-trainer/scripts/object_detection_training.py create mode 100644 skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py create mode 100644 skills/transformers-js/SKILL.md create mode 100644 skills/transformers-js/references/CACHE.md create mode 100644 skills/transformers-js/references/CONFIGURATION.md create mode 100644 skills/transformers-js/references/EXAMPLES.md create mode 100644 skills/transformers-js/references/MODEL_ARCHITECTURES.md create mode 100644 skills/transformers-js/references/PIPELINE_OPTIONS.md create mode 100644 skills/transformers-js/references/TEXT_GENERATION.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 07c9950d..712070da 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "antigravity-awesome-skills", "version": "9.1.0", - "description": "Plugin-safe Claude Code distribution of Antigravity Awesome Skills with 1,318 supported skills.", + "description": "Plugin-safe Claude Code distribution of Antigravity Awesome Skills with 1,326 supported skills.", "author": { "name": "sickn33 and contributors", "url": "https://github.com/sickn33/antigravity-awesome-skills" diff --git a/CATALOG.md b/CATALOG.md index aff2eafd..b8681bd3 100644 --- a/CATALOG.md +++ b/CATALOG.md @@ -2,7 +2,7 @@ Generated at: 2026-02-08T00:00:00.000Z -Total skills: 1332 +Total skills: 1340 ## architecture (88) @@ -171,7 +171,7 @@ Total skills: 1332 | `warren-buffett` | Agente que simula Warren Buffett — o maior investidor do seculo XX e XXI, CEO da Berkshire Hathaway, discipulo de Benjamin Graham e socio intelectual de Char... | persona, investing, value-investing, business | persona, investing, value-investing, business, warren, buffett, agente, que, simula, maior, investidor, do | | `whatsapp-automation` | Automate WhatsApp Business tasks via Rube MCP (Composio): send messages, manage templates, upload media, and handle contacts. Always search tools first for c... | whatsapp | whatsapp, automation, automate, business, tasks, via, rube, mcp, composio, send, messages, upload | -## data-ai (251) +## data-ai (252) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -322,6 +322,7 @@ Total skills: 1332 | `google-analytics-automation` | Automate Google Analytics tasks via Rube MCP (Composio): run reports, list accounts/properties, funnels, pivots, key events. Always search tools first for cu... | google, analytics | google, analytics, automation, automate, tasks, via, rube, mcp, composio, run, reports, list | | `googlesheets-automation` | Automate Google Sheets operations (read, write, format, filter, manage spreadsheets) via Rube MCP (Composio). Read/write data, manage tabs, apply formatting,... | googlesheets | googlesheets, automation, automate, google, sheets, operations, read, write, format, filter, spreadsheets, via | | `hosted-agents-v2-py` | Build hosted agents using Azure AI Projects SDK with ImageBasedHostedAgentDefinition. Use when creating container-based agents in Azure AI Foundry. | hosted, agents, v2, py | hosted, agents, v2, py, azure, ai, sdk, imagebasedhostedagentdefinition, creating, container, foundry | +| `hugging-face-community-evals` | Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval. | hugging, face, community, evals | hugging, face, community, evals, run, local, evaluations, hub, models, inspect, ai, lighteval | | `hugging-face-datasets` | Create and manage datasets on Hugging Face Hub. Supports initializing repos, defining configs/system prompts, streaming row updates, and SQL-based dataset qu... | hugging, face, datasets | hugging, face, datasets, hub, supports, initializing, repos, defining, configs, prompts, streaming, row | | `hybrid-search-implementation` | Combine vector and keyword search for improved retrieval. Use when implementing RAG systems, building search engines, or when neither approach alone provides... | hybrid, search | hybrid, search, combine, vector, keyword, improved, retrieval, implementing, rag, building, engines, neither | | `iconsax-library` | Extensive icon library and AI-driven icon generation skill for premium UI/UX design. | iconsax, library | iconsax, library, extensive, icon, ai, driven, generation, skill, premium, ui, ux | @@ -427,7 +428,7 @@ Total skills: 1332 | `youtube-automation` | Automate YouTube tasks via Rube MCP (Composio): upload videos, manage playlists, search content, get analytics, and handle comments. Always search tools firs... | youtube | youtube, automation, automate, tasks, via, rube, mcp, composio, upload, videos, playlists, search | | `zapier-make-patterns` | You are a no-code automation architect who has built thousands of Zaps and Scenarios for businesses of all sizes. You've seen automations that save companies... | zapier, make | zapier, make, no, code, automation, architect, who, built, thousands, zaps, scenarios, businesses | -## development (182) +## development (185) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -532,8 +533,10 @@ Total skills: 1332 | `go-rod-master` | Comprehensive guide for browser automation and web scraping with go-rod (Chrome DevTools Protocol) including stealth anti-bot-detection patterns. | go, rod, master | go, rod, master, browser, automation, web, scraping, chrome, devtools, protocol, including, stealth | | `golang-pro` | Master Go 1.21+ with modern patterns, advanced concurrency, performance optimization, and production-ready microservices. | golang | golang, pro, go, 21, concurrency, performance, optimization, microservices | | `hono` | Build ultra-fast web APIs and full-stack apps with Hono — runs on Cloudflare Workers, Deno, Bun, Node.js, and any WinterCG-compatible runtime. | hono, edge, cloudflare-workers, bun, deno, api, typescript, web-standards | hono, edge, cloudflare-workers, bun, deno, api, typescript, web-standards, ultra, fast, web, apis | -| `hugging-face-dataset-viewer` | Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet UR... | hugging, face, dataset, viewer | hugging, face, dataset, viewer, skill, api, fetch, subset, split, metadata, paginate, rows | +| `hugging-face-dataset-viewer` | Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links. | hugging, face, dataset, viewer | hugging, face, dataset, viewer, query, datasets, through, api, splits, rows, search, filters | | `hugging-face-evaluation` | Add and manage evaluation results in Hugging Face model cards. Supports extracting eval tables from README content, importing scores from Artificial Analysis... | hugging, face, evaluation | hugging, face, evaluation, add, results, model, cards, supports, extracting, eval, tables, readme | +| `hugging-face-gradio` | Build or edit Gradio apps, layouts, components, and chat interfaces in Python. | hugging, face, gradio | hugging, face, gradio, edit, apps, layouts, components, chat, interfaces, python | +| `hugging-face-papers` | Read and analyze Hugging Face paper pages or arXiv papers with markdown and papers API metadata. | hugging, face, papers | hugging, face, papers, read, analyze, paper, pages, arxiv, markdown, api, metadata | | `hugging-face-tool-builder` | Your purpose is now is to create reusable command line scripts and utilities for using the Hugging Face API, allowing chaining, piping and intermediate proce... | hugging, face, builder | hugging, face, builder, purpose, now, reusable, command, line, scripts, utilities, api, allowing | | `ios-debugger-agent` | Debug the current iOS project on a booted simulator with XcodeBuildMCP. | ios, debugger, agent | ios, debugger, agent, debug, current, booted, simulator, xcodebuildmcp | | `javascript-mastery` | 33+ essential JavaScript concepts every developer should know, inspired by [33-js-concepts](https://github.com/leonardomso/33-js-concepts). | javascript, mastery | javascript, mastery, 33, essential, concepts, every, developer, should, know, inspired, js, https | @@ -603,6 +606,7 @@ Total skills: 1332 | `tavily-web` | Web search, content extraction, crawling, and research capabilities using Tavily API. Use when you need to search the web for current information, extracting... | tavily, web | tavily, web, search, content, extraction, crawling, research, capabilities, api, current, information, extracting | | `telegram` | Integracao completa com Telegram Bot API. Setup com BotFather, mensagens, webhooks, inline keyboards, grupos, canais. Boilerplates Node.js e Python. | messaging, telegram, bots, webhooks | messaging, telegram, bots, webhooks, integracao, completa, com, bot, api, setup, botfather, mensagens | | `temporal-python-testing` | Comprehensive testing approaches for Temporal workflows using pytest, progressive disclosure resources for specific testing scenarios. | temporal, python | temporal, python, testing, approaches, pytest, progressive, disclosure, resources, specific, scenarios | +| `transformers-js` | Run Hugging Face models in JavaScript or TypeScript with Transformers.js in Node.js or the browser. | transformers, js | transformers, js, run, hugging, face, models, javascript, typescript, node, browser | | `trigger-dev` | You are a Trigger.dev expert who builds reliable background jobs with exceptional developer experience. You understand that Trigger.dev bridges the gap betwe... | trigger, dev | trigger, dev, who, reliable, background, jobs, exceptional, developer, experience, understand, bridges, gap | | `trpc-fullstack` | Build end-to-end type-safe APIs with tRPC — routers, procedures, middleware, subscriptions, and Next.js/React integration patterns. | typescript, trpc, api, fullstack, nextjs, react, type-safety | typescript, trpc, api, fullstack, nextjs, react, type-safety, type, safe, apis, routers, procedures | | `typescript-advanced-types` | Comprehensive guidance for mastering TypeScript's advanced type system including generics, conditional types, mapped types, template literal types, and utili... | typescript, advanced, types | typescript, advanced, types, guidance, mastering, type, including, generics, conditional, mapped, literal, utility | @@ -614,7 +618,7 @@ Total skills: 1332 | `zod-validation-expert` | Expert in Zod — TypeScript-first schema validation. Covers parsing, custom errors, refinements, type inference, and integration with React Hook Form, Next.js... | zod, validation | zod, validation, typescript, first, schema, covers, parsing, custom, errors, refinements, type, inference | | `zustand-store-ts` | Create Zustand stores following established patterns with proper TypeScript types and middleware. | zustand, store, ts | zustand, store, ts, stores, following, established, proper, typescript, types, middleware | -## general (326) +## general (328) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -774,8 +778,10 @@ Total skills: 1332 | `hig-technologies` | Check for .claude/apple-design-context.md before asking questions. Use existing context and only ask for information not already covered. | hig, technologies | hig, technologies, check, claude, apple, context, md, before, asking, questions, existing, ask | | `hosted-agents` | Build background agents in sandboxed environments. Use for hosted coding agents, sandboxed VMs, Modal sandboxes, and remote coding environments. | hosted, agents | hosted, agents, background, sandboxed, environments, coding, vms, modal, sandboxes, remote | | `hubspot-integration` | Authentication for single-account integrations | hubspot, integration | hubspot, integration, authentication, single, account, integrations | -| `hugging-face-cli` | The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources. | hugging, face, cli | hugging, face, cli, hf, provides, direct, terminal, access, hub, downloading, uploading, managing | +| `hugging-face-cli` | Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces. | hugging, face, cli | hugging, face, cli, hub, hf, download, upload, models, datasets, spaces | +| `hugging-face-model-trainer` | Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export. | hugging, face, model, trainer | hugging, face, model, trainer, train, fine, tune, trl, language, models, jobs, including | | `hugging-face-paper-publisher` | Publish and manage research papers on Hugging Face Hub. Supports creating paper pages, linking papers to models/datasets, claiming authorship, and generating... | hugging, face, paper, publisher | hugging, face, paper, publisher, publish, research, papers, hub, supports, creating, pages, linking | +| `hugging-face-vision-trainer` | Train or fine-tune vision models on Hugging Face Jobs for detection, classification, and SAM or SAM2 segmentation. | hugging, face, vision, trainer | hugging, face, vision, trainer, train, fine, tune, models, jobs, detection, classification, sam | | `ilya-sutskever` | Agente que simula Ilya Sutskever — co-fundador da OpenAI, ex-Chief Scientist, fundador da SSI. Use quando quiser perspectivas sobre: AGI safety-first, consci... | persona, agi, safety, scaling-laws, openai | persona, agi, safety, scaling-laws, openai, ilya, sutskever, agente, que, simula, co, fundador | | `infinite-gratitude` | Multi-agent research skill for parallel research execution (10 agents, battle-tested with real case studies). | infinite, gratitude | infinite, gratitude, multi, agent, research, skill, parallel, execution, 10, agents, battle, tested | | `inngest` | You are an Inngest expert who builds reliable background processing without managing infrastructure. You understand that serverless doesn't mean you can't ha... | inngest | inngest, who, reliable, background, processing, without, managing, infrastructure, understand, serverless, doesn, mean | @@ -1017,13 +1023,13 @@ Total skills: 1332 | `gitops-workflow` | Complete guide to implementing GitOps workflows with ArgoCD and Flux for automated Kubernetes deployments. | gitops | gitops, complete, implementing, argocd, flux, automated, kubernetes, deployments | | `grafana-dashboards` | Create and manage production-ready Grafana dashboards for comprehensive system observability. | grafana, dashboards | grafana, dashboards, observability | | `helm-chart-scaffolding` | Comprehensive guidance for creating, organizing, and managing Helm charts for packaging and deploying Kubernetes applications. | helm, chart | helm, chart, scaffolding, guidance, creating, organizing, managing, charts, packaging, deploying, kubernetes, applications | -| `hugging-face-jobs` | Run any workload on fully managed Hugging Face infrastructure. No local setup required—jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the H... | hugging, face, jobs | hugging, face, jobs, run, any, workload, fully, managed, infrastructure, no, local, setup | -| `hugging-face-model-trainer` | Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required—models train o... | hugging, face, model, trainer | hugging, face, model, trainer, train, language, models, trl, transformer, reinforcement, learning, fully | +| `hugging-face-trackio` | Track ML experiments with Trackio using Python logging, alerts, and CLI metric retrieval. | hugging, face, trackio | hugging, face, trackio, track, ml, experiments, python, logging, alerts, cli, metric, retrieval | | `hybrid-cloud-architect` | Expert hybrid cloud architect specializing in complex multi-cloud solutions across AWS/Azure/GCP and private clouds (OpenStack/VMware). | hybrid, cloud | hybrid, cloud, architect, specializing, complex, multi, solutions, aws, azure, gcp, private, clouds | | `hybrid-cloud-networking` | Configure secure, high-performance connectivity between on-premises and cloud environments using VPN, Direct Connect, and ExpressRoute. | hybrid, cloud, networking | hybrid, cloud, networking, configure, secure, high, performance, connectivity, between, premises, environments, vpn | | `istio-traffic-management` | Comprehensive guide to Istio traffic management for production service mesh deployments. | istio, traffic | istio, traffic, mesh, deployments | | `iterate-pr` | Iterate on a PR until CI passes. Use when you need to fix CI failures, address review feedback, or continuously push fixes until all checks are green. Automa... | iterate, pr | iterate, pr, until, ci, passes, fix, failures, address, review, feedback, continuously, push | | `java-pro` | Master Java 21+ with modern features like virtual threads, pattern matching, and Spring Boot 3.x. Expert in the latest Java ecosystem including GraalVM, Proj... | java | java, pro, 21, features, like, virtual, threads, matching, spring, boot, latest, ecosystem | +| `jq` | Expert jq usage for JSON querying, filtering, transformation, and pipeline integration. Practical patterns for real shell workflows. | jq, json, shell, cli, data-transformation, bash | jq, json, shell, cli, data-transformation, bash, usage, querying, filtering, transformation, pipeline, integration | | `k6-load-testing` | Comprehensive k6 load testing skill for API, browser, and scalability testing. Write realistic load scenarios, analyze results, and integrate with CI/CD. | k6, load-testing, performance, api-testing, ci-cd | k6, load-testing, performance, api-testing, ci-cd, load, testing, skill, api, browser, scalability, write | | `kubernetes-architect` | Expert Kubernetes architect specializing in cloud-native infrastructure, advanced GitOps workflows (ArgoCD/Flux), and enterprise container orchestration. | kubernetes | kubernetes, architect, specializing, cloud, native, infrastructure, gitops, argocd, flux, enterprise, container, orchestration | | `kubernetes-deployment` | Kubernetes deployment workflow for container orchestration, Helm charts, service mesh, and production-ready K8s configurations. | kubernetes, deployment | kubernetes, deployment, container, orchestration, helm, charts, mesh, k8s, configurations | @@ -1069,7 +1075,7 @@ Total skills: 1332 | `whatsapp-cloud-api` | Integracao com WhatsApp Business Cloud API (Meta). Mensagens, templates, webhooks HMAC-SHA256, automacao de atendimento. Boilerplates Node.js e Python. | messaging, whatsapp, meta, webhooks | messaging, whatsapp, meta, webhooks, cloud, api, integracao, com, business, mensagens, hmac, sha256 | | `x-twitter-scraper` | X (Twitter) data platform skill — tweet search, user lookup, follower extraction, engagement metrics, giveaway draws, monitoring, webhooks, 19 extraction too... | [twitter, x-api, scraping, mcp, social-media, data-extraction, giveaway, monitoring, webhooks] | [twitter, x-api, scraping, mcp, social-media, data-extraction, giveaway, monitoring, webhooks], twitter, scraper, data | -## security (166) +## security (167) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -1139,6 +1145,7 @@ Total skills: 1332 | `graphql-architect` | Master modern GraphQL with federation, performance optimization, and enterprise security. Build scalable schemas, implement advanced caching, and design real... | graphql | graphql, architect, federation, performance, optimization, enterprise, security, scalable, schemas, caching, real, time | | `grpc-golang` | Build production-ready gRPC services in Go with mTLS, streaming, and observability. Use when designing Protobuf contracts with Buf or implementing secure ser... | grpc, golang | grpc, golang, go, mtls, streaming, observability, designing, protobuf, contracts, buf, implementing, secure | | `html-injection-testing` | Identify and exploit HTML injection vulnerabilities that allow attackers to inject malicious HTML content into web applications. This vulnerability enables a... | html, injection | html, injection, testing, identify, exploit, vulnerabilities, allow, attackers, inject, malicious, content, web | +| `hugging-face-jobs` | Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence. | hugging, face, jobs | hugging, face, jobs, run, workloads, managed, cpus, gpus, tpus, secrets, hub, persistence | | `incident-responder` | Expert SRE incident responder specializing in rapid problem resolution, modern observability, and comprehensive incident management. | incident, responder | incident, responder, sre, specializing, rapid, problem, resolution, observability | | `incident-response-incident-response` | Use when working with incident response incident response | incident, response | incident, response, working | | `incident-response-smart-fix` | [Extended thinking: This workflow implements a sophisticated debugging and resolution pipeline that leverages AI-assisted debugging tools and observability p... | incident, response, fix | incident, response, fix, smart, extended, thinking, implements, sophisticated, debugging, resolution, pipeline, leverages | @@ -1276,7 +1283,7 @@ Total skills: 1332 | `wiki-qa` | Answer repository questions grounded entirely in source code evidence. Use when user asks a question about the codebase, user wants to understand a specific ... | wiki, qa | wiki, qa, answer, repository, questions, grounded, entirely, source, code, evidence, user, asks | | `windows-privilege-escalation` | Provide systematic methodologies for discovering and exploiting privilege escalation vulnerabilities on Windows systems during penetration testing engagements. | windows, privilege, escalation | windows, privilege, escalation, provide, systematic, methodologies, discovering, exploiting, vulnerabilities, during, penetration, testing | -## workflow (100) +## workflow (101) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -1367,6 +1374,7 @@ Total skills: 1332 | `team-collaboration-issue` | You are a GitHub issue resolution expert specializing in systematic bug investigation, feature implementation, and collaborative development workflows. Your ... | team, collaboration, issue | team, collaboration, issue, github, resolution, specializing, systematic, bug, investigation, feature, collaborative, development | | `telegram-automation` | Automate Telegram tasks via Rube MCP (Composio): send messages, manage chats, share photos/documents, and handle bot commands. Always search tools first for ... | telegram | telegram, automation, automate, tasks, via, rube, mcp, composio, send, messages, chats, share | | `tiktok-automation` | Automate TikTok tasks via Rube MCP (Composio): upload/publish videos, post photos, manage content, and view user profiles/stats. Always search tools first fo... | tiktok | tiktok, automation, automate, tasks, via, rube, mcp, composio, upload, publish, videos, post | +| `tmux` | Expert tmux session, window, and pane management for terminal multiplexing, persistent remote workflows, and shell scripting automation. | tmux, terminal, multiplexer, sessions, shell, remote, automation | tmux, terminal, multiplexer, sessions, shell, remote, automation, session, window, pane, multiplexing, persistent | | `todoist-automation` | Automate Todoist task management, projects, sections, filtering, and bulk operations via Rube MCP (Composio). Always search tools first for current schemas. | todoist | todoist, automation, automate, task, sections, filtering, bulk, operations, via, rube, mcp, composio | | `track-management` | Use this skill when creating, managing, or working with Conductor tracks - the logical work units for features, bugs, and refactors. Applies to spec.md, plan... | track | track, skill, creating, managing, working, conductor, tracks, logical, work, units, features, bugs | | `trello-automation` | Automate Trello boards, cards, and workflows via Rube MCP (Composio). Create cards, manage lists, assign members, and search across boards programmatically. | trello | trello, automation, automate, boards, cards, via, rube, mcp, composio, lists, assign, members | diff --git a/CHANGELOG.md b/CHANGELOG.md index e207ce03..6f58e23f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,53 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [9.2.0] - 2026-03-29 - "Hugging Face Ecosystem and Shell Workflow Expansion" + +> Installable skill library update for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and related AI coding assistants. + +Start here: + +- Install: `npx antigravity-awesome-skills` +- Choose your tool: [README -> Choose Your Tool](https://github.com/sickn33/antigravity-awesome-skills#choose-your-tool) +- Best skills by tool: [README -> Best Skills By Tool](https://github.com/sickn33/antigravity-awesome-skills#best-skills-by-tool) +- Bundles: [docs/users/bundles.md](https://github.com/sickn33/antigravity-awesome-skills/blob/main/docs/users/bundles.md) +- Workflows: [docs/users/workflows.md](https://github.com/sickn33/antigravity-awesome-skills/blob/main/docs/users/workflows.md) + +This release expands practical day-to-day coverage for Claude Code, Cursor, Codex CLI, Gemini CLI, and similar agent workflows. It adds a full batch of Hugging Face ecosystem skills, new shell and terminal expertise for `jq` and `tmux`, a new `viboscope` collaboration skill, and stronger Odoo guidance for safer credentials and more reliable EDI flows. + +## New Skills + +- **hugging-face-community-evals** - run local Hugging Face Hub model evaluations with `inspect-ai` and `lighteval`. +- **hugging-face-gradio** - build and edit Gradio demos, layouts, and chat interfaces in Python. +- **hugging-face-papers** - read and analyze Hugging Face paper pages and arXiv-linked metadata. +- **hugging-face-trackio** - track ML experiments with Trackio logging, alerts, and CLI metric retrieval. +- **hugging-face-vision-trainer** - train and fine-tune detection, classification, and SAM or SAM2 vision models on Hugging Face Jobs. +- **transformers-js** - run Hugging Face models in JavaScript and TypeScript with Transformers.js. +- **jq** - add expert JSON querying, transformation, and shell pipeline guidance for terminal-first workflows (PR #414). +- **tmux** - add advanced session, pane, scripting, and remote terminal workflow guidance (PR #414). +- **viboscope** - add psychological compatibility matching guidance for cofounder, collaborator, and relationship discovery workflows (PR #415). + +## Improvements + +- **Hugging Face official skill sync** - refreshed local Hugging Face coverage and attribution for `hugging-face-cli`, `hugging-face-dataset-viewer`, `hugging-face-jobs`, `hugging-face-model-trainer`, and `hugging-face-paper-publisher`, while packaging the missing official ecosystem skills into the repo. +- **Odoo security hardening** - merged safer credential handling for `odoo-woocommerce-bridge` by replacing hardcoded secrets with environment-variable lookups (PR #413). +- **Odoo EDI resilience** - improved `odoo-edi-connector` with idempotency checks, partner verification, dynamic X12 date handling, and safer environment-based configuration (PR #416). +- **Maintainer and release docs** - folded in the latest maintainer guidance around risk-label sync, repo-state hygiene, and release/CI workflow consistency. + +## Who should care + +- **Claude Code, Codex CLI, Cursor, and Gemini CLI users** get broader Hugging Face ecosystem coverage for datasets, Jobs, evaluations, papers, Trackio, and Transformers.js workflows. +- **Terminal-heavy developers and infra teams** get stronger `jq` and `tmux` guidance for JSON processing, session management, and scripted shell workflows. +- **Odoo integrators** get safer bridge examples and more production-ready EDI connector patterns. +- **Builders looking for collaborator-matching workflows** get a new `viboscope` skill for compatibility-driven discovery. + +## Credits + +- **[@kostakost2](https://github.com/kostakost2)** for the new `jq` and `tmux` skills in PR #414 +- **[@ivankoriako](https://github.com/ivankoriako)** for the new `viboscope` skill in PR #415 +- **[@Champbreed](https://github.com/Champbreed)** for Odoo security and EDI improvements in PRs #413 and #416 +- **[Hugging Face](https://github.com/huggingface/skills)** for the upstream official skill collection synced into this release + ### Changed - **Risk maintenance workflow**: expanded the legacy `risk:` cleanup flow so maintainers can sync explicit high-confidence `none`, `safe`, `critical`, and `offensive` labels from audit suggestions, including auto-inserting the required `AUTHORIZED USE ONLY` notice when a legacy skill is promoted to `offensive`. diff --git a/README.md b/README.md index fe338cc4..091a2fcc 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ - -# 🌌 Antigravity Awesome Skills: 1,332+ Agentic Skills for Claude Code, Gemini CLI, Cursor, Copilot & More + +# 🌌 Antigravity Awesome Skills: 1,340+ Agentic Skills for Claude Code, Gemini CLI, Cursor, Copilot & More -> **Installable GitHub library of 1,332+ agentic skills for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and other AI coding assistants.** +> **Installable GitHub library of 1,340+ agentic skills for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and other AI coding assistants.** Antigravity Awesome Skills is a GitHub repository and installer CLI for reusable `SKILL.md` playbooks. Instead of collecting random prompts, you get a searchable, installable skill library for planning, coding, debugging, testing, security review, infrastructure work, product workflows, and growth tasks across the major AI coding assistants. @@ -26,7 +26,7 @@ Antigravity Awesome Skills is a GitHub repository and installer CLI for reusable - **Installable, not just inspirational**: use `npx antigravity-awesome-skills` to put skills where your tool expects them. - **Built for major agent workflows**: Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, Kiro, OpenCode, Copilot, and more. -- **Broad coverage with real utility**: 1,332+ skills across development, testing, security, infrastructure, product, and marketing. +- **Broad coverage with real utility**: 1,340+ skills across development, testing, security, infrastructure, product, and marketing. - **Faster onboarding**: bundles and workflows reduce the time from "I found this repo" to "I used my first skill". - **Useful whether you want breadth or curation**: browse the full catalog, start with top bundles, or compare alternatives before installing. @@ -47,7 +47,7 @@ Antigravity Awesome Skills is a GitHub repository and installer CLI for reusable - [🧭 Antigravity Workflows](#antigravity-workflows) - [⚖️ Alternatives & Comparisons](#alternatives--comparisons) - [📦 Features & Categories](#features--categories) -- [📚 Browse 1,332+ Skills](#browse-1332-skills) +- [📚 Browse 1,340+ Skills](#browse-1340-skills) - [🤝 Contributing](#contributing) - [💬 Community](#community) - [☕ Support the Project](#support-the-project) @@ -261,7 +261,7 @@ If you want the full explanation of root plugins, bundle plugins, full-library i ## Best Skills By Tool -If you want a faster answer than "browse all 1,332+ skills", start with a tool-specific guide: +If you want a faster answer than "browse all 1,340+ skills", start with a tool-specific guide: - **[Claude Code skills](docs/users/claude-code-skills.md)**: install paths, starter skills, prompt examples, and plugin marketplace flow. - **[Cursor skills](docs/users/cursor-skills.md)**: best starter skills for `.cursor/skills/`, UI-heavy work, and pair-programming flows. @@ -428,7 +428,7 @@ The repository is organized into specialized domains to transform your AI into a Counts change as new skills are added. For the current full registry, see [CATALOG.md](CATALOG.md). -## Browse 1,332+ Skills +## Browse 1,340+ Skills - Open the interactive browser in [`apps/web-app`](apps/web-app). - Read the full catalog in [`CATALOG.md`](CATALOG.md). @@ -651,12 +651,12 @@ We officially thank the following contributors for their help in making this rep - [@Mohammad-Faiz-Cloud-Engineer](https://github.com/Mohammad-Faiz-Cloud-Engineer) - [@zinzied](https://github.com/zinzied) - [@ssumanbiswas](https://github.com/ssumanbiswas) -- [@Champbreed](https://github.com/Champbreed) - [@Dokhacgiakhoa](https://github.com/Dokhacgiakhoa) - [@sx4im](https://github.com/sx4im) -- [@maxdml](https://github.com/maxdml) - [@IanJ332](https://github.com/IanJ332) +- [@maxdml](https://github.com/maxdml) - [@skyruh](https://github.com/skyruh) +- [@Champbreed](https://github.com/Champbreed) - [@ar27111994](https://github.com/ar27111994) - [@chauey](https://github.com/chauey) - [@itsmeares](https://github.com/itsmeares) @@ -681,13 +681,13 @@ We officially thank the following contributors for their help in making this rep - [@fernandezbaptiste](https://github.com/fernandezbaptiste) - [@Gizzant](https://github.com/Gizzant) - [@JayeHarrill](https://github.com/JayeHarrill) +- [@AssassinMaeve](https://github.com/AssassinMaeve) - [@Tiger-Foxx](https://github.com/Tiger-Foxx) - [@RamonRiosJr](https://github.com/RamonRiosJr) - [@Musayrlsms](https://github.com/Musayrlsms) -- [@AssassinMaeve](https://github.com/AssassinMaeve) +- [@vuth-dogo](https://github.com/vuth-dogo) - [@Wittlesus](https://github.com/Wittlesus) - [@wahidzzz](https://github.com/wahidzzz) -- [@yubing744](https://github.com/yubing744) - [@Vonfry](https://github.com/Vonfry) - [@vprudnikoff](https://github.com/vprudnikoff) - [@viktor-ferenczi](https://github.com/viktor-ferenczi) @@ -699,7 +699,6 @@ We officially thank the following contributors for their help in making this rep - [@TomGranot](https://github.com/TomGranot) - [@terryspitz](https://github.com/terryspitz) - [@Onsraa](https://github.com/Onsraa) -- [@PabloASMD](https://github.com/PabloASMD) - [@SebConejo](https://github.com/SebConejo) - [@SuperJMN](https://github.com/SuperJMN) - [@Enreign](https://github.com/Enreign) @@ -710,7 +709,8 @@ We officially thank the following contributors for their help in making this rep - [@ronanguilloux](https://github.com/ronanguilloux) - [@sraphaz](https://github.com/sraphaz) - [@ProgramadorBrasil](https://github.com/ProgramadorBrasil) -- [@vuth-dogo](https://github.com/vuth-dogo) +- [@PabloASMD](https://github.com/PabloASMD) +- [@yubing744](https://github.com/yubing744) - [@yang1002378395-cmyk](https://github.com/yang1002378395-cmyk) - [@viliawang-pm](https://github.com/viliawang-pm) - [@uucz](https://github.com/uucz) @@ -737,8 +737,7 @@ We officially thank the following contributors for their help in making this rep - [@ziuus](https://github.com/ziuus) - [@Cerdore](https://github.com/Cerdore) - [@Wolfe-Jam](https://github.com/Wolfe-Jam) -- [@olgasafonova](https://github.com/olgasafonova) -- [@ivankoriako](https://github.com/ivankoriako) +- [@qcwssss](https://github.com/qcwssss) - [@rcigor](https://github.com/rcigor) - [@hvasconcelos](https://github.com/hvasconcelos) - [@Guilherme-ruy](https://github.com/Guilherme-ruy) @@ -764,6 +763,7 @@ We officially thank the following contributors for their help in making this rep - [@ALEKGG1](https://github.com/ALEKGG1) - [@8144225309](https://github.com/8144225309) - [@1bcMax](https://github.com/1bcMax) +- [@olgasafonova](https://github.com/olgasafonova) - [@sharmanilay](https://github.com/sharmanilay) - [@KhaiTrang1995](https://github.com/KhaiTrang1995) - [@LocNguyenSGU](https://github.com/LocNguyenSGU) @@ -789,7 +789,6 @@ We officially thank the following contributors for their help in making this rep - [@Jonohobs](https://github.com/Jonohobs) - [@JaskiratAnand](https://github.com/JaskiratAnand) - [@jamescha-earley](https://github.com/jamescha-earley) -- [@qcwssss](https://github.com/qcwssss) ## License diff --git a/apps/web-app/public/sitemap.xml b/apps/web-app/public/sitemap.xml index ed996e49..011a3262 100644 --- a/apps/web-app/public/sitemap.xml +++ b/apps/web-app/public/sitemap.xml @@ -12,6 +12,12 @@ weekly 0.7 + + http://localhost/skill/jq + 2026-03-29 + weekly + 0.7 + http://localhost/skill/phase-gated-debugging 2026-03-29 @@ -24,6 +30,12 @@ weekly 0.7 + + http://localhost/skill/tmux + 2026-03-29 + weekly + 0.7 + http://localhost/skill/akf-trust-metadata 2026-03-29 @@ -234,16 +246,4 @@ weekly 0.7 - - http://localhost/skill/obsidian-markdown - 2026-03-29 - weekly - 0.7 - - - http://localhost/skill/product-marketing-context - 2026-03-29 - weekly - 0.7 - diff --git a/apps/web-app/public/skills.json.backup b/apps/web-app/public/skills.json.backup index 64fd7c5e..d1d99fff 100644 --- a/apps/web-app/public/skills.json.backup +++ b/apps/web-app/public/skills.json.backup @@ -14598,10 +14598,32 @@ "path": "skills/hugging-face-cli", "category": "ai-ml", "name": "hugging-face-cli", - "description": "The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources.", - "risk": "safe", - "source": "https://github.com/huggingface/skills/tree/main/skills/hugging-face-cli", - "date_added": "2026-02-27", + "description": "Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/hf-cli", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, + { + "id": "hugging-face-community-evals", + "path": "skills/hugging-face-community-evals", + "category": "ai-ml", + "name": "hugging-face-community-evals", + "description": "Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-community-evals", + "date_added": null, "plugin": { "targets": { "codex": "supported", @@ -14620,9 +14642,9 @@ "path": "skills/hugging-face-dataset-viewer", "category": "ai-ml", "name": "hugging-face-dataset-viewer", - "description": "Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics.", + "description": "Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links.", "risk": "unknown", - "source": "community", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-datasets", "date_added": null, "plugin": { "targets": { @@ -14681,15 +14703,37 @@ "reasons": [] } }, + { + "id": "hugging-face-gradio", + "path": "skills/hugging-face-gradio", + "category": "ai-ml", + "name": "hugging-face-gradio", + "description": "Build or edit Gradio apps, layouts, components, and chat interfaces in Python.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-gradio", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "hugging-face-jobs", "path": "skills/hugging-face-jobs", "category": "ai-ml", "name": "hugging-face-jobs", - "description": "Run any workload on fully managed Hugging Face infrastructure. No local setup required\u2014jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub.", - "risk": "safe", - "source": "https://github.com/huggingface/skills/tree/main/skills/hugging-face-jobs", - "date_added": "2026-02-27", + "description": "Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-jobs", + "date_added": null, "plugin": { "targets": { "codex": "supported", @@ -14708,9 +14752,9 @@ "path": "skills/hugging-face-model-trainer", "category": "ai-ml", "name": "hugging-face-model-trainer", - "description": "Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required\u2014models train on cloud GPUs and results are automatically saved to the Hugging Face Hub.", + "description": "Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export.", "risk": "unknown", - "source": "community", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-llm-trainer", "date_added": null, "plugin": { "targets": { @@ -14732,7 +14776,29 @@ "name": "hugging-face-paper-publisher", "description": "Publish and manage research papers on Hugging Face Hub. Supports creating paper pages, linking papers to models/datasets, claiming authorship, and generating professional markdown-based research articles.", "risk": "unknown", - "source": "community", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-paper-publisher", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, + { + "id": "hugging-face-papers", + "path": "skills/hugging-face-papers", + "category": "ai-ml", + "name": "hugging-face-papers", + "description": "Read and analyze Hugging Face paper pages or arXiv papers with markdown and papers API metadata.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-papers", "date_added": null, "plugin": { "targets": { @@ -14769,6 +14835,50 @@ "reasons": [] } }, + { + "id": "hugging-face-trackio", + "path": "skills/hugging-face-trackio", + "category": "ai-ml", + "name": "hugging-face-trackio", + "description": "Track ML experiments with Trackio using Python logging, alerts, and CLI metric retrieval.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-trackio", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, + { + "id": "hugging-face-vision-trainer", + "path": "skills/hugging-face-vision-trainer", + "category": "ai-ml", + "name": "hugging-face-vision-trainer", + "description": "Train or fine-tune vision models on Hugging Face Jobs for detection, classification, and SAM or SAM2 segmentation.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-vision-trainer", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "hybrid-cloud-architect", "path": "skills/hybrid-cloud-architect", @@ -15583,6 +15693,28 @@ "reasons": [] } }, + { + "id": "jq", + "path": "skills/jq", + "category": "development", + "name": "jq", + "description": "Expert jq usage for JSON querying, filtering, transformation, and pipeline integration. Practical patterns for real shell workflows.", + "risk": "safe", + "source": "community", + "date_added": "2026-03-28", + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "json-canvas", "path": "skills/json-canvas", @@ -26897,6 +27029,28 @@ "reasons": [] } }, + { + "id": "tmux", + "path": "skills/tmux", + "category": "development", + "name": "tmux", + "description": "Expert tmux session, window, and pane management for terminal multiplexing, persistent remote workflows, and shell scripting automation.", + "risk": "safe", + "source": "community", + "date_added": "2026-03-28", + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "todoist-automation", "path": "skills/todoist-automation", @@ -27007,6 +27161,28 @@ "reasons": [] } }, + { + "id": "transformers-js", + "path": "skills/transformers-js", + "category": "web-development", + "name": "transformers-js", + "description": "Run Hugging Face models in JavaScript or TypeScript with Transformers.js in Node.js or the browser.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/transformers-js", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "travel-health-analyzer", "path": "skills/travel-health-analyzer", diff --git a/data/aliases.json b/data/aliases.json index 8c525363..e93ccb65 100644 --- a/data/aliases.json +++ b/data/aliases.json @@ -80,6 +80,7 @@ "vr-ar": "game-development/vr-ar", "web-games": "game-development/web-games", "git-pr-workflow": "git-pr-workflows-git-workflow", + "hugging-face-evals": "hugging-face-community-evals", "hugging-face-publisher": "hugging-face-paper-publisher", "incident-response": "incident-response-incident-response", "javascript-typescript-scaffold": "javascript-typescript-typescript-scaffold", diff --git a/data/bundles.json b/data/bundles.json index 3503d596..f7a70486 100644 --- a/data/bundles.json +++ b/data/bundles.json @@ -183,7 +183,10 @@ "hono", "hugging-face-dataset-viewer", "hugging-face-evaluation", + "hugging-face-gradio", + "hugging-face-papers", "hugging-face-tool-builder", + "hugging-face-trackio", "instagram", "ios-debugger-agent", "ios-developer", @@ -288,6 +291,7 @@ "temporal-golang-pro", "temporal-python-pro", "temporal-python-testing", + "transformers-js", "trigger-dev", "trpc-fullstack", "typescript-advanced-types", @@ -371,6 +375,7 @@ "gha-security-review", "graphql-architect", "html-injection-testing", + "hugging-face-jobs", "k8s-manifest-generator", "k8s-security-policies", "laravel-expert", @@ -691,11 +696,13 @@ "gitops-workflow", "grafana-dashboards", "grpc-golang", + "hugging-face-trackio", "incident-responder", "incident-response-incident-response", "incident-response-smart-fix", "incident-runbook-templates", "internal-comms", + "jq", "kubernetes-architect", "kubernetes-deployment", "langfuse", @@ -906,6 +913,7 @@ "testing-patterns", "testing-qa", "tiktok-automation", + "tmux", "todoist-automation", "trello-automation", "twitter-automation", diff --git a/data/catalog.json b/data/catalog.json index 8c89580f..47008052 100644 --- a/data/catalog.json +++ b/data/catalog.json @@ -1,6 +1,6 @@ { "generatedAt": "2026-02-08T00:00:00.000Z", - "total": 1332, + "total": 1340, "skills": [ { "id": "00-andruia-consultant", @@ -16279,7 +16279,7 @@ { "id": "hugging-face-cli", "name": "hugging-face-cli", - "description": "The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources.", + "description": "Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces.", "category": "general", "tags": [ "hugging", @@ -16290,22 +16290,47 @@ "hugging", "face", "cli", - "hf", - "provides", - "direct", - "terminal", - "access", "hub", - "downloading", - "uploading", - "managing" + "hf", + "download", + "upload", + "models", + "datasets", + "spaces" ], "path": "skills/hugging-face-cli/SKILL.md" }, + { + "id": "hugging-face-community-evals", + "name": "hugging-face-community-evals", + "description": "Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval.", + "category": "data-ai", + "tags": [ + "hugging", + "face", + "community", + "evals" + ], + "triggers": [ + "hugging", + "face", + "community", + "evals", + "run", + "local", + "evaluations", + "hub", + "models", + "inspect", + "ai", + "lighteval" + ], + "path": "skills/hugging-face-community-evals/SKILL.md" + }, { "id": "hugging-face-dataset-viewer", "name": "hugging-face-dataset-viewer", - "description": "Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics.", + "description": "Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links.", "category": "development", "tags": [ "hugging", @@ -16318,14 +16343,14 @@ "face", "dataset", "viewer", - "skill", + "query", + "datasets", + "through", "api", - "fetch", - "subset", - "split", - "metadata", - "paginate", - "rows" + "splits", + "rows", + "search", + "filters" ], "path": "skills/hugging-face-dataset-viewer/SKILL.md" }, @@ -16381,11 +16406,35 @@ ], "path": "skills/hugging-face-evaluation/SKILL.md" }, + { + "id": "hugging-face-gradio", + "name": "hugging-face-gradio", + "description": "Build or edit Gradio apps, layouts, components, and chat interfaces in Python.", + "category": "development", + "tags": [ + "hugging", + "face", + "gradio" + ], + "triggers": [ + "hugging", + "face", + "gradio", + "edit", + "apps", + "layouts", + "components", + "chat", + "interfaces", + "python" + ], + "path": "skills/hugging-face-gradio/SKILL.md" + }, { "id": "hugging-face-jobs", "name": "hugging-face-jobs", - "description": "Run any workload on fully managed Hugging Face infrastructure. No local setup required—jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub.", - "category": "infrastructure", + "description": "Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence.", + "category": "security", "tags": [ "hugging", "face", @@ -16396,22 +16445,22 @@ "face", "jobs", "run", - "any", - "workload", - "fully", + "workloads", "managed", - "infrastructure", - "no", - "local", - "setup" + "cpus", + "gpus", + "tpus", + "secrets", + "hub", + "persistence" ], "path": "skills/hugging-face-jobs/SKILL.md" }, { "id": "hugging-face-model-trainer", "name": "hugging-face-model-trainer", - "description": "Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required—models train on cloud GPUs and results are automatically saved to the Hugging Face Hub.", - "category": "infrastructure", + "description": "Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export.", + "category": "general", "tags": [ "hugging", "face", @@ -16424,13 +16473,13 @@ "model", "trainer", "train", + "fine", + "tune", + "trl", "language", "models", - "trl", - "transformer", - "reinforcement", - "learning", - "fully" + "jobs", + "including" ], "path": "skills/hugging-face-model-trainer/SKILL.md" }, @@ -16461,6 +16510,31 @@ ], "path": "skills/hugging-face-paper-publisher/SKILL.md" }, + { + "id": "hugging-face-papers", + "name": "hugging-face-papers", + "description": "Read and analyze Hugging Face paper pages or arXiv papers with markdown and papers API metadata.", + "category": "development", + "tags": [ + "hugging", + "face", + "papers" + ], + "triggers": [ + "hugging", + "face", + "papers", + "read", + "analyze", + "paper", + "pages", + "arxiv", + "markdown", + "api", + "metadata" + ], + "path": "skills/hugging-face-papers/SKILL.md" + }, { "id": "hugging-face-tool-builder", "name": "hugging-face-tool-builder", @@ -16487,6 +16561,59 @@ ], "path": "skills/hugging-face-tool-builder/SKILL.md" }, + { + "id": "hugging-face-trackio", + "name": "hugging-face-trackio", + "description": "Track ML experiments with Trackio using Python logging, alerts, and CLI metric retrieval.", + "category": "infrastructure", + "tags": [ + "hugging", + "face", + "trackio" + ], + "triggers": [ + "hugging", + "face", + "trackio", + "track", + "ml", + "experiments", + "python", + "logging", + "alerts", + "cli", + "metric", + "retrieval" + ], + "path": "skills/hugging-face-trackio/SKILL.md" + }, + { + "id": "hugging-face-vision-trainer", + "name": "hugging-face-vision-trainer", + "description": "Train or fine-tune vision models on Hugging Face Jobs for detection, classification, and SAM or SAM2 segmentation.", + "category": "general", + "tags": [ + "hugging", + "face", + "vision", + "trainer" + ], + "triggers": [ + "hugging", + "face", + "vision", + "trainer", + "train", + "fine", + "tune", + "models", + "jobs", + "detection", + "classification", + "sam" + ], + "path": "skills/hugging-face-vision-trainer/SKILL.md" + }, { "id": "hybrid-cloud-architect", "name": "hybrid-cloud-architect", @@ -17339,6 +17466,35 @@ ], "path": "skills/jobgpt/SKILL.md" }, + { + "id": "jq", + "name": "jq", + "description": "Expert jq usage for JSON querying, filtering, transformation, and pipeline integration. Practical patterns for real shell workflows.", + "category": "infrastructure", + "tags": [ + "jq", + "json", + "shell", + "cli", + "data-transformation", + "bash" + ], + "triggers": [ + "jq", + "json", + "shell", + "cli", + "data-transformation", + "bash", + "usage", + "querying", + "filtering", + "transformation", + "pipeline", + "integration" + ], + "path": "skills/jq/SKILL.md" + }, { "id": "json-canvas", "name": "json-canvas", @@ -29983,6 +30139,36 @@ ], "path": "skills/tiktok-automation/SKILL.md" }, + { + "id": "tmux", + "name": "tmux", + "description": "Expert tmux session, window, and pane management for terminal multiplexing, persistent remote workflows, and shell scripting automation.", + "category": "workflow", + "tags": [ + "tmux", + "terminal", + "multiplexer", + "sessions", + "shell", + "remote", + "automation" + ], + "triggers": [ + "tmux", + "terminal", + "multiplexer", + "sessions", + "shell", + "remote", + "automation", + "session", + "window", + "pane", + "multiplexing", + "persistent" + ], + "path": "skills/tmux/SKILL.md" + }, { "id": "todoist-automation", "name": "todoist-automation", @@ -30108,6 +30294,29 @@ ], "path": "skills/track-management/SKILL.md" }, + { + "id": "transformers-js", + "name": "transformers-js", + "description": "Run Hugging Face models in JavaScript or TypeScript with Transformers.js in Node.js or the browser.", + "category": "development", + "tags": [ + "transformers", + "js" + ], + "triggers": [ + "transformers", + "js", + "run", + "hugging", + "face", + "models", + "javascript", + "typescript", + "node", + "browser" + ], + "path": "skills/transformers-js/SKILL.md" + }, { "id": "travel-health-analyzer", "name": "travel-health-analyzer", diff --git a/data/plugin-compatibility.json b/data/plugin-compatibility.json index e07e72db..e89c58a5 100644 --- a/data/plugin-compatibility.json +++ b/data/plugin-compatibility.json @@ -12656,6 +12656,25 @@ }, "runtime_files": [] }, + { + "id": "hugging-face-community-evals", + "path": "skills/hugging-face-community-evals", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "hugging-face-dataset-viewer", "path": "skills/hugging-face-dataset-viewer", @@ -12713,6 +12732,25 @@ }, "runtime_files": [] }, + { + "id": "hugging-face-gradio", + "path": "skills/hugging-face-gradio", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "hugging-face-jobs", "path": "skills/hugging-face-jobs", @@ -12770,6 +12808,25 @@ }, "runtime_files": [] }, + { + "id": "hugging-face-papers", + "path": "skills/hugging-face-papers", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "hugging-face-tool-builder", "path": "skills/hugging-face-tool-builder", @@ -12789,6 +12846,44 @@ }, "runtime_files": [] }, + { + "id": "hugging-face-trackio", + "path": "skills/hugging-face-trackio", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, + { + "id": "hugging-face-vision-trainer", + "path": "skills/hugging-face-vision-trainer", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "hybrid-cloud-architect", "path": "skills/hybrid-cloud-architect", @@ -13473,6 +13568,25 @@ }, "runtime_files": [] }, + { + "id": "jq", + "path": "skills/jq", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "json-canvas", "path": "skills/json-canvas", @@ -23387,6 +23501,25 @@ }, "runtime_files": [] }, + { + "id": "tmux", + "path": "skills/tmux", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "todoist-automation", "path": "skills/todoist-automation", @@ -23482,6 +23615,25 @@ }, "runtime_files": [] }, + { + "id": "transformers-js", + "path": "skills/transformers-js", + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [], + "blocked_reasons": { + "codex": [], + "claude": [] + }, + "runtime_files": [] + }, { "id": "travel-health-analyzer", "path": "skills/travel-health-analyzer", @@ -25470,10 +25622,10 @@ } ], "summary": { - "total_skills": 1332, + "total_skills": 1340, "supported": { - "codex": 1303, - "claude": 1318 + "codex": 1311, + "claude": 1326 }, "blocked": { "codex": 29, diff --git a/docs/integrations/jetski-cortex.md b/docs/integrations/jetski-cortex.md index d9dd6555..0c0de26c 100644 --- a/docs/integrations/jetski-cortex.md +++ b/docs/integrations/jetski-cortex.md @@ -1,9 +1,9 @@ --- title: Jetski/Cortex + Gemini Integration Guide -description: "Come usare antigravity-awesome-skills con Jetski/Cortex evitando l’overflow di contesto con 1.332+ skill." +description: "Come usare antigravity-awesome-skills con Jetski/Cortex evitando l’overflow di contesto con 1.340+ skill." --- -# Jetski/Cortex + Gemini: integrazione sicura con 1.332+ skill +# Jetski/Cortex + Gemini: integrazione sicura con 1.340+ skill Questa guida mostra come integrare il repository `antigravity-awesome-skills` con un agente basato su **Jetski/Cortex + Gemini** (o framework simili) **senza superare il context window** del modello. @@ -23,7 +23,7 @@ Non bisogna mai: - concatenare il contenuto di tutte le `SKILL.md` in un singolo system prompt; - reiniettare l’intera libreria per **ogni** richiesta. -Con oltre 1.332 skill, questo approccio riempie il context window prima ancora di aggiungere i messaggi dell’utente, causando l’errore di truncation. +Con oltre 1.340 skill, questo approccio riempie il context window prima ancora di aggiungere i messaggi dell’utente, causando l’errore di truncation. --- diff --git a/docs/integrations/jetski-gemini-loader/README.md b/docs/integrations/jetski-gemini-loader/README.md index 7e94680f..3dafc4cc 100644 --- a/docs/integrations/jetski-gemini-loader/README.md +++ b/docs/integrations/jetski-gemini-loader/README.md @@ -20,7 +20,7 @@ This example shows one way to integrate **antigravity-awesome-skills** with a Je - How to enforce a **maximum number of skills per turn** via `maxSkillsPerTurn`. - How to choose whether to **truncate or error** when too many skills are requested via `overflowBehavior`. -This pattern avoids context overflow when you have 1,332+ skills installed. +This pattern avoids context overflow when you have 1,340+ skills installed. --- diff --git a/docs/maintainers/repo-growth-seo.md b/docs/maintainers/repo-growth-seo.md index bad87094..19b27685 100644 --- a/docs/maintainers/repo-growth-seo.md +++ b/docs/maintainers/repo-growth-seo.md @@ -6,7 +6,7 @@ This document keeps the repository's GitHub-facing discovery copy aligned with t Preferred positioning: -> Installable GitHub library of 1,332+ agentic skills for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and other AI coding assistants. +> Installable GitHub library of 1,340+ agentic skills for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and other AI coding assistants. Key framing: @@ -20,7 +20,7 @@ Key framing: Preferred description: -> Installable GitHub library of 1,332+ agentic skills for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and more. Includes installer CLI, bundles, workflows, and official/community skill collections. +> Installable GitHub library of 1,340+ agentic skills for Claude Code, Cursor, Codex CLI, Gemini CLI, Antigravity, and more. Includes installer CLI, bundles, workflows, and official/community skill collections. Preferred homepage: @@ -28,7 +28,7 @@ Preferred homepage: Preferred social preview: -- use a clean preview image that says `1,332+ Agentic Skills`; +- use a clean preview image that says `1,340+ Agentic Skills`; - mention Claude Code, Cursor, Codex CLI, and Gemini CLI; - avoid dense text and tiny logos that disappear in social cards. diff --git a/docs/maintainers/skills-update-guide.md b/docs/maintainers/skills-update-guide.md index d2fa46fc..9f463b29 100644 --- a/docs/maintainers/skills-update-guide.md +++ b/docs/maintainers/skills-update-guide.md @@ -69,7 +69,7 @@ For manual updates, you need: The update process refreshes: - Skills index (`skills_index.json`) - Web app skills data (`apps\web-app\public\skills.json`) -- All 1,332+ skills from the skills directory +- All 1,340+ skills from the skills directory ## When to Update diff --git a/docs/sources/sources.md b/docs/sources/sources.md index 451ee808..c8c01bf2 100644 --- a/docs/sources/sources.md +++ b/docs/sources/sources.md @@ -24,7 +24,7 @@ The following skills were added from the curated collection at [VoltAgent/awesom | :------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------ | :--------- | :--------------------------------- | | `vercel-deploy-claimable` | [Vercel Labs](https://github.com/vercel-labs/agent-skills) | MIT | Official Vercel skill | | `design-md` | [Google Labs (Stitch)](https://github.com/google-labs-code/stitch-skills) | Compatible | Google Labs Stitch skills | -| `hugging-face-cli`, `hugging-face-jobs` | [Hugging Face](https://github.com/huggingface/skills) | Compatible | Official Hugging Face skills | +| `hugging-face-cli`, `hugging-face-community-evals`, `hugging-face-dataset-viewer`, `hugging-face-gradio`, `hugging-face-jobs`, `hugging-face-model-trainer`, `hugging-face-paper-publisher`, `hugging-face-papers`, `hugging-face-trackio`, `hugging-face-vision-trainer`, `transformers-js` | [Hugging Face](https://github.com/huggingface/skills) | Compatible | Official Hugging Face skills | | `culture-index`, `fix-review`, `sharp-edges` | [Trail of Bits](https://github.com/trailofbits/skills) | Compatible | Security skills from Trail of Bits | | `expo-deployment`, `upgrading-expo` | [Expo](https://github.com/expo/skills) | Compatible | Official Expo skills | | `commit`, `create-pr`, `find-bugs`, `iterate-pr` | [Sentry](https://github.com/getsentry/skills) | Compatible | Sentry dev team skills | @@ -118,7 +118,7 @@ The following skills were added during the March 2026 skills update: ### Machine Learning & Data | Skill | Source | License | Notes | |-------|--------|---------|-------| -| `hugging-face-dataset-viewer`, `hugging-face-datasets`, `hugging-face-evaluation`, `hugging-face-model-trainer`, `hugging-face-paper-publisher`, `hugging-face-tool-builder` | [huggingface/skills](https://github.com/huggingface/skills) | Compatible | HuggingFace ML tools | +| `hugging-face-datasets`, `hugging-face-evaluation`, `hugging-face-tool-builder` | [huggingface/skills](https://github.com/huggingface/skills) | Compatible | Hugging Face ecosystem extensions | | `numpy`, `pandas`, `scipy`, `matplotlib`, `scikit-learn`, `jupyter-workflow` | [K-Dense-AI/claude-scientific-skills](https://github.com/K-Dense-AI/claude-scientific-skills) | Compatible | Data science essentials | | `biopython`, `scanpy`, `uniprot-database`, `pubmed-database` | [K-Dense-AI/claude-scientific-skills](https://github.com/K-Dense-AI/claude-scientific-skills) | Compatible | Bioinformatics tools | diff --git a/docs/users/bundles.md b/docs/users/bundles.md index 727cc8c5..db4e064f 100644 --- a/docs/users/bundles.md +++ b/docs/users/bundles.md @@ -673,4 +673,4 @@ Found a skill that should be in a bundle? Or want to create a new bundle? [Open --- -_Last updated: March 2026 | Total Skills: 1,332+ | Total Bundles: 37_ +_Last updated: March 2026 | Total Skills: 1,340+ | Total Bundles: 37_ diff --git a/docs/users/claude-code-skills.md b/docs/users/claude-code-skills.md index dc84b194..0ec62550 100644 --- a/docs/users/claude-code-skills.md +++ b/docs/users/claude-code-skills.md @@ -12,7 +12,7 @@ Install the library into Claude Code, then invoke focused skills directly in the ## Why use this repo for Claude Code -- It includes 1,332+ skills instead of a narrow single-domain starter pack. +- It includes 1,340+ skills instead of a narrow single-domain starter pack. - It supports the standard `.claude/skills/` path and the Claude Code plugin marketplace flow. - It also ships generated bundle plugins so teams can install focused packs like `Essentials` or `Security Developer` from the marketplace metadata. - It includes onboarding docs, bundles, and workflows so new users do not need to guess where to begin. diff --git a/docs/users/gemini-cli-skills.md b/docs/users/gemini-cli-skills.md index 664b43db..52f0b5b8 100644 --- a/docs/users/gemini-cli-skills.md +++ b/docs/users/gemini-cli-skills.md @@ -12,7 +12,7 @@ Install into the Gemini skills path, then ask Gemini to apply one skill at a tim - It installs directly into the expected Gemini skills path. - It includes both core software engineering skills and deeper agent/LLM-oriented skills. -- It helps new users get started with bundles and workflows rather than forcing a cold start from 1,332+ files. +- It helps new users get started with bundles and workflows rather than forcing a cold start from 1,340+ files. - It is useful whether you want a broad internal skill library or a single repo to test many workflows quickly. ## Install Gemini CLI Skills diff --git a/docs/users/kiro-integration.md b/docs/users/kiro-integration.md index 559d7578..23603c54 100644 --- a/docs/users/kiro-integration.md +++ b/docs/users/kiro-integration.md @@ -18,7 +18,7 @@ Kiro is AWS's agentic AI IDE that combines: Kiro's agentic capabilities are enhanced by skills that provide: -- **Domain expertise** across 1,332+ specialized areas +- **Domain expertise** across 1,340+ specialized areas - **Best practices** from Anthropic, OpenAI, Google, Microsoft, and AWS - **Workflow automation** for common development tasks - **AWS-specific patterns** for serverless, infrastructure, and cloud architecture diff --git a/docs/users/usage.md b/docs/users/usage.md index d69bfa58..a10bae7f 100644 --- a/docs/users/usage.md +++ b/docs/users/usage.md @@ -14,7 +14,7 @@ If you came in through a **Claude Code** or **Codex** plugin instead of a full l When you ran `npx antigravity-awesome-skills` or cloned the repository, you: -✅ **Downloaded 1,332+ skill files** to your computer (default: `~/.gemini/antigravity/skills/`; or a custom path like `~/.agent/skills/` if you used `--path`) +✅ **Downloaded 1,340+ skill files** to your computer (default: `~/.gemini/antigravity/skills/`; or a custom path like `~/.agent/skills/` if you used `--path`) ✅ **Made them available** to your AI assistant ❌ **Did NOT enable them all automatically** (they're just sitting there, waiting) @@ -34,7 +34,7 @@ Bundles are **curated groups** of skills organized by role. They help you decide **Analogy:** -- You installed a toolbox with 1,332+ tools (✅ done) +- You installed a toolbox with 1,340+ tools (✅ done) - Bundles are like **labeled organizer trays** saying: "If you're a carpenter, start with these 10 tools" - You can either **pick skills from the tray** or install that tray as a focused marketplace bundle plugin @@ -212,7 +212,7 @@ Let's actually use a skill right now. Follow these steps: ## Step 5: Picking Your First Skills (Practical Advice) -Don't try to use all 1,332+ skills at once. Here's a sensible approach: +Don't try to use all 1,340+ skills at once. Here's a sensible approach: If you want a tool-specific starting point before choosing skills, use: @@ -343,7 +343,7 @@ Usually no, but if your AI doesn't recognize a skill: ### "Can I load all skills into the model at once?" -No. Even though you have 1,332+ skills installed locally, you should **not** concatenate every `SKILL.md` into a single system prompt or context block. +No. Even though you have 1,340+ skills installed locally, you should **not** concatenate every `SKILL.md` into a single system prompt or context block. The intended pattern is: diff --git a/docs/users/visual-guide.md b/docs/users/visual-guide.md index 7c9ac634..31651817 100644 --- a/docs/users/visual-guide.md +++ b/docs/users/visual-guide.md @@ -34,7 +34,7 @@ antigravity-awesome-skills/ ├── 📄 CONTRIBUTING.md ← Contributor workflow ├── 📄 CATALOG.md ← Full generated catalog │ -├── 📁 skills/ ← 1,332+ skills live here +├── 📁 skills/ ← 1,340+ skills live here │ │ │ ├── 📁 brainstorming/ │ │ └── 📄 SKILL.md ← Skill definition @@ -47,7 +47,7 @@ antigravity-awesome-skills/ │ │ └── 📁 2d-games/ │ │ └── 📄 SKILL.md ← Nested skills also supported │ │ -│ └── ... (1,332+ total) +│ └── ... (1,340+ total) │ ├── 📁 apps/ │ └── 📁 web-app/ ← Interactive browser @@ -100,7 +100,7 @@ antigravity-awesome-skills/ ``` ┌─────────────────────────┐ - │ 1,332+ SKILLS │ + │ 1,340+ SKILLS │ └────────────┬────────────┘ │ ┌────────────────────────┼────────────────────────┐ @@ -201,7 +201,7 @@ If you want a workspace-style manual install instead, cloning into `.agent/skill │ ├── 📁 brainstorming/ │ │ ├── 📁 stripe-integration/ │ │ ├── 📁 react-best-practices/ │ -│ └── ... (1,332+ total) │ +│ └── ... (1,340+ total) │ └─────────────────────────────────────────┘ ``` diff --git a/docs_zh-CN/sources/sources.md b/docs_zh-CN/sources/sources.md index e9815a94..58bfec73 100644 --- a/docs_zh-CN/sources/sources.md +++ b/docs_zh-CN/sources/sources.md @@ -30,7 +30,7 @@ | :------------------------------------------------------------------------------------------ | :------------------------------------------------------------------------ | :--------- | :--------------------------------- | | `vercel-deploy-claimable` | [Vercel Labs](https://github.com/vercel-labs/agent-skills) | MIT | 官方Vercel技能 | | `design-md` | [Google Labs (Stitch)](https://github.com/google-labs-code/stitch-skills) | Compatible | Google Labs Stitch技能 | -| `hugging-face-cli`, `hugging-face-jobs` | [Hugging Face](https://github.com/huggingface/skills) | Compatible | 官方Hugging Face技能 | +| `hugging-face-cli`, `hugging-face-community-evals`, `hugging-face-dataset-viewer`, `hugging-face-gradio`, `hugging-face-jobs`, `hugging-face-model-trainer`, `hugging-face-paper-publisher`, `hugging-face-papers`, `hugging-face-trackio`, `hugging-face-vision-trainer`, `transformers-js` | [Hugging Face](https://github.com/huggingface/skills) | Compatible | 官方Hugging Face技能 | | `culture-index`, `fix-review`, `sharp-edges` | [Trail of Bits](https://github.com/trailofbits/skills) | Compatible | Trail of Bits安全技能 | | `expo-deployment`, `upgrading-expo` | [Expo](https://github.com/expo/skills) | Compatible | 官方Expo技能 | | `commit`, `create-pr`, `find-bugs`, `iterate-pr` | [Sentry](https://github.com/getsentry/skills) | Compatible | Sentry开发团队技能 | @@ -116,7 +116,7 @@ ### 机器学习与数据 | 技能 | 来源 | 许可证 | 备注 | |-------|--------|---------|-------| -| `hugging-face-dataset-viewer`, `hugging-face-datasets`, `hugging-face-evaluation`, `hugging-face-model-trainer`, `hugging-face-paper-publisher`, `hugging-face-tool-builder` | [huggingface/skills](https://github.com/huggingface/skills) | Compatible | HuggingFace ML工具 | +| `hugging-face-datasets`, `hugging-face-evaluation`, `hugging-face-tool-builder` | [huggingface/skills](https://github.com/huggingface/skills) | Compatible | Hugging Face 生态扩展 | | `numpy`, `pandas`, `scipy`, `matplotlib`, `scikit-learn`, `jupyter-workflow` | [K-Dense-AI/claude-scientific-skills](https://github.com/K-Dense-AI/claude-scientific-skills) | Compatible | 数据科学基础 | | `biopython`, `scanpy`, `uniprot-database`, `pubmed-database` | [K-Dense-AI/claude-scientific-skills](https://github.com/K-Dense-AI/claude-scientific-skills) | Compatible | 生物信息学工具 | @@ -234,4 +234,4 @@ - 🐛 [GitHub Issues](https://github.com/sickn33/antigravity-awesome-skills/issues) - 💬 [GitHub Discussions](https://github.com/sickn33/antigravity-awesome-skills/discussions) -我们致力于确保每个贡献都得到适当的认可和尊重。感谢您为AI开发社区做出的贡献!🙏 \ No newline at end of file +我们致力于确保每个贡献都得到适当的认可和尊重。感谢您为AI开发社区做出的贡献!🙏 diff --git a/package.json b/package.json index bcf49fac..ecbb1362 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "antigravity-awesome-skills", "version": "9.1.0", - "description": "1,332+ agentic skills for Claude Code, Gemini CLI, Cursor, Antigravity & more. Installer CLI.", + "description": "1,340+ agentic skills for Claude Code, Gemini CLI, Cursor, Antigravity & more. Installer CLI.", "license": "MIT", "scripts": { "validate": "node tools/scripts/run-python.js tools/scripts/validate_skills.py", diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-cli/SKILL.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-cli/SKILL.md index eb68c478..11665159 100644 --- a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-cli/SKILL.md +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-cli/SKILL.md @@ -1,199 +1,194 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/hf-cli" name: hugging-face-cli -description: "The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources." -risk: safe -source: "https://github.com/huggingface/skills/tree/main/skills/hugging-face-cli" -date_added: "2026-02-27" +description: "Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces." +risk: unknown --- -# Hugging Face CLI +Install: `curl -LsSf https://hf.co/cli/install.sh | bash -s`. -The `hf` CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources. +## When to Use -## When to Use This Skill +Use this skill when you need the `hf` CLI for Hub authentication, downloads, uploads, repo management, or basic compute operations. -Use this skill when: -- User needs to download models, datasets, or spaces -- Uploading files to Hub repositories -- Creating Hugging Face repositories -- Managing local cache -- Running compute jobs on HF infrastructure -- Working with Hugging Face Hub authentication +The Hugging Face Hub CLI tool `hf` is available. IMPORTANT: The `hf` command replaces the deprecated `huggingface-cli` command. -## Quick Command Reference +Use `hf --help` to view available functions. Note that auth commands are now all under `hf auth` e.g. `hf auth whoami`. -| Task | Command | -|------|---------| -| Login | `hf auth login` | -| Download model | `hf download ` | -| Download to folder | `hf download --local-dir ./path` | -| Upload folder | `hf upload . .` | -| Create repo | `hf repo create ` | -| Create tag | `hf repo tag create ` | -| Delete files | `hf repo-files delete ` | -| List cache | `hf cache ls` | -| Remove from cache | `hf cache rm ` | -| List models | `hf models ls` | -| Get model info | `hf models info ` | -| List datasets | `hf datasets ls` | -| Get dataset info | `hf datasets info ` | -| List spaces | `hf spaces ls` | -| Get space info | `hf spaces info ` | -| List endpoints | `hf endpoints ls` | -| Run GPU job | `hf jobs run --flavor a10g-small ` | -| Environment info | `hf env` | +Generated with `huggingface_hub v1.8.0`. Run `hf skills add --force` to regenerate. -## Core Commands +## Commands -### Authentication -```bash -hf auth login # Interactive login -hf auth login --token $HF_TOKEN # Non-interactive -hf auth whoami # Check current user -hf auth list # List stored tokens -hf auth switch # Switch between tokens -hf auth logout # Log out -``` +- `hf download REPO_ID` — Download files from the Hub. `[--type CHOICE --revision TEXT --include TEXT --exclude TEXT --cache-dir TEXT --local-dir TEXT --force-download --dry-run --quiet --max-workers INTEGER]` +- `hf env` — Print information about the environment. +- `hf sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]` +- `hf upload REPO_ID` — Upload a file or a folder to the Hub. Recommended for single-commit uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --delete TEXT --commit-message TEXT --commit-description TEXT --create-pr --every FLOAT --quiet]` +- `hf upload-large-folder REPO_ID LOCAL_PATH` — Upload a large folder to the Hub. Recommended for resumable uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --num-workers INTEGER --no-report --no-bars]` +- `hf version` — Print information about the hf version. -### Download -```bash -hf download # Full repo to cache -hf download file.safetensors # Specific file -hf download --local-dir ./models # To local directory -hf download --include "*.safetensors" # Filter by pattern -hf download --repo-type dataset # Dataset -hf download --revision v1.0 # Specific version -``` +### `hf auth` — Manage authentication (login, logout, etc.). -### Upload -```bash -hf upload . . # Current dir to root -hf upload ./models /weights # Folder to path -hf upload model.safetensors # Single file -hf upload . . --repo-type dataset # Dataset -hf upload . . --create-pr # Create PR -hf upload . . --commit-message="msg" # Custom message -``` +- `hf auth list` — List all stored access tokens. +- `hf auth login` — Login using a token from huggingface.co/settings/tokens. `[--add-to-git-credential --force]` +- `hf auth logout` — Logout from a specific token. `[--token-name TEXT]` +- `hf auth switch` — Switch between access tokens. `[--token-name TEXT --add-to-git-credential]` +- `hf auth whoami` — Find out which huggingface.co account you are logged in as. `[--format CHOICE]` -### Repository Management -```bash -hf repo create # Create model repo -hf repo create --repo-type dataset # Create dataset -hf repo create --private # Private repo -hf repo create --repo-type space --space_sdk gradio # Gradio space -hf repo delete # Delete repo -hf repo move # Move repo to new namespace -hf repo settings --private true # Update repo settings -hf repo list --repo-type model # List repos -hf repo branch create release-v1 # Create branch -hf repo branch delete release-v1 # Delete branch -hf repo tag create v1.0 # Create tag -hf repo tag list # List tags -hf repo tag delete v1.0 # Delete tag -``` +### `hf buckets` — Commands to interact with buckets. -### Delete Files from Repo -```bash -hf repo-files delete folder/ # Delete folder -hf repo-files delete "*.txt" # Delete with pattern -``` +- `hf buckets cp SRC` — Copy a single file to or from a bucket. `[--quiet]` +- `hf buckets create BUCKET_ID` — Create a new bucket. `[--private --exist-ok --quiet]` +- `hf buckets delete BUCKET_ID` — Delete a bucket. `[--yes --missing-ok --quiet]` +- `hf buckets info BUCKET_ID` — Get info about a bucket. `[--quiet]` +- `hf buckets list` — List buckets or files in a bucket. `[--human-readable --tree --recursive --format CHOICE --quiet]` +- `hf buckets move FROM_ID TO_ID` — Move (rename) a bucket to a new name or namespace. +- `hf buckets remove ARGUMENT` — Remove files from a bucket. `[--recursive --yes --dry-run --include TEXT --exclude TEXT --quiet]` +- `hf buckets sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]` -### Cache Management -```bash -hf cache ls # List cached repos -hf cache ls --revisions # Include individual revisions -hf cache rm model/gpt2 # Remove cached repo -hf cache rm # Remove cached revision -hf cache prune # Remove detached revisions -hf cache verify gpt2 # Verify checksums from cache -``` +### `hf cache` — Manage local cache directory. -### Browse Hub -```bash -# Models -hf models ls # List top trending models -hf models ls --search "MiniMax" --author MiniMaxAI # Search models -hf models ls --filter "text-generation" --limit 20 # Filter by task -hf models info MiniMaxAI/MiniMax-M2.1 # Get model info +- `hf cache list` — List cached repositories or revisions. `[--cache-dir TEXT --revisions --filter TEXT --format CHOICE --quiet --sort CHOICE --limit INTEGER]` +- `hf cache prune` — Remove detached revisions from the cache. `[--cache-dir TEXT --yes --dry-run]` +- `hf cache rm TARGETS` — Remove cached repositories or revisions. `[--cache-dir TEXT --yes --dry-run]` +- `hf cache verify REPO_ID` — Verify checksums for a single repo revision from cache or a local directory. `[--type CHOICE --revision TEXT --cache-dir TEXT --local-dir TEXT --fail-on-missing-files --fail-on-extra-files]` -# Datasets -hf datasets ls # List top trending datasets -hf datasets ls --search "finepdfs" --sort downloads # Search datasets -hf datasets info HuggingFaceFW/finepdfs # Get dataset info +### `hf collections` — Interact with collections on the Hub. -# Spaces -hf spaces ls # List top trending spaces -hf spaces ls --filter "3d" --limit 10 # Filter by 3D modeling spaces -hf spaces info enzostvs/deepsite # Get space info -``` +- `hf collections add-item COLLECTION_SLUG ITEM_ID ITEM_TYPE` — Add an item to a collection. `[--note TEXT --exists-ok]` +- `hf collections create TITLE` — Create a new collection on the Hub. `[--namespace TEXT --description TEXT --private --exists-ok]` +- `hf collections delete COLLECTION_SLUG` — Delete a collection from the Hub. `[--missing-ok]` +- `hf collections delete-item COLLECTION_SLUG ITEM_OBJECT_ID` — Delete an item from a collection. `[--missing-ok]` +- `hf collections info COLLECTION_SLUG` — Get info about a collection on the Hub. Output is in JSON format. +- `hf collections list` — List collections on the Hub. `[--owner TEXT --item TEXT --sort CHOICE --limit INTEGER --format CHOICE --quiet]` +- `hf collections update COLLECTION_SLUG` — Update a collection's metadata on the Hub. `[--title TEXT --description TEXT --position INTEGER --private --theme TEXT]` +- `hf collections update-item COLLECTION_SLUG ITEM_OBJECT_ID` — Update an item in a collection. `[--note TEXT --position INTEGER]` -### Jobs (Cloud Compute) -```bash -hf jobs run python:3.12 python script.py # Run on CPU -hf jobs run --flavor a10g-small # Run on GPU -hf jobs run --secrets HF_TOKEN # With HF token -hf jobs ps # List jobs -hf jobs logs # View logs -hf jobs cancel # Cancel job -``` +### `hf datasets` — Interact with datasets on the Hub. -### Inference Endpoints -```bash -hf endpoints ls # List endpoints -hf endpoints deploy my-endpoint \ - --repo openai/gpt-oss-120b \ - --framework vllm \ - --accelerator gpu \ - --instance-size x4 \ - --instance-type nvidia-a10g \ - --region us-east-1 \ - --vendor aws -hf endpoints describe my-endpoint # Show endpoint details -hf endpoints pause my-endpoint # Pause endpoint -hf endpoints resume my-endpoint # Resume endpoint -hf endpoints scale-to-zero my-endpoint # Scale to zero -hf endpoints delete my-endpoint --yes # Delete endpoint -``` -**GPU Flavors:** `cpu-basic`, `cpu-upgrade`, `cpu-xl`, `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `l40sx1`, `l40sx4`, `l40sx8`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`, `a100-large`, `h100`, `h100x8` +- `hf datasets info DATASET_ID` — Get info about a dataset on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf datasets list` — List datasets on the Hub. `[--search TEXT --author TEXT --filter TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` +- `hf datasets parquet DATASET_ID` — List parquet file URLs available for a dataset. `[--subset TEXT --split TEXT --format CHOICE --quiet]` +- `hf datasets sql SQL` — Execute a raw SQL query with DuckDB against dataset parquet URLs. `[--format CHOICE]` -## Common Patterns +### `hf discussions` — Manage discussions and pull requests on the Hub. -### Download and Use Model Locally -```bash -# Download to local directory for deployment -hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./model +- `hf discussions close REPO_ID NUM` — Close a discussion or pull request. `[--comment TEXT --yes --type CHOICE]` +- `hf discussions comment REPO_ID NUM` — Comment on a discussion or pull request. `[--body TEXT --body-file PATH --type CHOICE]` +- `hf discussions create REPO_ID --title TEXT` — Create a new discussion or pull request on a repo. `[--body TEXT --body-file PATH --pull-request --type CHOICE]` +- `hf discussions diff REPO_ID NUM` — Show the diff of a pull request. `[--type CHOICE]` +- `hf discussions info REPO_ID NUM` — Get info about a discussion or pull request. `[--comments --diff --no-color --type CHOICE --format CHOICE]` +- `hf discussions list REPO_ID` — List discussions and pull requests on a repo. `[--status CHOICE --kind CHOICE --author TEXT --limit INTEGER --type CHOICE --format CHOICE --quiet]` +- `hf discussions merge REPO_ID NUM` — Merge a pull request. `[--comment TEXT --yes --type CHOICE]` +- `hf discussions rename REPO_ID NUM NEW_TITLE` — Rename a discussion or pull request. `[--type CHOICE]` +- `hf discussions reopen REPO_ID NUM` — Reopen a closed discussion or pull request. `[--comment TEXT --yes --type CHOICE]` -# Or use cache and get path -MODEL_PATH=$(hf download meta-llama/Llama-3.2-1B-Instruct --quiet) -``` +### `hf endpoints` — Manage Hugging Face Inference Endpoints. -### Publish Model/Dataset -```bash -hf repo create my-username/my-model --private -hf upload my-username/my-model ./output . --commit-message="Initial release" -hf repo tag create my-username/my-model v1.0 -``` +- `hf endpoints catalog deploy --repo TEXT` — Deploy an Inference Endpoint from the Model Catalog. `[--name TEXT --accelerator TEXT --namespace TEXT]` +- `hf endpoints catalog list` — List available Catalog models. +- `hf endpoints delete NAME` — Delete an Inference Endpoint permanently. `[--namespace TEXT --yes]` +- `hf endpoints deploy NAME --repo TEXT --framework TEXT --accelerator TEXT --instance-size TEXT --instance-type TEXT --region TEXT --vendor TEXT` — Deploy an Inference Endpoint from a Hub repository. `[--namespace TEXT --task TEXT --min-replica INTEGER --max-replica INTEGER --scale-to-zero-timeout INTEGER --scaling-metric CHOICE --scaling-threshold FLOAT]` +- `hf endpoints describe NAME` — Get information about an existing endpoint. `[--namespace TEXT]` +- `hf endpoints list` — Lists all Inference Endpoints for the given namespace. `[--namespace TEXT --format CHOICE --quiet]` +- `hf endpoints pause NAME` — Pause an Inference Endpoint. `[--namespace TEXT]` +- `hf endpoints resume NAME` — Resume an Inference Endpoint. `[--namespace TEXT --fail-if-already-running]` +- `hf endpoints scale-to-zero NAME` — Scale an Inference Endpoint to zero. `[--namespace TEXT]` +- `hf endpoints update NAME` — Update an existing endpoint. `[--namespace TEXT --repo TEXT --accelerator TEXT --instance-size TEXT --instance-type TEXT --framework TEXT --revision TEXT --task TEXT --min-replica INTEGER --max-replica INTEGER --scale-to-zero-timeout INTEGER --scaling-metric CHOICE --scaling-threshold FLOAT]` -### Sync Space with Local -```bash -hf upload my-username/my-space . . --repo-type space \ - --exclude="logs/*" --delete="*" --commit-message="Sync" -``` +### `hf extensions` — Manage hf CLI extensions. -### Check Cache Usage -```bash -hf cache ls # See all cached repos and sizes -hf cache rm model/gpt2 # Remove a repo from cache -``` +- `hf extensions exec NAME` — Execute an installed extension. +- `hf extensions install REPO_ID` — Install an extension from a public GitHub repository. `[--force]` +- `hf extensions list` — List installed extension commands. `[--format CHOICE --quiet]` +- `hf extensions remove NAME` — Remove an installed extension. +- `hf extensions search` — Search extensions available on GitHub (tagged with 'hf-extension' topic). `[--format CHOICE --quiet]` -## Key Options +### `hf jobs` — Run and manage Jobs on the Hub. -- `--repo-type`: `model` (default), `dataset`, `space` -- `--revision`: Branch, tag, or commit hash -- `--token`: Override authentication -- `--quiet`: Output only essential info (paths/URLs) +- `hf jobs cancel JOB_ID` — Cancel a Job `[--namespace TEXT]` +- `hf jobs hardware` — List available hardware options for Jobs +- `hf jobs inspect JOB_IDS` — Display detailed information on one or more Jobs `[--namespace TEXT]` +- `hf jobs logs JOB_ID` — Fetch the logs of a Job. `[--follow --tail INTEGER --namespace TEXT]` +- `hf jobs ps` — List Jobs. `[--all --namespace TEXT --filter TEXT --format TEXT --quiet]` +- `hf jobs run IMAGE COMMAND` — Run a Job. `[--env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --flavor CHOICE --timeout TEXT --detach --namespace TEXT]` +- `hf jobs scheduled delete SCHEDULED_JOB_ID` — Delete a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled inspect SCHEDULED_JOB_IDS` — Display detailed information on one or more scheduled Jobs `[--namespace TEXT]` +- `hf jobs scheduled ps` — List scheduled Jobs `[--all --namespace TEXT --filter TEXT --format TEXT --quiet]` +- `hf jobs scheduled resume SCHEDULED_JOB_ID` — Resume (unpause) a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled run SCHEDULE IMAGE COMMAND` — Schedule a Job. `[--suspend --concurrency --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --flavor CHOICE --timeout TEXT --namespace TEXT]` +- `hf jobs scheduled suspend SCHEDULED_JOB_ID` — Suspend (pause) a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled uv run SCHEDULE SCRIPT` — Run a UV script (local file or URL) on HF infrastructure `[--suspend --concurrency --image TEXT --flavor CHOICE --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --timeout TEXT --namespace TEXT --with TEXT --python TEXT]` +- `hf jobs stats` — Fetch the resource usage statistics and metrics of Jobs `[--namespace TEXT]` +- `hf jobs uv run SCRIPT` — Run a UV script (local file or URL) on HF infrastructure `[--image TEXT --flavor CHOICE --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --timeout TEXT --detach --namespace TEXT --with TEXT --python TEXT]` -## References +### `hf models` — Interact with models on the Hub. -- **Complete command reference**: See references/commands.md -- **Workflow examples**: See references/examples.md +- `hf models info MODEL_ID` — Get info about a model on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf models list` — List models on the Hub. `[--search TEXT --author TEXT --filter TEXT --num-parameters TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` + +### `hf papers` — Interact with papers on the Hub. + +- `hf papers info PAPER_ID` — Get info about a paper on the Hub. Output is in JSON format. +- `hf papers list` — List daily papers on the Hub. `[--date TEXT --week TEXT --month TEXT --submitter TEXT --sort CHOICE --limit INTEGER --format CHOICE --quiet]` +- `hf papers read PAPER_ID` — Read a paper as markdown. +- `hf papers search QUERY` — Search papers on the Hub. `[--limit INTEGER --format CHOICE --quiet]` + +### `hf repos` — Manage repos on the Hub. + +- `hf repos branch create REPO_ID BRANCH` — Create a new branch for a repo on the Hub. `[--revision TEXT --type CHOICE --exist-ok]` +- `hf repos branch delete REPO_ID BRANCH` — Delete a branch from a repo on the Hub. `[--type CHOICE]` +- `hf repos create REPO_ID` — Create a new repo on the Hub. `[--type CHOICE --space-sdk TEXT --private --public --protected --exist-ok --resource-group-id TEXT --flavor TEXT --storage TEXT --sleep-time INTEGER --secrets TEXT --secrets-file TEXT --env TEXT --env-file TEXT]` +- `hf repos delete REPO_ID` — Delete a repo from the Hub. This is an irreversible operation. `[--type CHOICE --missing-ok]` +- `hf repos delete-files REPO_ID PATTERNS` — Delete files from a repo on the Hub. `[--type CHOICE --revision TEXT --commit-message TEXT --commit-description TEXT --create-pr]` +- `hf repos duplicate FROM_ID` — Duplicate a repo on the Hub (model, dataset, or Space). `[--type CHOICE --private --public --protected --exist-ok --flavor TEXT --storage TEXT --sleep-time INTEGER --secrets TEXT --secrets-file TEXT --env TEXT --env-file TEXT]` +- `hf repos move FROM_ID TO_ID` — Move a repository from a namespace to another namespace. `[--type CHOICE]` +- `hf repos settings REPO_ID` — Update the settings of a repository. `[--gated CHOICE --private --public --protected --type CHOICE]` +- `hf repos tag create REPO_ID TAG` — Create a tag for a repo. `[--message TEXT --revision TEXT --type CHOICE]` +- `hf repos tag delete REPO_ID TAG` — Delete a tag for a repo. `[--yes --type CHOICE]` +- `hf repos tag list REPO_ID` — List tags for a repo. `[--type CHOICE]` + +### `hf skills` — Manage skills for AI assistants. + +- `hf skills add` — Download a skill and install it for an AI assistant. `[--claude --codex --cursor --opencode --global --dest PATH --force]` +- `hf skills preview` — Print the generated SKILL.md to stdout. + +### `hf spaces` — Interact with spaces on the Hub. + +- `hf spaces dev-mode SPACE_ID` — Enable or disable dev mode on a Space. `[--stop]` +- `hf spaces hot-reload SPACE_ID` — Hot-reload any Python file of a Space without a full rebuild + restart. `[--local-file TEXT --skip-checks --skip-summary]` +- `hf spaces info SPACE_ID` — Get info about a space on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf spaces list` — List spaces on the Hub. `[--search TEXT --author TEXT --filter TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` + +### `hf webhooks` — Manage webhooks on the Hub. + +- `hf webhooks create --watch TEXT` — Create a new webhook. `[--url TEXT --job-id TEXT --domain CHOICE --secret TEXT]` +- `hf webhooks delete WEBHOOK_ID` — Delete a webhook permanently. `[--yes]` +- `hf webhooks disable WEBHOOK_ID` — Disable an active webhook. +- `hf webhooks enable WEBHOOK_ID` — Enable a disabled webhook. +- `hf webhooks info WEBHOOK_ID` — Show full details for a single webhook as JSON. +- `hf webhooks list` — List all webhooks for the current user. `[--format CHOICE --quiet]` +- `hf webhooks update WEBHOOK_ID` — Update an existing webhook. Only provided options are changed. `[--url TEXT --watch TEXT --domain CHOICE --secret TEXT]` + +## Common options + +- `--format` — Output format: `--format json` (or `--json`) or `--format table` (default). +- `-q / --quiet` — Minimal output. +- `--revision` — Git revision id which can be a branch name, a tag, or a commit hash. +- `--token` — Use a User Access Token. Prefer setting `HF_TOKEN` env var instead of passing `--token`. +- `--type` — The type of repository (model, dataset, or space). + +## Mounting repos as local filesystems + +To mount Hub repositories or buckets as local filesystems — no download, no copy, no waiting — use `hf-mount`. Files are fetched on demand. GitHub: https://github.com/huggingface/hf-mount + +Install: `curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh` + +Some command examples: +- `hf-mount start repo openai-community/gpt2 /tmp/gpt2` — mount a repo (read-only) +- `hf-mount start --hf-token $HF_TOKEN bucket myuser/my-bucket /tmp/data` — mount a bucket (read-write) +- `hf-mount status` / `hf-mount stop /tmp/data` — list or unmount + +## Tips + +- Use `hf --help` for full options, descriptions, usage, and real-world examples +- Authenticate with `HF_TOKEN` env var (recommended) or with `--token` diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/SKILL.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/SKILL.md new file mode 100644 index 00000000..05bc57ab --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/SKILL.md @@ -0,0 +1,213 @@ +--- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-community-evals" +name: hugging-face-community-evals +description: Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval. +risk: unknown +--- + +# Overview + +## When to Use + +Use this skill for local model evaluation, backend selection, and GPU smoke tests outside the Hugging Face Jobs workflow. + +This skill is for **running evaluations against models on the Hugging Face Hub on local hardware**. + +It covers: +- `inspect-ai` with local inference +- `lighteval` with local inference +- choosing between `vllm`, Hugging Face Transformers, and `accelerate` +- smoke tests, task selection, and backend fallback strategy + +It does **not** cover: +- Hugging Face Jobs orchestration +- model-card or `model-index` edits +- README table extraction +- Artificial Analysis imports +- `.eval_results` generation or publishing +- PR creation or community-evals automation + +If the user wants to **run the same eval remotely on Hugging Face Jobs**, hand off to the `hugging-face-jobs` skill and pass it one of the local scripts in this skill. + +If the user wants to **publish results into the community evals workflow**, stop after generating the evaluation run and hand off that publishing step to `~/code/community-evals`. + +> All paths below are relative to the directory containing this `SKILL.md`. + +# When To Use Which Script + +| Use case | Script | +|---|---| +| Local `inspect-ai` eval on a Hub model via inference providers | `scripts/inspect_eval_uv.py` | +| Local GPU eval with `inspect-ai` using `vllm` or Transformers | `scripts/inspect_vllm_uv.py` | +| Local GPU eval with `lighteval` using `vllm` or `accelerate` | `scripts/lighteval_vllm_uv.py` | +| Extra command patterns | `examples/USAGE_EXAMPLES.md` | + +# Prerequisites + +- Prefer `uv run` for local execution. +- Set `HF_TOKEN` for gated/private models. +- For local GPU runs, verify GPU access before starting: + +```bash +uv --version +printenv HF_TOKEN >/dev/null +nvidia-smi +``` + +If `nvidia-smi` is unavailable, either: +- use `scripts/inspect_eval_uv.py` for lighter provider-backed evaluation, or +- hand off to the `hugging-face-jobs` skill if the user wants remote compute. + +# Core Workflow + +1. Choose the evaluation framework. + - Use `inspect-ai` when you want explicit task control and inspect-native flows. + - Use `lighteval` when the benchmark is naturally expressed as a lighteval task string, especially leaderboard-style tasks. +2. Choose the inference backend. + - Prefer `vllm` for throughput on supported architectures. + - Use Hugging Face Transformers (`--backend hf`) or `accelerate` as compatibility fallbacks. +3. Start with a smoke test. + - `inspect-ai`: add `--limit 10` or similar. + - `lighteval`: add `--max-samples 10`. +4. Scale up only after the smoke test passes. +5. If the user wants remote execution, hand off to `hugging-face-jobs` with the same script + args. + +# Quick Start + +## Option A: inspect-ai with local inference providers path + +Best when the model is already supported by Hugging Face Inference Providers and you want the lowest local setup overhead. + +```bash +uv run scripts/inspect_eval_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task mmlu \ + --limit 20 +``` + +Use this path when: +- you want a quick local smoke test +- you do not need direct GPU control +- the task already exists in `inspect-evals` + +## Option B: inspect-ai on Local GPU + +Best when you need to load the Hub model directly, use `vllm`, or fall back to Transformers for unsupported architectures. + +Local GPU: + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task gsm8k \ + --limit 20 +``` + +Transformers fallback: + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model microsoft/phi-2 \ + --task mmlu \ + --backend hf \ + --trust-remote-code \ + --limit 20 +``` + +## Option C: lighteval on Local GPU + +Best when the task is naturally expressed as a `lighteval` task string, especially Open LLM Leaderboard style benchmarks. + +Local GPU: + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \ + --max-samples 20 \ + --use-chat-template +``` + +`accelerate` fallback: + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model microsoft/phi-2 \ + --tasks "leaderboard|mmlu|5" \ + --backend accelerate \ + --trust-remote-code \ + --max-samples 20 +``` + +# Remote Execution Boundary + +This skill intentionally stops at **local execution and backend selection**. + +If the user wants to: +- run these scripts on Hugging Face Jobs +- pick remote hardware +- pass secrets to remote jobs +- schedule recurring runs +- inspect / cancel / monitor jobs + +then switch to the **`hugging-face-jobs`** skill and pass it one of these scripts plus the chosen arguments. + +# Task Selection + +`inspect-ai` examples: +- `mmlu` +- `gsm8k` +- `hellaswag` +- `arc_challenge` +- `truthfulqa` +- `winogrande` +- `humaneval` + +`lighteval` task strings use `suite|task|num_fewshot`: +- `leaderboard|mmlu|5` +- `leaderboard|gsm8k|5` +- `leaderboard|arc_challenge|25` +- `lighteval|hellaswag|0` + +Multiple `lighteval` tasks can be comma-separated in `--tasks`. + +# Backend Selection + +- Prefer `inspect_vllm_uv.py --backend vllm` for fast GPU inference on supported architectures. +- Use `inspect_vllm_uv.py --backend hf` when `vllm` does not support the model. +- Prefer `lighteval_vllm_uv.py --backend vllm` for throughput on supported models. +- Use `lighteval_vllm_uv.py --backend accelerate` as the compatibility fallback. +- Use `inspect_eval_uv.py` when Inference Providers already cover the model and you do not need direct GPU control. + +# Hardware Guidance + +| Model size | Suggested local hardware | +|---|---| +| `< 3B` | consumer GPU / Apple Silicon / small dev GPU | +| `3B - 13B` | stronger local GPU | +| `13B+` | high-memory local GPU or hand off to `hugging-face-jobs` | + +For smoke tests, prefer cheaper local runs plus `--limit` or `--max-samples`. + +# Troubleshooting + +- CUDA or vLLM OOM: + - reduce `--batch-size` + - reduce `--gpu-memory-utilization` + - switch to a smaller model for the smoke test + - if necessary, hand off to `hugging-face-jobs` +- Model unsupported by `vllm`: + - switch to `--backend hf` for `inspect-ai` + - switch to `--backend accelerate` for `lighteval` +- Gated/private repo access fails: + - verify `HF_TOKEN` +- Custom model code required: + - add `--trust-remote-code` + +# Examples + +See: +- `examples/USAGE_EXAMPLES.md` for local command patterns +- `scripts/inspect_eval_uv.py` +- `scripts/inspect_vllm_uv.py` +- `scripts/lighteval_vllm_uv.py` diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/.env.example b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/.env.example new file mode 100644 index 00000000..26d9b9b4 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/.env.example @@ -0,0 +1,3 @@ +# Hugging Face Token (required for gated/private models) +# Get your token at: https://huggingface.co/settings/tokens +HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md new file mode 100644 index 00000000..64c24334 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md @@ -0,0 +1,101 @@ +# Usage Examples + +This document provides practical examples for **running evaluations locally** against Hugging Face Hub models. + +## What this skill covers + +- `inspect-ai` local runs +- `inspect-ai` with `vllm` or Transformers backends +- `lighteval` local runs with `vllm` or `accelerate` +- smoke tests and backend fallback patterns + +## What this skill does NOT cover + +- `model-index` +- `.eval_results` +- community eval publication workflows +- model-card PR creation +- Hugging Face Jobs orchestration + +If you want to run these same scripts remotely, use the `hugging-face-jobs` skill and pass one of the scripts in `scripts/`. + +## Setup + +```bash +cd skills/hugging-face-evaluation +export HF_TOKEN=hf_xxx +uv --version +``` + +For local GPU runs: + +```bash +nvidia-smi +``` + +## inspect-ai examples + +### Quick smoke test + +```bash +uv run scripts/inspect_eval_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task mmlu \ + --limit 10 +``` + +### Local GPU with vLLM + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model meta-llama/Llama-3.2-8B-Instruct \ + --task gsm8k \ + --limit 20 +``` + +### Transformers fallback + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model microsoft/phi-2 \ + --task mmlu \ + --backend hf \ + --trust-remote-code \ + --limit 20 +``` + +## lighteval examples + +### Single task + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5" \ + --max-samples 20 +``` + +### Multiple tasks + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \ + --max-samples 20 \ + --use-chat-template +``` + +### accelerate fallback + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model microsoft/phi-2 \ + --tasks "leaderboard|mmlu|5" \ + --backend accelerate \ + --trust-remote-code \ + --max-samples 20 +``` + +## Hand-off to Hugging Face Jobs + +When local hardware is not enough, switch to the `hugging-face-jobs` skill and run one of these scripts remotely. Keep the script path and args; move the orchestration there. diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py new file mode 100644 index 00000000..d398bc60 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py @@ -0,0 +1,104 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "inspect-ai>=0.3.0", +# "inspect-evals", +# "openai", +# ] +# /// + +""" +Entry point script for running inspect-ai evaluations against Hugging Face inference providers. +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from pathlib import Path +from typing import Optional + + +def _inspect_evals_tasks_root() -> Optional[Path]: + """Return the installed inspect_evals package path if available.""" + try: + import inspect_evals + + return Path(inspect_evals.__file__).parent + except Exception: + return None + + +def _normalize_task(task: str) -> str: + """Allow lighteval-style `suite|task|shots` strings by keeping the task name.""" + if "|" in task: + parts = task.split("|") + if len(parts) >= 2 and parts[1]: + return parts[1] + return task + + +def main() -> None: + parser = argparse.ArgumentParser(description="Inspect-ai job runner") + parser.add_argument("--model", required=True, help="Model ID on Hugging Face Hub") + parser.add_argument("--task", required=True, help="inspect-ai task to execute") + parser.add_argument("--limit", type=int, default=None, help="Limit number of samples to evaluate") + parser.add_argument( + "--tasks-root", + default=None, + help="Optional path to inspect task files. Defaults to the installed inspect_evals package.", + ) + parser.add_argument( + "--sandbox", + default="local", + help="Sandbox backend to use (default: local for HF jobs without Docker).", + ) + args = parser.parse_args() + + # Ensure downstream libraries can read the token passed as a secret + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + task = _normalize_task(args.task) + tasks_root = Path(args.tasks_root) if args.tasks_root else _inspect_evals_tasks_root() + if tasks_root and not tasks_root.exists(): + tasks_root = None + + cmd = [ + "inspect", + "eval", + task, + "--model", + f"hf-inference-providers/{args.model}", + "--log-level", + "info", + # Reduce batch size to avoid OOM errors (default is 32) + "--max-connections", + "1", + # Set a small positive temperature (HF doesn't allow temperature=0) + "--temperature", + "0.001", + ] + + if args.sandbox: + cmd.extend(["--sandbox", args.sandbox]) + + if args.limit: + cmd.extend(["--limit", str(args.limit)]) + + try: + subprocess.run(cmd, check=True, cwd=tasks_root) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + location = f" (cwd={tasks_root})" if tasks_root else "" + print(f"Evaluation failed with exit code {exc.returncode}{location}", file=sys.stderr) + raise + + +if __name__ == "__main__": + main() + diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py new file mode 100644 index 00000000..f1454c5a --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py @@ -0,0 +1,306 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "inspect-ai>=0.3.0", +# "inspect-evals", +# "vllm>=0.4.0", +# "torch>=2.0.0", +# "transformers>=4.40.0", +# ] +# /// + +""" +Entry point script for running inspect-ai evaluations with vLLM or HuggingFace Transformers backend. + +This script runs evaluations on custom HuggingFace models using local GPU inference, +separate from inference provider scripts (which use external APIs). + +Usage (standalone): + uv run scripts/inspect_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --task "mmlu" + +Model backends: + - vllm: Fast inference with vLLM (recommended for large models) + - hf: HuggingFace Transformers backend (broader model compatibility) +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from typing import Optional + + +def setup_environment() -> None: + """Configure environment variables for HuggingFace authentication.""" + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + +def run_inspect_vllm( + model_id: str, + task: str, + limit: Optional[int] = None, + max_connections: int = 4, + temperature: float = 0.0, + tensor_parallel_size: int = 1, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + trust_remote_code: bool = False, + log_level: str = "info", +) -> None: + """ + Run inspect-ai evaluation with vLLM backend. + + Args: + model_id: HuggingFace model ID + task: inspect-ai task to execute (e.g., "mmlu", "gsm8k") + limit: Limit number of samples to evaluate + max_connections: Maximum concurrent connections + temperature: Sampling temperature + tensor_parallel_size: Number of GPUs for tensor parallelism + gpu_memory_utilization: GPU memory fraction + dtype: Data type (auto, float16, bfloat16) + trust_remote_code: Allow remote code execution + log_level: Logging level + """ + setup_environment() + + model_spec = f"vllm/{model_id}" + cmd = [ + "inspect", + "eval", + task, + "--model", + model_spec, + "--log-level", + log_level, + "--max-connections", + str(max_connections), + ] + + # vLLM supports temperature=0 unlike HF inference providers + cmd.extend(["--temperature", str(temperature)]) + + # Older inspect-ai CLI versions do not support --model-args; rely on defaults + # and let vLLM choose sensible settings for small models. + if tensor_parallel_size != 1: + cmd.extend(["--tensor-parallel-size", str(tensor_parallel_size)]) + if gpu_memory_utilization != 0.8: + cmd.extend(["--gpu-memory-utilization", str(gpu_memory_utilization)]) + if dtype != "auto": + cmd.extend(["--dtype", dtype]) + if trust_remote_code: + cmd.append("--trust-remote-code") + + if limit: + cmd.extend(["--limit", str(limit)]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def run_inspect_hf( + model_id: str, + task: str, + limit: Optional[int] = None, + max_connections: int = 1, + temperature: float = 0.001, + device: str = "auto", + dtype: str = "auto", + trust_remote_code: bool = False, + log_level: str = "info", +) -> None: + """ + Run inspect-ai evaluation with HuggingFace Transformers backend. + + Use this when vLLM doesn't support the model architecture. + + Args: + model_id: HuggingFace model ID + task: inspect-ai task to execute + limit: Limit number of samples + max_connections: Maximum concurrent connections (keep low for memory) + temperature: Sampling temperature + device: Device to use (auto, cuda, cpu) + dtype: Data type + trust_remote_code: Allow remote code execution + log_level: Logging level + """ + setup_environment() + + model_spec = f"hf/{model_id}" + + cmd = [ + "inspect", + "eval", + task, + "--model", + model_spec, + "--log-level", + log_level, + "--max-connections", + str(max_connections), + "--temperature", + str(temperature), + ] + + if device != "auto": + cmd.extend(["--device", device]) + if dtype != "auto": + cmd.extend(["--dtype", dtype]) + if trust_remote_code: + cmd.append("--trust-remote-code") + + if limit: + cmd.extend(["--limit", str(limit)]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run inspect-ai evaluations with vLLM or HuggingFace Transformers on custom models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run MMLU with vLLM backend + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu + + # Run with HuggingFace Transformers backend + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --backend hf + + # Run with limited samples for testing + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --limit 10 + + # Run on multiple GPUs with tensor parallelism + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-70B --task mmlu --tensor-parallel-size 4 + +Available tasks (from inspect-evals): + - mmlu: Massive Multitask Language Understanding + - gsm8k: Grade School Math + - hellaswag: Common sense reasoning + - arc_challenge: AI2 Reasoning Challenge + - truthfulqa: TruthfulQA benchmark + - winogrande: Winograd Schema Challenge + - humaneval: Code generation (HumanEval) + + """, + ) + + parser.add_argument( + "--model", + required=True, + help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)", + ) + parser.add_argument( + "--task", + required=True, + help="inspect-ai task to execute (e.g., mmlu, gsm8k)", + ) + parser.add_argument( + "--backend", + choices=["vllm", "hf"], + default="vllm", + help="Model backend (default: vllm)", + ) + parser.add_argument( + "--limit", + type=int, + default=None, + help="Limit number of samples to evaluate", + ) + parser.add_argument( + "--max-connections", + type=int, + default=None, + help="Maximum concurrent connections (default: 4 for vllm, 1 for hf)", + ) + parser.add_argument( + "--temperature", + type=float, + default=None, + help="Sampling temperature (default: 0.0 for vllm, 0.001 for hf)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=1, + help="Number of GPUs for tensor parallelism (vLLM only, default: 1)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.8, + help="GPU memory fraction to use (vLLM only, default: 0.8)", + ) + parser.add_argument( + "--dtype", + default="auto", + choices=["auto", "float16", "bfloat16", "float32"], + help="Data type for model weights (default: auto)", + ) + parser.add_argument( + "--device", + default="auto", + help="Device for HF backend (auto, cuda, cpu)", + ) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="Allow executing remote code from model repository", + ) + parser.add_argument( + "--log-level", + default="info", + choices=["debug", "info", "warning", "error"], + help="Logging level (default: info)", + ) + + args = parser.parse_args() + + if args.backend == "vllm": + run_inspect_vllm( + model_id=args.model, + task=args.task, + limit=args.limit, + max_connections=args.max_connections or 4, + temperature=args.temperature if args.temperature is not None else 0.0, + tensor_parallel_size=args.tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + log_level=args.log_level, + ) + else: + run_inspect_hf( + model_id=args.model, + task=args.task, + limit=args.limit, + max_connections=args.max_connections or 1, + temperature=args.temperature if args.temperature is not None else 0.001, + device=args.device, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + log_level=args.log_level, + ) + + +if __name__ == "__main__": + main() diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py new file mode 100644 index 00000000..91ba83b3 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py @@ -0,0 +1,297 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "lighteval[accelerate,vllm]>=0.6.0", +# "torch>=2.0.0", +# "transformers>=4.40.0", +# "accelerate>=0.30.0", +# "vllm>=0.4.0", +# ] +# /// + +""" +Entry point script for running lighteval evaluations with local GPU backends. + +This script runs evaluations using vLLM or accelerate on custom HuggingFace models. +It is separate from inference provider scripts and evaluates models directly on local hardware. + +Usage (standalone): + uv run scripts/lighteval_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --tasks "leaderboard|mmlu|5" + +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from typing import Optional + + +def setup_environment() -> None: + """Configure environment variables for HuggingFace authentication.""" + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + +def run_lighteval_vllm( + model_id: str, + tasks: str, + output_dir: Optional[str] = None, + max_samples: Optional[int] = None, + batch_size: int = 1, + tensor_parallel_size: int = 1, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + trust_remote_code: bool = False, + use_chat_template: bool = False, + system_prompt: Optional[str] = None, +) -> None: + """ + Run lighteval with vLLM backend for efficient GPU inference. + + Args: + model_id: HuggingFace model ID (e.g., "meta-llama/Llama-3.2-1B") + tasks: Task specification (e.g., "leaderboard|mmlu|5" or "lighteval|hellaswag|0") + output_dir: Directory for evaluation results + max_samples: Limit number of samples per task + batch_size: Batch size for evaluation + tensor_parallel_size: Number of GPUs for tensor parallelism + gpu_memory_utilization: GPU memory fraction to use (0.0-1.0) + dtype: Data type for model weights (auto, float16, bfloat16) + trust_remote_code: Allow executing remote code from model repo + use_chat_template: Apply chat template for conversational models + system_prompt: System prompt for chat models + """ + setup_environment() + + # Build lighteval vllm command + cmd = [ + "lighteval", + "vllm", + model_id, + tasks, + "--batch-size", str(batch_size), + "--tensor-parallel-size", str(tensor_parallel_size), + "--gpu-memory-utilization", str(gpu_memory_utilization), + "--dtype", dtype, + ] + + if output_dir: + cmd.extend(["--output-dir", output_dir]) + + if max_samples: + cmd.extend(["--max-samples", str(max_samples)]) + + if trust_remote_code: + cmd.append("--trust-remote-code") + + if use_chat_template: + cmd.append("--use-chat-template") + + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def run_lighteval_accelerate( + model_id: str, + tasks: str, + output_dir: Optional[str] = None, + max_samples: Optional[int] = None, + batch_size: int = 1, + dtype: str = "bfloat16", + trust_remote_code: bool = False, + use_chat_template: bool = False, + system_prompt: Optional[str] = None, +) -> None: + """ + Run lighteval with accelerate backend for multi-GPU distributed inference. + + Use this backend when vLLM is not available or for models not supported by vLLM. + + Args: + model_id: HuggingFace model ID + tasks: Task specification + output_dir: Directory for evaluation results + max_samples: Limit number of samples per task + batch_size: Batch size for evaluation + dtype: Data type for model weights + trust_remote_code: Allow executing remote code + use_chat_template: Apply chat template + system_prompt: System prompt for chat models + """ + setup_environment() + + # Build lighteval accelerate command + cmd = [ + "lighteval", + "accelerate", + model_id, + tasks, + "--batch-size", str(batch_size), + "--dtype", dtype, + ] + + if output_dir: + cmd.extend(["--output-dir", output_dir]) + + if max_samples: + cmd.extend(["--max-samples", str(max_samples)]) + + if trust_remote_code: + cmd.append("--trust-remote-code") + + if use_chat_template: + cmd.append("--use-chat-template") + + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run lighteval evaluations with vLLM or accelerate backend on custom HuggingFace models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run MMLU evaluation with vLLM + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" + + # Run with accelerate backend instead of vLLM + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" --backend accelerate + + # Run with chat template for instruction-tuned models + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B-Instruct --tasks "leaderboard|mmlu|5" --use-chat-template + + # Run with limited samples for testing + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" --max-samples 10 + +Task format: + Tasks use the format: "suite|task|num_fewshot" + - leaderboard|mmlu|5 (MMLU with 5-shot) + - lighteval|hellaswag|0 (HellaSwag zero-shot) + - leaderboard|gsm8k|5 (GSM8K with 5-shot) + - Multiple tasks: "leaderboard|mmlu|5,leaderboard|gsm8k|5" + """, + ) + + parser.add_argument( + "--model", + required=True, + help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)", + ) + parser.add_argument( + "--tasks", + required=True, + help="Task specification (e.g., 'leaderboard|mmlu|5')", + ) + parser.add_argument( + "--backend", + choices=["vllm", "accelerate"], + default="vllm", + help="Inference backend to use (default: vllm)", + ) + parser.add_argument( + "--output-dir", + default=None, + help="Directory for evaluation results", + ) + parser.add_argument( + "--max-samples", + type=int, + default=None, + help="Limit number of samples per task (useful for testing)", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1, + help="Batch size for evaluation (default: 1)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=1, + help="Number of GPUs for tensor parallelism (vLLM only, default: 1)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.8, + help="GPU memory fraction to use (vLLM only, default: 0.8)", + ) + parser.add_argument( + "--dtype", + default="auto", + choices=["auto", "float16", "bfloat16", "float32"], + help="Data type for model weights (default: auto)", + ) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="Allow executing remote code from model repository", + ) + parser.add_argument( + "--use-chat-template", + action="store_true", + help="Apply chat template for instruction-tuned/chat models", + ) + parser.add_argument( + "--system-prompt", + default=None, + help="System prompt for chat models", + ) + + args = parser.parse_args() + + if args.backend == "vllm": + run_lighteval_vllm( + model_id=args.model, + tasks=args.tasks, + output_dir=args.output_dir, + max_samples=args.max_samples, + batch_size=args.batch_size, + tensor_parallel_size=args.tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + else: + run_lighteval_accelerate( + model_id=args.model, + tasks=args.tasks, + output_dir=args.output_dir, + max_samples=args.max_samples, + batch_size=args.batch_size, + dtype=args.dtype if args.dtype != "auto" else "bfloat16", + trust_remote_code=args.trust_remote_code, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + + +if __name__ == "__main__": + main() diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-dataset-viewer/SKILL.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-dataset-viewer/SKILL.md index 410eb832..624bc78d 100644 --- a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-dataset-viewer/SKILL.md +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-dataset-viewer/SKILL.md @@ -1,127 +1,127 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-datasets" name: hugging-face-dataset-viewer -description: Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics. +description: Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links. risk: unknown -source: community --- - + # Hugging Face Dataset Viewer -Use this skill to execute read-only Dataset Viewer API calls for dataset exploration and extraction. - -## Core workflow - -1. Optionally validate dataset availability with `/is-valid`. -2. Resolve `config` + `split` with `/splits`. -3. Preview with `/first-rows`. -4. Paginate content with `/rows` using `offset` and `length` (max 100). -5. Use `/search` for text matching and `/filter` for row predicates. -6. Retrieve parquet links via `/parquet` and totals/metadata via `/size` and `/statistics`. - -## Defaults - -- Base URL: `https://datasets-server.huggingface.co` -- Default API method: `GET` -- Query params should be URL-encoded. -- `offset` is 0-based. -- `length` max is usually `100` for row-like endpoints. -- Gated/private datasets require `Authorization: Bearer `. - -## Dataset Viewer - -- `Validate dataset`: `/is-valid?dataset=` -- `List subsets and splits`: `/splits?dataset=` -- `Preview first rows`: `/first-rows?dataset=&config=&split=` -- `Paginate rows`: `/rows?dataset=&config=&split=&offset=&length=` -- `Search text`: `/search?dataset=&config=&split=&query=&offset=&length=` -- `Filter with predicates`: `/filter?dataset=&config=&split=&where=&orderby=&offset=&length=` -- `List parquet shards`: `/parquet?dataset=` -- `Get size totals`: `/size?dataset=` -- `Get column statistics`: `/statistics?dataset=&config=&split=` -- `Get Croissant metadata (if available)`: `/croissant?dataset=` - -Pagination pattern: - -```bash -curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=0&length=100" -curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=100&length=100" -``` - -When pagination is partial, use response fields such as `num_rows_total`, `num_rows_per_page`, and `partial` to drive continuation logic. - -Search/filter notes: - -- `/search` matches string columns (full-text style behavior is internal to the API). -- `/filter` requires predicate syntax in `where` and optional sort in `orderby`. -- Keep filtering and searches read-only and side-effect free. - -## Querying Datasets - -Use `npx parquetlens` with Hub parquet alias paths for SQL querying. - -Parquet alias shape: - -```text -hf://datasets//@~parquet///.parquet -``` - -Derive ``, ``, and `` from Dataset Viewer `/parquet`: - -```bash -curl -s "https://datasets-server.huggingface.co/parquet?dataset=cfahlgren1/hub-stats" \ - | jq -r '.parquet_files[] | "hf://datasets/\(.dataset)@~parquet/\(.config)/\(.split)/\(.filename)"' -``` - -Run SQL query: - -```bash -npx -y -p parquetlens -p @parquetlens/sql parquetlens \ - "hf://datasets//@~parquet///.parquet" \ - --sql "SELECT * FROM data LIMIT 20" -``` - -### SQL export - -- CSV: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.csv' (FORMAT CSV, HEADER, DELIMITER ',')"` -- JSON: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.json' (FORMAT JSON)"` -- Parquet: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.parquet' (FORMAT PARQUET)"` - -## Creating and Uploading Datasets - -Use one of these flows depending on dependency constraints. - -Zero local dependencies (Hub UI): - -- Create dataset repo in browser: `https://huggingface.co/new-dataset` -- Upload parquet files in the repo "Files and versions" page. -- Verify shards appear in Dataset Viewer: - -```bash -curl -s "https://datasets-server.huggingface.co/parquet?dataset=/" -``` - -Low dependency CLI flow (`npx @huggingface/hub` / `hfjs`): - -- Set auth token: - -```bash -export HF_TOKEN= -``` - -- Upload parquet folder to a dataset repo (auto-creates repo if missing): - -```bash -npx -y @huggingface/hub upload datasets// ./local/parquet-folder data -``` - -- Upload as private repo on creation: - -```bash -npx -y @huggingface/hub upload datasets// ./local/parquet-folder data --private -``` - -After upload, call `/parquet` to discover `//` values for querying with `@~parquet`. - - ## When to Use -Use this skill when tackling tasks related to its primary domain or functionality as described above. + +Use this skill when you need read-only exploration of a Hugging Face dataset through the Dataset Viewer API. + +Use this skill to execute read-only Dataset Viewer API calls for dataset exploration and extraction. + +## Core workflow + +1. Optionally validate dataset availability with `/is-valid`. +2. Resolve `config` + `split` with `/splits`. +3. Preview with `/first-rows`. +4. Paginate content with `/rows` using `offset` and `length` (max 100). +5. Use `/search` for text matching and `/filter` for row predicates. +6. Retrieve parquet links via `/parquet` and totals/metadata via `/size` and `/statistics`. + +## Defaults + +- Base URL: `https://datasets-server.huggingface.co` +- Default API method: `GET` +- Query params should be URL-encoded. +- `offset` is 0-based. +- `length` max is usually `100` for row-like endpoints. +- Gated/private datasets require `Authorization: Bearer `. + +## Dataset Viewer + +- `Validate dataset`: `/is-valid?dataset=` +- `List subsets and splits`: `/splits?dataset=` +- `Preview first rows`: `/first-rows?dataset=&config=&split=` +- `Paginate rows`: `/rows?dataset=&config=&split=&offset=&length=` +- `Search text`: `/search?dataset=&config=&split=&query=&offset=&length=` +- `Filter with predicates`: `/filter?dataset=&config=&split=&where=&orderby=&offset=&length=` +- `List parquet shards`: `/parquet?dataset=` +- `Get size totals`: `/size?dataset=` +- `Get column statistics`: `/statistics?dataset=&config=&split=` +- `Get Croissant metadata (if available)`: `/croissant?dataset=` + +Pagination pattern: + +```bash +curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=0&length=100" +curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=100&length=100" +``` + +When pagination is partial, use response fields such as `num_rows_total`, `num_rows_per_page`, and `partial` to drive continuation logic. + +Search/filter notes: + +- `/search` matches string columns (full-text style behavior is internal to the API). +- `/filter` requires predicate syntax in `where` and optional sort in `orderby`. +- Keep filtering and searches read-only and side-effect free. + +## Querying Datasets + +Use `npx parquetlens` with Hub parquet alias paths for SQL querying. + +Parquet alias shape: + +```text +hf://datasets//@~parquet///.parquet +``` + +Derive ``, ``, and `` from Dataset Viewer `/parquet`: + +```bash +curl -s "https://datasets-server.huggingface.co/parquet?dataset=cfahlgren1/hub-stats" \ + | jq -r '.parquet_files[] | "hf://datasets/\(.dataset)@~parquet/\(.config)/\(.split)/\(.filename)"' +``` + +Run SQL query: + +```bash +npx -y -p parquetlens -p @parquetlens/sql parquetlens \ + "hf://datasets//@~parquet///.parquet" \ + --sql "SELECT * FROM data LIMIT 20" +``` + +### SQL export + +- CSV: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.csv' (FORMAT CSV, HEADER, DELIMITER ',')"` +- JSON: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.json' (FORMAT JSON)"` +- Parquet: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.parquet' (FORMAT PARQUET)"` + +## Creating and Uploading Datasets + +Use one of these flows depending on dependency constraints. + +Zero local dependencies (Hub UI): + +- Create dataset repo in browser: `https://huggingface.co/new-dataset` +- Upload parquet files in the repo "Files and versions" page. +- Verify shards appear in Dataset Viewer: + +```bash +curl -s "https://datasets-server.huggingface.co/parquet?dataset=/" +``` + +Low dependency CLI flow (`npx @huggingface/hub` / `hfjs`): + +- Set auth token: + +```bash +export HF_TOKEN= +``` + +- Upload parquet folder to a dataset repo (auto-creates repo if missing): + +```bash +npx -y @huggingface/hub upload datasets// ./local/parquet-folder data +``` + +- Upload as private repo on creation: + +```bash +npx -y @huggingface/hub upload datasets// ./local/parquet-folder data --private +``` + +After upload, call `/parquet` to discover `//` values for querying with `@~parquet`. diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/SKILL.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/SKILL.md new file mode 100644 index 00000000..b15c3a39 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/SKILL.md @@ -0,0 +1,304 @@ +--- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-gradio" +name: hugging-face-gradio +description: Build or edit Gradio apps, layouts, components, and chat interfaces in Python. +risk: unknown +--- + +# Gradio + +## When to Use + +Use this skill when a user wants a Gradio demo, UI prototype, or Python-based ML interface. + +Gradio is a Python library for building interactive web UIs and ML demos. This skill covers the core API, patterns, and examples. + +## Guides + +Detailed guides on specific topics (read these when relevant): + +- [Quickstart](https://www.gradio.app/guides/quickstart) +- [The Interface Class](https://www.gradio.app/guides/the-interface-class) +- [Blocks and Event Listeners](https://www.gradio.app/guides/blocks-and-event-listeners) +- [Controlling Layout](https://www.gradio.app/guides/controlling-layout) +- [More Blocks Features](https://www.gradio.app/guides/more-blocks-features) +- [Custom CSS and JS](https://www.gradio.app/guides/custom-CSS-and-JS) +- [Streaming Outputs](https://www.gradio.app/guides/streaming-outputs) +- [Streaming Inputs](https://www.gradio.app/guides/streaming-inputs) +- [Sharing Your App](https://www.gradio.app/guides/sharing-your-app) +- [Custom HTML Components](https://www.gradio.app/guides/custom-HTML-components) +- [Getting Started with the Python Client](https://www.gradio.app/guides/getting-started-with-the-python-client) +- [Getting Started with the JS Client](https://www.gradio.app/guides/getting-started-with-the-js-client) + +## Core Patterns + +**Interface** (high-level): wraps a function with input/output components. + +```python +import gradio as gr + +def greet(name): + return f"Hello {name}!" + +gr.Interface(fn=greet, inputs="text", outputs="text").launch() +``` + +**Blocks** (low-level): flexible layout with explicit event wiring. + +```python +import gradio as gr + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Greeting") + btn = gr.Button("Greet") + btn.click(fn=lambda n: f"Hello {n}!", inputs=name, outputs=output) + +demo.launch() +``` + +**ChatInterface**: high-level wrapper for chatbot UIs. + +```python +import gradio as gr + +def respond(message, history): + return f"You said: {message}" + +gr.ChatInterface(fn=respond).launch() +``` + +## Key Component Signatures + +### `Textbox(value: str | I18nData | Callable | None = None, type: Literal['text', 'password', 'email'] = "text", lines: int = 1, max_lines: int | None = None, placeholder: str | I18nData | None = None, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, autofocus: bool = False, autoscroll: bool = True, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", text_align: Literal['left', 'right'] | None = None, rtl: bool = False, buttons: list[Literal['copy'] | Button] | None = None, max_length: int | None = None, submit_btn: str | bool | None = False, stop_btn: str | bool | None = False, html_attributes: InputHTMLAttributes | None = None)` +Creates a textarea for user to enter string input or display string output.. + +### `Number(value: float | Callable | None = None, label: str | I18nData | None = None, placeholder: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None, precision: int | None = None, minimum: float | None = None, maximum: float | None = None, step: float = 1)` +Creates a numeric field for user to enter numbers as input or display numeric output.. + +### `Slider(minimum: float = 0, maximum: float = 100, value: float | Callable | None = None, step: float | None = None, precision: int | None = None, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", randomize: bool = False, buttons: list[Literal['reset']] | None = None)` +Creates a slider that ranges from {minimum} to {maximum} with a step size of {step}.. + +### `Checkbox(value: bool | Callable = False, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None)` +Creates a checkbox that can be set to `True` or `False`. + +### `Dropdown(choices: Sequence[str | int | float | tuple[str, str | int | float]] | None = None, value: str | int | float | Sequence[str | int | float] | Callable | DefaultValue | None = DefaultValue(), type: Literal['value', 'index'] = "value", multiselect: bool | None = None, allow_custom_value: bool = False, max_choices: int | None = None, filterable: bool = True, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None)` +Creates a dropdown of choices from which a single entry or multiple entries can be selected (as an input component) or displayed (as an output component).. + +### `Radio(choices: Sequence[str | int | float | tuple[str, str | int | float]] | None = None, value: str | int | float | Callable | None = None, type: Literal['value', 'index'] = "value", label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", rtl: bool = False, buttons: list[Button] | None = None)` +Creates a set of (string or numeric type) radio buttons of which only one can be selected.. + +### `Image(value: str | PIL.Image.Image | np.ndarray | Callable | None = None, format: str = "webp", height: int | str | None = None, width: int | str | None = None, image_mode: Literal['1', 'L', 'P', 'RGB', 'RGBA', 'CMYK', 'YCbCr', 'LAB', 'HSV', 'I', 'F'] | None = "RGB", sources: list[Literal['upload', 'webcam', 'clipboard']] | Literal['upload', 'webcam', 'clipboard'] | None = None, type: Literal['numpy', 'pil', 'filepath'] = "numpy", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, buttons: list[Literal['download', 'share', 'fullscreen'] | Button] | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, streaming: bool = False, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", webcam_options: WebcamOptions | None = None, placeholder: str | None = None, watermark: WatermarkOptions | None = None)` +Creates an image component that can be used to upload images (as an input) or display images (as an output).. + +### `Audio(value: str | Path | tuple[int, np.ndarray] | Callable | None = None, sources: list[Literal['upload', 'microphone']] | Literal['upload', 'microphone'] | None = None, type: Literal['numpy', 'filepath'] = "numpy", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, streaming: bool = False, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", format: Literal['wav', 'mp3'] | None = None, autoplay: bool = False, editable: bool = True, buttons: list[Literal['download', 'share'] | Button] | None = None, waveform_options: WaveformOptions | dict | None = None, loop: bool = False, recording: bool = False, subtitles: str | Path | list[dict[str, Any]] | None = None, playback_position: float = 0)` +Creates an audio component that can be used to upload/record audio (as an input) or display audio (as an output).. + +### `Video(value: str | Path | Callable | None = None, format: str | None = None, sources: list[Literal['upload', 'webcam']] | Literal['upload', 'webcam'] | None = None, height: int | str | None = None, width: int | str | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", webcam_options: WebcamOptions | None = None, include_audio: bool | None = None, autoplay: bool = False, buttons: list[Literal['download', 'share'] | Button] | None = None, loop: bool = False, streaming: bool = False, watermark: WatermarkOptions | None = None, subtitles: str | Path | list[dict[str, Any]] | None = None, playback_position: float = 0)` +Creates a video component that can be used to upload/record videos (as an input) or display videos (as an output). + +### `File(value: str | list[str] | Callable | None = None, file_count: Literal['single', 'multiple', 'directory'] = "single", file_types: list[str] | None = None, type: Literal['filepath', 'binary'] = "filepath", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, height: int | str | float | None = None, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", allow_reordering: bool = False, buttons: list[Button] | None = None)` +Creates a file component that allows uploading one or more generic files (when used as an input) or displaying generic files or URLs for download (as output). + +### `Chatbot(value: list[MessageDict | Message] | Callable | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, autoscroll: bool = True, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", height: int | str | None = 400, resizable: bool = False, max_height: int | str | None = None, min_height: int | str | None = None, editable: Literal['user', 'all'] | None = None, latex_delimiters: list[dict[str, str | bool]] | None = None, rtl: bool = False, buttons: list[Literal['share', 'copy', 'copy_all'] | Button] | None = None, watermark: str | None = None, avatar_images: tuple[str | Path | None, str | Path | None] | None = None, sanitize_html: bool = True, render_markdown: bool = True, feedback_options: list[str] | tuple[str, ...] | None = ('Like', 'Dislike'), feedback_value: Sequence[str | None] | None = None, line_breaks: bool = True, layout: Literal['panel', 'bubble'] | None = None, placeholder: str | None = None, examples: list[ExampleMessage] | None = None, allow_file_downloads: = True, group_consecutive_messages: bool = True, allow_tags: list[str] | bool = True, reasoning_tags: list[tuple[str, str]] | None = None, like_user_message: bool = False)` +Creates a chatbot that displays user-submitted messages and responses. + +### `Button(value: str | I18nData | Callable = "Run", every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, variant: Literal['primary', 'secondary', 'stop', 'huggingface'] = "secondary", size: Literal['sm', 'md', 'lg'] = "lg", icon: str | Path | None = None, link: str | None = None, link_target: Literal['_self', '_blank', '_parent', '_top'] = "_self", visible: bool | Literal['hidden'] = True, interactive: bool = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", scale: int | None = None, min_width: int | None = None)` +Creates a button that can be assigned arbitrary .click() events. + +### `Markdown(value: str | I18nData | Callable | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, rtl: bool = False, latex_delimiters: list[dict[str, str | bool]] | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", sanitize_html: bool = True, line_breaks: bool = False, header_links: bool = False, height: int | str | None = None, max_height: int | str | None = None, min_height: int | str | None = None, buttons: list[Literal['copy']] | None = None, container: bool = False, padding: bool = False)` +Used to render arbitrary Markdown output. + +### `HTML(value: Any | Callable | None = None, label: str | I18nData | None = None, html_template: str = "${value}", css_template: str = "", js_on_load: str | None = "element.addEventListener('click', function() { trigger('click') });", apply_default_css: bool = True, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool = False, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", min_height: int | None = None, max_height: int | None = None, container: bool = False, padding: bool = False, autoscroll: bool = False, buttons: list[Button] | None = None, server_functions: list[Callable] | None = None, props: Any)` +Creates a component with arbitrary HTML. + + +## Custom HTML Components + +If a task requires significant customization of an existing component or a component that doesn't exist in Gradio, you can create one with `gr.HTML`. It supports `html_template` (with `${}` JS expressions and `{{}}` Handlebars syntax), `css_template` for scoped styles, and `js_on_load` for interactivity — where `props.value` updates the component value and `trigger('event_name')` fires Gradio events. For reuse, subclass `gr.HTML` and define `api_info()` for API/MCP support. See the [full guide](https://www.gradio.app/guides/custom-HTML-components). + +Here's an example that shows how to create and use these kinds of components: + +```python +import gradio as gr + +class StarRating(gr.HTML): + def __init__(self, label, value=0, **kwargs): + html_template = """ +

${label} rating:

+ ${Array.from({length: 5}, (_, i) => ``).join('')} + """ + css_template = """ + img { height: 50px; display: inline-block; cursor: pointer; } + .faded { filter: grayscale(100%); opacity: 0.3; } + """ + js_on_load = """ + const imgs = element.querySelectorAll('img'); + imgs.forEach((img, index) => { + img.addEventListener('click', () => { + props.value = index + 1; + }); + }); + """ + super().__init__(value=value, label=label, html_template=html_template, css_template=css_template, js_on_load=js_on_load, **kwargs) + + def api_info(self): + return {"type": "integer", "minimum": 0, "maximum": 5} + + +with gr.Blocks() as demo: + gr.Markdown("# Restaurant Review") + food_rating = StarRating(label="Food", value=3) + service_rating = StarRating(label="Service", value=3) + ambience_rating = StarRating(label="Ambience", value=3) + average_btn = gr.Button("Calculate Average Rating") + rating_output = StarRating(label="Average", value=3) + def calculate_average(food, service, ambience): + return round((food + service + ambience) / 3) + average_btn.click( + fn=calculate_average, + inputs=[food_rating, service_rating, ambience_rating], + outputs=rating_output + ) + +demo.launch() +``` + +## Event Listeners + +All event listeners share the same signature: + +```python +component.event_name( + fn: Callable | None | Literal["decorator"] = "decorator", + inputs: Component | Sequence[Component] | set[Component] | None = None, + outputs: Component | Sequence[Component] | set[Component] | None = None, + api_name: str | None = None, + api_description: str | None | Literal[False] = None, + scroll_to_output: bool = False, + show_progress: Literal["full", "minimal", "hidden"] = "full", + show_progress_on: Component | Sequence[Component] | None = None, + queue: bool = True, + batch: bool = False, + max_batch_size: int = 4, + preprocess: bool = True, + postprocess: bool = True, + cancels: dict[str, Any] | list[dict[str, Any]] | None = None, + trigger_mode: Literal["once", "multiple", "always_last"] | None = None, + js: str | Literal[True] | None = None, + concurrency_limit: int | None | Literal["default"] = "default", + concurrency_id: str | None = None, + api_visibility: Literal["public", "private", "undocumented"] = "public", + time_limit: int | None = None, + stream_every: float = 0.5, + key: int | str | tuple[int | str, ...] | None = None, + validator: Callable | None = None, +) -> Dependency +``` + +Supported events per component: + +- **AnnotatedImage**: select +- **Audio**: stream, change, clear, play, pause, stop, pause, start_recording, pause_recording, stop_recording, upload, input +- **BarPlot**: select, double_click +- **BrowserState**: change +- **Button**: click +- **Chatbot**: change, select, like, retry, undo, example_select, option_select, clear, copy, edit +- **Checkbox**: change, input, select +- **CheckboxGroup**: change, input, select +- **ClearButton**: click +- **Code**: change, input, focus, blur +- **ColorPicker**: change, input, submit, focus, blur +- **Dataframe**: change, input, select, edit +- **Dataset**: click, select +- **DateTime**: change, submit +- **DeepLinkButton**: click +- **Dialogue**: change, input, submit +- **DownloadButton**: click +- **Dropdown**: change, input, select, focus, blur, key_up +- **DuplicateButton**: click +- **File**: change, select, clear, upload, delete, download +- **FileExplorer**: change, input, select +- **Gallery**: select, upload, change, delete, preview_close, preview_open +- **HTML**: change, input, click, double_click, submit, stop, edit, clear, play, pause, end, start_recording, pause_recording, stop_recording, focus, blur, upload, release, select, stream, like, example_select, option_select, load, key_up, apply, delete, tick, undo, retry, expand, collapse, download, copy +- **HighlightedText**: change, select +- **Image**: clear, change, stream, select, upload, input +- **ImageEditor**: clear, change, input, select, upload, apply +- **ImageSlider**: clear, change, stream, select, upload, input +- **JSON**: change +- **Label**: change, select +- **LinePlot**: select, double_click +- **LoginButton**: click +- **Markdown**: change, copy +- **Model3D**: change, upload, edit, clear +- **MultimodalTextbox**: change, input, select, submit, focus, blur, stop +- **Navbar**: change +- **Number**: change, input, submit, focus, blur +- **ParamViewer**: change, upload +- **Plot**: change +- **Radio**: select, change, input +- **ScatterPlot**: select, double_click +- **SimpleImage**: clear, change, upload +- **Slider**: change, input, release +- **State**: change +- **Textbox**: change, input, select, submit, focus, blur, stop, copy +- **Timer**: tick +- **UploadButton**: click, upload +- **Video**: change, clear, start_recording, stop_recording, stop, play, pause, end, upload, input + +## Prediction CLI + +The `gradio` CLI includes `info` and `predict` commands for interacting with Gradio apps programmatically. These are especially useful for coding agents that need to use Spaces in their workflows. + +### `gradio info` — Discover endpoints and parameters + +```bash +gradio info +``` + +Returns a JSON payload describing all endpoints, their parameters (with types and defaults), and return values. + +```bash +gradio info gradio/calculator +# { +# "/predict": { +# "parameters": [ +# {"name": "num1", "required": true, "default": null, "type": {"type": "number"}}, +# {"name": "operation", "required": true, "default": null, "type": {"enum": ["add", "subtract", "multiply", "divide"], "type": "string"}}, +# {"name": "num2", "required": true, "default": null, "type": {"type": "number"}} +# ], +# "returns": [{"name": "output", "type": {"type": "number"}}], +# "description": "" +# } +# } +``` + +File-type parameters show `"type": "filepath"` with instructions to include `"meta": {"_type": "gradio.FileData"}` — this signals the file will be uploaded to the remote server. + +### `gradio predict` — Send predictions + +```bash +gradio predict +``` + +Returns a JSON object with named output keys. + +```bash +# Simple numeric prediction +gradio predict gradio/calculator /predict '{"num1": 5, "operation": "multiply", "num2": 3}' +# {"output": 15} + +# Image generation +gradio predict black-forest-labs/FLUX.2-dev /infer '{"prompt": "A majestic dragon"}' +# {"Result": "/tmp/gradio/.../image.webp", "Seed": 1117868604} + +# File upload (must include meta key) +gradio predict gradio/image_mod /predict '{"image": {"path": "/path/to/image.png", "meta": {"_type": "gradio.FileData"}}}' +# {"output": "/tmp/gradio/.../output.png"} +``` + +Both commands accept `--token` for accessing private Spaces. + +## Additional Reference + +- [End-to-End Examples](examples.md) — complete working apps diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/examples.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/examples.md new file mode 100644 index 00000000..b48c4cdc --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-gradio/examples.md @@ -0,0 +1,613 @@ +# Gradio End-to-End Examples + +Complete working Gradio apps for reference. + +## Blocks Essay Simple + +```python +import gradio as gr + +def change_textbox(choice): + if choice == "short": + return gr.Textbox(lines=2, visible=True) + elif choice == "long": + return gr.Textbox(lines=8, visible=True, value="Lorem ipsum dolor sit amet") + else: + return gr.Textbox(visible=False) + +with gr.Blocks() as demo: + radio = gr.Radio( + ["short", "long", "none"], label="What kind of essay would you like to write?" + ) + text = gr.Textbox(lines=2, interactive=True, buttons=["copy"]) + radio.change(fn=change_textbox, inputs=radio, outputs=text) + +demo.launch() +``` + +## Blocks Flipper + +```python +import numpy as np +import gradio as gr + +def flip_text(x): + return x[::-1] + +def flip_image(x): + return np.fliplr(x) + +with gr.Blocks() as demo: + gr.Markdown("Flip text or image files using this demo.") + with gr.Tab("Flip Text"): + text_input = gr.Textbox() + text_output = gr.Textbox() + text_button = gr.Button("Flip") + with gr.Tab("Flip Image"): + with gr.Row(): + image_input = gr.Image() + image_output = gr.Image() + image_button = gr.Button("Flip") + + with gr.Accordion("Open for More!", open=False): + gr.Markdown("Look at me...") + temp_slider = gr.Slider( + 0, 1, + value=0.1, + step=0.1, + interactive=True, + label="Slide me", + ) + + text_button.click(flip_text, inputs=text_input, outputs=text_output) + image_button.click(flip_image, inputs=image_input, outputs=image_output) + +demo.launch() +``` + +## Blocks Form + +```python +import gradio as gr + +with gr.Blocks() as demo: + name_box = gr.Textbox(label="Name") + age_box = gr.Number(label="Age", minimum=0, maximum=100) + symptoms_box = gr.CheckboxGroup(["Cough", "Fever", "Runny Nose"]) + submit_btn = gr.Button("Submit") + + with gr.Column(visible=False) as output_col: + diagnosis_box = gr.Textbox(label="Diagnosis") + patient_summary_box = gr.Textbox(label="Patient Summary") + + def submit(name, age, symptoms): + return { + submit_btn: gr.Button(visible=False), + output_col: gr.Column(visible=True), + diagnosis_box: "covid" if "Cough" in symptoms else "flu", + patient_summary_box: f"{name}, {age} y/o", + } + + submit_btn.click( + submit, + [name_box, age_box, symptoms_box], + [submit_btn, diagnosis_box, patient_summary_box, output_col], + ) + +demo.launch() +``` + +## Blocks Hello + +```python +import gradio as gr + +def welcome(name): + return f"Welcome to Gradio, {name}!" + +with gr.Blocks() as demo: + gr.Markdown( + """ + # Hello World! + Start typing below to see the output. + """) + inp = gr.Textbox(placeholder="What is your name?") + out = gr.Textbox() + inp.change(welcome, inp, out) + +demo.launch() +``` + +## Blocks Layout + +```python +import gradio as gr + +demo = gr.Blocks() + +with demo: + with gr.Row(): + gr.Image(interactive=True, scale=2) + gr.Image() + with gr.Row(): + gr.Textbox(label="Text") + gr.Number(label="Count", scale=2) + gr.Radio(choices=["One", "Two"]) + with gr.Row(): + gr.Button("500", scale=0, min_width=500) + gr.Button("A", scale=0) + gr.Button("grow") + with gr.Row(): + gr.Textbox() + gr.Textbox() + gr.Button() + with gr.Row(): + with gr.Row(): + with gr.Column(): + gr.Textbox(label="Text") + gr.Number(label="Count") + gr.Radio(choices=["One", "Two"]) + gr.Image() + with gr.Column(): + gr.Image(interactive=True) + gr.Image() + gr.Image() + gr.Textbox(label="Text") + gr.Number(label="Count") + gr.Radio(choices=["One", "Two"]) + +demo.launch() +``` + +## Calculator + +```python +import gradio as gr + +def calculator(num1, operation, num2): + if operation == "add": + return num1 + num2 + elif operation == "subtract": + return num1 - num2 + elif operation == "multiply": + return num1 * num2 + elif operation == "divide": + if num2 == 0: + raise gr.Error("Cannot divide by zero!") + return num1 / num2 + +demo = gr.Interface( + calculator, + [ + "number", + gr.Radio(["add", "subtract", "multiply", "divide"]), + "number" + ], + "number", + examples=[ + [45, "add", 3], + [3.14, "divide", 2], + [144, "multiply", 2.5], + [0, "subtract", 1.2], + ], + title="Toy Calculator", + description="Here's a sample toy calculator.", + api_name="predict" +) + +demo.launch() +``` + +## Chatbot Simple + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot]) + + def respond(message, chat_history): + bot_message = random.choice(["How are you?", "Today is a great day", "I'm very hungry"]) + chat_history.append({"role": "user", "content": message}) + chat_history.append({"role": "assistant", "content": bot_message}) + time.sleep(2) + return "", chat_history + + msg.submit(respond, [msg, chatbot], [msg, chatbot]) + +demo.launch() +``` + +## Chatbot Streaming + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + clear = gr.Button("Clear") + + def user(user_message, history: list): + return "", history + [{"role": "user", "content": user_message}] + + def bot(history: list): + bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) + history.append({"role": "assistant", "content": ""}) + for character in bot_message: + history[-1]['content'] += character + time.sleep(0.05) + yield history + + msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, chatbot, chatbot + ) + clear.click(lambda: None, None, chatbot, queue=False) + +demo.launch() +``` + +## Custom Css + +```python +import gradio as gr + +with gr.Blocks() as demo: + with gr.Column(elem_classes="cool-col"): + gr.Markdown("### Gradio Demo with Custom CSS", elem_classes="darktest") + gr.Markdown( + elem_classes="markdown", + value="Resize the browser window to see the CSS media query in action.", + ) + +if __name__ == "__main__": + demo.launch(css_paths=["demo/custom_css/custom_css.css"]) +``` + +## Fake Diffusion + +```python +import gradio as gr +import numpy as np +import time + +def fake_diffusion(steps): + rng = np.random.default_rng() + for i in range(steps): + time.sleep(1) + image = rng.random(size=(600, 600, 3)) + yield image + image = np.ones((1000,1000,3), np.uint8) + image[:] = [255, 124, 0] + yield image + +demo = gr.Interface(fake_diffusion, + inputs=gr.Slider(1, 10, 3, step=1), + outputs="image", + api_name="predict") + +demo.launch() +``` + +## Hello World + +```python +import gradio as gr + + +def greet(name): + return "Hello " + name + "!" + + +demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox", api_name="predict") + +demo.launch() +``` + +## Image Editor + +```python +import gradio as gr +import time + + +def sleep(im): + time.sleep(5) + return [im["background"], im["layers"][0], im["layers"][1], im["composite"]] + + +def predict(im): + return im["composite"] + + +with gr.Blocks() as demo: + with gr.Row(): + im = gr.ImageEditor( + type="numpy", + ) + im_preview = gr.Image() + n_upload = gr.Number(0, label="Number of upload events", step=1) + n_change = gr.Number(0, label="Number of change events", step=1) + n_input = gr.Number(0, label="Number of input events", step=1) + + im.upload(lambda x: x + 1, outputs=n_upload, inputs=n_upload) + im.change(lambda x: x + 1, outputs=n_change, inputs=n_change) + im.input(lambda x: x + 1, outputs=n_input, inputs=n_input) + im.change(predict, outputs=im_preview, inputs=im, show_progress="hidden") + +demo.launch() +``` + +## On Listener Decorator + +```python +import gradio as gr + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Output Box") + greet_btn = gr.Button("Greet") + + @gr.on(triggers=[name.submit, greet_btn.click], inputs=name, outputs=output) + def greet(name): + return "Hello " + name + "!" + +demo.launch() +``` + +## Render Merge + +```python +import gradio as gr +import time + +with gr.Blocks() as demo: + text_count = gr.Slider(1, 5, value=1, step=1, label="Textbox Count") + + @gr.render(inputs=text_count) + def render_count(count): + boxes = [] + for i in range(count): + box = gr.Textbox(label=f"Box {i}") + boxes.append(box) + + def merge(*args): + time.sleep(0.2) # simulate a delay + return " ".join(args) + + merge_btn.click(merge, boxes, output) + + def clear(): + time.sleep(0.2) # simulate a delay + return [" "] * count + + clear_btn.click(clear, None, boxes) + + def countup(): + time.sleep(0.2) # simulate a delay + return list(range(count)) + + count_btn.click(countup, None, boxes, queue=False) + + with gr.Row(): + merge_btn = gr.Button("Merge") + clear_btn = gr.Button("Clear") + count_btn = gr.Button("Count") + + output = gr.Textbox() + +demo.launch() +``` + +## Reverse Audio 2 + +```python +import gradio as gr +import numpy as np + +def reverse_audio(audio): + sr, data = audio + return (sr, np.flipud(data)) + +demo = gr.Interface(fn=reverse_audio, + inputs="microphone", + outputs="audio", api_name="predict") + +demo.launch() +``` + +## Sepia Filter + +```python +import numpy as np +import gradio as gr + +def sepia(input_img): + sepia_filter = np.array([ + [0.393, 0.769, 0.189], + [0.349, 0.686, 0.168], + [0.272, 0.534, 0.131] + ]) + sepia_img = input_img.dot(sepia_filter.T) + sepia_img /= sepia_img.max() + return sepia_img + +demo = gr.Interface(sepia, gr.Image(), "image", api_name="predict") +demo.launch() +``` + +## Sort Records + +```python +import gradio as gr + +def sort_records(records): + return records.sort("Quantity") + +demo = gr.Interface( + sort_records, + gr.Dataframe( + headers=["Item", "Quantity"], + datatype=["str", "number"], + row_count=3, + column_count=2, + column_limits=(2, 2), + type="polars" + ), + "dataframe", + description="Sort by Quantity" +) + +demo.launch() +``` + +## Streaming Simple + +```python +import gradio as gr + +with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(): + input_img = gr.Image(label="Input", sources="webcam") + with gr.Column(): + output_img = gr.Image(label="Output") + input_img.stream(lambda s: s, input_img, output_img, time_limit=15, stream_every=0.1, concurrency_limit=30) + +if __name__ == "__main__": + + demo.launch() +``` + +## Tabbed Interface Lite + +```python +import gradio as gr + +hello_world = gr.Interface(lambda name: "Hello " + name, "text", "text", api_name="predict") +bye_world = gr.Interface(lambda name: "Bye " + name, "text", "text", api_name="predict") +chat = gr.ChatInterface(lambda *args: "Hello " + args[0], api_name="chat") + +demo = gr.TabbedInterface([hello_world, bye_world, chat], ["Hello World", "Bye World", "Chat"]) + +demo.launch() +``` + +## Tax Calculator + +```python +import gradio as gr + +def tax_calculator(income, marital_status, assets): + tax_brackets = [(10, 0), (25, 8), (60, 12), (120, 20), (250, 30)] + total_deductible = sum(cost for cost, deductible in zip(assets["Cost"], assets["Deductible"]) if deductible) + taxable_income = income - total_deductible + + total_tax = 0 + for bracket, rate in tax_brackets: + if taxable_income > bracket: + total_tax += (taxable_income - bracket) * rate / 100 + + if marital_status == "Married": + total_tax *= 0.75 + elif marital_status == "Divorced": + total_tax *= 0.8 + + return round(total_tax) + +demo = gr.Interface( + tax_calculator, + [ + "number", + gr.Radio(["Single", "Married", "Divorced"]), + gr.Dataframe( + headers=["Item", "Cost", "Deductible"], + datatype=["str", "number", "bool"], + label="Assets Purchased this Year", + ), + ], + gr.Number(label="Tax due"), + examples=[ + [10000, "Married", [["Suit", 5000, True], ["Laptop (for work)", 800, False], ["Car", 1800, True]]], + [80000, "Single", [["Suit", 800, True], ["Watch", 1800, True], ["Food", 800, True]]], + ], + live=True, + api_name="predict" +) + +demo.launch() +``` + +## Timer Simple + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + timer = gr.Timer(1) + timestamp = gr.Number(label="Time") + timer.tick(lambda: round(time.time()), outputs=timestamp, api_name="timestamp") + + number = gr.Number(lambda: random.randint(1, 10), every=timer, label="Random Number") + with gr.Row(): + gr.Button("Start").click(lambda: gr.Timer(active=True), None, timer) + gr.Button("Stop").click(lambda: gr.Timer(active=False), None, timer) + gr.Button("Go Fast").click(lambda: 0.2, None, timer) + +if __name__ == "__main__": + demo.launch() +``` + +## Variable Outputs + +```python +import gradio as gr + +max_textboxes = 10 + +def variable_outputs(k): + k = int(k) + return [gr.Textbox(visible=True)]*k + [gr.Textbox(visible=False)]*(max_textboxes-k) + +with gr.Blocks() as demo: + s = gr.Slider(1, max_textboxes, value=max_textboxes, step=1, label="How many textboxes to show:") + textboxes = [] + for i in range(max_textboxes): + t = gr.Textbox(f"Textbox {i}") + textboxes.append(t) + + s.change(variable_outputs, s, textboxes) + +if __name__ == "__main__": + demo.launch() +``` + +## Video Identity + +```python +import gradio as gr +from gradio.media import get_video + +def video_identity(video): + return video + +# get_video() returns file paths to sample media included with Gradio +demo = gr.Interface(video_identity, + gr.Video(), + "playable_video", + examples=[ + get_video("world.mp4") + ], + cache_examples=True, + api_name="predict",) + +demo.launch() +``` diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/SKILL.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/SKILL.md index 9da285b8..ed9f90f0 100644 --- a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/SKILL.md +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/SKILL.md @@ -1,9 +1,9 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-jobs" name: hugging-face-jobs -description: "Run any workload on fully managed Hugging Face infrastructure. No local setup required—jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub." -risk: safe -source: "https://github.com/huggingface/skills/tree/main/skills/hugging-face-jobs" -date_added: "2026-02-27" +description: Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence. +license: Complete terms in LICENSE.txt +risk: unknown --- # Running Workloads on Hugging Face Jobs @@ -66,12 +66,15 @@ Before starting any job, verify: **How to provide tokens:** ```python -{ - "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Recommended: automatic token -} +# hf_jobs MCP tool — $HF_TOKEN is auto-replaced with real token: +{"secrets": {"HF_TOKEN": "$HF_TOKEN"}} + +# HfApi().run_uv_job() — MUST pass actual token: +from huggingface_hub import get_token +secrets={"HF_TOKEN": get_token()} ``` -**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is automatically replaced with your logged-in token. Never hardcode tokens in scripts. +**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is ONLY auto-replaced by the `hf_jobs` MCP tool. When using `HfApi().run_uv_job()`, you MUST pass the real token via `get_token()`. Passing the literal string `"$HF_TOKEN"` results in a 9-character invalid token and 401 errors. ## Token Usage Guide @@ -539,9 +542,12 @@ requests.post("https://your-api.com/results", json=results) **In job submission:** ```python -{ - "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Enables authentication -} +# hf_jobs MCP tool: +{"secrets": {"HF_TOKEN": "$HF_TOKEN"}} # auto-replaced + +# HfApi().run_uv_job(): +from huggingface_hub import get_token +secrets={"HF_TOKEN": get_token()} # must pass real token ``` **In script:** @@ -560,7 +566,7 @@ api.upload_file(...) Before submitting: - [ ] Results persistence method chosen -- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` if using Hub +- [ ] Token in secrets if using Hub (MCP: `"$HF_TOKEN"`, Python API: `get_token()`) - [ ] Script handles missing token gracefully - [ ] Test persistence path works @@ -950,7 +956,7 @@ hf_jobs("uv", { ### Hub Push Failures **Fix:** -1. Add to job: `secrets={"HF_TOKEN": "$HF_TOKEN"}` +1. Add token to secrets: MCP uses `"$HF_TOKEN"` (auto-replaced), Python API uses `get_token()` (must pass real token) 2. Verify token in script: `assert "HF_TOKEN" in os.environ` 3. Check token permissions 4. Verify repo exists or can be created @@ -969,7 +975,7 @@ Add to PEP 723 header: **Fix:** 1. Check `hf_whoami()` works locally -2. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Verify token in secrets — MCP: `"$HF_TOKEN"`, Python API: `get_token()` (NOT `"$HF_TOKEN"`) 3. Re-login: `hf auth login` 4. Check token has required permissions @@ -1017,7 +1023,7 @@ Add to PEP 723 header: 2. **Jobs are asynchronous** - Don't wait/poll; let user check when ready 3. **Always set timeout** - Default 30 min may be insufficient; set appropriate timeout 4. **Always persist results** - Environment is ephemeral; without persistence, all work is lost -5. **Use tokens securely** - Always use `secrets={"HF_TOKEN": "$HF_TOKEN"}` for Hub operations +5. **Use tokens securely** - MCP: `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API: `secrets={"HF_TOKEN": get_token()}` — `"$HF_TOKEN"` only works with MCP tool 6. **Choose appropriate hardware** - Start small, scale up based on needs (see hardware guide) 7. **Use UV scripts** - Default to `hf_jobs("uv", {...})` with inline scripts for Python workloads 8. **Handle authentication** - Verify tokens are available before Hub operations @@ -1033,6 +1039,7 @@ Add to PEP 723 header: | List jobs | `hf_jobs("ps")` | `hf jobs ps` | `list_jobs()` | | View logs | `hf_jobs("logs", {...})` | `hf jobs logs ` | `fetch_job_logs(job_id)` | | Cancel job | `hf_jobs("cancel", {...})` | `hf jobs cancel ` | `cancel_job(job_id)` | -| Schedule UV | `hf_jobs("scheduled uv", {...})` | - | `create_scheduled_uv_job()` | -| Schedule Docker | `hf_jobs("scheduled run", {...})` | - | `create_scheduled_job()` | - +| Schedule UV | `hf_jobs("scheduled uv", {...})` | `hf jobs scheduled uv run SCHEDULE script.py` | `create_scheduled_uv_job()` | +| Schedule Docker | `hf_jobs("scheduled run", {...})` | `hf jobs scheduled run SCHEDULE image cmd` | `create_scheduled_job()` | +| List scheduled | `hf_jobs("scheduled ps")` | `hf jobs scheduled ps` | `list_scheduled_jobs()` | +| Delete scheduled | `hf_jobs("scheduled delete", {...})` | `hf jobs scheduled delete ` | `delete_scheduled_job()` | diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/index.html b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/index.html new file mode 100644 index 00000000..6db24014 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/index.html @@ -0,0 +1,216 @@ + + + + + + hf-jobs - Run Workloads on Hugging Face Jobs + + + +
+

Agent Skill : hf-jobs

+ +
+

Run any workload on Hugging Face Jobs.

+

Use this skill when you want to run GPU/CPU workloads (batch inference, synthetic data generation, dataset stats, experiments) on Hugging Face Jobs, with correct token handling and result persistence back to the Hub.

+
+ + + +
+

Overview

+

This skill focuses on running real workloads via Hugging Face Jobs. It includes ready-to-run UV scripts and guides for authentication (HF tokens), secrets vs env vars, timeouts, hardware selection, and pushing results to the Hub.

+
+ +
+

Core Documentation

+
    +
  • + SKILL.md +
    hf-jobs/SKILL.md
    +
    Complete skill documentation (how to submit jobs, tokens/secrets, timeouts, persistence, and how to use the bundled scripts)
    +
  • +
+
+ +
+

References

+
    +
  • + token_usage.md +
    hf-jobs/references/token_usage.md
    +
    Token best practices: secrets vs env, permissions, common errors (401/403), and secure patterns
    +
  • +
  • + hub_saving.md +
    hf-jobs/references/hub_saving.md
    +
    How to persist results: push datasets/models/files to the Hub (ephemeral job filesystem)
    +
  • +
  • + hardware_guide.md +
    hf-jobs/references/hardware_guide.md
    +
    Flavor selection guidance for CPU/GPU/TPU workloads
    +
  • +
  • + troubleshooting.md +
    hf-jobs/references/troubleshooting.md
    +
    Common failure modes (timeouts, missing deps, OOM, auth) and fixes
    +
  • +
+
+ +
+

Scripts

+
    +
  • + generate-responses.py +
    hf-jobs/scripts/generate-responses.py
    +
    vLLM batch generation: load prompts/messages from a dataset, generate responses, push dataset + card to Hub
    +
  • +
  • + cot-self-instruct.py +
    hf-jobs/scripts/cot-self-instruct.py
    +
    CoT Self-Instruct synthetic data generation (reasoning/instruction) + optional filtering, pushes dataset + card
    +
  • +
  • + finepdfs-stats.py +
    hf-jobs/scripts/finepdfs-stats.py
    +
    Polars streaming stats over Hub parquet (finepdfs-edu); optional upload of computed stats to a dataset repo
    +
  • +
+
+
+ + + + + + diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hardware_guide.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hardware_guide.md new file mode 100644 index 00000000..0a846338 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hardware_guide.md @@ -0,0 +1,336 @@ +# Hardware Selection Guide + +Choosing the right hardware (flavor) is critical for cost-effective workloads. + +> **Reference:** [HF Jobs Hardware Documentation](https://huggingface.co/docs/hub/en/spaces-config-reference) (updated 07/2025) + +## Available Hardware + +### CPU Flavors +| Flavor | Description | Use Case | +|--------|-------------|----------| +| `cpu-basic` | Basic CPU instance | Testing, lightweight scripts | +| `cpu-upgrade` | Enhanced CPU instance | Data processing, parallel workloads | + +**Use cases:** Data processing, testing scripts, lightweight workloads +**Not recommended for:** Model training, GPU-accelerated workloads + +### GPU Flavors + +| Flavor | GPU | VRAM | Use Case | +|--------|-----|------|----------| +| `t4-small` | NVIDIA T4 | 16GB | <1B models, demos, quick tests | +| `t4-medium` | NVIDIA T4 | 16GB | 1-3B models, development | +| `l4x1` | NVIDIA L4 | 24GB | 3-7B models, efficient workloads | +| `l4x4` | 4x NVIDIA L4 | 96GB | Multi-GPU, parallel workloads | +| `a10g-small` | NVIDIA A10G | 24GB | 3-7B models, production | +| `a10g-large` | NVIDIA A10G | 24GB | 7-13B models, batch inference | +| `a10g-largex2` | 2x NVIDIA A10G | 48GB | Multi-GPU, large models | +| `a10g-largex4` | 4x NVIDIA A10G | 96GB | Multi-GPU, very large models | +| `a100-large` | NVIDIA A100 | 40GB | 13B+ models, fastest GPU option | + +### TPU Flavors + +| Flavor | Configuration | Use Case | +|--------|---------------|----------| +| `v5e-1x1` | TPU v5e (1x1) | Small TPU workloads | +| `v5e-2x2` | TPU v5e (2x2) | Medium TPU workloads | +| `v5e-2x4` | TPU v5e (2x4) | Large TPU workloads | + +**TPU Use Cases:** +- JAX/Flax model training +- Large-scale inference +- TPU-optimized workloads + +## Selection Guidelines + +### By Workload Type + +**Data Processing** +- **Recommended:** `cpu-upgrade` or `l4x1` +- **Use case:** Transform, filter, analyze datasets +- **Batch size:** Depends on data size +- **Time:** Varies by dataset size + +**Batch Inference** +- **Recommended:** `a10g-large` or `a100-large` +- **Use case:** Run inference on thousands of samples +- **Batch size:** 8-32 depending on model +- **Time:** Depends on number of samples + +**Experiments & Benchmarks** +- **Recommended:** `a10g-small` or `a10g-large` +- **Use case:** Reproducible ML experiments +- **Batch size:** Varies +- **Time:** Depends on experiment complexity + +**Model Training** (see `model-trainer` skill for details) +- **Recommended:** See model-trainer skill +- **Use case:** Fine-tuning models +- **Batch size:** Depends on model size +- **Time:** Hours to days + +**Synthetic Data Generation** +- **Recommended:** `a10g-large` or `a100-large` +- **Use case:** Generate datasets using LLMs +- **Batch size:** Depends on generation method +- **Time:** Hours for large datasets + +### By Budget + +**Minimal Budget (<$5 total)** +- Use `cpu-basic` or `t4-small` +- Process small datasets +- Quick tests and demos + +**Small Budget ($5-20)** +- Use `t4-medium` or `a10g-small` +- Process medium datasets +- Run experiments + +**Medium Budget ($20-50)** +- Use `a10g-small` or `a10g-large` +- Process large datasets +- Production workloads + +**Large Budget ($50-200)** +- Use `a10g-large` or `a100-large` +- Large-scale processing +- Multiple experiments + +### By Model Size (for inference/processing) + +**Tiny Models (<1B parameters)** +- **Recommended:** `t4-small` +- **Example:** Qwen2.5-0.5B, TinyLlama +- **Batch size:** 8-16 + +**Small Models (1-3B parameters)** +- **Recommended:** `t4-medium` or `a10g-small` +- **Example:** Qwen2.5-1.5B, Phi-2 +- **Batch size:** 4-8 + +**Medium Models (3-7B parameters)** +- **Recommended:** `a10g-small` or `a10g-large` +- **Example:** Qwen2.5-7B, Mistral-7B +- **Batch size:** 2-4 + +**Large Models (7-13B parameters)** +- **Recommended:** `a10g-large` or `a100-large` +- **Example:** Llama-3-8B +- **Batch size:** 1-2 + +**Very Large Models (13B+ parameters)** +- **Recommended:** `a100-large` +- **Example:** Llama-3-13B, Llama-3-70B +- **Batch size:** 1 + +## Memory Considerations + +### Estimating Memory Requirements + +**For inference:** +``` +Memory (GB) ≈ (Model params in billions) × 2-4 +``` + +**For training:** +``` +Memory (GB) ≈ (Model params in billions) × 20 (full) or × 4 (LoRA) +``` + +**Examples:** +- Qwen2.5-0.5B inference: ~1-2GB ✅ fits t4-small +- Qwen2.5-7B inference: ~14-28GB ✅ fits a10g-large +- Qwen2.5-7B training: ~140GB ❌ not feasible without LoRA + +### Memory Optimization + +If hitting memory limits: + +1. **Reduce batch size** + ```python + batch_size = 1 + ``` + +2. **Process in chunks** + ```python + for chunk in chunks: + process(chunk) + ``` + +3. **Use smaller models** + - Use quantized models + - Use LoRA adapters + +4. **Upgrade hardware** + - cpu → t4 → a10g → a100 + +## Cost Estimation + +### Formula + +``` +Total Cost = (Hours of runtime) × (Cost per hour) +``` + +### Example Calculations + +**Data processing:** +- Hardware: cpu-upgrade ($0.50/hour) +- Time: 1 hour +- Cost: $0.50 + +**Batch inference:** +- Hardware: a10g-large ($5/hour) +- Time: 2 hours +- Cost: $10.00 + +**Experiments:** +- Hardware: a10g-small ($3.50/hour) +- Time: 4 hours +- Cost: $14.00 + +### Cost Optimization Tips + +1. **Start small:** Test on cpu-basic or t4-small +2. **Monitor runtime:** Set appropriate timeouts +3. **Optimize code:** Reduce unnecessary compute +4. **Choose right hardware:** Don't over-provision +5. **Use checkpoints:** Resume if job fails +6. **Monitor costs:** Check running jobs regularly + +## Multi-GPU Workloads + +Multi-GPU flavors automatically distribute workloads: + +**Multi-GPU flavors:** +- `l4x4` - 4x L4 GPUs (96GB total VRAM) +- `a10g-largex2` - 2x A10G GPUs (48GB total VRAM) +- `a10g-largex4` - 4x A10G GPUs (96GB total VRAM) + +**When to use:** +- Large models (>13B parameters) +- Need faster processing (linear speedup) +- Large datasets (>100K samples) +- Parallel workloads +- Tensor parallelism for inference + +**MCP Tool Example:** +```python +hf_jobs("uv", { + "script": "process.py", + "flavor": "a10g-largex2", # 2 GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**CLI Equivalent:** +```bash +hf jobs uv run process.py --flavor a10g-largex2 --timeout 4h +``` + +## Choosing Between Options + +### CPU vs GPU + +**Choose CPU when:** +- No GPU acceleration needed +- Data processing only +- Budget constrained +- Simple workloads + +**Choose GPU when:** +- Model inference/training +- GPU-accelerated libraries +- Need faster processing +- Large models + +### a10g vs a100 + +**Choose a10g when:** +- Model <13B parameters +- Budget conscious +- Processing time not critical + +**Choose a100 when:** +- Model 13B+ parameters +- Need fastest processing +- Memory requirements high +- Budget allows + +### Single vs Multi-GPU + +**Choose single GPU when:** +- Model <7B parameters +- Budget constrained +- Simpler debugging + +**Choose multi-GPU when:** +- Model >13B parameters +- Need faster processing +- Large batch sizes required +- Cost-effective for large jobs + +## Quick Reference + +### All Available Flavors + +```python +# Official flavor list (updated 07/2025) +FLAVORS = { + # CPU + "cpu-basic", # Testing, lightweight + "cpu-upgrade", # Data processing + + # GPU - Single + "t4-small", # 16GB - <1B models + "t4-medium", # 16GB - 1-3B models + "l4x1", # 24GB - 3-7B models + "a10g-small", # 24GB - 3-7B production + "a10g-large", # 24GB - 7-13B models + "a100-large", # 40GB - 13B+ models + + # GPU - Multi + "l4x4", # 4x L4 (96GB total) + "a10g-largex2", # 2x A10G (48GB total) + "a10g-largex4", # 4x A10G (96GB total) + + # TPU + "v5e-1x1", # TPU v5e 1x1 + "v5e-2x2", # TPU v5e 2x2 + "v5e-2x4", # TPU v5e 2x4 +} +``` + +### Workload → Hardware Mapping + +```python +HARDWARE_MAP = { + "data_processing": "cpu-upgrade", + "batch_inference_small": "t4-small", + "batch_inference_medium": "a10g-large", + "batch_inference_large": "a100-large", + "experiments": "a10g-small", + "tpu_workloads": "v5e-1x1", + "training": "see model-trainer skill" +} +``` + +### CLI Examples + +```bash +# CPU job +hf jobs run python:3.12 python script.py + +# GPU job +hf jobs run --flavor a10g-large pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel python script.py + +# TPU job +hf jobs run --flavor v5e-1x1 your-tpu-image python script.py + +# UV script with GPU +hf jobs uv run --flavor a10g-small my_script.py +``` + diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hub_saving.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hub_saving.md new file mode 100644 index 00000000..e2af0283 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/hub_saving.md @@ -0,0 +1,352 @@ +# Saving Results to Hugging Face Hub + +**⚠️ CRITICAL:** Job environments are ephemeral. ALL results are lost when a job completes unless persisted to the Hub or external storage. + +## Why Persistence is Required + +When running on Hugging Face Jobs: +- Environment is temporary +- All files deleted on job completion +- No local disk persistence +- Cannot access results after job ends + +**Without persistence, all work is permanently lost.** + +## Persistence Options + +### Option 1: Push to Hugging Face Hub (Recommended) + +**For models:** +```python +from transformers import AutoModel +model.push_to_hub("username/model-name", token=os.environ.get("HF_TOKEN")) +``` + +**For datasets:** +```python +from datasets import Dataset +dataset.push_to_hub("username/dataset-name", token=os.environ.get("HF_TOKEN")) +``` + +**For files/artifacts:** +```python +from huggingface_hub import HfApi +api = HfApi(token=os.environ.get("HF_TOKEN")) +api.upload_file( + path_or_fileobj="results.json", + path_in_repo="results.json", + repo_id="username/results", + repo_type="dataset" +) +``` + +### Option 2: External Storage + +**S3:** +```python +import boto3 +s3 = boto3.client('s3') +s3.upload_file('results.json', 'my-bucket', 'results.json') +``` + +**Google Cloud Storage:** +```python +from google.cloud import storage +client = storage.Client() +bucket = client.bucket('my-bucket') +blob = bucket.blob('results.json') +blob.upload_from_filename('results.json') +``` + +### Option 3: API Endpoint + +```python +import requests +requests.post("https://your-api.com/results", json=results) +``` + +## Required Configuration for Hub Push + +### Job Configuration + +**Always include HF_TOKEN:** +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required for Hub operations +}) +``` + +### Script Configuration + +**Verify token exists:** +```python +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN required for Hub operations!" +``` + +**Use token for Hub operations:** +```python +from huggingface_hub import HfApi + +# Auto-detects HF_TOKEN from environment +api = HfApi() + +# Or explicitly pass token +api = HfApi(token=os.environ.get("HF_TOKEN")) +``` + +## Complete Examples + +### Example 1: Push Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets", "huggingface-hub"] +# /// + +import os +from datasets import Dataset +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Process data +data = {"text": ["Sample 1", "Sample 2"]} +dataset = Dataset.from_dict(data) + +# Push to Hub +dataset.push_to_hub("username/my-dataset") +print("✅ Dataset pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +### Example 2: Push Model + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["transformers"] +# /// + +import os +from transformers import AutoModel, AutoTokenizer + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load and process model +model = AutoModel.from_pretrained("base-model") +tokenizer = AutoTokenizer.from_pretrained("base-model") +# ... process model ... + +# Push to Hub +model.push_to_hub("username/my-model") +tokenizer.push_to_hub("username/my-model") +print("✅ Model pushed!") +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +### Example 3: Push Artifacts + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["huggingface-hub", "pandas"] +# /// + +import os +import json +import pandas as pd +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Generate results +results = {"accuracy": 0.95, "loss": 0.05} +df = pd.DataFrame([results]) + +# Save files +with open("results.json", "w") as f: + json.dump(results, f) +df.to_csv("results.csv", index=False) + +# Push to Hub +api = HfApi() +api.upload_file("results.json", "results.json", "username/results", repo_type="dataset") +api.upload_file("results.csv", "results.csv", "username/results", repo_type="dataset") +print("✅ Results pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +## Authentication Methods + +### Method 1: Automatic Token (Recommended) + +```python +"secrets": {"HF_TOKEN": "$HF_TOKEN"} +``` + +Uses your logged-in Hugging Face token automatically. + +### Method 2: Explicit Token + +```python +"secrets": {"HF_TOKEN": "hf_abc123..."} +``` + +Provide token explicitly (not recommended for security). + +### Method 3: Environment Variable + +```python +"env": {"HF_TOKEN": "hf_abc123..."} +``` + +Pass as regular environment variable (less secure than secrets). + +**Always prefer Method 1** for security and convenience. + +## Verification Checklist + +Before submitting any job that saves to Hub, verify: + +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Hub push code included in script +- [ ] Repository name doesn't conflict with existing repos +- [ ] You have write access to the target namespace + +## Repository Setup + +### Automatic Creation + +If repository doesn't exist, it's created automatically when first pushing (if token has write permissions). + +### Manual Creation + +Create repository before pushing: + +```python +from huggingface_hub import HfApi + +api = HfApi() +api.create_repo( + repo_id="username/repo-name", + repo_type="model", # or "dataset" + private=False, # or True for private repo +) +``` + +### Repository Naming + +**Valid names:** +- `username/my-model` +- `username/model-name` +- `organization/model-name` + +**Invalid names:** +- `model-name` (missing username) +- `username/model name` (spaces not allowed) +- `username/MODEL` (uppercase discouraged) + +## Troubleshooting + +### Error: 401 Unauthorized + +**Cause:** HF_TOKEN not provided or invalid + +**Solutions:** +1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Check you're logged in: `hf_whoami()` +3. Re-login: `hf auth login` + +### Error: 403 Forbidden + +**Cause:** No write access to repository + +**Solutions:** +1. Check repository namespace matches your username +2. Verify you're a member of organization (if using org namespace) +3. Check token has write permissions + +### Error: Repository not found + +**Cause:** Repository doesn't exist and auto-creation failed + +**Solutions:** +1. Manually create repository first +2. Check repository name format +3. Verify namespace exists + +### Error: Push failed + +**Cause:** Network issues or Hub unavailable + +**Solutions:** +1. Check logs for specific error +2. Verify token is valid +3. Retry push operation + +## Best Practices + +1. **Always verify token exists** before Hub operations +2. **Use descriptive repo names** (e.g., `my-experiment-results` not `results`) +3. **Push incrementally** for large results (use checkpoints) +4. **Verify push success** in logs before job completes +5. **Use appropriate repo types** (model vs dataset) +6. **Add README** with result descriptions +7. **Tag repos** with relevant tags + +## Monitoring Push Progress + +Check logs for push progress: + +**MCP Tool:** +```python +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**CLI:** +```bash +hf jobs logs +``` + +**Python API:** +```python +from huggingface_hub import fetch_job_logs +for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +**Look for:** +``` +Pushing to username/repo-name... +Upload file results.json: 100% +✅ Push successful +``` + +## Key Takeaway + +**Without `secrets={"HF_TOKEN": "$HF_TOKEN"}` and persistence code, all results are permanently lost.** + +Always verify both are configured before submitting any job that produces results. + diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/token_usage.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/token_usage.md new file mode 100644 index 00000000..89d675d4 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/token_usage.md @@ -0,0 +1,570 @@ +# Token Usage Guide for Hugging Face Jobs + +**⚠️ CRITICAL:** Proper token usage is essential for any job that interacts with the Hugging Face Hub. + +## Overview + +Hugging Face tokens are authentication credentials that allow your jobs to interact with the Hub. They're required for: +- Pushing models/datasets to Hub +- Accessing private repositories +- Creating new repositories +- Using Hub APIs programmatically +- Any authenticated Hub operations + +## Token Types + +### Read Token +- **Permissions:** Download models/datasets, read private repos +- **Use case:** Jobs that only need to download/read content +- **Creation:** https://huggingface.co/settings/tokens + +### Write Token +- **Permissions:** Push models/datasets, create repos, modify content +- **Use case:** Jobs that need to upload results (most common) +- **Creation:** https://huggingface.co/settings/tokens +- **⚠️ Required for:** Pushing models, datasets, or any uploads + +### Organization Token +- **Permissions:** Act on behalf of an organization +- **Use case:** Jobs running under organization namespace +- **Creation:** Organization settings → Tokens + +## Providing Tokens to Jobs + +### Method 1: `hf_jobs` MCP tool with `$HF_TOKEN` (Recommended) ⭐ + +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Automatic replacement +}) +``` + +**How it works:** +1. `$HF_TOKEN` is a placeholder that gets replaced with your actual token +2. Uses the token from your logged-in session (`hf auth login`) +3. Token is encrypted server-side when passed as a secret +4. Most secure and convenient method + +**Benefits:** +- ✅ No token exposure in code +- ✅ Uses your current login session +- ✅ Automatically updated if you re-login +- ✅ Works seamlessly with MCP tools +- ✅ Token encrypted server-side + +**Requirements:** +- Must be logged in: `hf auth login` or `hf_whoami()` works +- Token must have required permissions + +**⚠️ CRITICAL:** `$HF_TOKEN` auto-replacement is an `hf_jobs` MCP tool feature ONLY. It does NOT work with `HfApi().run_uv_job()` — see Method 1b below. + +### Method 1b: `HfApi().run_uv_job()` with `get_token()` (Required for Python API) + +```python +from huggingface_hub import HfApi, get_token +api = HfApi() +api.run_uv_job( + script="your_script.py", + secrets={"HF_TOKEN": get_token()}, # ✅ Passes actual token value +) +``` + +**How it works:** +1. `get_token()` retrieves the token from your logged-in session +2. The actual token value is passed to the `secrets` parameter +3. Token is encrypted server-side + +**Why `"$HF_TOKEN"` fails with `HfApi().run_uv_job()`:** +- The Python API passes the literal string `"$HF_TOKEN"` (9 characters) as the token +- The Jobs server receives this invalid string instead of a real token +- Result: `401 Unauthorized` errors when the script tries to authenticate +- You MUST use `get_token()` from `huggingface_hub` to get the real token + +### Method 2: Explicit Token (Not Recommended) + +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Hardcoded token +}) +``` + +**When to use:** +- Only if automatic token doesn't work +- Testing with a specific token +- Organization tokens (use with caution) + +**Security concerns:** +- ❌ Token visible in code/logs +- ❌ Must manually update if token rotates +- ❌ Risk of token exposure +- ❌ Not recommended for production + +### Method 3: Environment Variable (Less Secure) + +```python +hf_jobs("uv", { + "script": "your_script.py", + "env": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Less secure than secrets +}) +``` + +**Difference from secrets:** +- `env` variables are visible in job logs +- `secrets` are encrypted server-side +- Always prefer `secrets` for tokens + +**When to use:** +- Only for non-sensitive configuration +- Never use for tokens (use `secrets` instead) + +## Using Tokens in Scripts + +### Accessing Tokens + +Tokens passed via `secrets` are available as environment variables in your script: + +```python +import os + +# Get token from environment +token = os.environ.get("HF_TOKEN") + +# Verify token exists +if not token: + raise ValueError("HF_TOKEN not found in environment!") +``` + +### Using with Hugging Face Hub + +**Option 1: Explicit token parameter** +```python +from huggingface_hub import HfApi + +api = HfApi(token=os.environ.get("HF_TOKEN")) +api.upload_file(...) +``` + +**Option 2: Auto-detection (Recommended)** +```python +from huggingface_hub import HfApi + +# Automatically uses HF_TOKEN env var +api = HfApi() # ✅ Simpler, uses token from environment +api.upload_file(...) +``` + +**Option 3: With transformers/datasets** +```python +from transformers import AutoModel +from datasets import load_dataset + +# Auto-detects HF_TOKEN from environment +model = AutoModel.from_pretrained("username/model") +dataset = load_dataset("username/dataset") + +# For push operations, token is auto-detected +model.push_to_hub("username/new-model") +dataset.push_to_hub("username/new-dataset") +``` + +### Complete Example + +```python +# /// script +# dependencies = ["huggingface-hub", "datasets"] +# /// + +import os +from huggingface_hub import HfApi +from datasets import Dataset + +# Verify token is available +assert "HF_TOKEN" in os.environ, "HF_TOKEN required for Hub operations!" + +# Use token for Hub operations +api = HfApi() # Auto-detects HF_TOKEN + +# Create and push dataset +data = {"text": ["Hello", "World"]} +dataset = Dataset.from_dict(data) + +# Push to Hub (token auto-detected) +dataset.push_to_hub("username/my-dataset") + +print("✅ Dataset pushed successfully!") +``` + +## Token Verification + +### Check Authentication Locally + +```python +from huggingface_hub import whoami + +try: + user_info = whoami() + print(f"✅ Logged in as: {user_info['name']}") +except Exception as e: + print(f"❌ Not authenticated: {e}") +``` + +### Verify Token in Job + +```python +import os + +# Check token exists +if "HF_TOKEN" not in os.environ: + raise ValueError("HF_TOKEN not found in environment!") + +token = os.environ["HF_TOKEN"] + +# Verify token format (should start with "hf_") +if not token.startswith("hf_"): + raise ValueError(f"Invalid token format: {token[:10]}...") + +# Test token works +from huggingface_hub import whoami +try: + user_info = whoami(token=token) + print(f"✅ Token valid for user: {user_info['name']}") +except Exception as e: + raise ValueError(f"Token validation failed: {e}") +``` + +## Common Token Issues + +### Error: 401 Unauthorized + +**Symptoms:** +``` +401 Client Error: Unauthorized for url: https://huggingface.co/api/... +``` + +**Causes:** +1. Token missing from job +2. Token invalid or expired +3. Token not passed correctly + +**Solutions:** +1. Add `secrets={"HF_TOKEN": "$HF_TOKEN"}` to job config +2. Verify `hf_whoami()` works locally +3. Re-login: `hf auth login` +4. Check token hasn't expired + +**Verification:** +```python +# In your script +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!" +``` + +### Error: 403 Forbidden + +**Symptoms:** +``` +403 Client Error: Forbidden for url: https://huggingface.co/api/... +``` + +**Causes:** +1. Token lacks required permissions (read-only token used for write) +2. No access to private repository +3. Organization permissions insufficient + +**Solutions:** +1. Ensure token has write permissions +2. Check token type at https://huggingface.co/settings/tokens +3. Verify access to target repository +4. Use organization token if needed + +**Check token permissions:** +```python +from huggingface_hub import whoami + +user_info = whoami() +print(f"User: {user_info['name']}") +print(f"Type: {user_info.get('type', 'user')}") +``` + +### Error: Token not found in environment + +**Symptoms:** +``` +KeyError: 'HF_TOKEN' +ValueError: HF_TOKEN not found +``` + +**Causes:** +1. `secrets` not passed in job config +2. Wrong key name (should be `HF_TOKEN`) +3. Using `env` instead of `secrets` + +**Solutions:** +1. Use `secrets={"HF_TOKEN": "$HF_TOKEN"}` (not `env`) +2. Verify key name is exactly `HF_TOKEN` +3. Check job config syntax + +**Correct configuration:** +```python +# ✅ Correct +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) + +# ❌ Wrong - using env instead of secrets +hf_jobs("uv", { + "script": "...", + "env": {"HF_TOKEN": "$HF_TOKEN"} # Less secure +}) + +# ❌ Wrong - wrong key name +hf_jobs("uv", { + "script": "...", + "secrets": {"TOKEN": "$HF_TOKEN"} # Wrong key +}) +``` + +### Error: Repository access denied + +**Symptoms:** +``` +403 Client Error: Forbidden +Repository not found or access denied +``` + +**Causes:** +1. Token doesn't have access to private repo +2. Repository doesn't exist and can't be created +3. Wrong namespace + +**Solutions:** +1. Use token from account with access +2. Verify repo visibility (public vs private) +3. Check namespace matches token owner +4. Create repo first if needed + +**Check repository access:** +```python +from huggingface_hub import HfApi + +api = HfApi() +try: + repo_info = api.repo_info("username/repo-name") + print(f"✅ Access granted: {repo_info.id}") +except Exception as e: + print(f"❌ Access denied: {e}") +``` + +## Token Security Best Practices + +### 1. Never Commit Tokens + +**❌ Bad:** +```python +# Never do this! +token = "hf_abc123xyz..." +api = HfApi(token=token) +``` + +**✅ Good:** +```python +# Use environment variable +token = os.environ.get("HF_TOKEN") +api = HfApi(token=token) +``` + +### 2. Use Secrets, Not Environment Variables + +**❌ Bad:** +```python +hf_jobs("uv", { + "script": "...", + "env": {"HF_TOKEN": "$HF_TOKEN"} # Visible in logs +}) +``` + +**✅ Good:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Encrypted server-side +}) +``` + +### 3. Use Automatic Token Replacement + +**❌ Bad:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "hf_abc123..."} # Hardcoded +}) +``` + +**✅ Good:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Automatic +}) +``` + +### 4. Rotate Tokens Regularly + +- Generate new tokens periodically +- Revoke old tokens +- Update job configurations +- Monitor token usage + +### 5. Use Minimal Permissions + +- Create tokens with only needed permissions +- Use read tokens when write isn't needed +- Don't use admin tokens for regular jobs + +### 6. Don't Share Tokens + +- Each user should use their own token +- Don't commit tokens to repositories +- Don't share tokens in logs or messages + +### 7. Monitor Token Usage + +- Check token activity in Hub settings +- Review job logs for token issues +- Set up alerts for unauthorized access + +## Token Workflow Examples + +### Example 1: Push Model to Hub + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["transformers"] +# /// + +import os +from transformers import AutoModel, AutoTokenizer + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load and process model +model = AutoModel.from_pretrained("base-model") +# ... process model ... + +# Push to Hub (token auto-detected) +model.push_to_hub("username/my-model") +print("✅ Model pushed!") +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +### Example 2: Access Private Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets"] +# /// + +import os +from datasets import load_dataset + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load private dataset (token auto-detected) +dataset = load_dataset("private-org/private-dataset") +print(f"✅ Loaded {len(dataset)} examples") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +### Example 3: Create and Push Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets", "huggingface-hub"] +# /// + +import os +from datasets import Dataset +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Create dataset +data = {"text": ["Sample 1", "Sample 2"]} +dataset = Dataset.from_dict(data) + +# Push to Hub +api = HfApi() # Auto-detects HF_TOKEN +dataset.push_to_hub("username/my-dataset") +print("✅ Dataset pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +## Quick Reference + +### Token Checklist + +Before submitting a job that uses Hub: + +- [ ] Job includes `secrets={"HF_TOKEN": "$HF_TOKEN"}` +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Token has required permissions (read/write) +- [ ] User is logged in: `hf_whoami()` works +- [ ] Token not hardcoded in script +- [ ] Using `secrets` not `env` for token + +### Common Patterns + +**Pattern 1: Auto-detect token** +```python +from huggingface_hub import HfApi +api = HfApi() # Uses HF_TOKEN from environment +``` + +**Pattern 2: Explicit token** +```python +import os +from huggingface_hub import HfApi +api = HfApi(token=os.environ.get("HF_TOKEN")) +``` + +**Pattern 3: Verify token** +```python +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" +``` + +## Key Takeaways + +1. **Always use `secrets={"HF_TOKEN": "$HF_TOKEN"}`** for Hub operations +2. **Never hardcode tokens** in scripts or job configs +3. **Verify token exists** in script before Hub operations +4. **Use auto-detection** when possible (`HfApi()` without token parameter) +5. **Check permissions** - ensure token has required access +6. **Monitor token usage** - review activity regularly +7. **Rotate tokens** - generate new tokens periodically + diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/troubleshooting.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/troubleshooting.md new file mode 100644 index 00000000..338b6894 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/references/troubleshooting.md @@ -0,0 +1,475 @@ +# Troubleshooting Guide + +Common issues and solutions for Hugging Face Jobs. + +## Authentication Issues + +### Error: 401 Unauthorized + +**Symptoms:** +``` +401 Client Error: Unauthorized for url: https://huggingface.co/api/... +``` + +**Causes:** +- Token missing from job +- Token invalid or expired +- Token not passed correctly + +**Solutions:** +1. Add token to secrets: `hf_jobs` MCP uses `"$HF_TOKEN"` (auto-replaced); `HfApi().run_uv_job()` MUST use `get_token()` from `huggingface_hub` (the literal string `"$HF_TOKEN"` will NOT work with the Python API) +2. Verify `hf_whoami()` works locally +3. Re-login: `hf auth login` +4. Check token hasn't expired + +**Verification:** +```python +# In your script +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!" +``` + +### Error: 403 Forbidden + +**Symptoms:** +``` +403 Client Error: Forbidden for url: https://huggingface.co/api/... +``` + +**Causes:** +- Token lacks required permissions +- No access to private repository +- Organization permissions insufficient + +**Solutions:** +1. Ensure token has write permissions +2. Check token type at https://huggingface.co/settings/tokens +3. Verify access to target repository +4. Use organization token if needed + +### Error: Token not found in environment + +**Symptoms:** +``` +KeyError: 'HF_TOKEN' +ValueError: HF_TOKEN not found +``` + +**Causes:** +- `secrets` not passed in job config +- Wrong key name (should be `HF_TOKEN`) +- Using `env` instead of `secrets` + +**Solutions:** +1. Use `secrets` (not `env`) — with `hf_jobs` MCP: `"$HF_TOKEN"`; with `HfApi().run_uv_job()`: `get_token()` +2. Verify key name is exactly `HF_TOKEN` +3. Check job config syntax + +## Job Execution Issues + +### Error: Job Timeout + +**Symptoms:** +- Job stops unexpectedly +- Status shows "TIMEOUT" +- Partial results only + +**Causes:** +- Default 30min timeout exceeded +- Job takes longer than expected +- No timeout specified + +**Solutions:** +1. Check logs for actual runtime +2. Increase timeout with buffer: `"timeout": "3h"` +3. Optimize code for faster execution +4. Process data in chunks +5. Add 20-30% buffer to estimated time + +**MCP Tool Example:** +```python +hf_jobs("uv", { + "script": "...", + "timeout": "2h" # Set appropriate timeout +}) +``` + +**Python API Example:** +```python +from huggingface_hub import run_uv_job, inspect_job, fetch_job_logs + +job = run_uv_job("script.py", timeout="4h") + +# Check if job failed +job_info = inspect_job(job_id=job.id) +if job_info.status.stage == "ERROR": + print(f"Job failed: {job_info.status.message}") + # Check logs for details + for log in fetch_job_logs(job_id=job.id): + print(log) +``` + +### Error: Out of Memory (OOM) + +**Symptoms:** +``` +RuntimeError: CUDA out of memory +MemoryError: Unable to allocate array +``` + +**Causes:** +- Batch size too large +- Model too large for hardware +- Insufficient GPU memory + +**Solutions:** +1. Reduce batch size +2. Process data in smaller chunks +3. Upgrade hardware: cpu → t4 → a10g → a100 +4. Use smaller models or quantization +5. Enable gradient checkpointing (for training) + +**Example:** +```python +# Reduce batch size +batch_size = 1 + +# Process in chunks +for chunk in chunks: + process(chunk) +``` + +### Error: Missing Dependencies + +**Symptoms:** +``` +ModuleNotFoundError: No module named 'package_name' +ImportError: cannot import name 'X' +``` + +**Causes:** +- Package not in dependencies +- Wrong package name +- Version mismatch + +**Solutions:** +1. Add to PEP 723 header: + ```python + # /// script + # dependencies = ["package-name>=1.0.0"] + # /// + ``` +2. Check package name spelling +3. Specify version if needed +4. Check package availability + +### Error: Script Not Found + +**Symptoms:** +``` +FileNotFoundError: script.py not found +``` + +**Causes:** +- Local file path used (not supported) +- URL incorrect +- Script not accessible + +**Solutions:** +1. Use inline script (recommended) +2. Use publicly accessible URL +3. Upload script to Hub first +4. Check URL is correct + +**Correct approaches:** +```python +# ✅ Inline code +hf_jobs("uv", {"script": "# /// script\n# dependencies = [...]\n# ///\n\n"}) + +# ✅ From URL +hf_jobs("uv", {"script": "https://huggingface.co/user/repo/resolve/main/script.py"}) +``` + +## Hub Push Issues + +### Error: Push Failed + +**Symptoms:** +``` +Error pushing to Hub +Upload failed +``` + +**Causes:** +- Network issues +- Token missing or invalid +- Repository access denied +- File too large + +**Solutions:** +1. Check token: `assert "HF_TOKEN" in os.environ` +2. Verify repository exists or can be created +3. Check network connectivity in logs +4. Retry push operation +5. Split large files into chunks + +### Error: Repository Not Found + +**Symptoms:** +``` +404 Client Error: Not Found +Repository not found +``` + +**Causes:** +- Repository doesn't exist +- Wrong repository name +- No access to private repo + +**Solutions:** +1. Create repository first: + ```python + from huggingface_hub import HfApi + api = HfApi() + api.create_repo("username/repo-name", repo_type="dataset") + ``` +2. Check repository name format +3. Verify namespace exists +4. Check repository visibility + +### Error: Results Not Saved + +**Symptoms:** +- Job completes successfully +- No results visible on Hub +- Files not persisted + +**Causes:** +- No persistence code in script +- Push code not executed +- Push failed silently + +**Solutions:** +1. Add persistence code to script +2. Verify push executes successfully +3. Check logs for push errors +4. Add error handling around push + +**Example:** +```python +try: + dataset.push_to_hub("username/dataset") + print("✅ Push successful") +except Exception as e: + print(f"❌ Push failed: {e}") + raise +``` + +## Hardware Issues + +### Error: GPU Not Available + +**Symptoms:** +``` +CUDA not available +No GPU found +``` + +**Causes:** +- CPU flavor used instead of GPU +- GPU not requested +- CUDA not installed in image + +**Solutions:** +1. Use GPU flavor: `"flavor": "a10g-large"` +2. Check image has CUDA support +3. Verify GPU availability in logs + +### Error: Slow Performance + +**Symptoms:** +- Job takes longer than expected +- Low GPU utilization +- CPU bottleneck + +**Causes:** +- Wrong hardware selected +- Inefficient code +- Data loading bottleneck + +**Solutions:** +1. Upgrade hardware +2. Optimize code +3. Use batch processing +4. Profile code to find bottlenecks + +## General Issues + +### Error: Job Status Unknown + +**Symptoms:** +- Can't check job status +- Status API returns error + +**Solutions:** +1. Use job URL: `https://huggingface.co/jobs/username/job-id` +2. Check logs: `hf_jobs("logs", {"job_id": "..."})` +3. Inspect job: `hf_jobs("inspect", {"job_id": "..."})` + +### Error: Logs Not Available + +**Symptoms:** +- No logs visible +- Logs delayed + +**Causes:** +- Job just started (logs delayed 30-60s) +- Job failed before logging +- Logs not yet generated + +**Solutions:** +1. Wait 30-60 seconds after job start +2. Check job status first +3. Use job URL for web interface + +### Error: Cost Unexpectedly High + +**Symptoms:** +- Job costs more than expected +- Longer runtime than estimated + +**Causes:** +- Job ran longer than timeout +- Wrong hardware selected +- Inefficient code + +**Solutions:** +1. Monitor job runtime +2. Set appropriate timeout +3. Optimize code +4. Choose right hardware +5. Check cost estimates before running + +## Debugging Tips + +### 1. Add Logging + +```python +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +logger.info("Starting processing...") +logger.info(f"Processed {count} items") +``` + +### 2. Verify Environment + +```python +import os +print(f"Python version: {os.sys.version}") +print(f"CUDA available: {torch.cuda.is_available()}") +print(f"HF_TOKEN present: {'HF_TOKEN' in os.environ}") +``` + +### 3. Test Locally First + +Run script locally before submitting to catch errors early: +```bash +python script.py +# Or with uv +uv run script.py +``` + +### 4. Check Job Logs + +**MCP Tool:** +```python +# View logs +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**CLI:** +```bash +hf jobs logs +``` + +**Python API:** +```python +from huggingface_hub import fetch_job_logs +for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +**Or use job URL:** `https://huggingface.co/jobs/username/job-id` + +### 5. Add Error Handling + +```python +try: + # Your code + process_data() +except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + raise +``` + +### 6. Check Job Status Programmatically + +```python +from huggingface_hub import inspect_job, fetch_job_logs + +job_info = inspect_job(job_id="your-job-id") +print(f"Status: {job_info.status.stage}") +print(f"Message: {job_info.status.message}") + +if job_info.status.stage == "ERROR": + print("Job failed! Logs:") + for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +## Quick Reference + +### Common Error Codes + +| Code | Meaning | Solution | +|------|---------|----------| +| 401 | Unauthorized | Add token to secrets: MCP uses `"$HF_TOKEN"`, Python API uses `get_token()` | +| 403 | Forbidden | Check token permissions | +| 404 | Not Found | Verify repository exists | +| 500 | Server Error | Retry or contact support | + +### Checklist Before Submitting + +- [ ] Token configured: MCP uses `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API uses `secrets={"HF_TOKEN": get_token()}` +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Timeout set appropriately +- [ ] Hardware selected correctly +- [ ] Dependencies listed in PEP 723 header +- [ ] Persistence code included +- [ ] Error handling added +- [ ] Logging added for debugging + +## Getting Help + +If issues persist: + +1. **Check logs** - Most errors include detailed messages +2. **Review documentation** - See main SKILL.md +3. **Check Hub status** - https://status.huggingface.co +4. **Community forums** - https://discuss.huggingface.co +5. **GitHub issues** - For bugs in huggingface_hub + +## Key Takeaways + +1. **Always include token** - MCP: `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API: `secrets={"HF_TOKEN": get_token()}` +2. **Set appropriate timeout** - Default 30min may be insufficient +3. **Verify persistence** - Results won't persist without code +4. **Check logs** - Most issues visible in job logs +5. **Test locally** - Catch errors before submitting +6. **Add error handling** - Better debugging information +7. **Monitor costs** - Set timeouts to avoid unexpected charges + diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/cot-self-instruct.py b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/cot-self-instruct.py new file mode 100644 index 00000000..5388438b --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/cot-self-instruct.py @@ -0,0 +1,718 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "datasets", +# "transformers", +# "vllm>=0.6.5", +# "huggingface-hub[hf_transfer]", +# "torch", +# "numpy", +# "tqdm", +# "scikit-learn", +# ] +# /// +""" +Generate high-quality synthetic data using Chain-of-Thought Self-Instruct methodology. + +This script implements the CoT-Self-Instruct approach from the paper "CoT-Self-Instruct: +Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025). + +It supports two modes: +1. Reasoning tasks: Generates both questions and answers with Chain-of-Thought +2. Instruction tasks: Generates diverse prompts for general instruction following + +Example usage: + # Reasoning tasks with Answer-Consistency filtering + uv run cot-self-instruct.py \\ + --seed-dataset davanstrien/s1k-reasoning \\ + --output-dataset username/synthetic-math \\ + --task-type reasoning \\ + --num-samples 5000 \\ + --filter-method answer-consistency + + # Instruction tasks with RIP filtering + uv run cot-self-instruct.py \\ + --seed-dataset wildchat-filtered \\ + --output-dataset username/synthetic-prompts \\ + --task-type instruction \\ + --filter-method rip \\ + --reward-model Nexusflow/Athene-RM-8B + + # HF Jobs execution + hf jobs uv run --flavor l4x4 \\ + --image vllm/vllm-openai \\ + -e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\ + https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + [args...] +""" + +import argparse +import json +import logging +import os +import random +import re +import sys +from collections import Counter +from datetime import datetime +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import torch +from datasets import Dataset, load_dataset +from huggingface_hub import DatasetCard, login +from sklearn.cluster import KMeans +from tqdm.auto import tqdm +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +# Enable HF Transfer for faster downloads +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Prompt templates from the paper +REASONING_PROMPT_TEMPLATE = """You are a reasoning question generator assistant. Your goal is to create a novel, and challenging reasoning question. You are provided the following seed questions: +Seed Question 1: {seed1} +Seed Question 2: {seed2} +Your task is to: +1. Write a brand-new, self-contained reasoning question that meets the following requirements: +(a) The question draws inspiration from the seed question without copying it verbatim, remaining novel and of comparable difficulty. +(b) The question's final answer should be a single, unambiguous scalar value (e.g., an integer, reduced fraction, exact radical), or another answer type that can be verified in one step (e.g., 'yes/no,' a choice from A to D). +2. Then reason step by step, solve the new question and format your output as follows: +[New Question Begin]{{your_generated_question}}[New Question End] +[Final Answer to New Question Begin]\\boxed{{your_final_answer}}[Final Answer to New Question End]""" + +INSTRUCTION_PROMPT_TEMPLATE = """You are a prompt generator assistant. Your goal is to create diverse and creative synthetic prompts. +Please follow the steps below to create synthetic prompts. +Step 1: Carefully read #Prompt 1# and #Prompt 2#. Identify and list all the common elements between these two prompts. If no common elements are found, list the main elements from each prompt. +Step 2: Develop a comprehensive plan based on the #Common Elements List# or #Main Elements List# from Step 1. This plan will guide the generation of new synthetic prompts that are similar to the original prompts. +Step 3: Execute the plan step by step and provide one #Synthetic Prompt#. +Please reply strictly in the following format: +- Step 1 #Common Elements List# or #Main Elements List#: +- Step 2 #Plan#: +- Step 3 #Synthetic Prompt#: +#Prompt 1#: +{prompt1} +#Prompt 2#: +{prompt2}""" + + +def check_gpu_availability() -> int: + """Check if CUDA is available and return the number of GPUs.""" + if not torch.cuda.is_available(): + logger.error("CUDA is not available. This script requires a GPU.") + logger.error( + "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor." + ) + sys.exit(1) + + num_gpus = torch.cuda.device_count() + for i in range(num_gpus): + gpu_name = torch.cuda.get_device_name(i) + gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1024**3 + logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory") + + return num_gpus + + +def parse_thinking_output(text: str) -> str: + """Remove thinking tokens from model output.""" + # Remove ... blocks + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + return text.strip() + + +def extract_reasoning_output(text: str) -> Tuple[Optional[str], Optional[str]]: + """Extract question and answer from reasoning task output.""" + text = parse_thinking_output(text) + + # Extract question + question_match = re.search(r'\[New Question Begin\](.*?)\[New Question End\]', text, re.DOTALL) + if not question_match: + return None, None + question = question_match.group(1).strip() + + # Extract answer + answer_match = re.search(r'\[Final Answer to New Question Begin\]\\?boxed\{(.*?)\}\[Final Answer to New Question End\]', text, re.DOTALL) + if not answer_match: + # Try without \boxed + answer_match = re.search(r'\[Final Answer to New Question Begin\](.*?)\[Final Answer to New Question End\]', text, re.DOTALL) + + if not answer_match: + return question, None + + answer = answer_match.group(1).strip() + return question, answer + + +def extract_instruction_output(text: str) -> Optional[str]: + """Extract synthetic prompt from instruction task output.""" + text = parse_thinking_output(text) + + # Look for the synthetic prompt after "Step 3 #Synthetic Prompt#:" + match = re.search(r'Step 3 #Synthetic Prompt#:\s*(.+)', text, re.DOTALL) + if match: + return match.group(1).strip() + return None + + +def categorize_prompts(prompts: List[str], num_categories: int = 8) -> Dict[int, List[int]]: + """Categorize prompts using clustering for instruction tasks.""" + from transformers import AutoModel + + logger.info(f"Categorizing {len(prompts)} prompts into {num_categories} categories...") + + # Use a small model for embeddings + tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + + # Get embeddings + embeddings = [] + for prompt in tqdm(prompts, desc="Computing embeddings"): + inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) + with torch.no_grad(): + outputs = model(**inputs) + embedding = outputs.last_hidden_state.mean(dim=1).numpy() + embeddings.append(embedding[0]) + + # Cluster + kmeans = KMeans(n_clusters=num_categories, random_state=42) + labels = kmeans.fit_predict(embeddings) + + # Group by category + categories = {} + for idx, label in enumerate(labels): + if label not in categories: + categories[label] = [] + categories[label].append(idx) + + return categories + + +def generate_synthetic_data( + llm: LLM, + seed_data: List[Dict], + task_type: str, + num_samples: int, + categories: Optional[Dict[int, List[int]]] = None, +) -> List[Dict]: + """Generate synthetic data using CoT-Self-Instruct.""" + synthetic_data = [] + + # Set up progress bar + pbar = tqdm(total=num_samples, desc="Generating synthetic data") + + while len(synthetic_data) < num_samples: + # Sample seed data + if task_type == "reasoning": + # Random sampling for reasoning tasks + seeds = random.sample(seed_data, min(2, len(seed_data))) + prompt = REASONING_PROMPT_TEMPLATE.format( + seed1=seeds[0].get("question", seeds[0].get("prompt", "")), + seed2=seeds[1].get("question", seeds[1].get("prompt", "")) if len(seeds) > 1 else seeds[0].get("question", seeds[0].get("prompt", "")) + ) + else: + # Category-aware sampling for instruction tasks + if categories: + # Pick a random category + category = random.choice(list(categories.keys())) + category_indices = categories[category] + indices = random.sample(category_indices, min(2, len(category_indices))) + seeds = [seed_data[i] for i in indices] + else: + seeds = random.sample(seed_data, min(2, len(seed_data))) + + prompt = INSTRUCTION_PROMPT_TEMPLATE.format( + prompt1=seeds[0].get("prompt", seeds[0].get("question", "")), + prompt2=seeds[1].get("prompt", seeds[1].get("question", "")) if len(seeds) > 1 else seeds[0].get("prompt", seeds[0].get("question", "")) + ) + + # Generate + sampling_params = SamplingParams( + temperature=0.7 if task_type == "reasoning" else 0.8, + top_p=0.95 if task_type == "reasoning" else 0.9, + max_tokens=2048, + ) + + outputs = llm.generate([prompt], sampling_params) + output_text = outputs[0].outputs[0].text + + # Parse output + if task_type == "reasoning": + question, answer = extract_reasoning_output(output_text) + if question and answer: + synthetic_data.append({ + "question": question, + "answer": answer, + "seed_indices": [seed_data.index(s) for s in seeds], + }) + pbar.update(1) + else: + synthetic_prompt = extract_instruction_output(output_text) + if synthetic_prompt: + synthetic_data.append({ + "prompt": synthetic_prompt, + "seed_indices": [seed_data.index(s) for s in seeds], + }) + pbar.update(1) + + pbar.close() + return synthetic_data + + +def answer_consistency_filter( + llm: LLM, + synthetic_data: List[Dict], + k_responses: int = 16, + threshold: float = 0.5, +) -> List[Dict]: + """Filter reasoning tasks using Answer-Consistency.""" + logger.info(f"Applying Answer-Consistency filter with K={k_responses}") + + filtered_data = [] + + for item in tqdm(synthetic_data, desc="Answer-Consistency filtering"): + question = item["question"] + original_answer = item["answer"] + + # Generate K responses + prompts = [question] * k_responses + sampling_params = SamplingParams( + temperature=0.6, + top_p=0.95, + max_tokens=1024, + ) + + outputs = llm.generate(prompts, sampling_params) + + # Extract answers + answers = [] + for output in outputs: + text = output.outputs[0].text + # Try to extract boxed answer + match = re.search(r'\\boxed\{(.*?)\}', text) + if match: + answers.append(match.group(1).strip()) + + if not answers: + continue + + # Get majority answer + answer_counts = Counter(answers) + if answer_counts: + majority_answer, count = answer_counts.most_common(1)[0] + + # Check if majority answer matches original and meets threshold + if (majority_answer == original_answer and + count / len(answers) >= threshold): + item["consistency_score"] = count / len(answers) + filtered_data.append(item) + + logger.info(f"Answer-Consistency: kept {len(filtered_data)}/{len(synthetic_data)} examples") + return filtered_data + + +def rip_filter( + llm: LLM, + synthetic_data: List[Dict], + reward_model_id: str, + k_responses: int = 32, + threshold: float = 0.5, +) -> List[Dict]: + """Filter using Rejecting Instruction Preferences (RIP).""" + logger.info(f"Applying RIP filter with K={k_responses} and reward model {reward_model_id}") + + # Note: In a full implementation, you would load and use the actual reward model + # For this example, we'll use a placeholder scoring mechanism + logger.warning("RIP filtering requires a reward model implementation - using placeholder") + + filtered_data = [] + + for item in tqdm(synthetic_data, desc="RIP filtering"): + prompt = item.get("prompt", item.get("question", "")) + + # Generate K responses + prompts = [prompt] * k_responses + sampling_params = SamplingParams( + temperature=1.0, + top_p=1.0, + max_tokens=1024, + ) + + outputs = llm.generate(prompts, sampling_params) + + # In real implementation: score each response with reward model + # For now, use length as a proxy (longer responses often score higher) + scores = [len(output.outputs[0].text) for output in outputs] + + # Use minimum score as quality indicator + min_score = min(scores) if scores else 0 + normalized_score = min_score / 1000 # Normalize to 0-1 range + + if normalized_score >= threshold: + item["rip_score"] = normalized_score + filtered_data.append(item) + + logger.info(f"RIP filter: kept {len(filtered_data)}/{len(synthetic_data)} examples") + return filtered_data + + +def create_dataset_card( + task_type: str, + source_dataset: str, + generation_model: str, + filter_method: str, + num_generated: int, + num_filtered: int, + generation_time: str, + additional_info: Dict = None, +) -> str: + """Create a comprehensive dataset card.""" + filter_info = "" + if filter_method == "answer-consistency": + filter_info = """ +### Answer-Consistency Filtering + +This dataset was filtered using Answer-Consistency: +- Generated K responses for each synthetic question +- Kept only examples where majority answer matched the generated answer +- Ensures high-quality, correctly solved problems""" + elif filter_method == "rip": + filter_info = """ +### RIP (Rejecting Instruction Preferences) Filtering + +This dataset was filtered using RIP: +- Generated K responses for each synthetic prompt +- Scored responses using a reward model +- Kept only prompts with high minimum scores""" + + return f"""--- +tags: +- synthetic-data +- cot-self-instruct +- {task_type} +- uv-script +--- + +# CoT-Self-Instruct Synthetic Data + +This dataset contains synthetic {task_type} data generated using the Chain-of-Thought Self-Instruct methodology. + +## Generation Details + +- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset}) +- **Generation Model**: [{generation_model}](https://huggingface.co/{generation_model}) +- **Task Type**: {task_type} +- **Filter Method**: {filter_method} +- **Generated Examples**: {num_generated:,} +- **After Filtering**: {num_filtered:,} ({(num_filtered/num_generated)*100:.1f}% acceptance rate) +- **Generation Date**: {generation_time} +{filter_info} + +## Methodology + +Generated using CoT-Self-Instruct, which: +1. Uses Chain-of-Thought reasoning to analyze seed examples +2. Generates new synthetic examples of similar quality and complexity +3. Applies quality filtering to ensure high-quality outputs + +Based on the paper: "CoT-Self-Instruct: Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025) + +## Generation Script + +Generated using the CoT-Self-Instruct script from [uv-scripts/synthetic-data](https://huggingface.co/datasets/uv-scripts/synthetic-data). + +To reproduce: +```bash +uv run https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + --seed-dataset {source_dataset} \\ + --output-dataset \\ + --task-type {task_type} \\ + --generation-model {generation_model} \\ + --filter-method {filter_method} +``` +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Generate synthetic data using CoT-Self-Instruct", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + # Dataset arguments + parser.add_argument( + "--seed-dataset", + type=str, + required=True, + help="HuggingFace dataset ID containing seed examples", + ) + parser.add_argument( + "--output-dataset", + type=str, + required=True, + help="HuggingFace dataset ID for output", + ) + + # Task configuration + parser.add_argument( + "--task-type", + type=str, + choices=["reasoning", "instruction", "auto"], + default="auto", + help="Type of task (reasoning generates Q&A, instruction generates prompts)", + ) + parser.add_argument( + "--task-column", + type=str, + default=None, + help="Column name containing tasks (auto-detected if not specified)", + ) + + # Model configuration + parser.add_argument( + "--generation-model", + type=str, + default="Qwen/Qwen3-30B-A3B-Thinking-2507", + help="Model for synthetic data generation", + ) + parser.add_argument( + "--filter-model", + type=str, + default=None, + help="Model for filtering (defaults to generation model)", + ) + parser.add_argument( + "--reward-model", + type=str, + default="Nexusflow/Athene-RM-8B", + help="Reward model for RIP filtering", + ) + + # Generation parameters + parser.add_argument( + "--num-samples", + type=int, + default=5000, + help="Number of synthetic examples to generate", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1, + help="Batch size for generation", + ) + + # Filtering parameters + parser.add_argument( + "--filter-method", + type=str, + choices=["answer-consistency", "rip", "both", "none"], + default="answer-consistency", + help="Quality filtering method", + ) + parser.add_argument( + "--k-responses", + type=int, + default=16, + help="Number of responses for filtering", + ) + parser.add_argument( + "--quality-threshold", + type=float, + default=0.5, + help="Minimum quality threshold for filtering", + ) + + # GPU configuration + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=None, + help="Number of GPUs for tensor parallelism (auto-detected if not set)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.9, + help="GPU memory utilization", + ) + + # Other arguments + parser.add_argument( + "--hf-token", + type=str, + default=None, + help="HuggingFace API token", + ) + parser.add_argument( + "--seed", + type=int, + default=42, + help="Random seed", + ) + + args = parser.parse_args() + + # Set random seeds + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + # Check GPU + num_gpus = check_gpu_availability() + tensor_parallel_size = args.tensor_parallel_size or num_gpus + + # Authentication + hf_token = args.hf_token or os.environ.get("HF_TOKEN") + if hf_token: + login(token=hf_token) + + # Load seed dataset + logger.info(f"Loading seed dataset: {args.seed_dataset}") + seed_dataset = load_dataset(args.seed_dataset, split="train") + + # Auto-detect task type and column if needed + if args.task_type == "auto": + columns = seed_dataset.column_names + if "question" in columns and "answer" in columns: + args.task_type = "reasoning" + logger.info("Auto-detected task type: reasoning") + else: + args.task_type = "instruction" + logger.info("Auto-detected task type: instruction") + + if not args.task_column: + if args.task_type == "reasoning": + args.task_column = "question" + else: + # Try to find prompt column + for col in ["prompt", "instruction", "text", "input"]: + if col in seed_dataset.column_names: + args.task_column = col + break + + logger.info(f"Using task column: {args.task_column}") + + # Convert to list of dicts + seed_data = seed_dataset.to_list() + + # Categorize prompts for instruction tasks + categories = None + if args.task_type == "instruction" and len(seed_data) > 100: + prompts = [item.get(args.task_column, "") for item in seed_data] + categories = categorize_prompts(prompts) + + # Initialize generation model + logger.info(f"Loading generation model: {args.generation_model}") + generation_llm = LLM( + model=args.generation_model, + tensor_parallel_size=tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + ) + + # Generate synthetic data + start_time = datetime.now() + synthetic_data = generate_synthetic_data( + generation_llm, + seed_data, + args.task_type, + args.num_samples, + categories, + ) + + # Apply filtering + filter_llm = generation_llm + if args.filter_model and args.filter_model != args.generation_model: + logger.info(f"Loading filter model: {args.filter_model}") + # Clean up generation model + del generation_llm + torch.cuda.empty_cache() + + filter_llm = LLM( + model=args.filter_model, + tensor_parallel_size=tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + ) + + filtered_data = synthetic_data + if args.filter_method != "none": + if args.filter_method == "answer-consistency" and args.task_type == "reasoning": + filtered_data = answer_consistency_filter( + filter_llm, + synthetic_data, + args.k_responses, + args.quality_threshold, + ) + elif args.filter_method == "rip": + filtered_data = rip_filter( + filter_llm, + synthetic_data, + args.reward_model, + args.k_responses, + args.quality_threshold, + ) + elif args.filter_method == "both": + if args.task_type == "reasoning": + filtered_data = answer_consistency_filter( + filter_llm, + synthetic_data, + args.k_responses, + args.quality_threshold, + ) + filtered_data = rip_filter( + filter_llm, + filtered_data, + args.reward_model, + args.k_responses, + args.quality_threshold, + ) + + # Create HuggingFace dataset + logger.info(f"Creating dataset with {len(filtered_data)} examples") + dataset = Dataset.from_list(filtered_data) + + # Create dataset card + generation_time = start_time.strftime("%Y-%m-%d %H:%M:%S UTC") + dataset_card = create_dataset_card( + args.task_type, + args.seed_dataset, + args.generation_model, + args.filter_method, + len(synthetic_data), + len(filtered_data), + generation_time, + ) + + # Push to hub + logger.info(f"Pushing dataset to: {args.output_dataset}") + # Create dataset card + card = DatasetCard(dataset_card) + dataset.push_to_hub(args.output_dataset) + # Push card separately + card.push_to_hub(args.output_dataset) + + logger.info("Done! Dataset available at: https://huggingface.co/datasets/" + args.output_dataset) + + # Print example HF Jobs command if running locally + if len(sys.argv) > 1: + print("\nTo run on HF Jobs:") + print(f"""hf jobs uv run --flavor l4x4 \\ + --image vllm/vllm-openai \\ + -e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\ + https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + --seed-dataset {args.seed_dataset} \\ + --output-dataset {args.output_dataset} \\ + --task-type {args.task_type} \\ + --generation-model {args.generation_model} \\ + --filter-method {args.filter_method} \\ + --num-samples {args.num_samples}""") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/finepdfs-stats.py b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/finepdfs-stats.py new file mode 100644 index 00000000..989732b6 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/finepdfs-stats.py @@ -0,0 +1,546 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "polars>=1.31.0", +# "huggingface-hub", +# "datasets", +# "ascii-graph", +# ] +# /// +""" +Analyze educational quality trends across CommonCrawl dumps using Polars streaming. + +Answers: "Is the web getting more educational over time?" + +Demonstrates Polars HF Hub integration - process 50M+ docs without downloading 300GB+. + +Example usage: + # Analyze English PDFs (default) + uv run finepdfs-stats.py + + # Analyze all 70+ languages + uv run finepdfs-stats.py --all-languages + + # Quick test + uv run finepdfs-stats.py --limit 10000 --show-plan + + # Save results to HF Hub + uv run finepdfs-stats.py --output-repo username/finepdfs-temporal-stats + + # Run on HF Jobs + hf jobs uv run \\ + -s HF_TOKEN \\ + -e HF_XET_HIGH_PERFORMANCE=1 \\ + https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\ + -- --output-repo username/stats +""" + +import argparse +import logging +import os +import sys +import time +from pathlib import Path + +import polars as pl +from ascii_graph import Pyasciigraph +from datasets import Dataset +from huggingface_hub import HfApi, create_repo, list_repo_tree, login + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Common language+script codes for finepdfs-edu +COMMON_LANGUAGES = { + "eng_Latn": "English (Latin script)", + "fra_Latn": "French (Latin script)", + "deu_Latn": "German (Latin script)", + "spa_Latn": "Spanish (Latin script)", + "por_Latn": "Portuguese (Latin script)", + "ita_Latn": "Italian (Latin script)", + "nld_Latn": "Dutch (Latin script)", + "pol_Latn": "Polish (Latin script)", + "rus_Cyrl": "Russian (Cyrillic script)", + "zho_Hans": "Chinese (Simplified)", + "zho_Hant": "Chinese (Traditional)", + "jpn_Jpan": "Japanese", + "kor_Hang": "Korean", + "ara_Arab": "Arabic", + "hin_Deva": "Hindi (Devanagari)", +} + + +def list_available_languages(dataset_id: str) -> list[str]: + """List available language subsets in the dataset.""" + try: + tree = list_repo_tree(dataset_id, path_in_repo="data", repo_type="dataset") + languages = [ + item.path.replace("data/", "") + for item in tree + if item.path.startswith("data/") + and "/" not in item.path.replace("data/", "") + ] + return sorted(languages) + except Exception as e: + logger.warning(f"Could not list languages: {e}") + return list(COMMON_LANGUAGES.keys()) + + +def compute_temporal_stats(df: pl.LazyFrame, output_path: Path) -> pl.DataFrame: + """Single scan: compute stats grouped by dump for temporal analysis.""" + query = df.group_by("dump").agg( + pl.len().alias("doc_count"), + pl.col("token_count").sum().alias("total_tokens"), + pl.col("fw_edu_scores").list.mean().mean().alias("avg_edu_score"), + (pl.col("fw_edu_scores").list.mean() >= 3).sum().alias("high_edu_count"), + ) + query.sink_parquet(output_path, engine="streaming") + return pl.read_parquet(output_path) + + +def compute_global_stats(temporal: pl.DataFrame) -> pl.DataFrame: + """Compute global stats from temporal breakdown.""" + total = temporal["doc_count"].sum() + return pl.DataFrame( + { + "total_docs": [total], + "total_tokens": [temporal["total_tokens"].sum()], + "avg_edu_score": [ + (temporal["avg_edu_score"] * temporal["doc_count"]).sum() / total + ], + "high_edu_rate": [temporal["high_edu_count"].sum() / total], + "num_dumps": [len(temporal)], + } + ) + + +def format_temporal_stats(temporal: pl.DataFrame) -> pl.DataFrame: + """Format temporal stats with high_edu_rate, sorted chronologically.""" + return ( + temporal.with_columns( + (pl.col("high_edu_count") / pl.col("doc_count")).alias("high_edu_rate") + ) + .select(["dump", "doc_count", "avg_edu_score", "high_edu_rate"]) + .sort( + "dump" + ) # Chronological order (CC-MAIN-2017-xx comes before CC-MAIN-2024-xx) + ) + + +def create_ascii_charts(temporal_stats: pl.DataFrame) -> str: + """Create ASCII bar charts showing temporal trends.""" + # Extract year from dump name (CC-MAIN-2024-42 -> 2024) + # Group by year and average the values for cleaner display + yearly = ( + temporal_stats.with_columns( + pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year") + ) + .group_by("year") + .agg( + pl.col("doc_count").sum(), + pl.col("avg_edu_score").mean(), + pl.col("high_edu_rate").mean(), + ) + .sort("year") + ) + + lines = [] + + # High edu rate chart (more dramatic differences) + data_rate = [ + (row["year"], row["high_edu_rate"] * 100) + for row in yearly.iter_rows(named=True) + ] + graph = Pyasciigraph(line_length=60, float_format="{0:.1f}%") + lines.extend(graph.graph("High Educational Content (edu >= 3)", data_rate)) + + lines.append("") + + # Avg edu score chart + data_score = [ + (row["year"], row["avg_edu_score"]) for row in yearly.iter_rows(named=True) + ] + graph2 = Pyasciigraph(line_length=60, float_format="{0:.2f}") + lines.extend(graph2.graph("Average Educational Score", data_score)) + + return "\n".join(lines) + + +def create_readme( + args, + global_stats: pl.DataFrame, + temporal_stats: pl.DataFrame, + scan_time: float, + ascii_charts: str, +) -> str: + """Create README content for the stats dataset.""" + stats = global_stats.to_dicts()[0] + total_docs = stats.get("total_docs", 0) + docs_per_sec = total_docs / scan_time if scan_time > 0 else 0 + + # Get first and last year averages for trend (more representative than single dumps) + yearly = ( + temporal_stats.with_columns( + pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year") + ) + .group_by("year") + .agg( + pl.col("doc_count").sum(), + pl.col("avg_edu_score").mean(), + pl.col("high_edu_rate").mean(), + ) + .sort("year") + ) + first_year = yearly.head(1).to_dicts()[0] + last_year = yearly.tail(1).to_dicts()[0] + + scope = ( + "all languages" + if args.all_languages + else COMMON_LANGUAGES.get(args.lang, args.lang) + ) + + return f"""--- +tags: + - uv-script + - statistics + - polars + - finepdfs-edu + - temporal-analysis +license: odc-by +configs: + - config_name: global_stats + data_files: global_stats/train-*.parquet + - config_name: temporal_stats + data_files: temporal_stats/train-*.parquet +default_viewer_config: temporal_stats +--- + +# Is the Web Getting More Educational? + +Temporal analysis of educational quality in **{scope}** across {stats.get("num_dumps", 0)} CommonCrawl dumps. + +## Trend + +``` +{ascii_charts} +``` + +## Key Finding + +| Year | Avg Edu Score | High Edu Rate | +|------|---------------|---------------| +| {first_year["year"]} | {first_year["avg_edu_score"]:.2f} | {first_year["high_edu_rate"] * 100:.1f}% | +| {last_year["year"]} | {last_year["avg_edu_score"]:.2f} | {last_year["high_edu_rate"] * 100:.1f}% | + +## Performance + +- **{total_docs:,} documents** processed in **{scan_time:.0f} seconds** +- **{docs_per_sec:,.0f} docs/sec** using Polars streaming +- Single scan, no full dataset download required + +## Summary + +| Metric | Value | +|--------|-------| +| Scope | {scope} | +| Total Documents | {total_docs:,} | +| Total Tokens | {stats.get("total_tokens", 0):,} | +| Avg Edu Score | {stats.get("avg_edu_score", 0):.3f} | +| High Edu Rate | {stats.get("high_edu_rate", 0) * 100:.1f}% | +| CommonCrawl Dumps | {stats.get("num_dumps", 0)} | + +## Files + +- `global_stats` - Overall summary +- `temporal_stats` - Per-dump breakdown (sorted chronologically) + +## Reproduce + +```bash +uv run https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\ + {"--all-languages" if args.all_languages else f"--lang {args.lang}"} --output-repo your-username/stats +``` + +## Source + +- **Dataset**: [{args.source_dataset}](https://huggingface.co/datasets/{args.source_dataset}) +- **Script**: [uv-scripts/dataset-stats](https://huggingface.co/datasets/uv-scripts/dataset-stats) +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze educational quality trends across CommonCrawl dumps", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + parser.add_argument( + "--source-dataset", + type=str, + default="HuggingFaceFW/finepdfs-edu", + help="Source dataset (default: HuggingFaceFW/finepdfs-edu)", + ) + + parser.add_argument( + "--lang", + type=str, + default="eng_Latn", + help="Language+script code (default: eng_Latn)", + ) + + parser.add_argument( + "--all-languages", + action="store_true", + help="Analyze all languages (70+) instead of single language", + ) + + parser.add_argument( + "--show-plan", + action="store_true", + help="Show Polars query plan (demonstrates optimization)", + ) + + parser.add_argument( + "--list-languages", + action="store_true", + help="List available languages and exit", + ) + + parser.add_argument( + "--limit", + type=int, + help="Limit to first N rows (for testing)", + ) + + parser.add_argument( + "--output-repo", + type=str, + help="HuggingFace dataset repository to upload results", + ) + + parser.add_argument( + "--output-dir", + type=str, + default="./stats_output", + help="Local directory for output files", + ) + + parser.add_argument( + "--hf-token", + type=str, + help="HuggingFace API token (or set HF_TOKEN env var)", + ) + + parser.add_argument( + "--private", + action="store_true", + help="Make the output dataset private", + ) + + args = parser.parse_args() + + # Check for high-performance mode + if os.environ.get("HF_XET_HIGH_PERFORMANCE"): + logger.info("High-performance mode enabled (HF_XET_HIGH_PERFORMANCE=1)") + + # List languages mode + if args.list_languages: + print(f"Available language+script codes for {args.source_dataset}:\n") + print("Common languages:") + for code, name in COMMON_LANGUAGES.items(): + print(f" {code:12} - {name}") + print("\nFetching full list from HF Hub...") + all_langs = list_available_languages(args.source_dataset) + print(f"\nAll available ({len(all_langs)} total):") + for lang in all_langs[:30]: # Show first 30 + name = COMMON_LANGUAGES.get(lang, "") + print(f" {lang:12} {name}") + if len(all_langs) > 30: + print(f" ... and {len(all_langs) - 30} more") + sys.exit(0) + + # Build the parquet path + if args.all_languages: + source_path = f"hf://datasets/{args.source_dataset}/data/*/train/*.parquet" + scope_desc = "all languages" + else: + source_path = ( + f"hf://datasets/{args.source_dataset}/data/{args.lang}/train/*.parquet" + ) + scope_desc = f"{args.lang} ({COMMON_LANGUAGES.get(args.lang, 'unknown')})" + + logger.info(f"Scanning: {source_path}") + logger.info(f"Scope: {scope_desc}") + + # Create lazy frame - this doesn't load any data yet! + logger.info("Creating lazy query plan...") + df = pl.scan_parquet(source_path) + + # Apply limit if specified + if args.limit: + logger.info(f"Limiting to first {args.limit:,} rows") + df = df.head(args.limit) + + # Show query plan if requested + if args.show_plan: + # Build a sample query to show the plan + sample_query = df.select( + pl.len(), + pl.col("token_count").sum(), + pl.col("language").n_unique(), + ) + print("\nQuery Plan (showing Polars optimization):") + print("=" * 60) + print(sample_query.explain()) + print("=" * 60) + print("\nNote: Polars uses projection pushdown - only reads columns needed!") + print("The 'text' column is never loaded, making this very fast.\n") + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Single scan: compute temporal stats + logger.info("Computing temporal stats (single scan)...") + start = time.perf_counter() + temporal_path = output_dir / "temporal_stats.parquet" + temporal_raw = compute_temporal_stats(df, temporal_path) + scan_time = time.perf_counter() - start + logger.info(f"Scan complete in {scan_time:.2f}s - {len(temporal_raw)} dumps") + + # Compute stats + global_stats = compute_global_stats(temporal_raw) + temporal_stats = format_temporal_stats(temporal_raw) + + # Save + global_stats.write_parquet(output_dir / "global_stats.parquet") + temporal_stats.write_parquet(output_dir / "temporal_stats.parquet") + + # Print results + total_docs = global_stats["total_docs"][0] + docs_per_sec = total_docs / scan_time if scan_time > 0 else 0 + + print("\n" + "=" * 70) + print("IS THE WEB GETTING MORE EDUCATIONAL?") + print("=" * 70) + + print(f"\nScope: {scope_desc}") + print(f"Dataset: {args.source_dataset}") + + print("\n" + "-" * 70) + print("GLOBAL STATS") + print("-" * 70) + print(global_stats) + + print("\n" + "-" * 70) + print(f"TEMPORAL TREND ({len(temporal_stats)} CommonCrawl dumps)") + print("-" * 70) + # Show first 5 and last 5 + if len(temporal_stats) > 10: + print("Earliest dumps:") + print(temporal_stats.head(5)) + print("\n...") + print("\nLatest dumps:") + print(temporal_stats.tail(5)) + else: + print(temporal_stats) + + # Create ASCII charts + ascii_charts = create_ascii_charts(temporal_stats) + print("\n" + "-" * 70) + print("TREND VISUALIZATION") + print("-" * 70) + print(ascii_charts) + + print("\n" + "-" * 70) + print("PERFORMANCE") + print("-" * 70) + print(f"Scan time: {scan_time:.2f}s") + print(f"Documents: {total_docs:,}") + print(f"Throughput: {docs_per_sec:,.0f} docs/sec") + + logger.info(f"Results saved to: {output_dir}") + + # Upload to HF Hub if requested + if args.output_repo: + hf_token = args.hf_token or os.environ.get("HF_TOKEN") + if hf_token: + login(token=hf_token) + + api = HfApi(token=hf_token) + + logger.info(f"Creating/updating dataset repository: {args.output_repo}") + create_repo( + args.output_repo, + repo_type="dataset", + private=args.private, + token=hf_token, + exist_ok=True, + ) + + # Upload each as a dataset config + configs = [ + ("global_stats", global_stats), + ("temporal_stats", temporal_stats), + ] + + for config_name, stats_df in configs: + logger.info(f"Uploading {config_name}...") + ds = Dataset.from_polars(stats_df) + ds.push_to_hub( + args.output_repo, + config_name=config_name, + token=hf_token, + private=args.private, + ) + time.sleep(1) # Avoid 409 conflicts + + # Upload README + readme_content = create_readme( + args, global_stats, temporal_stats, scan_time, ascii_charts + ) + api.upload_file( + path_or_fileobj=readme_content.encode(), + path_in_repo="README.md", + repo_id=args.output_repo, + repo_type="dataset", + token=hf_token, + ) + + dataset_url = f"https://huggingface.co/datasets/{args.output_repo}" + logger.info(f"Dataset uploaded: {dataset_url}") + print(f"\nResults uploaded to: {dataset_url}") + + +if __name__ == "__main__": + if len(sys.argv) == 1: + print("Is the Web Getting More Educational?") + print("=" * 40) + print("\nAnalyze educational quality trends across CommonCrawl dumps") + print("using Polars streaming - no download needed!\n") + print("Example commands:\n") + print("# Quick test:") + print("uv run finepdfs-stats.py --limit 10000\n") + print("# Analyze English PDFs:") + print("uv run finepdfs-stats.py\n") + print("# Analyze ALL 70+ languages:") + print("uv run finepdfs-stats.py --all-languages\n") + print("# Show query plan (see Polars optimization):") + print("uv run finepdfs-stats.py --show-plan --limit 1000\n") + print("# Save results to HF Hub:") + print("uv run finepdfs-stats.py --output-repo username/temporal-stats\n") + print("# Run on HF Jobs:") + print("hf jobs uv run \\") + print(" -s HF_TOKEN \\") + print(" -e HF_XET_HIGH_PERFORMANCE=1 \\") + print( + " https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\" + ) + print(" -- --output-repo username/stats") + sys.exit(0) + + main() diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/generate-responses.py b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/generate-responses.py new file mode 100644 index 00000000..1496f449 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-jobs/scripts/generate-responses.py @@ -0,0 +1,587 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "datasets", +# "flashinfer-python", +# "huggingface-hub[hf_transfer]", +# "hf-xet>= 1.1.7", +# "torch", +# "transformers", +# "vllm>=0.8.5", +# ] +# +# /// +""" +Generate responses for prompts in a dataset using vLLM for efficient GPU inference. + +This script loads a dataset from Hugging Face Hub containing chat-formatted messages, +applies the model's chat template, generates responses using vLLM, and saves the +results back to the Hub with a comprehensive dataset card. + +Example usage: + # Local execution with auto GPU detection + uv run generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --messages-column messages + + # With custom model and sampling parameters + uv run generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --model-id meta-llama/Llama-3.1-8B-Instruct \\ + --temperature 0.9 \\ + --top-p 0.95 \\ + --max-tokens 2048 + + # HF Jobs execution (see script output for full command) + hf jobs uv run --flavor a100x4 ... +""" + +import argparse +import logging +import os +import sys +from datetime import datetime +from typing import Optional + +from datasets import load_dataset +from huggingface_hub import DatasetCard, get_token, login +from torch import cuda +from tqdm.auto import tqdm +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +# Enable HF Transfer for faster downloads +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +def check_gpu_availability() -> int: + """Check if CUDA is available and return the number of GPUs.""" + if not cuda.is_available(): + logger.error("CUDA is not available. This script requires a GPU.") + logger.error( + "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor." + ) + sys.exit(1) + + num_gpus = cuda.device_count() + for i in range(num_gpus): + gpu_name = cuda.get_device_name(i) + gpu_memory = cuda.get_device_properties(i).total_memory / 1024**3 + logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory") + + return num_gpus + + +def create_dataset_card( + source_dataset: str, + model_id: str, + messages_column: str, + prompt_column: Optional[str], + sampling_params: SamplingParams, + tensor_parallel_size: int, + num_examples: int, + generation_time: str, + num_skipped: int = 0, + max_model_len_used: Optional[int] = None, +) -> str: + """Create a comprehensive dataset card documenting the generation process.""" + filtering_section = "" + if num_skipped > 0: + skip_percentage = (num_skipped / num_examples) * 100 + processed = num_examples - num_skipped + filtering_section = f""" + +### Filtering Statistics + +- **Total Examples**: {num_examples:,} +- **Processed**: {processed:,} ({100 - skip_percentage:.1f}%) +- **Skipped (too long)**: {num_skipped:,} ({skip_percentage:.1f}%) +- **Max Model Length Used**: {max_model_len_used:,} tokens + +Note: Prompts exceeding the maximum model length were skipped and have empty responses.""" + + return f"""--- +tags: +- generated +- vllm +- uv-script +--- + +# Generated Responses Dataset + +This dataset contains generated responses for prompts from [{source_dataset}](https://huggingface.co/datasets/{source_dataset}). + +## Generation Details + +- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset}) +- **Input Column**: `{prompt_column if prompt_column else messages_column}` ({"plain text prompts" if prompt_column else "chat messages"}) +- **Model**: [{model_id}](https://huggingface.co/{model_id}) +- **Number of Examples**: {num_examples:,} +- **Generation Date**: {generation_time}{filtering_section} + +### Sampling Parameters + +- **Temperature**: {sampling_params.temperature} +- **Top P**: {sampling_params.top_p} +- **Top K**: {sampling_params.top_k} +- **Min P**: {sampling_params.min_p} +- **Max Tokens**: {sampling_params.max_tokens} +- **Repetition Penalty**: {sampling_params.repetition_penalty} + +### Hardware Configuration + +- **Tensor Parallel Size**: {tensor_parallel_size} +- **GPU Configuration**: {tensor_parallel_size} GPU(s) + +## Dataset Structure + +The dataset contains all columns from the source dataset plus: +- `response`: The generated response from the model + +## Generation Script + +Generated using the vLLM inference script from [uv-scripts/vllm](https://huggingface.co/datasets/uv-scripts/vllm). + +To reproduce this generation: + +```bash +uv run https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\ + {source_dataset} \\ + \\ + --model-id {model_id} \\ + {"--prompt-column " + prompt_column if prompt_column else "--messages-column " + messages_column} \\ + --temperature {sampling_params.temperature} \\ + --top-p {sampling_params.top_p} \\ + --top-k {sampling_params.top_k} \\ + --max-tokens {sampling_params.max_tokens}{f" \\\\\\n --max-model-len {max_model_len_used}" if max_model_len_used else ""} +``` +""" + + +def main( + src_dataset_hub_id: str, + output_dataset_hub_id: str, + model_id: str = "Qwen/Qwen3-30B-A3B-Instruct-2507", + messages_column: str = "messages", + prompt_column: Optional[str] = None, + output_column: str = "response", + temperature: float = 0.7, + top_p: float = 0.8, + top_k: int = 20, + min_p: float = 0.0, + max_tokens: int = 16384, + repetition_penalty: float = 1.0, + gpu_memory_utilization: float = 0.90, + max_model_len: Optional[int] = None, + tensor_parallel_size: Optional[int] = None, + skip_long_prompts: bool = True, + max_samples: Optional[int] = None, + hf_token: Optional[str] = None, +): + """ + Main generation pipeline. + + Args: + src_dataset_hub_id: Input dataset on Hugging Face Hub + output_dataset_hub_id: Where to save results on Hugging Face Hub + model_id: Hugging Face model ID for generation + messages_column: Column name containing chat messages + prompt_column: Column name containing plain text prompts (alternative to messages_column) + output_column: Column name for generated responses + temperature: Sampling temperature + top_p: Top-p sampling parameter + top_k: Top-k sampling parameter + min_p: Minimum probability threshold + max_tokens: Maximum tokens to generate + repetition_penalty: Repetition penalty parameter + gpu_memory_utilization: GPU memory utilization factor + max_model_len: Maximum model context length (None uses model default) + tensor_parallel_size: Number of GPUs to use (auto-detect if None) + skip_long_prompts: Skip prompts exceeding max_model_len instead of failing + max_samples: Maximum number of samples to process (None for all) + hf_token: Hugging Face authentication token + """ + generation_start_time = datetime.now().isoformat() + + # GPU check and configuration + num_gpus = check_gpu_availability() + if tensor_parallel_size is None: + tensor_parallel_size = num_gpus + logger.info( + f"Auto-detected {num_gpus} GPU(s), using tensor_parallel_size={tensor_parallel_size}" + ) + else: + logger.info(f"Using specified tensor_parallel_size={tensor_parallel_size}") + if tensor_parallel_size > num_gpus: + logger.warning( + f"Requested {tensor_parallel_size} GPUs but only {num_gpus} available" + ) + + # Authentication - try multiple methods + HF_TOKEN = hf_token or os.environ.get("HF_TOKEN") or get_token() + + if not HF_TOKEN: + logger.error("No HuggingFace token found. Please provide token via:") + logger.error(" 1. --hf-token argument") + logger.error(" 2. HF_TOKEN environment variable") + logger.error(" 3. Run 'hf auth login' or use login() in Python") + sys.exit(1) + + logger.info("HuggingFace token found, authenticating...") + login(token=HF_TOKEN) + + # Initialize vLLM + logger.info(f"Loading model: {model_id}") + vllm_kwargs = { + "model": model_id, + "tensor_parallel_size": tensor_parallel_size, + "gpu_memory_utilization": gpu_memory_utilization, + } + if max_model_len is not None: + vllm_kwargs["max_model_len"] = max_model_len + logger.info(f"Using max_model_len={max_model_len}") + + llm = LLM(**vllm_kwargs) + + # Load tokenizer for chat template + logger.info("Loading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(model_id) + + # Create sampling parameters + sampling_params = SamplingParams( + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + max_tokens=max_tokens, + repetition_penalty=repetition_penalty, + ) + + # Load dataset + logger.info(f"Loading dataset: {src_dataset_hub_id}") + dataset = load_dataset(src_dataset_hub_id, split="train") + + # Apply max_samples if specified + if max_samples is not None and max_samples < len(dataset): + logger.info(f"Limiting dataset to {max_samples} samples") + dataset = dataset.select(range(max_samples)) + + total_examples = len(dataset) + logger.info(f"Dataset loaded with {total_examples:,} examples") + + # Determine which column to use and validate + if prompt_column: + # Use prompt column mode + if prompt_column not in dataset.column_names: + logger.error( + f"Column '{prompt_column}' not found. Available columns: {dataset.column_names}" + ) + sys.exit(1) + logger.info(f"Using prompt column mode with column: '{prompt_column}'") + use_messages = False + else: + # Use messages column mode + if messages_column not in dataset.column_names: + logger.error( + f"Column '{messages_column}' not found. Available columns: {dataset.column_names}" + ) + sys.exit(1) + logger.info(f"Using messages column mode with column: '{messages_column}'") + use_messages = True + + # Get effective max length for filtering + if max_model_len is not None: + effective_max_len = max_model_len + else: + # Get model's default max length + effective_max_len = llm.llm_engine.model_config.max_model_len + logger.info(f"Using effective max model length: {effective_max_len}") + + # Process messages and apply chat template + logger.info("Preparing prompts...") + all_prompts = [] + valid_prompts = [] + valid_indices = [] + skipped_info = [] + + for i, example in enumerate(tqdm(dataset, desc="Processing prompts")): + if use_messages: + # Messages mode: use existing chat messages + messages = example[messages_column] + # Apply chat template + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + else: + # Prompt mode: convert plain text to messages format + user_prompt = example[prompt_column] + messages = [{"role": "user", "content": user_prompt}] + # Apply chat template + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + all_prompts.append(prompt) + + # Count tokens if filtering is enabled + if skip_long_prompts: + tokens = tokenizer.encode(prompt) + if len(tokens) <= effective_max_len: + valid_prompts.append(prompt) + valid_indices.append(i) + else: + skipped_info.append((i, len(tokens))) + else: + valid_prompts.append(prompt) + valid_indices.append(i) + + # Log filtering results + if skip_long_prompts and skipped_info: + logger.warning( + f"Skipped {len(skipped_info)} prompts that exceed max_model_len ({effective_max_len} tokens)" + ) + logger.info("Skipped prompt details (first 10):") + for idx, (prompt_idx, token_count) in enumerate(skipped_info[:10]): + logger.info( + f" - Example {prompt_idx}: {token_count} tokens (exceeds by {token_count - effective_max_len})" + ) + if len(skipped_info) > 10: + logger.info(f" ... and {len(skipped_info) - 10} more") + + skip_percentage = (len(skipped_info) / total_examples) * 100 + if skip_percentage > 10: + logger.warning(f"WARNING: {skip_percentage:.1f}% of prompts were skipped!") + + if not valid_prompts: + logger.error("No valid prompts to process after filtering!") + sys.exit(1) + + # Generate responses - vLLM handles batching internally + logger.info(f"Starting generation for {len(valid_prompts):,} valid prompts...") + logger.info("vLLM will handle batching and scheduling automatically") + + outputs = llm.generate(valid_prompts, sampling_params) + + # Extract generated text and create full response list + logger.info("Extracting generated responses...") + responses = [""] * total_examples # Initialize with empty strings + + for idx, output in enumerate(outputs): + original_idx = valid_indices[idx] + response = output.outputs[0].text.strip() + responses[original_idx] = response + + # Add responses to dataset + logger.info("Adding responses to dataset...") + dataset = dataset.add_column(output_column, responses) + + # Create dataset card + logger.info("Creating dataset card...") + card_content = create_dataset_card( + source_dataset=src_dataset_hub_id, + model_id=model_id, + messages_column=messages_column, + prompt_column=prompt_column, + sampling_params=sampling_params, + tensor_parallel_size=tensor_parallel_size, + num_examples=total_examples, + generation_time=generation_start_time, + num_skipped=len(skipped_info) if skip_long_prompts else 0, + max_model_len_used=effective_max_len if skip_long_prompts else None, + ) + + # Push dataset to hub + logger.info(f"Pushing dataset to: {output_dataset_hub_id}") + dataset.push_to_hub(output_dataset_hub_id, token=HF_TOKEN) + + # Push dataset card + card = DatasetCard(card_content) + card.push_to_hub(output_dataset_hub_id, token=HF_TOKEN) + + logger.info("✅ Generation complete!") + logger.info( + f"Dataset available at: https://huggingface.co/datasets/{output_dataset_hub_id}" + ) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + parser = argparse.ArgumentParser( + description="Generate responses for dataset prompts using vLLM", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Basic usage with default Qwen model + uv run generate-responses.py input-dataset output-dataset + + # With custom model and parameters + uv run generate-responses.py input-dataset output-dataset \\ + --model-id meta-llama/Llama-3.1-8B-Instruct \\ + --temperature 0.9 \\ + --max-tokens 2048 + + # Force specific GPU configuration + uv run generate-responses.py input-dataset output-dataset \\ + --tensor-parallel-size 2 \\ + --gpu-memory-utilization 0.95 + + # Using environment variable for token + HF_TOKEN=hf_xxx uv run generate-responses.py input-dataset output-dataset + """, + ) + + parser.add_argument( + "src_dataset_hub_id", + help="Input dataset on Hugging Face Hub (e.g., username/dataset-name)", + ) + parser.add_argument( + "output_dataset_hub_id", help="Output dataset name on Hugging Face Hub" + ) + parser.add_argument( + "--model-id", + type=str, + default="Qwen/Qwen3-30B-A3B-Instruct-2507", + help="Model to use for generation (default: Qwen3-30B-A3B-Instruct-2507)", + ) + parser.add_argument( + "--messages-column", + type=str, + default="messages", + help="Column containing chat messages (default: messages)", + ) + parser.add_argument( + "--prompt-column", + type=str, + help="Column containing plain text prompts (alternative to --messages-column)", + ) + parser.add_argument( + "--output-column", + type=str, + default="response", + help="Column name for generated responses (default: response)", + ) + parser.add_argument( + "--max-samples", + type=int, + help="Maximum number of samples to process (default: all)", + ) + parser.add_argument( + "--temperature", + type=float, + default=0.7, + help="Sampling temperature (default: 0.7)", + ) + parser.add_argument( + "--top-p", + type=float, + default=0.8, + help="Top-p sampling parameter (default: 0.8)", + ) + parser.add_argument( + "--top-k", + type=int, + default=20, + help="Top-k sampling parameter (default: 20)", + ) + parser.add_argument( + "--min-p", + type=float, + default=0.0, + help="Minimum probability threshold (default: 0.0)", + ) + parser.add_argument( + "--max-tokens", + type=int, + default=16384, + help="Maximum tokens to generate (default: 16384)", + ) + parser.add_argument( + "--repetition-penalty", + type=float, + default=1.0, + help="Repetition penalty (default: 1.0)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.90, + help="GPU memory utilization factor (default: 0.90)", + ) + parser.add_argument( + "--max-model-len", + type=int, + help="Maximum model context length (default: model's default)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + help="Number of GPUs to use (default: auto-detect)", + ) + parser.add_argument( + "--hf-token", + type=str, + help="Hugging Face token (can also use HF_TOKEN env var)", + ) + parser.add_argument( + "--skip-long-prompts", + action="store_true", + default=True, + help="Skip prompts that exceed max_model_len instead of failing (default: True)", + ) + parser.add_argument( + "--no-skip-long-prompts", + dest="skip_long_prompts", + action="store_false", + help="Fail on prompts that exceed max_model_len", + ) + + args = parser.parse_args() + + main( + src_dataset_hub_id=args.src_dataset_hub_id, + output_dataset_hub_id=args.output_dataset_hub_id, + model_id=args.model_id, + messages_column=args.messages_column, + prompt_column=args.prompt_column, + output_column=args.output_column, + temperature=args.temperature, + top_p=args.top_p, + top_k=args.top_k, + min_p=args.min_p, + max_tokens=args.max_tokens, + repetition_penalty=args.repetition_penalty, + gpu_memory_utilization=args.gpu_memory_utilization, + max_model_len=args.max_model_len, + tensor_parallel_size=args.tensor_parallel_size, + skip_long_prompts=args.skip_long_prompts, + max_samples=args.max_samples, + hf_token=args.hf_token, + ) + else: + # Show HF Jobs example when run without arguments + print(""" +vLLM Response Generation Script +============================== + +This script requires arguments. For usage information: + uv run generate-responses.py --help + +Example HF Jobs command with multi-GPU: + # If you're logged in with hf auth, token will be auto-detected + hf jobs uv run \\ + --flavor l4x4 \\ + https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --messages-column messages \\ + --model-id Qwen/Qwen3-30B-A3B-Instruct-2507 \\ + --temperature 0.7 \\ + --max-tokens 16384 + """) diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/SKILL.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/SKILL.md index 95994b17..d6d5f742 100644 --- a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/SKILL.md +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/SKILL.md @@ -1,9 +1,9 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-llm-trainer" name: hugging-face-model-trainer -description: "Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required—models train on cloud GPUs and results are automatically saved to the Hugging Face Hub." +description: Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export. license: Complete terms in LICENSE.txt risk: unknown -source: community --- # TRL Training on Hugging Face Jobs @@ -60,11 +60,12 @@ When assisting with training jobs: 4. **Use example scripts as templates** - Reference `scripts/train_sft_example.py`, `scripts/train_dpo_example.py`, etc. as starting points. -## Local Script Dependencies +## Local Script Execution -To run scripts locally (like `estimate_cost.py`), install dependencies: +Repository scripts use PEP 723 inline dependencies. Run them with `uv run`: ```bash -pip install -r requirements.txt +uv run scripts/estimate_cost.py --help +uv run scripts/dataset_inspector.py --help ``` ## Prerequisites Checklist @@ -240,8 +241,8 @@ hf_jobs("uv", {"script": "https://gist.githubusercontent.com/user/id/raw/train.p **To use local scripts:** Upload to HF Hub first: ```bash -huggingface-cli repo create my-training-scripts --type model -huggingface-cli upload my-training-scripts ./train.py train.py +hf repos create my-training-scripts --type model +hf upload my-training-scripts ./train.py train.py # Use: https://huggingface.co/USERNAME/my-training-scripts/resolve/main/train.py ``` @@ -331,13 +332,10 @@ hf jobs cancel # Cancel a job The `trl-jobs` package provides optimized defaults and one-liner training. ```bash -# Install -pip install trl-jobs - -# Train with SFT (simplest possible) -trl-jobs sft \ +uvx trl-jobs sft \ --model_name Qwen/Qwen2.5-0.5B \ --dataset_name trl-lib/Capybara + ``` **Benefits:** Pre-configured settings, automatic Trackio integration, automatic Hub push, one-line commands @@ -685,6 +683,7 @@ Add to PEP 723 header: - `references/hardware_guide.md` - Hardware specs and selection - `references/hub_saving.md` - Hub authentication troubleshooting - `references/troubleshooting.md` - Common issues and solutions +- `references/local_training_macos.md` - Local training on macOS ### Scripts (In This Skill) - `scripts/train_sft_example.py` - Production SFT template diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/gguf_conversion.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/gguf_conversion.md new file mode 100644 index 00000000..a99ea0e8 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/gguf_conversion.md @@ -0,0 +1,296 @@ +# GGUF Conversion Guide + +After training models with TRL on Hugging Face Jobs, convert them to **GGUF format** for use with llama.cpp, Ollama, LM Studio, and other local inference tools. + +**This guide provides production-ready, tested code based on successful conversions.** All critical dependencies and build steps are included. + +## What is GGUF? + +**GGUF** (GPT-Generated Unified Format): +- Optimized format for CPU/GPU inference with llama.cpp +- Supports quantization (4-bit, 5-bit, 8-bit) to reduce model size +- Compatible with: Ollama, LM Studio, Jan, GPT4All, llama.cpp +- Typically 2-8GB for 7B models (vs 14GB unquantized) + +## When to Convert to GGUF + +**Convert when:** +- Running models locally with Ollama or LM Studio +- Using CPU-optimized inference +- Reducing model size with quantization +- Deploying to edge devices +- Sharing models for local-first use + +## Critical Success Factors + +Based on production testing, these are **essential** for reliable conversion: + +### 1. ✅ Install Build Tools FIRST +**Before cloning llama.cpp**, install build dependencies: +```python +subprocess.run(["apt-get", "update", "-qq"], check=True, capture_output=True) +subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True, capture_output=True) +``` + +**Why:** The quantization tool requires gcc and cmake. Installing after cloning doesn't help. + +### 2. ✅ Use CMake (Not Make) +**Build the quantize tool with CMake:** +```python +# Create build directory +os.makedirs("/tmp/llama.cpp/build", exist_ok=True) + +# Configure +subprocess.run([ + "cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", + "-DGGML_CUDA=OFF" # Faster build, CUDA not needed for quantization +], check=True, capture_output=True, text=True) + +# Build +subprocess.run([ + "cmake", "--build", "/tmp/llama.cpp/build", + "--target", "llama-quantize", "-j", "4" +], check=True, capture_output=True, text=True) + +# Binary path +quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize" +``` + +**Why:** CMake is more reliable than `make` and produces consistent binary paths. + +### 3. ✅ Include All Dependencies +**PEP 723 header must include:** +```python +# /// script +# dependencies = [ +# "transformers>=4.36.0", +# "peft>=0.7.0", +# "torch>=2.0.0", +# "accelerate>=0.24.0", +# "huggingface_hub>=0.20.0", +# "sentencepiece>=0.1.99", # Required for tokenizer +# "protobuf>=3.20.0", # Required for tokenizer +# "numpy", +# "gguf", +# ] +# /// +``` + +**Why:** `sentencepiece` and `protobuf` are critical for tokenizer conversion. Missing them causes silent failures. + +### 4. ✅ Verify Names Before Use +**Always verify repos exist:** +```python +# Before submitting job, verify: +hub_repo_details([ADAPTER_MODEL], repo_type="model") +hub_repo_details([BASE_MODEL], repo_type="model") +``` + +**Why:** Non-existent dataset/model names cause job failures that could be caught in seconds. + +## Complete Conversion Script + +See `scripts/convert_to_gguf.py` for the complete, production-ready script. + +**Key features:** +- ✅ All dependencies in PEP 723 header +- ✅ Build tools installed automatically +- ✅ CMake build process (reliable) +- ✅ Comprehensive error handling +- ✅ Environment variable configuration +- ✅ Automatic README generation + +## Quick Conversion Job + +```python +# Before submitting: VERIFY MODELS EXIST +hub_repo_details(["username/my-finetuned-model"], repo_type="model") +hub_repo_details(["Qwen/Qwen2.5-0.5B"], repo_type="model") + +# Submit conversion job +hf_jobs("uv", { + "script": open("trl/scripts/convert_to_gguf.py").read(), # Or inline the script + "flavor": "a10g-large", + "timeout": "45m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"}, + "env": { + "ADAPTER_MODEL": "username/my-finetuned-model", + "BASE_MODEL": "Qwen/Qwen2.5-0.5B", + "OUTPUT_REPO": "username/my-model-gguf", + "HF_USERNAME": "username" # Optional, for README + } +}) +``` + +## Conversion Process + +The script performs these steps: + +1. **Load and Merge** - Load base model and LoRA adapter, merge them +2. **Install Build Tools** - Install gcc, cmake (CRITICAL: before cloning llama.cpp) +3. **Setup llama.cpp** - Clone repo, install Python dependencies +4. **Convert to GGUF** - Create FP16 GGUF using llama.cpp converter +5. **Build Quantize Tool** - Use CMake to build `llama-quantize` +6. **Quantize** - Create Q4_K_M, Q5_K_M, Q8_0 versions +7. **Upload** - Upload all versions + README to Hub + +## Quantization Options + +Common quantization formats (from smallest to largest): + +| Format | Size | Quality | Use Case | +|--------|------|---------|----------| +| **Q4_K_M** | ~300MB | Good | **Recommended** - best balance of size/quality | +| **Q5_K_M** | ~350MB | Better | Higher quality, slightly larger | +| **Q8_0** | ~500MB | Very High | Near-original quality | +| **F16** | ~1GB | Original | Full precision, largest file | + +**Recommendation:** Create Q4_K_M, Q5_K_M, and Q8_0 versions to give users options. + +## Hardware Requirements + +**For conversion:** +- Small models (<1B): CPU-basic works, but slow +- Medium models (1-7B): a10g-large recommended +- Large models (7B+): a10g-large or a100-large + +**Time estimates:** +- 0.5B model: ~15-25 minutes on A10G +- 3B model: ~30-45 minutes on A10G +- 7B model: ~45-60 minutes on A10G + +## Using GGUF Models + +**GGUF models work on both CPU and GPU.** They're optimized for CPU inference but can also leverage GPU acceleration when available. + +### With Ollama (auto-detects GPU) +```bash +# Download GGUF +hf download username/my-model-gguf model-q4_k_m.gguf + +# Create Modelfile +echo "FROM ./model-q4_k_m.gguf" > Modelfile + +# Create and run (uses GPU automatically if available) +ollama create my-model -f Modelfile +ollama run my-model +``` + +### With llama.cpp +```bash +# CPU only +./llama-cli -m model-q4_k_m.gguf -p "Your prompt" + +# With GPU acceleration (offload 32 layers to GPU) +./llama-cli -m model-q4_k_m.gguf -ngl 32 -p "Your prompt" +``` + +### With LM Studio +1. Download the `.gguf` file +2. Import into LM Studio +3. Start chatting + +## Best Practices + +### ✅ DO: +1. **Verify repos exist** before submitting jobs (use `hub_repo_details`) +2. **Install build tools FIRST** before cloning llama.cpp +3. **Use CMake** for building quantize tool (not make) +4. **Include all dependencies** in PEP 723 header (especially sentencepiece, protobuf) +5. **Create multiple quantizations** - Give users choice +6. **Test on known models** before production use +7. **Use A10G GPU** for faster conversion + +### ❌ DON'T: +1. **Assume repos exist** - Always verify with hub tools +2. **Use make** instead of CMake - Less reliable +3. **Remove dependencies** to "simplify" - They're all needed +4. **Skip build tools** - Quantization will fail silently +5. **Use default paths** - CMake puts binaries in build/bin/ + +## Common Issues + +### Out of memory during merge +**Fix:** +- Use larger GPU (a10g-large or a100-large) +- Ensure `device_map="auto"` for automatic placement +- Use `dtype=torch.float16` or `torch.bfloat16` + +### Conversion fails with architecture error +**Fix:** +- Ensure llama.cpp supports the model architecture +- Check for standard architecture (Qwen, Llama, Mistral, etc.) +- Update llama.cpp to latest: `git clone --depth 1 https://github.com/ggerganov/llama.cpp.git` +- Check llama.cpp documentation for model support + +### Quantization fails +**Fix:** +- Verify build tools installed: `apt-get install build-essential cmake` +- Use CMake (not make) to build quantize tool +- Check binary path: `/tmp/llama.cpp/build/bin/llama-quantize` +- Verify FP16 GGUF exists before quantizing + +### Missing sentencepiece error +**Fix:** +- Add to PEP 723 header: `"sentencepiece>=0.1.99", "protobuf>=3.20.0"` +- Don't remove dependencies to "simplify" - all are required + +### Upload fails or times out +**Fix:** +- Large models (>2GB) need longer timeout: `"timeout": "1h"` +- Upload quantized versions separately if needed +- Check network/Hub status + +## Lessons Learned + +These are from production testing and real failures: + +### 1. Always Verify Before Use +**Lesson:** Don't assume repos/datasets exist. Check first. +```python +# BEFORE submitting job +hub_repo_details(["trl-lib/argilla-dpo-mix-7k"], repo_type="dataset") # Would catch error +``` +**Prevented failures:** Non-existent dataset names, typos in model names + +### 2. Prioritize Reliability Over Performance +**Lesson:** Default to what's most likely to succeed. +- Use CMake (not make) - more reliable +- Disable CUDA in build - faster, not needed +- Include all dependencies - don't "simplify" + +**Prevented failures:** Build failures, missing binaries + +### 3. Create Atomic, Self-Contained Scripts +**Lesson:** Don't remove dependencies or steps. Scripts should work as a unit. +- All dependencies in PEP 723 header +- All build steps included +- Clear error messages + +**Prevented failures:** Missing tokenizer libraries, build tool failures + +## References + +**In this skill:** +- `scripts/convert_to_gguf.py` - Complete, production-ready script + +**External:** +- [llama.cpp Repository](https://github.com/ggerganov/llama.cpp) +- [GGUF Specification](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) +- [Ollama Documentation](https://ollama.ai) +- [LM Studio](https://lmstudio.ai) + +## Summary + +**Critical checklist for GGUF conversion:** +- [ ] Verify adapter and base models exist on Hub +- [ ] Use production script from `scripts/convert_to_gguf.py` +- [ ] All dependencies in PEP 723 header (including sentencepiece, protobuf) +- [ ] Build tools installed before cloning llama.cpp +- [ ] CMake used for building quantize tool (not make) +- [ ] Correct binary path: `/tmp/llama.cpp/build/bin/llama-quantize` +- [ ] A10G GPU selected for reasonable conversion time +- [ ] Timeout set to 45m minimum +- [ ] HF_TOKEN in secrets for Hub upload + +**The script in `scripts/convert_to_gguf.py` incorporates all these lessons and has been tested successfully in production.** diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hardware_guide.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hardware_guide.md new file mode 100644 index 00000000..22eba945 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hardware_guide.md @@ -0,0 +1,283 @@ +# Hardware Selection Guide + +Choosing the right hardware (flavor) is critical for cost-effective training. + +## Available Hardware + +### CPU +- `cpu-basic` - Basic CPU, testing only +- `cpu-upgrade` - Enhanced CPU + +**Use cases:** Dataset validation, preprocessing, testing scripts +**Not recommended for training:** Too slow for any meaningful training + +### GPU Options + +| Flavor | GPU | Memory | Use Case | Cost/hour | +|--------|-----|--------|----------|-----------| +| `t4-small` | NVIDIA T4 | 16GB | <1B models, demos | ~$0.50-1 | +| `t4-medium` | NVIDIA T4 | 16GB | 1-3B models, development | ~$1-2 | +| `l4x1` | NVIDIA L4 | 24GB | 3-7B models, efficient training | ~$2-3 | +| `l4x4` | 4x NVIDIA L4 | 96GB | Multi-GPU training | ~$8-12 | +| `a10g-small` | NVIDIA A10G | 24GB | 3-7B models, production | ~$3-4 | +| `a10g-large` | NVIDIA A10G | 24GB | 7-13B models | ~$4-6 | +| `a10g-largex2` | 2x NVIDIA A10G | 48GB | Multi-GPU, large models | ~$8-12 | +| `a10g-largex4` | 4x NVIDIA A10G | 96GB | Multi-GPU, very large models | ~$16-24 | +| `a100-large` | NVIDIA A100 | 40GB | 13B+ models, fast training | ~$8-12 | + +### TPU Options + +| Flavor | Type | Use Case | +|--------|------|----------| +| `v5e-1x1` | TPU v5e | Small TPU workloads | +| `v5e-2x2` | 4x TPU v5e | Medium TPU workloads | +| `v5e-2x4` | 8x TPU v5e | Large TPU workloads | + +**Note:** TPUs require TPU-optimized code. Most TRL training uses GPUs. + +## Selection Guidelines + +### By Model Size + +**Tiny Models (<1B parameters)** +- **Recommended:** `t4-small` +- **Example:** Qwen2.5-0.5B, TinyLlama +- **Batch size:** 4-8 +- **Training time:** 1-2 hours for 1K examples + +**Small Models (1-3B parameters)** +- **Recommended:** `t4-medium` or `a10g-small` +- **Example:** Qwen2.5-1.5B, Phi-2 +- **Batch size:** 2-4 +- **Training time:** 2-4 hours for 10K examples + +**Medium Models (3-7B parameters)** +- **Recommended:** `a10g-small` or `a10g-large` +- **Example:** Qwen2.5-7B, Mistral-7B +- **Batch size:** 1-2 (or LoRA with 4-8) +- **Training time:** 4-8 hours for 10K examples + +**Large Models (7-13B parameters)** +- **Recommended:** `a10g-large` or `a100-large` +- **Example:** Llama-3-8B, Mixtral-8x7B (with LoRA) +- **Batch size:** 1 (full fine-tuning) or 2-4 (LoRA) +- **Training time:** 6-12 hours for 10K examples +- **Note:** Always use LoRA/PEFT + +**Very Large Models (13B+ parameters)** +- **Recommended:** `a100-large` with LoRA +- **Example:** Llama-3-13B, Llama-3-70B (LoRA only) +- **Batch size:** 1-2 with LoRA +- **Training time:** 8-24 hours for 10K examples +- **Note:** Full fine-tuning not feasible, use LoRA/PEFT + +### By Budget + +**Minimal Budget (<$5 total)** +- Use `t4-small` +- Train on subset of data (100-500 examples) +- Limit to 1-2 epochs +- Use small model (<1B) + +**Small Budget ($5-20)** +- Use `t4-medium` or `a10g-small` +- Train on 1K-5K examples +- 2-3 epochs +- Model up to 3B parameters + +**Medium Budget ($20-50)** +- Use `a10g-small` or `a10g-large` +- Train on 5K-20K examples +- 3-5 epochs +- Model up to 7B parameters + +**Large Budget ($50-200)** +- Use `a10g-large` or `a100-large` +- Full dataset training +- Multiple epochs +- Model up to 13B parameters with LoRA + +### By Training Type + +**Quick Demo/Experiment** +- `t4-small` +- 50-100 examples +- 5-10 steps +- ~10-15 minutes + +**Development/Iteration** +- `t4-medium` or `a10g-small` +- 1K examples +- 1 epoch +- ~30-60 minutes + +**Production Training** +- `a10g-large` or `a100-large` +- Full dataset +- 3-5 epochs +- 4-12 hours + +**Research/Experimentation** +- `a100-large` +- Multiple runs +- Various hyperparameters +- Budget for 20-50 hours + +## Memory Considerations + +### Estimating Memory Requirements + +**Full fine-tuning:** +``` +Memory (GB) ≈ (Model params in billions) × 20 +``` + +**LoRA fine-tuning:** +``` +Memory (GB) ≈ (Model params in billions) × 4 +``` + +**Examples:** +- Qwen2.5-0.5B full: ~10GB ✅ fits t4-small +- Qwen2.5-1.5B full: ~30GB ❌ exceeds most GPUs +- Qwen2.5-1.5B LoRA: ~6GB ✅ fits t4-small +- Qwen2.5-7B full: ~140GB ❌ not feasible +- Qwen2.5-7B LoRA: ~28GB ✅ fits a10g-large + +### Memory Optimization + +If hitting memory limits: + +1. **Use LoRA/PEFT** + ```python + peft_config=LoraConfig(r=16, lora_alpha=32) + ``` + +2. **Reduce batch size** + ```python + per_device_train_batch_size=1 + ``` + +3. **Increase gradient accumulation** + ```python + gradient_accumulation_steps=8 # Effective batch size = 1×8 + ``` + +4. **Enable gradient checkpointing** + ```python + gradient_checkpointing=True + ``` + +5. **Use mixed precision** + ```python + bf16=True # or fp16=True + ``` + +6. **Upgrade to larger GPU** + - t4 → a10g → a100 + +## Cost Estimation + +### Formula + +``` +Total Cost = (Hours of training) × (Cost per hour) +``` + +### Example Calculations + +**Quick demo:** +- Hardware: t4-small ($0.75/hour) +- Time: 15 minutes (0.25 hours) +- Cost: $0.19 + +**Development training:** +- Hardware: a10g-small ($3.50/hour) +- Time: 2 hours +- Cost: $7.00 + +**Production training:** +- Hardware: a10g-large ($5/hour) +- Time: 6 hours +- Cost: $30.00 + +**Large model with LoRA:** +- Hardware: a100-large ($10/hour) +- Time: 8 hours +- Cost: $80.00 + +### Cost Optimization Tips + +1. **Start small:** Test on t4-small with subset +2. **Use LoRA:** 4-5x cheaper than full fine-tuning +3. **Optimize hyperparameters:** Fewer epochs if possible +4. **Set appropriate timeout:** Don't waste compute on stalled jobs +5. **Use checkpointing:** Resume if job fails +6. **Monitor costs:** Check running jobs regularly + +## Multi-GPU Training + +TRL automatically handles multi-GPU training with Accelerate when using multi-GPU flavors. + +**Multi-GPU flavors:** +- `l4x4` - 4x L4 GPUs +- `a10g-largex2` - 2x A10G GPUs +- `a10g-largex4` - 4x A10G GPUs + +**When to use:** +- Models >13B parameters +- Need faster training (linear speedup) +- Large datasets (>50K examples) + +**Example:** +```python +hf_jobs("uv", { + "script": "train.py", + "flavor": "a10g-largex2", # 2 GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +No code changes needed—TRL/Accelerate handles distribution automatically. + +## Choosing Between Options + +### a10g vs a100 + +**Choose a10g when:** +- Model <13B parameters +- Budget conscious +- Training time not critical + +**Choose a100 when:** +- Model 13B+ parameters +- Need fastest training +- Memory requirements high +- Budget allows + +### Single vs Multi-GPU + +**Choose single GPU when:** +- Model <7B parameters +- Budget constrained +- Simpler debugging + +**Choose multi-GPU when:** +- Model >13B parameters +- Need faster training +- Large batch sizes required +- Cost-effective for large jobs + +## Quick Reference + +```python +# Model size → Hardware selection +HARDWARE_MAP = { + "<1B": "t4-small", + "1-3B": "a10g-small", + "3-7B": "a10g-large", + "7-13B": "a10g-large (LoRA) or a100-large", + ">13B": "a100-large (LoRA required)" +} +``` diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hub_saving.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hub_saving.md new file mode 100644 index 00000000..734e49b5 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/hub_saving.md @@ -0,0 +1,364 @@ +# Saving Training Results to Hugging Face Hub + +**⚠️ CRITICAL:** Training environments are ephemeral. ALL results are lost when a job completes unless pushed to the Hub. + +## Why Hub Push is Required + +When running on Hugging Face Jobs: +- Environment is temporary +- All files deleted on job completion +- No local disk persistence +- Cannot access results after job ends + +**Without Hub push, training is completely wasted.** + +## Required Configuration + +### 1. Training Configuration + +In your SFTConfig or trainer config: + +```python +SFTConfig( + push_to_hub=True, # Enable Hub push + hub_model_id="username/model-name", # Target repository +) +``` + +### 2. Job Configuration + +When submitting the job: + +```python +hf_jobs("uv", { + "script": "train.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Provide authentication +}) +``` + +**The `$HF_TOKEN` placeholder is automatically replaced with your Hugging Face token.** + +## Complete Example + +```python +# train.py +# /// script +# dependencies = ["trl"] +# /// + +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +dataset = load_dataset("trl-lib/Capybara", split="train") + +# Configure with Hub push +config = SFTConfig( + output_dir="my-model", + num_train_epochs=3, + + # ✅ CRITICAL: Hub push configuration + push_to_hub=True, + hub_model_id="myusername/my-trained-model", + + # Optional: Push strategy + push_to_hub_model_id="myusername/my-trained-model", + push_to_hub_organization=None, + push_to_hub_token=None, # Uses environment token +) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=config, +) + +trainer.train() + +# ✅ Push final model +trainer.push_to_hub() + +print("✅ Model saved to: https://huggingface.co/myusername/my-trained-model") +``` + +**Submit with authentication:** + +```python +hf_jobs("uv", { + "script": "train.py", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required! +}) +``` + +## What Gets Saved + +When `push_to_hub=True`: + +1. **Model weights** - Final trained parameters +2. **Tokenizer** - Associated tokenizer +3. **Configuration** - Model config (config.json) +4. **Training arguments** - Hyperparameters used +5. **Model card** - Auto-generated documentation +6. **Checkpoints** - If `save_strategy="steps"` enabled + +## Checkpoint Saving + +Save intermediate checkpoints during training: + +```python +SFTConfig( + output_dir="my-model", + push_to_hub=True, + hub_model_id="username/my-model", + + # Checkpoint configuration + save_strategy="steps", + save_steps=100, # Save every 100 steps + save_total_limit=3, # Keep only last 3 checkpoints +) +``` + +**Benefits:** +- Resume training if job fails +- Compare checkpoint performance +- Use intermediate models + +**Checkpoints are pushed to:** `username/my-model` (same repo) + +## Authentication Methods + +### Method 1: Automatic Token (Recommended) + +```python +"secrets": {"HF_TOKEN": "$HF_TOKEN"} +``` + +Uses your logged-in Hugging Face token automatically. + +### Method 2: Explicit Token + +```python +"secrets": {"HF_TOKEN": "hf_abc123..."} +``` + +Provide token explicitly (not recommended for security). + +### Method 3: Environment Variable + +```python +"env": {"HF_TOKEN": "hf_abc123..."} +``` + +Pass as regular environment variable (less secure than secrets). + +**Always prefer Method 1** for security and convenience. + +## Verification Checklist + +Before submitting any training job, verify: + +- [ ] `push_to_hub=True` in training config +- [ ] `hub_model_id` is specified (format: `username/model-name`) +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +- [ ] Repository name doesn't conflict with existing repos +- [ ] You have write access to the target namespace + +## Repository Setup + +### Automatic Creation + +If repository doesn't exist, it's created automatically when first pushing. + +### Manual Creation + +Create repository before training: + +```python +from huggingface_hub import HfApi + +api = HfApi() +api.create_repo( + repo_id="username/model-name", + repo_type="model", + private=False, # or True for private repo +) +``` + +### Repository Naming + +**Valid names:** +- `username/my-model` +- `username/model-name` +- `organization/model-name` + +**Invalid names:** +- `model-name` (missing username) +- `username/model name` (spaces not allowed) +- `username/MODEL` (uppercase discouraged) + +## Troubleshooting + +### Error: 401 Unauthorized + +**Cause:** HF_TOKEN not provided or invalid + +**Solutions:** +1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Check you're logged in: `hf auth whoami` +3. Re-login: `hf auth login` + +### Error: 403 Forbidden + +**Cause:** No write access to repository + +**Solutions:** +1. Check repository namespace matches your username +2. Verify you're a member of organization (if using org namespace) +3. Check repository isn't private (if accessing org repo) + +### Error: Repository not found + +**Cause:** Repository doesn't exist and auto-creation failed + +**Solutions:** +1. Manually create repository first +2. Check repository name format +3. Verify namespace exists + +### Error: Push failed during training + +**Cause:** Network issues or Hub unavailable + +**Solutions:** +1. Training continues but final push fails +2. Checkpoints may be saved +3. Re-run push manually after job completes + +### Issue: Model saved but not visible + +**Possible causes:** +1. Repository is private—check https://huggingface.co/username +2. Wrong namespace—verify `hub_model_id` matches login +3. Push still in progress—wait a few minutes + +## Manual Push After Training + +If training completes but push fails, push manually: + +```python +from transformers import AutoModel, AutoTokenizer + +# Load from local checkpoint +model = AutoModel.from_pretrained("./output_dir") +tokenizer = AutoTokenizer.from_pretrained("./output_dir") + +# Push to Hub +model.push_to_hub("username/model-name", token="hf_abc123...") +tokenizer.push_to_hub("username/model-name", token="hf_abc123...") +``` + +**Note:** Only possible if job hasn't completed (files still exist). + +## Best Practices + +1. **Always enable `push_to_hub=True`** +2. **Use checkpoint saving** for long training runs +3. **Verify Hub push** in logs before job completes +4. **Set appropriate `save_total_limit`** to avoid excessive checkpoints +5. **Use descriptive repo names** (e.g., `qwen-capybara-sft` not `model1`) +6. **Add model card** with training details +7. **Tag models** with relevant tags (e.g., `text-generation`, `fine-tuned`) + +## Monitoring Push Progress + +Check logs for push progress: + +```python +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**Look for:** +``` +Pushing model to username/model-name... +Upload file pytorch_model.bin: 100% +✅ Model pushed successfully +``` + +## Example: Full Production Setup + +```python +# production_train.py +# /// script +# dependencies = ["trl>=0.12.0", "peft>=0.7.0"] +# /// + +from datasets import load_dataset +from peft import LoraConfig +from trl import SFTTrainer, SFTConfig +import os + +# Verify token is available +assert "HF_TOKEN" in os.environ, "HF_TOKEN not found in environment!" + +# Load dataset +dataset = load_dataset("trl-lib/Capybara", split="train") +print(f"✅ Dataset loaded: {len(dataset)} examples") + +# Configure with comprehensive Hub settings +config = SFTConfig( + output_dir="qwen-capybara-sft", + + # Hub configuration + push_to_hub=True, + hub_model_id="myusername/qwen-capybara-sft", + hub_strategy="checkpoint", # Push checkpoints + + # Checkpoint configuration + save_strategy="steps", + save_steps=100, + save_total_limit=3, + + # Training settings + num_train_epochs=3, + per_device_train_batch_size=4, + + # Logging + logging_steps=10, + logging_first_step=True, +) + +# Train with LoRA +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=config, + peft_config=LoraConfig(r=16, lora_alpha=32), +) + +print("🚀 Starting training...") +trainer.train() + +print("💾 Pushing final model to Hub...") +trainer.push_to_hub() + +print("✅ Training complete!") +print(f"Model available at: https://huggingface.co/myusername/qwen-capybara-sft") +``` + +**Submit:** + +```python +hf_jobs("uv", { + "script": "production_train.py", + "flavor": "a10g-large", + "timeout": "6h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +## Key Takeaway + +**Without `push_to_hub=True` and `secrets={"HF_TOKEN": "$HF_TOKEN"}`, all training results are permanently lost.** + +Always verify both are configured before submitting any training job. diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/local_training_macos.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/local_training_macos.md new file mode 100644 index 00000000..fdf5dede --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/local_training_macos.md @@ -0,0 +1,231 @@ +# Local Training on macOS (Apple Silicon) + +Run small LoRA fine-tuning jobs locally on Mac for smoke tests and quick iteration before submitting to HF Jobs. + +## When to Use Local Mac vs HF Jobs + +| Local Mac | HF Jobs / Cloud GPU | +|-----------|-------------------| +| Model ≤3B, text-only | Model 7B+ | +| LoRA/PEFT only | QLoRA 4-bit (CUDA/bitsandbytes) | +| Short context (≤1024) | Long context / full fine-tuning | +| Smoke tests, dataset validation | Production runs, VLMs | + +**Typical workflow:** local smoke test → HF Jobs with same config → export/quantize ([gguf_conversion.md](gguf_conversion.md)) + +## Recommended Defaults + +| Setting | Value | Notes | +|---------|-------|-------| +| Model size | 0.5B–1.5B first run | Scale up after verifying | +| Max seq length | 512–1024 | Lower = less memory | +| Batch size | 1 | Scale via gradient accumulation | +| Gradient accumulation | 8–16 | Effective batch = 8–16 | +| LoRA rank (r) | 8–16 | alpha = 2×r | +| Dtype | float32 | fp16 causes NaN on MPS; bf16 only on M1 Pro+ and M2/M3/M4 | + +### Memory by hardware + +| Unified RAM | Max Model Size | +|-------------|---------------| +| 16 GB | ~0.5B–1.5B | +| 32 GB | ~1.5B–3B | +| 64 GB | ~3B (short context) | + +## Setup + +```bash +xcode-select --install +python3 -m venv .venv && source .venv/bin/activate +pip install -U "torch>=2.2" "transformers>=4.40" "trl>=0.12" "peft>=0.10" \ + datasets accelerate safetensors huggingface_hub +``` + +Verify MPS: +```bash +python -c "import torch; print(torch.__version__, '| MPS:', torch.backends.mps.is_available())" +``` + +Optional — configure Accelerate for local Mac (no distributed, no mixed precision, MPS device): +```bash +accelerate config +``` + +## Training Script + +
+train_lora_sft.py + +```python +import os +from dataclasses import dataclass +from typing import Optional +import torch +from datasets import load_dataset +from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed +from peft import LoraConfig +from trl import SFTTrainer, SFTConfig + +set_seed(42) + +@dataclass +class Cfg: + model_id: str = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-0.5B-Instruct") + dataset_id: str = os.environ.get("DATASET_ID", "HuggingFaceH4/ultrachat_200k") + dataset_split: str = os.environ.get("DATASET_SPLIT", "train_sft[:500]") + data_files: Optional[str] = os.environ.get("DATA_FILES", None) + text_field: str = os.environ.get("TEXT_FIELD", "") + messages_field: str = os.environ.get("MESSAGES_FIELD", "messages") + out_dir: str = os.environ.get("OUT_DIR", "outputs/local-lora") + max_seq_length: int = int(os.environ.get("MAX_SEQ_LENGTH", "512")) + max_steps: int = int(os.environ.get("MAX_STEPS", "-1")) + +cfg = Cfg() +device = "mps" if torch.backends.mps.is_available() else "cpu" + +tokenizer = AutoTokenizer.from_pretrained(cfg.model_id, use_fast=True) +if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token +tokenizer.padding_side = "right" + +model = AutoModelForCausalLM.from_pretrained(cfg.model_id, torch_dtype=torch.float32) +model.to(device) +model.config.use_cache = False + +if cfg.data_files: + ds = load_dataset("json", data_files=cfg.data_files, split="train") +else: + ds = load_dataset(cfg.dataset_id, split=cfg.dataset_split) + +def format_example(ex): + if cfg.text_field and isinstance(ex.get(cfg.text_field), str): + ex["text"] = ex[cfg.text_field] + return ex + msgs = ex.get(cfg.messages_field) + if isinstance(msgs, list): + if hasattr(tokenizer, "apply_chat_template"): + try: + ex["text"] = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=False) + return ex + except Exception: + pass + ex["text"] = "\n".join([str(m) for m in msgs]) + return ex + ex["text"] = str(ex) + return ex + +ds = ds.map(format_example) +ds = ds.remove_columns([c for c in ds.column_names if c != "text"]) + +lora = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, bias="none", + task_type="CAUSAL_LM", target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]) + +sft_kwargs = dict( + output_dir=cfg.out_dir, per_device_train_batch_size=1, gradient_accumulation_steps=8, + learning_rate=2e-4, logging_steps=10, save_steps=200, save_total_limit=2, + gradient_checkpointing=True, report_to="none", fp16=False, bf16=False, + max_seq_length=cfg.max_seq_length, dataset_text_field="text", +) +if cfg.max_steps > 0: + sft_kwargs["max_steps"] = cfg.max_steps +else: + sft_kwargs["num_train_epochs"] = 1 + +trainer = SFTTrainer(model=model, train_dataset=ds, peft_config=lora, + args=SFTConfig(**sft_kwargs), processing_class=tokenizer) +trainer.train() +trainer.save_model(cfg.out_dir) +print(f"✅ Saved to: {cfg.out_dir}") +``` + +
+ +### Run + +```bash +python train_lora_sft.py +``` + +**Env overrides:** + +```bash +MODEL_ID="Qwen/Qwen2.5-1.5B-Instruct" python train_lora_sft.py # different model +MAX_STEPS=50 python train_lora_sft.py # quick 50-step test +DATA_FILES="my_data.jsonl" python train_lora_sft.py # local JSONL file +PYTORCH_ENABLE_MPS_FALLBACK=1 python train_lora_sft.py # MPS op fallback to CPU +PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 python train_lora_sft.py # disable MPS memory limit (use with caution) +``` + +**Local JSONL format** — chat messages or plain text: +```jsonl +{"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}]} +``` +```jsonl +{"text": "User: Hello\nAssistant: Hi!"} +``` +For plain text: `DATA_FILES="file.jsonl" TEXT_FIELD="text" MESSAGES_FIELD="" python train_lora_sft.py` + +### Verify Success + +- Loss decreases over steps +- `outputs/local-lora/` contains `adapter_config.json` + `*.safetensors` + +## Quick Evaluation + +
+eval_generate.py + +```python +import os, torch +from transformers import AutoTokenizer, AutoModelForCausalLM +from peft import PeftModel + +BASE = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-0.5B-Instruct") +ADAPTER = os.environ.get("ADAPTER_DIR", "outputs/local-lora") +device = "mps" if torch.backends.mps.is_available() else "cpu" + +tokenizer = AutoTokenizer.from_pretrained(BASE, use_fast=True) +model = AutoModelForCausalLM.from_pretrained(BASE, torch_dtype=torch.float32) +model.to(device) +model = PeftModel.from_pretrained(model, ADAPTER) + +prompt = os.environ.get("PROMPT", "Explain gradient accumulation in 3 bullet points.") +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) +with torch.no_grad(): + out = model.generate(**inputs, max_new_tokens=120, do_sample=True, temperature=0.7, top_p=0.9) +print(tokenizer.decode(out[0], skip_special_tokens=True)) +``` + +
+ +## Troubleshooting (macOS-Specific) + +For general training issues, see [troubleshooting.md](troubleshooting.md). + +| Problem | Fix | +|---------|-----| +| MPS unsupported op / crash | `PYTORCH_ENABLE_MPS_FALLBACK=1` | +| OOM / system instability | Reduce `MAX_SEQ_LENGTH`, use smaller model, set `PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0` (caution) | +| fp16 NaN / loss explosion | Keep `fp16=False` (default), lower learning rate | +| LoRA "module not found" | Print `model.named_modules()` to find correct target names | +| TRL TypeError on args | Check TRL version; script uses `SFTConfig` + `processing_class` (TRL ≥0.12) | +| Intel Mac | No MPS — use HF Jobs instead | + +**Common LoRA target modules by architecture:** + +| Architecture | target_modules | +|-------------|---------------| +| Llama/Qwen/Mistral | `q_proj`, `k_proj`, `v_proj`, `o_proj` | +| GPT-2/GPT-J | `c_attn`, `c_proj` | +| BLOOM | `query_key_value`, `dense` | + +## MLX Alternative + +[MLX](https://github.com/ml-explore/mlx) offers tighter Apple Silicon integration but has a smaller ecosystem and less mature training APIs. For this skill's workflow (local validation → HF Jobs), PyTorch + MPS is recommended for consistency. See [mlx-lm](https://github.com/ml-explore/mlx-lm) for MLX-based fine-tuning. + +## See Also + +- [troubleshooting.md](troubleshooting.md) — General TRL troubleshooting +- [hardware_guide.md](hardware_guide.md) — GPU selection for HF Jobs +- [gguf_conversion.md](gguf_conversion.md) — Export for on-device inference +- [training_methods.md](training_methods.md) — SFT, DPO, GRPO overview diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/reliability_principles.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/reliability_principles.md new file mode 100644 index 00000000..bf2f7458 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/reliability_principles.md @@ -0,0 +1,371 @@ +# Reliability Principles for Training Jobs + +These principles are derived from real production failures and successful fixes. Following them prevents common failure modes and ensures reliable job execution. + +## Principle 1: Always Verify Before Use + +**Rule:** Never assume repos, datasets, or resources exist. Verify with tools first. + +### What It Prevents + +- **Non-existent datasets** - Jobs fail immediately when dataset doesn't exist +- **Typos in names** - Simple mistakes like "argilla-dpo-mix-7k" vs "ultrafeedback_binarized" +- **Incorrect paths** - Old or moved repos, renamed files +- **Missing dependencies** - Undocumented requirements + +### How to Apply + +**Before submitting ANY job:** + +```python +# Verify dataset exists +dataset_search({"query": "dataset-name", "author": "author-name", "limit": 5}) +hub_repo_details(["author/dataset-name"], repo_type="dataset") + +# Verify model exists +hub_repo_details(["org/model-name"], repo_type="model") + +# Check script/file paths (for URL-based scripts) +# Verify before using: https://github.com/user/repo/blob/main/script.py +``` + +**Examples that would have caught errors:** + +```python +# ❌ WRONG: Assumed dataset exists +hf_jobs("uv", { + "script": """...""", + "env": {"DATASET": "trl-lib/argilla-dpo-mix-7k"} # Doesn't exist! +}) + +# ✅ CORRECT: Verify first +dataset_search({"query": "argilla dpo", "author": "trl-lib"}) +# Would show: "trl-lib/ultrafeedback_binarized" is the correct name + +hub_repo_details(["trl-lib/ultrafeedback_binarized"], repo_type="dataset") +# Confirms it exists before using +``` + +### Implementation Checklist + +- [ ] Check dataset exists before training +- [ ] Verify base model exists before fine-tuning +- [ ] Confirm adapter model exists before GGUF conversion +- [ ] Test script URLs are valid before submitting +- [ ] Validate file paths in repositories +- [ ] Check for recent updates/renames of resources + +**Time cost:** 5-10 seconds +**Time saved:** Hours of failed job time + debugging + +--- + +## Principle 2: Prioritize Reliability Over Performance + +**Rule:** Default to what is most likely to succeed, not what is theoretically fastest. + +### What It Prevents + +- **Hardware incompatibilities** - Features that fail on certain GPUs +- **Unstable optimizations** - Speed-ups that cause crashes +- **Complex configurations** - More failure points +- **Build system issues** - Unreliable compilation methods + +### How to Apply + +**Choose reliability:** + +```python +# ❌ RISKY: Aggressive optimization that may fail +SFTConfig( + torch_compile=True, # Can fail on T4, A10G GPUs + optim="adamw_bnb_8bit", # Requires specific setup + fp16=False, # May cause training instability + ... +) + +# ✅ SAFE: Proven defaults +SFTConfig( + # torch_compile=True, # Commented with note: "Enable on H100 for 20% speedup" + optim="adamw_torch", # Standard, always works + fp16=True, # Stable and fast + ... +) +``` + +**For build processes:** + +```python +# ❌ UNRELIABLE: Uses make (platform-dependent) +subprocess.run(["make", "-C", "/tmp/llama.cpp", "llama-quantize"], check=True) + +# ✅ RELIABLE: Uses CMake (consistent, documented) +subprocess.run([ + "cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", + "-DGGML_CUDA=OFF" # Disable CUDA for faster, more reliable build +], check=True) + +subprocess.run([ + "cmake", "--build", "/tmp/llama.cpp/build", + "--target", "llama-quantize", "-j", "4" +], check=True) +``` + +### Real-World Example + +**The `torch.compile` failure:** +- Added for "20% speedup" on H100 +- **Failed fatally on T4-medium** with cryptic error +- Misdiagnosed as dataset issue (cost hours) +- **Fix:** Disable by default, add as optional comment + +**Result:** Reliability > 20% performance gain + +### Implementation Checklist + +- [ ] Use proven, standard configurations by default +- [ ] Comment out performance optimizations with hardware notes +- [ ] Use stable build systems (CMake > make) +- [ ] Test on target hardware before production +- [ ] Document known incompatibilities +- [ ] Provide "safe" and "fast" variants when needed + +**Performance loss:** 10-20% in best case +**Reliability gain:** 95%+ success rate vs 60-70% + +--- + +## Principle 3: Create Atomic, Self-Contained Scripts + +**Rule:** Scripts should work as complete, independent units. Don't remove parts to "simplify." + +### What It Prevents + +- **Missing dependencies** - Removed "unnecessary" packages that are actually required +- **Incomplete processes** - Skipped steps that seem redundant +- **Environment assumptions** - Scripts that need pre-setup +- **Partial failures** - Some parts work, others fail silently + +### How to Apply + +**Complete dependency specifications:** + +```python +# ❌ INCOMPLETE: "Simplified" by removing dependencies +# /// script +# dependencies = [ +# "transformers", +# "peft", +# "torch", +# ] +# /// + +# ✅ COMPLETE: All dependencies explicit +# /// script +# dependencies = [ +# "transformers>=4.36.0", +# "peft>=0.7.0", +# "torch>=2.0.0", +# "accelerate>=0.24.0", +# "huggingface_hub>=0.20.0", +# "sentencepiece>=0.1.99", # Required for tokenizers +# "protobuf>=3.20.0", # Required for tokenizers +# "numpy", +# "gguf", +# ] +# /// +``` + +**Complete build processes:** + +```python +# ❌ INCOMPLETE: Assumes build tools exist +subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"]) +subprocess.run(["make", "-C", "/tmp/llama.cpp", "llama-quantize"]) # FAILS: no gcc/make + +# ✅ COMPLETE: Installs all requirements +subprocess.run(["apt-get", "update", "-qq"], check=True) +subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True) +subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"]) +# ... then build +``` + +### Real-World Example + +**The `sentencepiece` failure:** +- Original script had it: worked fine +- "Simplified" version removed it: "doesn't look necessary" +- **GGUF conversion failed silently** - tokenizer couldn't convert +- Hard to debug: no obvious error message +- **Fix:** Restore all original dependencies + +**Result:** Don't remove dependencies without thorough testing + +### Implementation Checklist + +- [ ] All dependencies in PEP 723 header with version pins +- [ ] All system packages installed by script +- [ ] No assumptions about pre-existing environment +- [ ] No "optional" steps that are actually required +- [ ] Test scripts in clean environment +- [ ] Document why each dependency is needed + +**Complexity:** Slightly longer scripts +**Reliability:** Scripts "just work" every time + +--- + +## Principle 4: Provide Clear Error Context + +**Rule:** When things fail, make it obvious what went wrong and how to fix it. + +### How to Apply + +**Wrap subprocess calls:** + +```python +# ❌ UNCLEAR: Silent failure +subprocess.run([...], check=True, capture_output=True) + +# ✅ CLEAR: Shows what failed +try: + result = subprocess.run( + [...], + check=True, + capture_output=True, + text=True + ) + print(result.stdout) + if result.stderr: + print("Warnings:", result.stderr) +except subprocess.CalledProcessError as e: + print(f"❌ Command failed!") + print("STDOUT:", e.stdout) + print("STDERR:", e.stderr) + raise +``` + +**Validate inputs:** + +```python +# ❌ UNCLEAR: Fails later with cryptic error +model = load_model(MODEL_NAME) + +# ✅ CLEAR: Fails fast with clear message +if not MODEL_NAME: + raise ValueError("MODEL_NAME environment variable not set!") + +print(f"Loading model: {MODEL_NAME}") +try: + model = load_model(MODEL_NAME) + print(f"✅ Model loaded successfully") +except Exception as e: + print(f"❌ Failed to load model: {MODEL_NAME}") + print(f"Error: {e}") + print("Hint: Check that model exists on Hub") + raise +``` + +### Implementation Checklist + +- [ ] Wrap external calls with try/except +- [ ] Print stdout/stderr on failure +- [ ] Validate environment variables early +- [ ] Add progress indicators (✅, ❌, 🔄) +- [ ] Include hints for common failures +- [ ] Log configuration at start + +--- + +## Principle 5: Test the Happy Path on Known-Good Inputs + +**Rule:** Before using new code in production, test with inputs you know work. + +### How to Apply + +**Known-good test inputs:** + +```python +# For training +TEST_DATASET = "trl-lib/Capybara" # Small, well-formatted, widely used +TEST_MODEL = "Qwen/Qwen2.5-0.5B" # Small, fast, reliable + +# For GGUF conversion +TEST_ADAPTER = "evalstate/qwen-capybara-medium" # Known working model +TEST_BASE = "Qwen/Qwen2.5-0.5B" # Compatible base +``` + +**Testing workflow:** + +1. Test with known-good inputs first +2. If that works, try production inputs +3. If production fails, you know it's the inputs (not code) +4. Isolate the difference + +### Implementation Checklist + +- [ ] Maintain list of known-good test models/datasets +- [ ] Test new scripts with test inputs first +- [ ] Document what makes inputs "good" +- [ ] Keep test jobs cheap (small models, short timeouts) +- [ ] Only move to production after test succeeds + +**Time cost:** 5-10 minutes for test run +**Debugging time saved:** Hours + +--- + +## Summary: The Reliability Checklist + +Before submitting ANY job: + +### Pre-Flight Checks +- [ ] **Verified** all repos/datasets exist (hub_repo_details) +- [ ] **Tested** with known-good inputs if new code +- [ ] **Using** proven hardware/configuration +- [ ] **Included** all dependencies in PEP 723 header +- [ ] **Installed** system requirements (build tools, etc.) +- [ ] **Set** appropriate timeout (not default 30m) +- [ ] **Configured** Hub push with HF_TOKEN +- [ ] **Added** clear error handling + +### Script Quality +- [ ] Self-contained (no external setup needed) +- [ ] Complete dependencies listed +- [ ] Build tools installed by script +- [ ] Progress indicators included +- [ ] Error messages are clear +- [ ] Configuration logged at start + +### Job Configuration +- [ ] Timeout > expected runtime + 30% buffer +- [ ] Hardware appropriate for model size +- [ ] Secrets include HF_TOKEN +- [ ] Environment variables set correctly +- [ ] Cost estimated and acceptable + +**Following these principles transforms job success rate from ~60-70% to ~95%+** + +--- + +## When Principles Conflict + +Sometimes reliability and performance conflict. Here's how to choose: + +| Scenario | Choose | Rationale | +|----------|--------|-----------| +| Demo/test | Reliability | Fast failure is worse than slow success | +| Production (first run) | Reliability | Prove it works before optimizing | +| Production (proven) | Performance | Safe to optimize after validation | +| Time-critical | Reliability | Failures cause more delay than slow runs | +| Cost-critical | Balanced | Test with small model, then optimize | + +**General rule:** Reliability first, optimize second. + +--- + +## Further Reading + +- `troubleshooting.md` - Common issues and fixes +- `training_patterns.md` - Proven training configurations +- `gguf_conversion.md` - Production GGUF workflow diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/trackio_guide.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/trackio_guide.md new file mode 100644 index 00000000..342045ee --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/trackio_guide.md @@ -0,0 +1,189 @@ +# Trackio Integration for TRL Training + +**Trackio** is an experiment tracking library that provides real-time metrics visualization for remote training on Hugging Face Jobs infrastructure. + +⚠️ **IMPORTANT**: For Jobs training (remote cloud GPUs): +- Training happens on ephemeral cloud runners (not your local machine) +- Trackio syncs metrics to a Hugging Face Space for real-time monitoring +- Without a Space, metrics are lost when the job completes +- The Space dashboard persists your training metrics permanently + +## Setting Up Trackio for Jobs + +**Step 1: Add trackio dependency** +```python +# /// script +# dependencies = [ +# "trl>=0.12.0", +# "trackio", # Required! +# ] +# /// +``` + +**Step 2: Create a Trackio Space (one-time setup)** + +**Option A: Let Trackio auto-create (Recommended)** +Pass a `space_id` to `trackio.init()` and Trackio will automatically create the Space if it doesn't exist. + +**Option B: Create manually** +- Create Space via Hub UI at https://huggingface.co/new-space +- Select Gradio SDK +- OR use command: `hf repos create my-trackio-dashboard --type space --space-sdk gradio` + +**Step 3: Initialize Trackio with space_id** +```python +import trackio + +trackio.init( + project="my-training", + space_id="username/trackio", # CRITICAL for Jobs! Replace 'username' with your HF username + config={ + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + } +) +``` + +**Step 4: Configure TRL to use Trackio** +```python +SFTConfig( + report_to="trackio", + # ... other config +) +``` + +**Step 5: Finish tracking** +```python +trainer.train() +trackio.finish() # Ensures final metrics are synced +``` + +## What Trackio Tracks + +Trackio automatically logs: +- ✅ Training loss +- ✅ Learning rate +- ✅ GPU utilization +- ✅ Memory usage +- ✅ Training throughput +- ✅ Custom metrics + +## How It Works with Jobs + +1. **Training runs** → Metrics logged to local SQLite DB +2. **Every 5 minutes** → Trackio syncs DB to HF Dataset (Parquet) +3. **Space dashboard** → Reads from Dataset, displays metrics in real-time +4. **Job completes** → Final sync ensures all metrics persisted + +## Default Configuration Pattern + +**Use sensible defaults for trackio configuration unless user requests otherwise.** + +### Recommended Defaults + +```python +import trackio + +trackio.init( + project="qwen-capybara-sft", + name="baseline-run", # Descriptive name user will recognize + space_id="username/trackio", # Default space: {username}/trackio + config={ + # Keep config minimal - hyperparameters and model/dataset info only + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + "num_epochs": 3, + } +) +``` + +**Key principles:** +- **Space ID**: Use `{username}/trackio` with "trackio" as default space name +- **Run naming**: Unless otherwise specified, name the run in a way the user will recognize +- **Config**: Keep minimal - don't automatically capture job metadata unless requested +- **Grouping**: Optional - only use if user requests organizing related experiments + +## Grouping Runs (Optional) + +The `group` parameter helps organize related runs together in the dashboard sidebar. This is useful when user is running multiple experiments with different configurations but wants to compare them together: + +```python +# Example: Group runs by experiment type +trackio.init(project="my-project", run_name="baseline-run-1", group="baseline") +trackio.init(project="my-project", run_name="augmented-run-1", group="augmented") +trackio.init(project="my-project", run_name="tuned-run-1", group="tuned") +``` + +Runs with the same group name can be grouped together in the sidebar, making it easier to compare related experiments. You can group by any configuration parameter: + +```python +# Hyperparameter sweep - group by learning rate +trackio.init(project="hyperparam-sweep", run_name="lr-0.001-run", group="lr_0.001") +trackio.init(project="hyperparam-sweep", run_name="lr-0.01-run", group="lr_0.01") +``` + +## Environment Variables for Jobs + +You can configure trackio using environment variables instead of passing parameters to `trackio.init()`. This is useful for managing configuration across multiple jobs. + + + +**`HF_TOKEN`** +Required for creating Spaces and writing to datasets (passed via `secrets`): +```python +hf_jobs("uv", { + "script": "...", + "secrets": { + "HF_TOKEN": "$HF_TOKEN" # Enables Space creation and Hub push + } +}) +``` + +### Example with Environment Variables + +```python +hf_jobs("uv", { + "script": """ +# Training script - trackio config from environment +import trackio +from datetime import datetime + +# Auto-generate run name +timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M") +run_name = f"sft_qwen25_{timestamp}" + +# Project and space_id can come from environment variables +trackio.init(run_name=run_name, group="SFT") + +# ... training code ... +trackio.finish() +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**When to use environment variables:** +- Managing multiple jobs with same configuration +- Keeping training scripts portable across projects +- Separating configuration from code + +**When to use direct parameters:** +- Single job with specific configuration +- When clarity in code is preferred +- When each job has different project/space + +## Viewing the Dashboard + +After starting training: +1. Navigate to the Space: `https://huggingface.co/spaces/username/trackio` +2. The Gradio dashboard shows all tracked experiments +3. Filter by project, compare runs, view charts with smoothing + +## Recommendation + +- **Trackio**: Best for real-time monitoring during long training runs +- **Weights & Biases**: Best for team collaboration, requires account diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_methods.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_methods.md new file mode 100644 index 00000000..2393d773 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_methods.md @@ -0,0 +1,150 @@ +# TRL Training Methods Overview + +TRL (Transformer Reinforcement Learning) provides multiple training methods for fine-tuning and aligning language models. This reference provides a brief overview of each method. + +## Supervised Fine-Tuning (SFT) + +**What it is:** Standard instruction tuning with supervised learning on demonstration data. + +**When to use:** +- Initial fine-tuning of base models on task-specific data +- Teaching new capabilities or domains +- Most common starting point for fine-tuning + +**Dataset format:** Conversational format with "messages" field, OR text field, OR prompt/completion pairs + +**Example:** +```python +from trl import SFTTrainer, SFTConfig + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=SFTConfig( + output_dir="my-model", + push_to_hub=True, + hub_model_id="username/my-model", + eval_strategy="no", # Disable eval for simple example + # max_length=1024 is the default - only set if you need different length + ) +) +trainer.train() +``` + +**Note:** For production training with evaluation monitoring, see `scripts/train_sft_example.py` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/sft_trainer")` + +## Direct Preference Optimization (DPO) + +**What it is:** Alignment method that trains directly on preference pairs (chosen vs rejected responses) without requiring a reward model. + +**When to use:** +- Aligning models to human preferences +- Improving response quality after SFT +- Have paired preference data (chosen/rejected responses) + +**Dataset format:** Preference pairs with "chosen" and "rejected" fields + +**Example:** +```python +from trl import DPOTrainer, DPOConfig + +trainer = DPOTrainer( + model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model + train_dataset=dataset, + args=DPOConfig( + output_dir="dpo-model", + beta=0.1, # KL penalty coefficient + eval_strategy="no", # Disable eval for simple example + # max_length=1024 is the default - only set if you need different length + ) +) +trainer.train() +``` + +**Note:** For production training with evaluation monitoring, see `scripts/train_dpo_example.py` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer")` + +## Group Relative Policy Optimization (GRPO) + +**What it is:** Online RL method that optimizes relative to group performance, useful for tasks with verifiable rewards. + +**When to use:** +- Tasks with automatic reward signals (code execution, math verification) +- Online learning scenarios +- When DPO offline data is insufficient + +**Dataset format:** Prompt-only format (model generates responses, reward computed online) + +**Example:** +```python +# Use TRL maintained script +hf_jobs("uv", { + "script": "https://raw.githubusercontent.com/huggingface/trl/main/examples/scripts/grpo.py", + "script_args": [ + "--model_name_or_path", "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset_name", "trl-lib/math_shepherd", + "--output_dir", "grpo-model" + ], + "flavor": "a10g-large", + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/grpo_trainer")` + +## Reward Modeling + +**What it is:** Train a reward model to score responses, used as a component in RLHF pipelines. + +**When to use:** +- Building RLHF pipeline +- Need automatic quality scoring +- Creating reward signals for PPO training + +**Dataset format:** Preference pairs with "chosen" and "rejected" responses + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/reward_trainer")` + +## Method Selection Guide + +| Method | Complexity | Data Required | Use Case | +|--------|-----------|---------------|----------| +| **SFT** | Low | Demonstrations | Initial fine-tuning | +| **DPO** | Medium | Paired preferences | Post-SFT alignment | +| **GRPO** | Medium | Prompts + reward fn | Online RL with automatic rewards | +| **Reward** | Medium | Paired preferences | Building RLHF pipeline | + +## Recommended Pipeline + +**For most use cases:** +1. **Start with SFT** - Fine-tune base model on task data +2. **Follow with DPO** - Align to preferences using paired data +3. **Optional: GGUF conversion** - Deploy for local inference + +**For advanced RL scenarios:** +1. **Start with SFT** - Fine-tune base model +2. **Train reward model** - On preference data + +## Dataset Format Reference + +For complete dataset format specifications, use: +```python +hf_doc_fetch("https://huggingface.co/docs/trl/dataset_formats") +``` + +Or validate your dataset: +```bash +uv run https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py \ + --dataset your/dataset --split train +``` + +## See Also + +- `references/training_patterns.md` - Common training patterns and examples +- `scripts/train_sft_example.py` - Complete SFT template +- `scripts/train_dpo_example.py` - Complete DPO template +- [Dataset Inspector](https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py) - Dataset format validation tool diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_patterns.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_patterns.md new file mode 100644 index 00000000..2101e12a --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/training_patterns.md @@ -0,0 +1,203 @@ +# Common Training Patterns + +This guide provides common training patterns and use cases for TRL on Hugging Face Jobs. + +## Multi-GPU Training + +Automatic distributed training across multiple GPUs. TRL/Accelerate handles distribution automatically: + +```python +hf_jobs("uv", { + "script": """ +# Your training script here (same as single GPU) +# No changes needed - Accelerate detects multiple GPUs +""", + "flavor": "a10g-largex2", # 2x A10G GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**Tips for multi-GPU:** +- No code changes needed +- Use `per_device_train_batch_size` (per GPU, not total) +- Effective batch size = `per_device_train_batch_size` × `num_gpus` × `gradient_accumulation_steps` +- Monitor GPU utilization to ensure both GPUs are being used + +## DPO Training (Preference Learning) + +Train with preference data for alignment: + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["trl>=0.12.0", "trackio"] +# /// + +from datasets import load_dataset +from trl import DPOTrainer, DPOConfig +import trackio + +dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train") + +# Create train/eval split +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +config = DPOConfig( + output_dir="dpo-model", + push_to_hub=True, + hub_model_id="username/dpo-model", + num_train_epochs=1, + beta=0.1, # KL penalty coefficient + eval_strategy="steps", + eval_steps=50, + report_to="trackio", + run_name="baseline_run", # use a meaningful run name + # max_length=1024, # Default - only set if you need different sequence length +) + +trainer = DPOTrainer( + model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model as base + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # IMPORTANT: Provide eval_dataset when eval_strategy is enabled + args=config, +) + +trainer.train() +trainer.push_to_hub() +trackio.finish() +""", + "flavor": "a10g-large", + "timeout": "3h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**For DPO documentation:** Use `hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer")` + +## GRPO Training (Online RL) + +Group Relative Policy Optimization for online reinforcement learning: + +```python +hf_jobs("uv", { + "script": "https://raw.githubusercontent.com/huggingface/trl/main/examples/scripts/grpo.py", + "script_args": [ + "--model_name_or_path", "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset_name", "trl-lib/math_shepherd", + "--output_dir", "grpo-model", + "--push_to_hub", + "--hub_model_id", "username/grpo-model" + ], + "flavor": "a10g-large", + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**For GRPO documentation:** Use `hf_doc_fetch("https://huggingface.co/docs/trl/grpo_trainer")` + +## Trackio Configuration + +**Use sensible defaults for trackio setup.** See `references/trackio_guide.md` for complete documentation including grouping runs for experiments. + +### Basic Pattern + +```python +import trackio + +trackio.init( + project="my-training", + run_name="baseline-run", # Descriptive name user will recognize + space_id="username/trackio", # Default space: {username}/trackio + config={ + # Keep config minimal - hyperparameters and model/dataset info only + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + } +) + +# Your training code... + +trackio.finish() +``` + +### Grouping for Experiments (Optional) + +When user wants to compare related runs, use the `group` parameter: + +```python +# Hyperparameter sweep +trackio.init(project="hyperparam-sweep", run_name="lr-0.001", group="lr_0.001") +trackio.init(project="hyperparam-sweep", run_name="lr-0.01", group="lr_0.01") +``` + +## Pattern Selection Guide + +| Use Case | Pattern | Hardware | Time | +|----------|---------|----------|------| +| SFT training | `scripts/train_sft_example.py` | a10g-large | 2-6 hours | +| Large dataset (>10K) | Multi-GPU | a10g-largex2 | 4-12 hours | +| Preference learning | DPO Training | a10g-large | 2-4 hours | +| Online RL | GRPO Training | a10g-large | 3-6 hours | + +## Critical: Evaluation Dataset Requirements + +**⚠️ IMPORTANT**: If you set `eval_strategy="steps"` or `eval_strategy="epoch"`, you **MUST** provide an `eval_dataset` to the trainer, or the training will hang. + +### ✅ CORRECT - With eval dataset: +```python +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # ← MUST provide when eval_strategy is enabled + args=SFTConfig(eval_strategy="steps", ...), +) +``` + +### ❌ WRONG - Will hang: +```python +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # NO eval_dataset but eval_strategy="steps" ← WILL HANG + args=SFTConfig(eval_strategy="steps", ...), +) +``` + +### Option: Disable evaluation if no eval dataset +```python +config = SFTConfig( + eval_strategy="no", # ← Explicitly disable evaluation + # ... other config +) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # No eval_dataset needed + args=config, +) +``` + +## Best Practices + +1. **Use train/eval splits** - Create evaluation split for monitoring progress +2. **Enable Trackio** - Monitor progress in real-time +3. **Add 20-30% buffer to timeout** - Account for loading/saving overhead +4. **Test with TRL official scripts first** - Use maintained examples before custom code +5. **Always provide eval_dataset** - When using eval_strategy, or set to "no" +6. **Use multi-GPU for large models** - 7B+ models benefit significantly + +## See Also + +- `scripts/train_sft_example.py` - Complete SFT template with Trackio and eval split +- `scripts/train_dpo_example.py` - Complete DPO template +- `scripts/train_grpo_example.py` - Complete GRPO template +- `references/hardware_guide.md` - Detailed hardware specifications +- `references/training_methods.md` - Overview of all TRL training methods +- `references/troubleshooting.md` - Common issues and solutions diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/troubleshooting.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/troubleshooting.md new file mode 100644 index 00000000..430816ce --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/troubleshooting.md @@ -0,0 +1,282 @@ +# Troubleshooting TRL Training Jobs + +Common issues and solutions when training with TRL on Hugging Face Jobs. + +## Training Hangs at "Starting training..." Step + +**Problem:** Job starts but hangs at the training step - never progresses, never times out, just sits there. + +**Root Cause:** Using `eval_strategy="steps"` or `eval_strategy="epoch"` without providing an `eval_dataset` to the trainer. + +**Solution:** + +**Option A: Provide eval_dataset (recommended)** +```python +# Create train/eval split +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # ← MUST provide when eval_strategy is enabled + args=SFTConfig( + eval_strategy="steps", + eval_steps=50, + ... + ), +) +``` + +**Option B: Disable evaluation** +```python +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # No eval_dataset + args=SFTConfig( + eval_strategy="no", # ← Explicitly disable + ... + ), +) +``` + +**Prevention:** +- Always create train/eval split for better monitoring +- Use `dataset.train_test_split(test_size=0.1, seed=42)` +- Check example scripts: `scripts/train_sft_example.py` includes proper eval setup + +## Job Times Out + +**Problem:** Job terminates before training completes, all progress lost. + +**Solutions:** +- Increase timeout parameter (e.g., `"timeout": "4h"`) +- Reduce `num_train_epochs` or use smaller dataset slice +- Use smaller model or enable LoRA/PEFT to speed up training +- Add 20-30% buffer to estimated time for loading/saving overhead + +**Prevention:** +- Always start with a quick demo run to estimate timing +- Use `scripts/estimate_cost.py` to get time estimates +- Monitor first runs closely via Trackio or logs + +## Model Not Saved to Hub + +**Problem:** Training completes but model doesn't appear on Hub - all work lost. + +**Check:** +- [ ] `push_to_hub=True` in training config +- [ ] `hub_model_id` specified with username (e.g., `"username/model-name"`) +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job submission +- [ ] User has write access to target repo +- [ ] Token has write permissions (check at https://huggingface.co/settings/tokens) +- [ ] Training script calls `trainer.push_to_hub()` at the end + +**See:** `references/hub_saving.md` for detailed Hub authentication troubleshooting + +## Out of Memory (OOM) + +**Problem:** Job fails with CUDA out of memory error. + +**Solutions (in order of preference):** +1. **Reduce batch size:** Lower `per_device_train_batch_size` (try 4 → 2 → 1) +2. **Increase gradient accumulation:** Raise `gradient_accumulation_steps` to maintain effective batch size +3. **Disable evaluation:** Remove `eval_dataset` and `eval_strategy` (saves ~40% memory, good for demos) +4. **Enable LoRA/PEFT:** Use `peft_config=LoraConfig(r=8, lora_alpha=16)` to train adapters only (smaller rank = less memory) +5. **Use larger GPU:** Switch from `t4-small` → `l4x1` → `a10g-large` → `a100-large` +6. **Enable gradient checkpointing:** Set `gradient_checkpointing=True` in config (slower but saves memory) +7. **Use smaller model:** Try a smaller variant (e.g., 0.5B instead of 3B) + +**Memory guidelines:** +- T4 (16GB): <1B models with LoRA +- A10G (24GB): 1-3B models with LoRA, <1B full fine-tune +- A100 (40GB/80GB): 7B+ models with LoRA, 3B full fine-tune + +## Parameter Naming Issues + +**Problem:** `TypeError: SFTConfig.__init__() got an unexpected keyword argument 'max_seq_length'` + +**Cause:** TRL config classes use `max_length`, not `max_seq_length`. + +**Solution:** +```python +# ✅ CORRECT - TRL uses max_length +SFTConfig(max_length=512) +DPOConfig(max_length=512) + +# ❌ WRONG - This will fail +SFTConfig(max_seq_length=512) +``` + +**Note:** Most TRL configs don't require explicit max_length - the default (1024) works well. Only set if you need a specific value. + +## Dataset Format Error + +**Problem:** Training fails with dataset format errors or missing fields. + +**Solutions:** +1. **Check format documentation:** + ```python + hf_doc_fetch("https://huggingface.co/docs/trl/dataset_formats") + ``` + +2. **Validate dataset before training:** + ```bash + uv run https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py \ + --dataset --split train + ``` + Or via hf_jobs: + ```python + hf_jobs("uv", { + "script": "https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py", + "script_args": ["--dataset", "dataset-name", "--split", "train"] + }) + ``` + +3. **Verify field names:** + - **SFT:** Needs "messages" field (conversational), OR "text" field, OR "prompt"/"completion" + - **DPO:** Needs "chosen" and "rejected" fields + - **GRPO:** Needs prompt-only format + +4. **Check dataset split:** + - Ensure split exists (e.g., `split="train"`) + - Preview dataset: `load_dataset("name", split="train[:5]")` + +## Import/Module Errors + +**Problem:** Job fails with "ModuleNotFoundError" or import errors. + +**Solutions:** +1. **Add PEP 723 header with dependencies:** + ```python + # /// script + # dependencies = [ + # "trl>=0.12.0", + # "peft>=0.7.0", + # "transformers>=4.36.0", + # ] + # /// + ``` + +2. **Verify exact format:** + - Must have `# ///` delimiters (with space after `#`) + - Dependencies must be valid PyPI package names + - Check spelling and version constraints + +3. **Test locally first:** + ```bash + uv run train.py # Tests if dependencies are correct + ``` + +## Authentication Errors + +**Problem:** Job fails with authentication or permission errors when pushing to Hub. + +**Solutions:** +1. **Verify authentication:** + ```python + mcp__huggingface__hf_whoami() # Check who's authenticated + ``` + +2. **Check token permissions:** + - Go to https://huggingface.co/settings/tokens + - Ensure token has "write" permission + - Token must not be "read-only" + +3. **Verify token in job:** + ```python + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Must be in job config + ``` + +4. **Check repo permissions:** + - User must have write access to target repo + - If org repo, user must be member with write access + - Repo must exist or user must have permission to create + +## Job Stuck or Not Starting + +**Problem:** Job shows "pending" or "starting" for extended period. + +**Solutions:** +- Check Jobs dashboard for status: https://huggingface.co/jobs +- Verify hardware availability (some GPU types may have queues) +- Try different hardware flavor if one is heavily utilized +- Check for account billing issues (Jobs requires paid plan) + +**Typical startup times:** +- CPU jobs: 10-30 seconds +- GPU jobs: 30-90 seconds +- If >3 minutes: likely queued or stuck + +## Training Loss Not Decreasing + +**Problem:** Training runs but loss stays flat or doesn't improve. + +**Solutions:** +1. **Check learning rate:** May be too low (try 2e-5 to 5e-5) or too high (try 1e-6) +2. **Verify dataset quality:** Inspect examples to ensure they're reasonable +3. **Check model size:** Very small models may not have capacity for task +4. **Increase training steps:** May need more epochs or larger dataset +5. **Verify dataset format:** Wrong format may cause degraded training + +## Logs Not Appearing + +**Problem:** Cannot see training logs or progress. + +**Solutions:** +1. **Wait 30-60 seconds:** Initial logs can be delayed +2. **Check logs via MCP tool:** + ```python + hf_jobs("logs", {"job_id": "your-job-id"}) + ``` +3. **Use Trackio for real-time monitoring:** See `references/trackio_guide.md` +4. **Verify job is actually running:** + ```python + hf_jobs("inspect", {"job_id": "your-job-id"}) + ``` + +## Checkpoint/Resume Issues + +**Problem:** Cannot resume from checkpoint or checkpoint not saved. + +**Solutions:** +1. **Enable checkpoint saving:** + ```python + SFTConfig( + save_strategy="steps", + save_steps=100, + hub_strategy="every_save", # Push each checkpoint + ) + ``` + +2. **Verify checkpoints pushed to Hub:** Check model repo for checkpoint folders + +3. **Resume from checkpoint:** + ```python + trainer = SFTTrainer( + model="username/model-name", # Can be checkpoint path + resume_from_checkpoint="username/model-name/checkpoint-1000", + ) + ``` + +## Getting Help + +If issues persist: + +1. **Check TRL documentation:** + ```python + hf_doc_search("your issue", product="trl") + ``` + +2. **Check Jobs documentation:** + ```python + hf_doc_fetch("https://huggingface.co/docs/huggingface_hub/guides/jobs") + ``` + +3. **Review related guides:** + - `references/hub_saving.md` - Hub authentication issues + - `references/hardware_guide.md` - Hardware selection and specs + - `references/training_patterns.md` - Eval dataset requirements + - SKILL.md "Working with Scripts" section - Script format and URL issues + +4. **Ask in HF forums:** https://discuss.huggingface.co/ diff --git a/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/unsloth.md b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/unsloth.md new file mode 100644 index 00000000..83e1e116 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/hugging-face-model-trainer/references/unsloth.md @@ -0,0 +1,313 @@ +# Unsloth: Fast Fine-Tuning with Memory Optimization + +**Unsloth** is a fine-tuning library that provides ~2x faster training and ~60% less VRAM usage for LLM training. It's particularly useful when working with limited GPU memory or when speed is critical. + +- **GitHub**: [unslothai/unsloth](https://github.com/unslothai/unsloth) +- **Docs**: [unsloth.ai/docs](https://unsloth.ai/docs) + +## When to Use Unsloth + +Use Unsloth if instructed to do so, or one of the following use cases applies: + +| Use Case | Recommendation | +|----------|----------------| +| Standard text LLM fine-tuning | TRL is sufficient, but Unsloth is faster | +| Limited GPU memory | **Use Unsloth** - 60% less VRAM | +| Need maximum speed | **Use Unsloth** - 2x faster | +| Large models (>13B) | **Use Unsloth** - memory efficiency critical | + +## Supported Models + +Unsloth supports many popular models including: +- **Text LLMs**: Llama 3/3.1/3.2/3.3, Qwen 2.5/3, Mistral, Phi-4, Gemma 2/3, LFM2/2.5 +- **Vision LLMs**: Qwen3-VL, Gemma 3, Llama 3.2 Vision, Pixtral + +Use Unsloth's pre-optimized model variants when available: +```python +# Unsloth-optimized models load faster and use less memory +model_id = "unsloth/LFM2.5-1.2B-Instruct" # 4-bit quantized +model_id = "unsloth/gemma-3-4b-pt" # Vision model +model_id = "unsloth/Qwen3-VL-8B-Instruct" # Vision model +``` + +## Installation + +```python +# /// script +# dependencies = [ +# "unsloth", +# "trl", +# "datasets", +# "trackio", +# ] +# /// +``` + +## Basic Usage: Text LLM + +```python +from unsloth import FastLanguageModel +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +# Load model with Unsloth optimizations +model, tokenizer = FastLanguageModel.from_pretrained( + model_name="LiquidAI/LFM2.5-1.2B-Instruct", + max_seq_length=4096, +) + +# Add LoRA adapters +model = FastLanguageModel.get_peft_model( + model, + r=16, + lora_alpha=16, + target_modules=["q_proj", "k_proj", "v_proj", "out_proj", "in_proj", "w1", "w2", "w3"], + lora_dropout=0, + bias="none", + use_gradient_checkpointing="unsloth", + random_state=3407, +) + +# Load dataset +dataset = load_dataset("trl-lib/Capybara", split="train") + +# Train with TRL +trainer = SFTTrainer( + model=model, + tokenizer=tokenizer, + train_dataset=dataset, + args=SFTConfig( + output_dir="./output", + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + max_steps=500, + learning_rate=2e-4, + report_to="trackio", + ), +) + +trainer.train() +``` + +## LFM2.5 Specific Settings + +For LFM2.5 inference, use these recommended generation parameters: + +**Instruct models:** +```python +temperature = 0.1 +top_k = 50 +top_p = 0.1 +repetition_penalty = 1.05 +``` + +**Thinking models:** +```python +temperature = 0.05 +top_k = 50 +repetition_penalty = 1.05 +``` + +## Vision-Language Models (VLMs) + +Unsloth provides specialized support for VLMs with `FastVisionModel`: + +```python +from unsloth import FastVisionModel, get_chat_template +from unsloth.trainer import UnslothVisionDataCollator +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +# Load VLM with Unsloth +model, processor = FastVisionModel.from_pretrained( + "unsloth/gemma-3-4b-pt", # or "unsloth/Qwen3-VL-8B-Instruct" + load_in_4bit=True, + use_gradient_checkpointing="unsloth", +) + +# Add LoRA for all modalities +model = FastVisionModel.get_peft_model( + model, + finetune_vision_layers=True, # Train vision encoder + finetune_language_layers=True, # Train language model + finetune_attention_modules=True, # Train attention + finetune_mlp_modules=True, # Train MLPs + r=16, + lora_alpha=32, + target_modules="all-linear", +) + +# Apply chat template (required for base models) +processor = get_chat_template(processor, "gemma-3") + +# Load VLM dataset (with images and messages) +dataset = load_dataset("your-vlm-dataset", split="train", streaming=True) + +# Enable training mode +FastVisionModel.for_training(model) + +# Train with VLM-specific collator +trainer = SFTTrainer( + model=model, + train_dataset=dataset, + processing_class=processor.tokenizer, + data_collator=UnslothVisionDataCollator(model, processor), + args=SFTConfig( + output_dir="./vlm-output", + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + max_steps=500, + learning_rate=2e-4, + # VLM-specific settings + remove_unused_columns=False, + dataset_text_field="", + dataset_kwargs={"skip_prepare_dataset": True}, + report_to="trackio", + ), +) + +trainer.train() +``` + +## Key Differences from Standard TRL + +| Aspect | Standard TRL | Unsloth | +|--------|--------------|---------| +| Model loading | `AutoModelForCausalLM.from_pretrained()` | `FastLanguageModel.from_pretrained()` | +| LoRA setup | `PeftModel` / `LoraConfig` | `FastLanguageModel.get_peft_model()` | +| VLM loading | Limited support | `FastVisionModel.from_pretrained()` | +| VLM collator | Manual | `UnslothVisionDataCollator` | +| Memory usage | Standard | ~60% less | +| Training speed | Standard | ~2x faster | + +## VLM Dataset Format + +VLM datasets should have: +- `images`: List of PIL images or image paths +- `messages`: Conversation format with image references + +```python +{ + "images": [, ...], + "messages": [ + {"role": "user", "content": [ + {"type": "image"}, + {"type": "text", "text": "Describe this image"} + ]}, + {"role": "assistant", "content": "This image shows..."} + ] +} +``` + +## Streaming Datasets + +For large VLM datasets, use streaming to avoid disk space issues: + +```python +dataset = load_dataset( + "your-vlm-dataset", + split="train", + streaming=True, # Stream from Hub +) + +# Must use max_steps with streaming (no epoch-based training) +SFTConfig(max_steps=500, ...) +``` + +## Saving Models + +### Save LoRA Adapter + +```python +model.save_pretrained("./adapter") +processor.save_pretrained("./adapter") + +# Push to Hub +model.push_to_hub("username/my-vlm-adapter") +processor.push_to_hub("username/my-vlm-adapter") +``` + +### Merge and Save Full Model + +```python +# Merge LoRA weights into base model +model = model.merge_and_unload() + +# Save merged model +model.save_pretrained("./merged") +tokenizer.save_pretrained("./merged") +``` + +### Convert to GGUF + +Unsloth models can be converted to GGUF for llama.cpp/Ollama: + +```python +# Save in 16-bit for GGUF conversion +model.save_pretrained_gguf("./gguf", tokenizer, quantization_method="f16") + +# Or directly quantize +model.save_pretrained_gguf("./gguf", tokenizer, quantization_method="q4_k_m") +``` + +## Qwen3-VL Specific Settings + +For Qwen3-VL models, use these recommended settings: + +**Instruct models:** +```python +temperature = 0.7 +top_p = 0.8 +presence_penalty = 1.5 +``` + +**Thinking models:** +```python +temperature = 1.0 +top_p = 0.95 +presence_penalty = 0.0 +``` + +## Hardware Requirements + +| Model | Min VRAM (Unsloth 4-bit) | Recommended GPU | +|-------|--------------------------|-----------------| +| 2B-4B | 8GB | T4, L4 | +| 7B-8B | 16GB | A10G, L4x4 | +| 13B | 24GB | A10G-large | +| 30B+ | 48GB+ | A100 | + +## Example: Full VLM Training Script + +See `scripts/unsloth_sft_example.py` for a complete production-ready example that includes: +- Unsloth VLM setup +- Streaming dataset support +- Trackio monitoring +- Hub push +- CLI arguments + +Run locally: +```bash +uv run scripts/unsloth_sft_example.py \ + --dataset trl-lib/Capybara \ + --max-steps 500 \ + --output-repo username/my-model +``` + +Run on HF Jobs: +```python +hf_jobs("uv", { + "script": " +``` + +## Core Concepts + +### 1. Pipeline API +The pipeline API is the easiest way to use models. It groups together preprocessing, model inference, and postprocessing: + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// Create a pipeline for a specific task +const pipe = await pipeline('sentiment-analysis'); + +// Use the pipeline +const result = await pipe('I love transformers!'); +// Output: [{ label: 'POSITIVE', score: 0.999817686 }] + +// IMPORTANT: Always dispose when done to free memory +await classifier.dispose(); +``` + +**⚠️ Memory Management:** All pipelines must be disposed with `pipe.dispose()` when finished to prevent memory leaks. See examples in [Code Examples](./references/EXAMPLES.md) for cleanup patterns across different environments. + +### 2. Model Selection +You can specify a custom model as the second argument: + +```javascript +const pipe = await pipeline( + 'sentiment-analysis', + 'Xenova/bert-base-multilingual-uncased-sentiment' +); +``` + +**Finding Models:** + +Browse available Transformers.js models on Hugging Face Hub: +- **All models**: https://huggingface.co/models?library=transformers.js&sort=trending +- **By task**: Add `pipeline_tag` parameter + - Text generation: https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending + - Image classification: https://huggingface.co/models?pipeline_tag=image-classification&library=transformers.js&sort=trending + - Speech recognition: https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js&sort=trending + +**Tip:** Filter by task type, sort by trending/downloads, and check model cards for performance metrics and usage examples. + +### 3. Device Selection +Choose where to run the model: + +```javascript +// Run on CPU (default for WASM) +const pipe = await pipeline('sentiment-analysis', 'model-id'); + +// Run on GPU (WebGPU - experimental) +const pipe = await pipeline('sentiment-analysis', 'model-id', { + device: 'webgpu', +}); +``` + +### 4. Quantization Options +Control model precision vs. performance: + +```javascript +// Use quantized model (faster, smaller) +const pipe = await pipeline('sentiment-analysis', 'model-id', { + dtype: 'q4', // Options: 'fp32', 'fp16', 'q8', 'q4' +}); +``` + +## Supported Tasks + +**Note:** All examples below show basic usage. + +### Natural Language Processing + +#### Text Classification +```javascript +const classifier = await pipeline('text-classification'); +const result = await classifier('This movie was amazing!'); +``` + +#### Named Entity Recognition (NER) +```javascript +const ner = await pipeline('token-classification'); +const entities = await ner('My name is John and I live in New York.'); +``` + +#### Question Answering +```javascript +const qa = await pipeline('question-answering'); +const answer = await qa({ + question: 'What is the capital of France?', + context: 'Paris is the capital and largest city of France.' +}); +``` + +#### Text Generation +```javascript +const generator = await pipeline('text-generation', 'onnx-community/gemma-3-270m-it-ONNX'); +const text = await generator('Once upon a time', { + max_new_tokens: 100, + temperature: 0.7 +}); +``` + +**For streaming and chat:** See **[Text Generation Guide](./references/TEXT_GENERATION.md)** for: +- Streaming token-by-token output with `TextStreamer` +- Chat/conversation format with system/user/assistant roles +- Generation parameters (temperature, top_k, top_p) +- Browser and Node.js examples +- React components and API endpoints + +#### Translation +```javascript +const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M'); +const output = await translator('Hello, how are you?', { + src_lang: 'eng_Latn', + tgt_lang: 'fra_Latn' +}); +``` + +#### Summarization +```javascript +const summarizer = await pipeline('summarization'); +const summary = await summarizer(longText, { + max_length: 100, + min_length: 30 +}); +``` + +#### Zero-Shot Classification +```javascript +const classifier = await pipeline('zero-shot-classification'); +const result = await classifier('This is a story about sports.', ['politics', 'sports', 'technology']); +``` + +### Computer Vision + +#### Image Classification +```javascript +const classifier = await pipeline('image-classification'); +const result = await classifier('https://example.com/image.jpg'); +// Or with local file +const result = await classifier(imageUrl); +``` + +#### Object Detection +```javascript +const detector = await pipeline('object-detection'); +const objects = await detector('https://example.com/image.jpg'); +// Returns: [{ label: 'person', score: 0.95, box: { xmin, ymin, xmax, ymax } }, ...] +``` + +#### Image Segmentation +```javascript +const segmenter = await pipeline('image-segmentation'); +const segments = await segmenter('https://example.com/image.jpg'); +``` + +#### Depth Estimation +```javascript +const depthEstimator = await pipeline('depth-estimation'); +const depth = await depthEstimator('https://example.com/image.jpg'); +``` + +#### Zero-Shot Image Classification +```javascript +const classifier = await pipeline('zero-shot-image-classification'); +const result = await classifier('image.jpg', ['cat', 'dog', 'bird']); +``` + +### Audio Processing + +#### Automatic Speech Recognition +```javascript +const transcriber = await pipeline('automatic-speech-recognition'); +const result = await transcriber('audio.wav'); +// Returns: { text: 'transcribed text here' } +``` + +#### Audio Classification +```javascript +const classifier = await pipeline('audio-classification'); +const result = await classifier('audio.wav'); +``` + +#### Text-to-Speech +```javascript +const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts'); +const audio = await synthesizer('Hello, this is a test.', { + speaker_embeddings: speakerEmbeddings +}); +``` + +### Multimodal + +#### Image-to-Text (Image Captioning) +```javascript +const captioner = await pipeline('image-to-text'); +const caption = await captioner('image.jpg'); +``` + +#### Document Question Answering +```javascript +const docQA = await pipeline('document-question-answering'); +const answer = await docQA('document-image.jpg', 'What is the total amount?'); +``` + +#### Zero-Shot Object Detection +```javascript +const detector = await pipeline('zero-shot-object-detection'); +const objects = await detector('image.jpg', ['person', 'car', 'tree']); +``` + +### Feature Extraction (Embeddings) + +```javascript +const extractor = await pipeline('feature-extraction'); +const embeddings = await extractor('This is a sentence to embed.'); +// Returns: tensor of shape [1, sequence_length, hidden_size] + +// For sentence embeddings (mean pooling) +const extractor = await pipeline('feature-extraction', 'onnx-community/all-MiniLM-L6-v2-ONNX'); +const embeddings = await extractor('Text to embed', { pooling: 'mean', normalize: true }); +``` + +## Finding and Choosing Models + +### Browsing the Hugging Face Hub + +Discover compatible Transformers.js models on Hugging Face Hub: + +**Base URL (all models):** +``` +https://huggingface.co/models?library=transformers.js&sort=trending +``` + +**Filter by task** using the `pipeline_tag` parameter: + +| Task | URL | +|------|-----| +| **Text Generation** | https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending | +| **Text Classification** | https://huggingface.co/models?pipeline_tag=text-classification&library=transformers.js&sort=trending | +| **Translation** | https://huggingface.co/models?pipeline_tag=translation&library=transformers.js&sort=trending | +| **Summarization** | https://huggingface.co/models?pipeline_tag=summarization&library=transformers.js&sort=trending | +| **Question Answering** | https://huggingface.co/models?pipeline_tag=question-answering&library=transformers.js&sort=trending | +| **Image Classification** | https://huggingface.co/models?pipeline_tag=image-classification&library=transformers.js&sort=trending | +| **Object Detection** | https://huggingface.co/models?pipeline_tag=object-detection&library=transformers.js&sort=trending | +| **Image Segmentation** | https://huggingface.co/models?pipeline_tag=image-segmentation&library=transformers.js&sort=trending | +| **Speech Recognition** | https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js&sort=trending | +| **Audio Classification** | https://huggingface.co/models?pipeline_tag=audio-classification&library=transformers.js&sort=trending | +| **Image-to-Text** | https://huggingface.co/models?pipeline_tag=image-to-text&library=transformers.js&sort=trending | +| **Feature Extraction** | https://huggingface.co/models?pipeline_tag=feature-extraction&library=transformers.js&sort=trending | +| **Zero-Shot Classification** | https://huggingface.co/models?pipeline_tag=zero-shot-classification&library=transformers.js&sort=trending | + +**Sort options:** +- `&sort=trending` - Most popular recently +- `&sort=downloads` - Most downloaded overall +- `&sort=likes` - Most liked by community +- `&sort=modified` - Recently updated + +### Choosing the Right Model + +Consider these factors when selecting a model: + +**1. Model Size** +- **Small (< 100MB)**: Fast, suitable for browsers, limited accuracy +- **Medium (100MB - 500MB)**: Balanced performance, good for most use cases +- **Large (> 500MB)**: High accuracy, slower, better for Node.js or powerful devices + +**2. Quantization** +Models are often available in different quantization levels: +- `fp32` - Full precision (largest, most accurate) +- `fp16` - Half precision (smaller, still accurate) +- `q8` - 8-bit quantized (much smaller, slight accuracy loss) +- `q4` - 4-bit quantized (smallest, noticeable accuracy loss) + +**3. Task Compatibility** +Check the model card for: +- Supported tasks (some models support multiple tasks) +- Input/output formats +- Language support (multilingual vs. English-only) +- License restrictions + +**4. Performance Metrics** +Model cards typically show: +- Accuracy scores +- Benchmark results +- Inference speed +- Memory requirements + +### Example: Finding a Text Generation Model + +```javascript +// 1. Visit: https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending + +// 2. Browse and select a model (e.g., onnx-community/gemma-3-270m-it-ONNX) + +// 3. Check model card for: +// - Model size: ~270M parameters +// - Quantization: q4 available +// - Language: English +// - Use case: Instruction-following chat + +// 4. Use the model: +import { pipeline } from '@huggingface/transformers'; + +const generator = await pipeline( + 'text-generation', + 'onnx-community/gemma-3-270m-it-ONNX', + { dtype: 'q4' } // Use quantized version for faster inference +); + +const output = await generator('Explain quantum computing in simple terms.', { + max_new_tokens: 100 +}); + +await generator.dispose(); +``` + +### Tips for Model Selection + +1. **Start Small**: Test with a smaller model first, then upgrade if needed +2. **Check ONNX Support**: Ensure the model has ONNX files (look for `onnx` folder in model repo) +3. **Read Model Cards**: Model cards contain usage examples, limitations, and benchmarks +4. **Test Locally**: Benchmark inference speed and memory usage in your environment +5. **Community Models**: Look for models by `Xenova` (Transformers.js maintainer) or `onnx-community` +6. **Version Pin**: Use specific git commits in production for stability: + ```javascript + const pipe = await pipeline('task', 'model-id', { revision: 'abc123' }); + ``` + +## Advanced Configuration + +### Environment Configuration (`env`) + +The `env` object provides comprehensive control over Transformers.js execution, caching, and model loading. + +**Quick Overview:** + +```javascript +import { env } from '@huggingface/transformers'; + +// View version +console.log(env.version); // e.g., '3.8.1' + +// Common settings +env.allowRemoteModels = true; // Load from Hugging Face Hub +env.allowLocalModels = false; // Load from file system +env.localModelPath = '/models/'; // Local model directory +env.useFSCache = true; // Cache models on disk (Node.js) +env.useBrowserCache = true; // Cache models in browser +env.cacheDir = './.cache'; // Cache directory location +``` + +**Configuration Patterns:** + +```javascript +// Development: Fast iteration with remote models +env.allowRemoteModels = true; +env.useFSCache = true; + +// Production: Local models only +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; + +// Custom CDN +env.remoteHost = 'https://cdn.example.com/models'; + +// Disable caching (testing) +env.useFSCache = false; +env.useBrowserCache = false; +``` + +For complete documentation on all configuration options, caching strategies, cache management, pre-downloading models, and more, see: + +**→ [Configuration Reference](./references/CONFIGURATION.md)** + +### Working with Tensors + +```javascript +import { AutoTokenizer, AutoModel } from '@huggingface/transformers'; + +// Load tokenizer and model separately for more control +const tokenizer = await AutoTokenizer.from_pretrained('bert-base-uncased'); +const model = await AutoModel.from_pretrained('bert-base-uncased'); + +// Tokenize input +const inputs = await tokenizer('Hello world!'); + +// Run model +const outputs = await model(inputs); +``` + +### Batch Processing + +```javascript +const classifier = await pipeline('sentiment-analysis'); + +// Process multiple texts +const results = await classifier([ + 'I love this!', + 'This is terrible.', + 'It was okay.' +]); +``` + +## Browser-Specific Considerations + +### WebGPU Usage +WebGPU provides GPU acceleration in browsers: + +```javascript +const pipe = await pipeline('text-generation', 'onnx-community/gemma-3-270m-it-ONNX', { + device: 'webgpu', + dtype: 'fp32' +}); +``` + +**Note**: WebGPU is experimental. Check browser compatibility and file issues if problems occur. + +### WASM Performance +Default browser execution uses WASM: + +```javascript +// Optimized for browsers with quantization +const pipe = await pipeline('sentiment-analysis', 'model-id', { + dtype: 'q8' // or 'q4' for even smaller size +}); +``` + +### Progress Tracking & Loading Indicators + +Models can be large (ranging from a few MB to several GB) and consist of multiple files. Track download progress by passing a callback to the `pipeline()` function: + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// Track progress for each file +const fileProgress = {}; + +function onProgress(info) { + console.log(`${info.status}: ${info.file}`); + + if (info.status === 'progress') { + fileProgress[info.file] = info.progress; + console.log(`${info.file}: ${info.progress.toFixed(1)}%`); + } + + if (info.status === 'done') { + console.log(`✓ ${info.file} complete`); + } +} + +// Pass callback to pipeline +const classifier = await pipeline('sentiment-analysis', null, { + progress_callback: onProgress +}); +``` + +**Progress Info Properties:** + +```typescript +interface ProgressInfo { + status: 'initiate' | 'download' | 'progress' | 'done' | 'ready'; + name: string; // Model id or path + file: string; // File being processed + progress?: number; // Percentage (0-100, only for 'progress' status) + loaded?: number; // Bytes downloaded (only for 'progress' status) + total?: number; // Total bytes (only for 'progress' status) +} +``` + +For complete examples including browser UIs, React components, CLI progress bars, and retry logic, see: + +**→ [Pipeline Options - Progress Callback](./references/PIPELINE_OPTIONS.md#progress-callback)** + +## Error Handling + +```javascript +try { + const pipe = await pipeline('sentiment-analysis', 'model-id'); + const result = await pipe('text to analyze'); +} catch (error) { + if (error.message.includes('fetch')) { + console.error('Model download failed. Check internet connection.'); + } else if (error.message.includes('ONNX')) { + console.error('Model execution failed. Check model compatibility.'); + } else { + console.error('Unknown error:', error); + } +} +``` + +## Performance Tips + +1. **Reuse Pipelines**: Create pipeline once, reuse for multiple inferences +2. **Use Quantization**: Start with `q8` or `q4` for faster inference +3. **Batch Processing**: Process multiple inputs together when possible +4. **Cache Models**: Models are cached automatically (see **[Caching Reference](./references/CACHE.md)** for details on browser Cache API, Node.js filesystem cache, and custom implementations) +5. **WebGPU for Large Models**: Use WebGPU for models that benefit from GPU acceleration +6. **Prune Context**: For text generation, limit `max_new_tokens` to avoid memory issues +7. **Clean Up Resources**: Call `pipe.dispose()` when done to free memory + +## Memory Management + +**IMPORTANT:** Always call `pipe.dispose()` when finished to prevent memory leaks. + +```javascript +const pipe = await pipeline('sentiment-analysis'); +const result = await pipe('Great product!'); +await pipe.dispose(); // ✓ Free memory (100MB - several GB per model) +``` + +**When to dispose:** +- Application shutdown or component unmount +- Before loading a different model +- After batch processing in long-running apps + +Models consume significant memory and hold GPU/CPU resources. Disposal is critical for browser memory limits and server stability. + +For detailed patterns (React cleanup, servers, browser), see **[Code Examples](./references/EXAMPLES.md)** + +## Troubleshooting + +### Model Not Found +- Verify model exists on Hugging Face Hub +- Check model name spelling +- Ensure model has ONNX files (look for `onnx` folder in model repo) + +### Memory Issues +- Use smaller models or quantized versions (`dtype: 'q4'`) +- Reduce batch size +- Limit sequence length with `max_length` + +### WebGPU Errors +- Check browser compatibility (Chrome 113+, Edge 113+) +- Try `dtype: 'fp16'` if `fp32` fails +- Fall back to WASM if WebGPU unavailable + +## Reference Documentation + +### This Skill +- **[Pipeline Options](./references/PIPELINE_OPTIONS.md)** - Configure `pipeline()` with `progress_callback`, `device`, `dtype`, etc. +- **[Configuration Reference](./references/CONFIGURATION.md)** - Global `env` configuration for caching and model loading +- **[Caching Reference](./references/CACHE.md)** - Browser Cache API, Node.js filesystem cache, and custom cache implementations +- **[Text Generation Guide](./references/TEXT_GENERATION.md)** - Streaming, chat format, and generation parameters +- **[Model Architectures](./references/MODEL_ARCHITECTURES.md)** - Supported models and selection tips +- **[Code Examples](./references/EXAMPLES.md)** - Real-world implementations for different runtimes + +### Official Transformers.js +- Official docs: https://huggingface.co/docs/transformers.js +- API reference: https://huggingface.co/docs/transformers.js/api/pipelines +- Model hub: https://huggingface.co/models?library=transformers.js +- GitHub: https://github.com/huggingface/transformers.js +- Examples: https://github.com/huggingface/transformers.js/tree/main/examples + +## Best Practices + +1. **Always Dispose Pipelines**: Call `pipe.dispose()` when done - critical for preventing memory leaks +2. **Start with Pipelines**: Use the pipeline API unless you need fine-grained control +3. **Test Locally First**: Test models with small inputs before deploying +4. **Monitor Model Sizes**: Be aware of model download sizes for web applications +5. **Handle Loading States**: Show progress indicators for better UX +6. **Version Pin**: Pin specific model versions for production stability +7. **Error Boundaries**: Always wrap pipeline calls in try-catch blocks +8. **Progressive Enhancement**: Provide fallbacks for unsupported browsers +9. **Reuse Models**: Load once, use many times - don't recreate pipelines unnecessarily +10. **Graceful Shutdown**: Dispose models on SIGTERM/SIGINT in servers + +## Quick Reference: Task IDs + +| Task | Task ID | +|------|---------| +| Text classification | `text-classification` or `sentiment-analysis` | +| Token classification | `token-classification` or `ner` | +| Question answering | `question-answering` | +| Fill mask | `fill-mask` | +| Summarization | `summarization` | +| Translation | `translation` | +| Text generation | `text-generation` | +| Text-to-text generation | `text2text-generation` | +| Zero-shot classification | `zero-shot-classification` | +| Image classification | `image-classification` | +| Image segmentation | `image-segmentation` | +| Object detection | `object-detection` | +| Depth estimation | `depth-estimation` | +| Image-to-image | `image-to-image` | +| Zero-shot image classification | `zero-shot-image-classification` | +| Zero-shot object detection | `zero-shot-object-detection` | +| Automatic speech recognition | `automatic-speech-recognition` | +| Audio classification | `audio-classification` | +| Text-to-speech | `text-to-speech` or `text-to-audio` | +| Image-to-text | `image-to-text` | +| Document question answering | `document-question-answering` | +| Feature extraction | `feature-extraction` | +| Sentence similarity | `sentence-similarity` | + +--- + +This skill enables you to integrate state-of-the-art machine learning capabilities directly into JavaScript applications without requiring separate ML servers or Python environments. diff --git a/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CACHE.md b/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CACHE.md new file mode 100644 index 00000000..6f97b2cd --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CACHE.md @@ -0,0 +1,339 @@ +# Caching Reference + +Complete guide to caching strategies for Transformers.js models across different environments. + +## Table of Contents + +1. [Overview](#overview) +2. [Browser Caching](#browser-caching) +3. [Node.js Caching](#nodejs-caching) +4. [Custom Cache Implementation](#custom-cache-implementation) +5. [Cache Configuration](#cache-configuration) + +## Overview + +Transformers.js models can be large (from a few MB to several GB), so caching is critical for performance. The caching strategy differs based on the environment: + +- **Browser**: Uses the Cache API (browser cache storage) +- **Node.js**: Uses filesystem cache in `~/.cache/huggingface/` +- **Custom**: Implement your own cache (database, cloud storage, etc.) + +### Default Behavior + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// First load: downloads model +const pipe = await pipeline('sentiment-analysis'); + +// Subsequent loads: uses cached model +const pipe2 = await pipeline('sentiment-analysis'); // Fast! +``` + +Caching is **automatic** and enabled by default. Models are cached after the first download. + +## Browser Caching + +### Using the Cache API + +In browser environments, Transformers.js uses the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) to store models: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Browser cache is enabled by default +console.log(env.useBrowserCache); // true + +// Load model (cached automatically) +const classifier = await pipeline('sentiment-analysis'); +``` + +**How it works:** + +1. Model files are downloaded from Hugging Face Hub +2. Files are stored in the browser's Cache Storage +3. Subsequent loads retrieve from cache (no network request) +4. Cache persists across page reloads and browser sessions + +### Cache Location + +Browser caches are stored in: +- **Chrome/Edge**: `Cache Storage` in DevTools → Application tab → Cache storage +- **Firefox**: `about:cache` → Storage +- **Safari**: Web Inspector → Storage tab + +### Disable Browser Cache + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable browser caching (not recommended) +env.useBrowserCache = false; + +// Models will be re-downloaded on every page load +``` + +**Use case:** Testing, development, or debugging cache issues. + +### Browser Storage Limits + +Browsers impose storage quotas: + +- **Chrome**: ~60% of available disk space (but can evict data) +- **Firefox**: ~50% of available disk space +- **Safari**: ~1GB per origin (prompt for more) + +**Tip:** Monitor storage usage with the [Storage API](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API): + +```javascript +if ('storage' in navigator && 'estimate' in navigator.storage) { + const estimate = await navigator.storage.estimate(); + const percentUsed = (estimate.usage / estimate.quota) * 100; + console.log(`Storage: ${percentUsed.toFixed(2)}% used`); + console.log(`Available: ${((estimate.quota - estimate.usage) / 1024 / 1024).toFixed(2)} MB`); +} +``` + +## Node.js Caching + +### Filesystem Cache + +In Node.js, models are cached to the filesystem: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Default cache directory (Node.js) +console.log(env.cacheDir); // './.cache' (relative to current directory) + +// Filesystem cache is enabled by default +console.log(env.useFSCache); // true + +// Load model (cached to disk) +const classifier = await pipeline('sentiment-analysis'); +``` + +### Default Cache Location + +**Default behavior:** +- Cache directory: `./.cache` (relative to where Node.js process runs) +- Full default path: `~/.cache/huggingface/` when using Hugging Face tools + +**Note:** The statement "Models are cached automatically in `~/.cache/huggingface/`" from performance tips is specific to Hugging Face's Python tooling convention. In Transformers.js for Node.js, the default is `./.cache` unless configured otherwise. + +### Custom Cache Directory + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Set custom cache directory +env.cacheDir = '/var/cache/transformers'; + +// Or use environment variable (Node.js convention) +env.cacheDir = process.env.HF_HOME || '~/.cache/huggingface'; + +// Now load model +const classifier = await pipeline('sentiment-analysis'); +// Cached to: /var/cache/transformers/models--Xenova--distilbert-base-uncased-finetuned-sst-2-english/ +``` + +**Pattern:** `models--{organization}--{model-name}/` + +### Disable Filesystem Cache + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable filesystem caching (not recommended) +env.useFSCache = false; + +// Models will be re-downloaded on every load +``` + +**Use case:** Testing, CI/CD environments, or containers with ephemeral storage. + +## Custom Cache Implementation + +Implement your own cache for specialized storage backends. + +### Custom Cache Interface + +```typescript +interface CacheInterface { + /** + * Check if a URL is cached + */ + match(url: string): Promise; + + /** + * Store a URL and its response + */ + put(url: string, response: Response): Promise; +} +``` + +### Example: Cloud Storage Cache (S3) + +```javascript +import { env, pipeline } from '@huggingface/transformers'; +import { S3Client, GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3'; +import { Readable } from 'stream'; + +class S3Cache { + constructor(bucket, region = 'us-east-1') { + this.bucket = bucket; + this.s3 = new S3Client({ region }); + } + + async match(url) { + const key = this.urlToKey(url); + + try { + const command = new GetObjectCommand({ + Bucket: this.bucket, + Key: key + }); + const response = await this.s3.send(command); + + // Convert stream to buffer + const chunks = []; + for await (const chunk of response.Body) { + chunks.push(chunk); + } + const body = Buffer.concat(chunks); + + return new Response(body, { + status: 200, + headers: JSON.parse(response.Metadata.headers || '{}') + }); + } catch (error) { + if (error.name === 'NoSuchKey') return undefined; + throw error; + } + } + + async put(url, response) { + const key = this.urlToKey(url); + const clonedResponse = response.clone(); + const body = Buffer.from(await clonedResponse.arrayBuffer()); + const headers = JSON.stringify(Object.fromEntries(response.headers.entries())); + + const command = new PutObjectCommand({ + Bucket: this.bucket, + Key: key, + Body: body, + Metadata: { headers } + }); + + await this.s3.send(command); + } + + urlToKey(url) { + // Convert URL to S3 key (remove protocol, replace slashes) + return url.replace(/^https?:\/\//, '').replace(/\//g, '_'); + } +} + +// Configure S3 cache +env.useCustomCache = true; +env.customCache = new S3Cache('my-transformers-cache', 'us-east-1'); +env.useFSCache = false; + +// Use S3 cache +const classifier = await pipeline('sentiment-analysis'); +``` + +## Cache Configuration + +### Environment Variables + +Use environment variables to configure caching: + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure cache directory from environment +env.cacheDir = process.env.TRANSFORMERS_CACHE || './.cache'; + +// Disable caching in CI/CD +if (process.env.CI === 'true') { + env.useFSCache = false; + env.useBrowserCache = false; +} + +// Production: use pre-cached models +if (process.env.NODE_ENV === 'production') { + env.allowRemoteModels = false; + env.allowLocalModels = true; + env.localModelPath = process.env.MODEL_PATH || '/app/models'; +} +``` + +### Configuration Patterns + +#### Development: Enable All Caching + +```javascript +import { env } from '@huggingface/transformers'; + +env.allowRemoteModels = true; +env.useFSCache = true; // Node.js +env.useBrowserCache = true; // Browser +env.cacheDir = './.cache'; +``` + +#### Production: Local Models Only + +```javascript +import { env } from '@huggingface/transformers'; + +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models'; +env.useFSCache = true; +``` + +#### Testing: Disable Caching + +```javascript +import { env } from '@huggingface/transformers'; + +env.useFSCache = false; +env.useBrowserCache = false; +env.allowRemoteModels = true; // Download every time +``` + +#### Hybrid: Cache + Remote Fallback + +```javascript +import { env } from '@huggingface/transformers'; + +// Try local cache first, fall back to remote +env.allowRemoteModels = true; +env.allowLocalModels = true; +env.useFSCache = true; +env.localModelPath = './models'; +``` + +--- + +## Summary + +Transformers.js provides flexible caching options: + +- **Browser**: Cache API (automatic, persistent) +- **Node.js**: Filesystem cache (default `./.cache`, configurable) +- **Custom**: Implement your own (database, cloud storage, etc.) + +**Key takeaways:** + +1. Caching is enabled by default and automatic +2. Configure cache **before** loading models +3. Browser uses Cache API, Node.js uses filesystem +4. Custom caches enable advanced storage backends +5. Monitor cache size and implement cleanup strategies +6. Pre-download models for production deployments + +For more configuration options, see: +- [Configuration Reference](./CONFIGURATION.md) +- [Pipeline Options](./PIPELINE_OPTIONS.md) diff --git a/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CONFIGURATION.md b/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CONFIGURATION.md new file mode 100644 index 00000000..52e18d96 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/CONFIGURATION.md @@ -0,0 +1,390 @@ +# Environment Configuration Reference + +Complete guide to configuring Transformers.js behavior using the `env` object. + +## Table of Contents + +1. [Overview](#overview) +2. [Remote Model Configuration](#remote-model-configuration) +3. [Local Model Configuration](#local-model-configuration) +4. [Cache Configuration](#cache-configuration) +5. [WASM Configuration](#wasm-configuration) +6. [Common Configuration Patterns](#common-configuration-patterns) +7. [Environment Best Practices](#environment-best-practices) + +## Overview + +The `env` object provides comprehensive control over Transformers.js execution, caching, and model loading: + +```javascript +import { env } from '@huggingface/transformers'; + +// View current version +console.log(env.version); // e.g., '3.8.1' +``` + +### Available Properties + +```typescript +interface TransformersEnvironment { + // Version info + version: string; + + // Backend configuration + backends: { + onnx: Partial; + }; + + // Remote model settings + allowRemoteModels: boolean; + remoteHost: string; + remotePathTemplate: string; + + // Local model settings + allowLocalModels: boolean; + localModelPath: string; + useFS: boolean; + + // Cache settings + useBrowserCache: boolean; + useFSCache: boolean; + cacheDir: string | null; + useCustomCache: boolean; + customCache: CacheInterface | null; + useWasmCache: boolean; + cacheKey: string; +} +``` + +## Remote Model Configuration + +Control how models are loaded from remote sources (default: Hugging Face Hub). + +### Disable Remote Loading + +```javascript +import { env } from '@huggingface/transformers'; + +// Force local-only mode (no network requests) +env.allowRemoteModels = false; +``` + +**Use case:** Offline applications, security requirements, or air-gapped environments. + +### Custom Model Host + +```javascript +import { env } from '@huggingface/transformers'; + +// Use your own CDN or model server +env.remoteHost = 'https://cdn.example.com/models'; + +// Customize the URL pattern +// Default: '{model}/resolve/{revision}/{file}' +env.remotePathTemplate = 'custom/{model}/{file}'; +``` + +**Use case:** Self-hosting models, using a CDN for faster downloads, or corporate proxies. + +### Example: Private Model Server + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Configure custom model host +env.remoteHost = 'https://models.mycompany.com'; +env.remotePathTemplate = '{model}/{file}'; + +// Models will be loaded from: +// https://models.mycompany.com/my-model/model.onnx +const pipe = await pipeline('sentiment-analysis', 'my-model'); +``` + +## Local Model Configuration + +Control loading models from the local file system. + +### Enable Local Models + +```javascript +import { env } from '@huggingface/transformers'; + +// Enable local file system loading +env.allowLocalModels = true; + +// Set the base path for local models +env.localModelPath = '/path/to/models/'; +``` + +**Default values:** +- Browser: `allowLocalModels = false`, `localModelPath = '/models/'` +- Node.js: `allowLocalModels = true`, `localModelPath = '/models/'` + +### File System Control + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable file system usage entirely (Node.js only) +env.useFS = false; +``` + +### Example: Local Model Directory Structure + +``` +/app/models/ +├── onnx-community/ +│ ├── Supertonic-TTS-ONNX/ +│ │ ├── config.json +│ │ ├── tokenizer.json +│ │ ├── model.onnx +│ │ └── ... +│ └── yolo26l-pose-ONNX/ +│ ├── config.json +│ ├── preprocessor_config.json +│ ├── model.onnx +│ └── ... +``` + +```javascript +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; +env.allowRemoteModels = false; // Offline mode + +const classifier = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english'); +``` + +## Cache Configuration + +Transformers.js supports multiple caching strategies to improve performance and reduce network usage. + +### Quick Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +// Browser cache (Cache API) +env.useBrowserCache = true; // default: true +env.cacheKey = 'my-app-transformers-cache'; // default: 'transformers-cache' + +// Node.js filesystem cache +env.useFSCache = true; // default: true +env.cacheDir = './custom-cache-dir'; // default: './.cache' + +// Custom cache implementation +env.useCustomCache = true; +env.customCache = new CustomCache(); // Implement Cache API interface + +// WASM binary caching +env.useWasmCache = true; // default: true +``` + +### Disable Caching + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable all caching (re-download on every load) +env.useFSCache = false; +env.useBrowserCache = false; +env.useWasmCache = false; +env.cacheDir = null; +``` + +For comprehensive caching documentation including: +- Browser Cache API details and storage limits +- Node.js filesystem cache structure and management +- Custom cache implementations (Redis, database, S3) +- Cache clearing and monitoring strategies +- Best practices and troubleshooting + +See **[Caching Reference](./CACHE.md)** + +## WASM Configuration + +Configure ONNX Runtime Web Assembly backend settings. + +### Basic WASM Settings + +```javascript +import { env } from '@huggingface/transformers'; + +// Set custom WASM paths +env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/'; + +// Configure number of threads (Node.js only) +env.backends.onnx.wasm.numThreads = 4; + +// Enable/disable SIMD (single instruction, multiple data) +env.backends.onnx.wasm.simd = true; +``` + +### Proxy Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure proxy for WASM downloads +env.backends.onnx.wasm.proxy = true; +``` + +### Self-Hosted WASM Files + +```javascript +import { env } from '@huggingface/transformers'; + +// Host WASM files on your own server +env.backends.onnx.wasm.wasmPaths = '/static/wasm/'; +``` + +**Required files:** +- `ort-wasm.wasm` - Main WASM binary +- `ort-wasm-simd.wasm` - SIMD-enabled WASM binary +- `ort-wasm-threaded.wasm` - Multi-threaded WASM binary +- `ort-wasm-simd-threaded.wasm` - SIMD + multi-threaded WASM binary + +## Common Configuration Patterns + +### Development Setup + +```javascript +import { env } from '@huggingface/transformers'; + +// Fast iteration with caching +env.allowRemoteModels = true; +env.useBrowserCache = true; // Browser +env.useFSCache = true; // Node.js +env.cacheDir = './.cache'; +``` + +### Production (Local Models) + +```javascript +import { env } from '@huggingface/transformers'; + +// Secure, offline-capable setup +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; +env.useFSCache = false; // Models already local +``` + +### Offline-First Application + +```javascript +import { env } from '@huggingface/transformers'; + +// Try local first, fall back to remote +env.allowLocalModels = true; +env.localModelPath = './models/'; +env.allowRemoteModels = true; +env.useFSCache = true; +env.cacheDir = './cache'; +``` + +### Custom CDN + +```javascript +import { env } from '@huggingface/transformers'; + +// Use your own model hosting +env.remoteHost = 'https://cdn.example.com/ml-models'; +env.remotePathTemplate = '{model}/{file}'; +env.useBrowserCache = true; +``` + +### Memory-Constrained Environment + +```javascript +import { env } from '@huggingface/transformers'; + +// Minimize disk/memory usage +env.useFSCache = false; +env.useBrowserCache = false; +env.useWasmCache = false; +env.cacheDir = null; +``` + +### Testing/CI Environment + +```javascript +import { env } from '@huggingface/transformers'; + +// Predictable, isolated testing +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = './test-fixtures/models/'; +env.useFSCache = false; +``` + + + +## Environment Best Practices + +### 1. Configure Early + +Set `env` properties before loading any models: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// ✓ Good: Configure before loading +env.allowRemoteModels = false; +env.localModelPath = '/app/models/'; +const pipe = await pipeline('sentiment-analysis'); + +// ✗ Bad: Configuring after loading may not take effect +const pipe = await pipeline('sentiment-analysis'); +env.allowRemoteModels = false; // Too late! +``` + +### 2. Use Environment Variables + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure based on environment +env.allowRemoteModels = process.env.NODE_ENV === 'development'; +env.cacheDir = process.env.MODEL_CACHE_DIR || './.cache'; +env.localModelPath = process.env.LOCAL_MODELS_PATH || '/app/models/'; +``` + +### 3. Handle Errors Gracefully + +```javascript +import { pipeline, env } from '@huggingface/transformers'; + +try { + env.allowRemoteModels = false; + const pipe = await pipeline('sentiment-analysis', 'my-model'); +} catch (error) { + if (error.message.includes('not found')) { + console.error('Model not found locally. Enable remote models or download the model.'); + } + throw error; +} +``` + +### 4. Log Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +console.log('Transformers.js Configuration:', { + version: env.version, + allowRemoteModels: env.allowRemoteModels, + allowLocalModels: env.allowLocalModels, + localModelPath: env.localModelPath, + cacheDir: env.cacheDir, + useFSCache: env.useFSCache, + useBrowserCache: env.useBrowserCache +}); +``` + +## Related Documentation + +- **[Caching Reference](./CACHE.md)** - Comprehensive caching guide (browser, Node.js, custom implementations) +- [Pipeline Options](./PIPELINE_OPTIONS.md) - Configure pipeline loading with `progress_callback`, `device`, `dtype`, etc. +- [Model Architectures](./MODEL_ARCHITECTURES.md) - Supported models and architectures +- [Examples](./EXAMPLES.md) - Code examples for different runtimes +- [Main Skill Guide](../SKILL.md) - Getting started and common usage diff --git a/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/EXAMPLES.md b/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/EXAMPLES.md new file mode 100644 index 00000000..6a6e9b74 --- /dev/null +++ b/plugins/antigravity-awesome-skills-claude/skills/transformers-js/references/EXAMPLES.md @@ -0,0 +1,605 @@ +# Transformers.js Code Examples + +Working examples showing how to use Transformers.js across different runtimes and frameworks. + +All examples use the same task and model for consistency: +- **Task**: `feature-extraction` +- **Model**: `onnx-community/all-MiniLM-L6-v2-ONNX` + +## Table of Contents +1. [Browser (Vanilla JS)](#browser-vanilla-js) +2. [Node.js](#nodejs) +3. [React](#react) +4. [Express API](#express-api) + +## Browser (Vanilla JS) + +### Basic Usage + +```html + + + + Feature Extraction + + +

Text Embedding Generator

+ + +
+ + + + + +``` + +### With Progress Tracking + +```html + + + + Feature Extraction with Progress + + + +

Text Embedding Generator

+
+

Loading model...

+
+
+ + + + + +``` + +## Node.js + +### Basic Script + +```javascript +// embed.js +import { pipeline } from '@huggingface/transformers'; + +async function generateEmbedding(text) { + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + + const output = await extractor(text, { pooling: 'mean', normalize: true }); + + console.log('Text:', text); + console.log('Embedding dimensions:', output.data.length); + console.log('First 5 values:', Array.from(output.data).slice(0, 5)); + + await extractor.dispose(); +} + +generateEmbedding('Hello, world!'); +``` + +### Batch Processing + +```javascript +// batch-embed.js +import { pipeline } from '@huggingface/transformers'; +import fs from 'fs/promises'; + +async function embedDocuments(documents) { + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + + console.log(`Processing ${documents.length} documents...`); + + const embeddings = []; + + for (let i = 0; i < documents.length; i++) { + const output = await extractor(documents[i], { + pooling: 'mean', + normalize: true + }); + + embeddings.push({ + text: documents[i], + embedding: Array.from(output.data) + }); + + console.log(`Processed ${i + 1}/${documents.length}`); + } + + await fs.writeFile( + 'embeddings.json', + JSON.stringify(embeddings, null, 2) + ); + + console.log('Saved to embeddings.json'); + + await extractor.dispose(); +} + +const documents = [ + 'The cat sat on the mat', + 'A dog played in the park', + 'Machine learning is fascinating' +]; + +embedDocuments(documents); +``` + +### CLI with Progress + +```javascript +// cli-embed.js +import { pipeline } from '@huggingface/transformers'; + +async function main() { + const text = process.argv[2] || 'Hello, world!'; + + console.log('Loading model...'); + + const fileProgress = {}; + + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX', + { + progress_callback: (info) => { + if (info.status === 'progress') { + fileProgress[info.file] = info.progress; + + // Show all files progress + const progressLines = Object.entries(fileProgress) + .map(([file, progress]) => ` ${file}: ${progress.toFixed(1)}%`) + .join('\n'); + + process.stdout.write(`\r\x1b[K${progressLines}`); + } + + if (info.status === 'done') { + console.log(`\n✓ ${info.file} complete`); + } + + if (info.status === 'ready') { + console.log('\nModel ready!'); + } + } + } + ); + + console.log('Generating embedding...'); + const output = await extractor(text, { pooling: 'mean', normalize: true }); + + console.log(`\nText: "${text}"`); + console.log(`Dimensions: ${output.data.length}`); + console.log(`First 5 values: ${Array.from(output.data).slice(0, 5).join(', ')}`); + + await extractor.dispose(); +} + +main(); +``` + +## React + +### Basic Component + +```jsx +// EmbeddingGenerator.jsx +import { useState, useRef, useEffect } from 'react'; +import { pipeline } from '@huggingface/transformers'; + +export function EmbeddingGenerator() { + const extractorRef = useRef(null); + const [text, setText] = useState(''); + const [embedding, setEmbedding] = useState(null); + const [loading, setLoading] = useState(false); + + const generate = async () => { + if (!text) return; + + setLoading(true); + + // Load model on first generate + if (!extractorRef.current) { + extractorRef.current = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + } + + const output = await extractorRef.current(text, { + pooling: 'mean', + normalize: true + }); + setEmbedding(Array.from(output.data)); + setLoading(false); + }; + + // Cleanup on unmount + useEffect(() => { + return () => { + if (extractorRef.current) { + extractorRef.current.dispose(); + } + }; + }, []); + + return ( +
+

Text Embedding Generator

+ + + +
+ + + + +``` + +### React + +```jsx +import { useState, useRef, useEffect } from 'react'; +import { pipeline, TextStreamer } from '@huggingface/transformers'; + +function StreamingGenerator() { + const generatorRef = useRef(null); + const [output, setOutput] = useState(''); + const [loading, setLoading] = useState(false); + + const handleGenerate = async (prompt) => { + if (!prompt) return; + + setLoading(true); + setOutput(''); + + // Load model on first generate + if (!generatorRef.current) { + generatorRef.current = await pipeline( + 'text-generation', + 'onnx-community/Qwen2.5-0.5B-Instruct', + { dtype: 'q4' } + ); + } + + const streamer = new TextStreamer(generatorRef.current.tokenizer, { + skip_prompt: true, + skip_special_tokens: true, + callback_function: (token) => { + setOutput((prev) => prev + token); + }, + }); + + await generatorRef.current(prompt, { + max_new_tokens: 200, + temperature: 0.7, + streamer, + }); + + setLoading(false); + }; + + // Cleanup on unmount + useEffect(() => { + return () => { + if (generatorRef.current) { + generatorRef.current.dispose(); + } + }; + }, []); + + return ( +
+ +
{output}
+
+ ); +} +``` + +## Chat Format + +Use structured messages for conversations. Works with both basic generation and streaming (just add `streamer` parameter). + +### Single Turn + +```javascript +import { pipeline } from '@huggingface/transformers'; + +const generator = await pipeline( + 'text-generation', + 'onnx-community/Qwen2.5-0.5B-Instruct', + { dtype: 'q4' } +); + +const messages = [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'How do I create an async function?' } +]; + +const result = await generator(messages, { + max_new_tokens: 256, + temperature: 0.7, +}); + +console.log(result[0].generated_text); +``` + +### Multi-turn Conversation + +```javascript +const conversation = [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'What is JavaScript?' }, + { role: 'assistant', content: 'JavaScript is a programming language...' }, + { role: 'user', content: 'Can you show an example?' } +]; + +const result = await generator(conversation, { + max_new_tokens: 200, + temperature: 0.7, +}); + +// To add streaming, just pass a streamer: +// streamer: new TextStreamer(generator.tokenizer, {...}) +``` + +## Generation Parameters + +### Common Parameters + +```javascript +await generator(prompt, { + // Token limits + max_new_tokens: 512, // Maximum tokens to generate + min_new_tokens: 0, // Minimum tokens to generate + + // Sampling + temperature: 0.7, // Randomness (0.0-2.0) + top_k: 50, // Consider top K tokens + top_p: 0.95, // Nucleus sampling + do_sample: true, // Use random sampling (false = always pick most likely token) + + // Repetition control + repetition_penalty: 1.0, // Penalty for repeating (1.0 = no penalty) + no_repeat_ngram_size: 0, // Prevent repeating n-grams + + // Streaming + streamer: streamer, // TextStreamer instance +}); +``` + +### Parameter Effects + +**Temperature:** +- Low (0.1-0.5): More focused and deterministic +- Medium (0.6-0.9): Balanced creativity and coherence +- High (1.0-2.0): More creative and random + +```javascript +// Focused output +await generator(prompt, { temperature: 0.3, max_new_tokens: 100 }); + +// Creative output +await generator(prompt, { temperature: 1.2, max_new_tokens: 100 }); +``` + +**Sampling Methods:** + +```javascript +// Greedy (deterministic) +await generator(prompt, { + do_sample: false, + max_new_tokens: 100 +}); + +// Top-k sampling +await generator(prompt, { + top_k: 50, + temperature: 0.7, + max_new_tokens: 100 +}); + +// Top-p (nucleus) sampling +await generator(prompt, { + top_p: 0.95, + temperature: 0.7, + max_new_tokens: 100 +}); +``` + +## Model Selection + +Browse available text generation models on Hugging Face Hub: + +**https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending** + +### Selection Tips + +- **Small models (< 1B params)**: Fast, browser-friendly, use `dtype: 'q4'` +- **Medium models (1-3B params)**: Balanced quality/speed, use `dtype: 'q4'` or `fp16` +- **Large models (> 3B params)**: High quality, slower, best for Node.js with `dtype: 'fp16'` + +Check model cards for: +- Parameter count and model size +- Supported languages +- Benchmark scores +- License restrictions + +## Best Practices + +1. **Model Size**: Use quantized models (`q4`) for browsers, larger models (`fp16`) for servers +2. **Streaming**: Use streaming for better UX - shows progress and feels responsive +3. **Token Limits**: Set `max_new_tokens` to prevent runaway generation +4. **Temperature**: Tune based on use case (creative: 0.8-1.2, factual: 0.3-0.7) +5. **Memory**: Always call `dispose()` when done +6. **Caching**: Load model once, reuse for multiple requests + +## Related Documentation + +- [Pipeline Options](./PIPELINE_OPTIONS.md) - Configure pipeline loading +- [Configuration Reference](./CONFIGURATION.md) - Environment settings +- [Code Examples](./EXAMPLES.md) - More examples for different runtimes +- [Main Skill Guide](../SKILL.md) - Getting started guide diff --git a/plugins/antigravity-awesome-skills/.codex-plugin/plugin.json b/plugins/antigravity-awesome-skills/.codex-plugin/plugin.json index 9abbcd50..829c2032 100644 --- a/plugins/antigravity-awesome-skills/.codex-plugin/plugin.json +++ b/plugins/antigravity-awesome-skills/.codex-plugin/plugin.json @@ -19,7 +19,7 @@ "skills": "./skills/", "interface": { "displayName": "Antigravity Awesome Skills", - "shortDescription": "1,303 plugin-safe skills for coding, security, product, and ops workflows.", + "shortDescription": "1,311 plugin-safe skills for coding, security, product, and ops workflows.", "longDescription": "Install a plugin-safe Codex distribution of Antigravity Awesome Skills. Skills that still need hardening or target-specific setup remain available in the repo but are excluded from this plugin.", "developerName": "sickn33 and contributors", "category": "Productivity", diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-cli/SKILL.md b/plugins/antigravity-awesome-skills/skills/hugging-face-cli/SKILL.md index eb68c478..11665159 100644 --- a/plugins/antigravity-awesome-skills/skills/hugging-face-cli/SKILL.md +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-cli/SKILL.md @@ -1,199 +1,194 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/hf-cli" name: hugging-face-cli -description: "The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources." -risk: safe -source: "https://github.com/huggingface/skills/tree/main/skills/hugging-face-cli" -date_added: "2026-02-27" +description: "Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces." +risk: unknown --- -# Hugging Face CLI +Install: `curl -LsSf https://hf.co/cli/install.sh | bash -s`. -The `hf` CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources. +## When to Use -## When to Use This Skill +Use this skill when you need the `hf` CLI for Hub authentication, downloads, uploads, repo management, or basic compute operations. -Use this skill when: -- User needs to download models, datasets, or spaces -- Uploading files to Hub repositories -- Creating Hugging Face repositories -- Managing local cache -- Running compute jobs on HF infrastructure -- Working with Hugging Face Hub authentication +The Hugging Face Hub CLI tool `hf` is available. IMPORTANT: The `hf` command replaces the deprecated `huggingface-cli` command. -## Quick Command Reference +Use `hf --help` to view available functions. Note that auth commands are now all under `hf auth` e.g. `hf auth whoami`. -| Task | Command | -|------|---------| -| Login | `hf auth login` | -| Download model | `hf download ` | -| Download to folder | `hf download --local-dir ./path` | -| Upload folder | `hf upload . .` | -| Create repo | `hf repo create ` | -| Create tag | `hf repo tag create ` | -| Delete files | `hf repo-files delete ` | -| List cache | `hf cache ls` | -| Remove from cache | `hf cache rm ` | -| List models | `hf models ls` | -| Get model info | `hf models info ` | -| List datasets | `hf datasets ls` | -| Get dataset info | `hf datasets info ` | -| List spaces | `hf spaces ls` | -| Get space info | `hf spaces info ` | -| List endpoints | `hf endpoints ls` | -| Run GPU job | `hf jobs run --flavor a10g-small ` | -| Environment info | `hf env` | +Generated with `huggingface_hub v1.8.0`. Run `hf skills add --force` to regenerate. -## Core Commands +## Commands -### Authentication -```bash -hf auth login # Interactive login -hf auth login --token $HF_TOKEN # Non-interactive -hf auth whoami # Check current user -hf auth list # List stored tokens -hf auth switch # Switch between tokens -hf auth logout # Log out -``` +- `hf download REPO_ID` — Download files from the Hub. `[--type CHOICE --revision TEXT --include TEXT --exclude TEXT --cache-dir TEXT --local-dir TEXT --force-download --dry-run --quiet --max-workers INTEGER]` +- `hf env` — Print information about the environment. +- `hf sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]` +- `hf upload REPO_ID` — Upload a file or a folder to the Hub. Recommended for single-commit uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --delete TEXT --commit-message TEXT --commit-description TEXT --create-pr --every FLOAT --quiet]` +- `hf upload-large-folder REPO_ID LOCAL_PATH` — Upload a large folder to the Hub. Recommended for resumable uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --num-workers INTEGER --no-report --no-bars]` +- `hf version` — Print information about the hf version. -### Download -```bash -hf download # Full repo to cache -hf download file.safetensors # Specific file -hf download --local-dir ./models # To local directory -hf download --include "*.safetensors" # Filter by pattern -hf download --repo-type dataset # Dataset -hf download --revision v1.0 # Specific version -``` +### `hf auth` — Manage authentication (login, logout, etc.). -### Upload -```bash -hf upload . . # Current dir to root -hf upload ./models /weights # Folder to path -hf upload model.safetensors # Single file -hf upload . . --repo-type dataset # Dataset -hf upload . . --create-pr # Create PR -hf upload . . --commit-message="msg" # Custom message -``` +- `hf auth list` — List all stored access tokens. +- `hf auth login` — Login using a token from huggingface.co/settings/tokens. `[--add-to-git-credential --force]` +- `hf auth logout` — Logout from a specific token. `[--token-name TEXT]` +- `hf auth switch` — Switch between access tokens. `[--token-name TEXT --add-to-git-credential]` +- `hf auth whoami` — Find out which huggingface.co account you are logged in as. `[--format CHOICE]` -### Repository Management -```bash -hf repo create # Create model repo -hf repo create --repo-type dataset # Create dataset -hf repo create --private # Private repo -hf repo create --repo-type space --space_sdk gradio # Gradio space -hf repo delete # Delete repo -hf repo move # Move repo to new namespace -hf repo settings --private true # Update repo settings -hf repo list --repo-type model # List repos -hf repo branch create release-v1 # Create branch -hf repo branch delete release-v1 # Delete branch -hf repo tag create v1.0 # Create tag -hf repo tag list # List tags -hf repo tag delete v1.0 # Delete tag -``` +### `hf buckets` — Commands to interact with buckets. -### Delete Files from Repo -```bash -hf repo-files delete folder/ # Delete folder -hf repo-files delete "*.txt" # Delete with pattern -``` +- `hf buckets cp SRC` — Copy a single file to or from a bucket. `[--quiet]` +- `hf buckets create BUCKET_ID` — Create a new bucket. `[--private --exist-ok --quiet]` +- `hf buckets delete BUCKET_ID` — Delete a bucket. `[--yes --missing-ok --quiet]` +- `hf buckets info BUCKET_ID` — Get info about a bucket. `[--quiet]` +- `hf buckets list` — List buckets or files in a bucket. `[--human-readable --tree --recursive --format CHOICE --quiet]` +- `hf buckets move FROM_ID TO_ID` — Move (rename) a bucket to a new name or namespace. +- `hf buckets remove ARGUMENT` — Remove files from a bucket. `[--recursive --yes --dry-run --include TEXT --exclude TEXT --quiet]` +- `hf buckets sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]` -### Cache Management -```bash -hf cache ls # List cached repos -hf cache ls --revisions # Include individual revisions -hf cache rm model/gpt2 # Remove cached repo -hf cache rm # Remove cached revision -hf cache prune # Remove detached revisions -hf cache verify gpt2 # Verify checksums from cache -``` +### `hf cache` — Manage local cache directory. -### Browse Hub -```bash -# Models -hf models ls # List top trending models -hf models ls --search "MiniMax" --author MiniMaxAI # Search models -hf models ls --filter "text-generation" --limit 20 # Filter by task -hf models info MiniMaxAI/MiniMax-M2.1 # Get model info +- `hf cache list` — List cached repositories or revisions. `[--cache-dir TEXT --revisions --filter TEXT --format CHOICE --quiet --sort CHOICE --limit INTEGER]` +- `hf cache prune` — Remove detached revisions from the cache. `[--cache-dir TEXT --yes --dry-run]` +- `hf cache rm TARGETS` — Remove cached repositories or revisions. `[--cache-dir TEXT --yes --dry-run]` +- `hf cache verify REPO_ID` — Verify checksums for a single repo revision from cache or a local directory. `[--type CHOICE --revision TEXT --cache-dir TEXT --local-dir TEXT --fail-on-missing-files --fail-on-extra-files]` -# Datasets -hf datasets ls # List top trending datasets -hf datasets ls --search "finepdfs" --sort downloads # Search datasets -hf datasets info HuggingFaceFW/finepdfs # Get dataset info +### `hf collections` — Interact with collections on the Hub. -# Spaces -hf spaces ls # List top trending spaces -hf spaces ls --filter "3d" --limit 10 # Filter by 3D modeling spaces -hf spaces info enzostvs/deepsite # Get space info -``` +- `hf collections add-item COLLECTION_SLUG ITEM_ID ITEM_TYPE` — Add an item to a collection. `[--note TEXT --exists-ok]` +- `hf collections create TITLE` — Create a new collection on the Hub. `[--namespace TEXT --description TEXT --private --exists-ok]` +- `hf collections delete COLLECTION_SLUG` — Delete a collection from the Hub. `[--missing-ok]` +- `hf collections delete-item COLLECTION_SLUG ITEM_OBJECT_ID` — Delete an item from a collection. `[--missing-ok]` +- `hf collections info COLLECTION_SLUG` — Get info about a collection on the Hub. Output is in JSON format. +- `hf collections list` — List collections on the Hub. `[--owner TEXT --item TEXT --sort CHOICE --limit INTEGER --format CHOICE --quiet]` +- `hf collections update COLLECTION_SLUG` — Update a collection's metadata on the Hub. `[--title TEXT --description TEXT --position INTEGER --private --theme TEXT]` +- `hf collections update-item COLLECTION_SLUG ITEM_OBJECT_ID` — Update an item in a collection. `[--note TEXT --position INTEGER]` -### Jobs (Cloud Compute) -```bash -hf jobs run python:3.12 python script.py # Run on CPU -hf jobs run --flavor a10g-small # Run on GPU -hf jobs run --secrets HF_TOKEN # With HF token -hf jobs ps # List jobs -hf jobs logs # View logs -hf jobs cancel # Cancel job -``` +### `hf datasets` — Interact with datasets on the Hub. -### Inference Endpoints -```bash -hf endpoints ls # List endpoints -hf endpoints deploy my-endpoint \ - --repo openai/gpt-oss-120b \ - --framework vllm \ - --accelerator gpu \ - --instance-size x4 \ - --instance-type nvidia-a10g \ - --region us-east-1 \ - --vendor aws -hf endpoints describe my-endpoint # Show endpoint details -hf endpoints pause my-endpoint # Pause endpoint -hf endpoints resume my-endpoint # Resume endpoint -hf endpoints scale-to-zero my-endpoint # Scale to zero -hf endpoints delete my-endpoint --yes # Delete endpoint -``` -**GPU Flavors:** `cpu-basic`, `cpu-upgrade`, `cpu-xl`, `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `l40sx1`, `l40sx4`, `l40sx8`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`, `a100-large`, `h100`, `h100x8` +- `hf datasets info DATASET_ID` — Get info about a dataset on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf datasets list` — List datasets on the Hub. `[--search TEXT --author TEXT --filter TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` +- `hf datasets parquet DATASET_ID` — List parquet file URLs available for a dataset. `[--subset TEXT --split TEXT --format CHOICE --quiet]` +- `hf datasets sql SQL` — Execute a raw SQL query with DuckDB against dataset parquet URLs. `[--format CHOICE]` -## Common Patterns +### `hf discussions` — Manage discussions and pull requests on the Hub. -### Download and Use Model Locally -```bash -# Download to local directory for deployment -hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./model +- `hf discussions close REPO_ID NUM` — Close a discussion or pull request. `[--comment TEXT --yes --type CHOICE]` +- `hf discussions comment REPO_ID NUM` — Comment on a discussion or pull request. `[--body TEXT --body-file PATH --type CHOICE]` +- `hf discussions create REPO_ID --title TEXT` — Create a new discussion or pull request on a repo. `[--body TEXT --body-file PATH --pull-request --type CHOICE]` +- `hf discussions diff REPO_ID NUM` — Show the diff of a pull request. `[--type CHOICE]` +- `hf discussions info REPO_ID NUM` — Get info about a discussion or pull request. `[--comments --diff --no-color --type CHOICE --format CHOICE]` +- `hf discussions list REPO_ID` — List discussions and pull requests on a repo. `[--status CHOICE --kind CHOICE --author TEXT --limit INTEGER --type CHOICE --format CHOICE --quiet]` +- `hf discussions merge REPO_ID NUM` — Merge a pull request. `[--comment TEXT --yes --type CHOICE]` +- `hf discussions rename REPO_ID NUM NEW_TITLE` — Rename a discussion or pull request. `[--type CHOICE]` +- `hf discussions reopen REPO_ID NUM` — Reopen a closed discussion or pull request. `[--comment TEXT --yes --type CHOICE]` -# Or use cache and get path -MODEL_PATH=$(hf download meta-llama/Llama-3.2-1B-Instruct --quiet) -``` +### `hf endpoints` — Manage Hugging Face Inference Endpoints. -### Publish Model/Dataset -```bash -hf repo create my-username/my-model --private -hf upload my-username/my-model ./output . --commit-message="Initial release" -hf repo tag create my-username/my-model v1.0 -``` +- `hf endpoints catalog deploy --repo TEXT` — Deploy an Inference Endpoint from the Model Catalog. `[--name TEXT --accelerator TEXT --namespace TEXT]` +- `hf endpoints catalog list` — List available Catalog models. +- `hf endpoints delete NAME` — Delete an Inference Endpoint permanently. `[--namespace TEXT --yes]` +- `hf endpoints deploy NAME --repo TEXT --framework TEXT --accelerator TEXT --instance-size TEXT --instance-type TEXT --region TEXT --vendor TEXT` — Deploy an Inference Endpoint from a Hub repository. `[--namespace TEXT --task TEXT --min-replica INTEGER --max-replica INTEGER --scale-to-zero-timeout INTEGER --scaling-metric CHOICE --scaling-threshold FLOAT]` +- `hf endpoints describe NAME` — Get information about an existing endpoint. `[--namespace TEXT]` +- `hf endpoints list` — Lists all Inference Endpoints for the given namespace. `[--namespace TEXT --format CHOICE --quiet]` +- `hf endpoints pause NAME` — Pause an Inference Endpoint. `[--namespace TEXT]` +- `hf endpoints resume NAME` — Resume an Inference Endpoint. `[--namespace TEXT --fail-if-already-running]` +- `hf endpoints scale-to-zero NAME` — Scale an Inference Endpoint to zero. `[--namespace TEXT]` +- `hf endpoints update NAME` — Update an existing endpoint. `[--namespace TEXT --repo TEXT --accelerator TEXT --instance-size TEXT --instance-type TEXT --framework TEXT --revision TEXT --task TEXT --min-replica INTEGER --max-replica INTEGER --scale-to-zero-timeout INTEGER --scaling-metric CHOICE --scaling-threshold FLOAT]` -### Sync Space with Local -```bash -hf upload my-username/my-space . . --repo-type space \ - --exclude="logs/*" --delete="*" --commit-message="Sync" -``` +### `hf extensions` — Manage hf CLI extensions. -### Check Cache Usage -```bash -hf cache ls # See all cached repos and sizes -hf cache rm model/gpt2 # Remove a repo from cache -``` +- `hf extensions exec NAME` — Execute an installed extension. +- `hf extensions install REPO_ID` — Install an extension from a public GitHub repository. `[--force]` +- `hf extensions list` — List installed extension commands. `[--format CHOICE --quiet]` +- `hf extensions remove NAME` — Remove an installed extension. +- `hf extensions search` — Search extensions available on GitHub (tagged with 'hf-extension' topic). `[--format CHOICE --quiet]` -## Key Options +### `hf jobs` — Run and manage Jobs on the Hub. -- `--repo-type`: `model` (default), `dataset`, `space` -- `--revision`: Branch, tag, or commit hash -- `--token`: Override authentication -- `--quiet`: Output only essential info (paths/URLs) +- `hf jobs cancel JOB_ID` — Cancel a Job `[--namespace TEXT]` +- `hf jobs hardware` — List available hardware options for Jobs +- `hf jobs inspect JOB_IDS` — Display detailed information on one or more Jobs `[--namespace TEXT]` +- `hf jobs logs JOB_ID` — Fetch the logs of a Job. `[--follow --tail INTEGER --namespace TEXT]` +- `hf jobs ps` — List Jobs. `[--all --namespace TEXT --filter TEXT --format TEXT --quiet]` +- `hf jobs run IMAGE COMMAND` — Run a Job. `[--env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --flavor CHOICE --timeout TEXT --detach --namespace TEXT]` +- `hf jobs scheduled delete SCHEDULED_JOB_ID` — Delete a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled inspect SCHEDULED_JOB_IDS` — Display detailed information on one or more scheduled Jobs `[--namespace TEXT]` +- `hf jobs scheduled ps` — List scheduled Jobs `[--all --namespace TEXT --filter TEXT --format TEXT --quiet]` +- `hf jobs scheduled resume SCHEDULED_JOB_ID` — Resume (unpause) a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled run SCHEDULE IMAGE COMMAND` — Schedule a Job. `[--suspend --concurrency --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --flavor CHOICE --timeout TEXT --namespace TEXT]` +- `hf jobs scheduled suspend SCHEDULED_JOB_ID` — Suspend (pause) a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled uv run SCHEDULE SCRIPT` — Run a UV script (local file or URL) on HF infrastructure `[--suspend --concurrency --image TEXT --flavor CHOICE --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --timeout TEXT --namespace TEXT --with TEXT --python TEXT]` +- `hf jobs stats` — Fetch the resource usage statistics and metrics of Jobs `[--namespace TEXT]` +- `hf jobs uv run SCRIPT` — Run a UV script (local file or URL) on HF infrastructure `[--image TEXT --flavor CHOICE --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --timeout TEXT --detach --namespace TEXT --with TEXT --python TEXT]` -## References +### `hf models` — Interact with models on the Hub. -- **Complete command reference**: See references/commands.md -- **Workflow examples**: See references/examples.md +- `hf models info MODEL_ID` — Get info about a model on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf models list` — List models on the Hub. `[--search TEXT --author TEXT --filter TEXT --num-parameters TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` + +### `hf papers` — Interact with papers on the Hub. + +- `hf papers info PAPER_ID` — Get info about a paper on the Hub. Output is in JSON format. +- `hf papers list` — List daily papers on the Hub. `[--date TEXT --week TEXT --month TEXT --submitter TEXT --sort CHOICE --limit INTEGER --format CHOICE --quiet]` +- `hf papers read PAPER_ID` — Read a paper as markdown. +- `hf papers search QUERY` — Search papers on the Hub. `[--limit INTEGER --format CHOICE --quiet]` + +### `hf repos` — Manage repos on the Hub. + +- `hf repos branch create REPO_ID BRANCH` — Create a new branch for a repo on the Hub. `[--revision TEXT --type CHOICE --exist-ok]` +- `hf repos branch delete REPO_ID BRANCH` — Delete a branch from a repo on the Hub. `[--type CHOICE]` +- `hf repos create REPO_ID` — Create a new repo on the Hub. `[--type CHOICE --space-sdk TEXT --private --public --protected --exist-ok --resource-group-id TEXT --flavor TEXT --storage TEXT --sleep-time INTEGER --secrets TEXT --secrets-file TEXT --env TEXT --env-file TEXT]` +- `hf repos delete REPO_ID` — Delete a repo from the Hub. This is an irreversible operation. `[--type CHOICE --missing-ok]` +- `hf repos delete-files REPO_ID PATTERNS` — Delete files from a repo on the Hub. `[--type CHOICE --revision TEXT --commit-message TEXT --commit-description TEXT --create-pr]` +- `hf repos duplicate FROM_ID` — Duplicate a repo on the Hub (model, dataset, or Space). `[--type CHOICE --private --public --protected --exist-ok --flavor TEXT --storage TEXT --sleep-time INTEGER --secrets TEXT --secrets-file TEXT --env TEXT --env-file TEXT]` +- `hf repos move FROM_ID TO_ID` — Move a repository from a namespace to another namespace. `[--type CHOICE]` +- `hf repos settings REPO_ID` — Update the settings of a repository. `[--gated CHOICE --private --public --protected --type CHOICE]` +- `hf repos tag create REPO_ID TAG` — Create a tag for a repo. `[--message TEXT --revision TEXT --type CHOICE]` +- `hf repos tag delete REPO_ID TAG` — Delete a tag for a repo. `[--yes --type CHOICE]` +- `hf repos tag list REPO_ID` — List tags for a repo. `[--type CHOICE]` + +### `hf skills` — Manage skills for AI assistants. + +- `hf skills add` — Download a skill and install it for an AI assistant. `[--claude --codex --cursor --opencode --global --dest PATH --force]` +- `hf skills preview` — Print the generated SKILL.md to stdout. + +### `hf spaces` — Interact with spaces on the Hub. + +- `hf spaces dev-mode SPACE_ID` — Enable or disable dev mode on a Space. `[--stop]` +- `hf spaces hot-reload SPACE_ID` — Hot-reload any Python file of a Space without a full rebuild + restart. `[--local-file TEXT --skip-checks --skip-summary]` +- `hf spaces info SPACE_ID` — Get info about a space on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf spaces list` — List spaces on the Hub. `[--search TEXT --author TEXT --filter TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` + +### `hf webhooks` — Manage webhooks on the Hub. + +- `hf webhooks create --watch TEXT` — Create a new webhook. `[--url TEXT --job-id TEXT --domain CHOICE --secret TEXT]` +- `hf webhooks delete WEBHOOK_ID` — Delete a webhook permanently. `[--yes]` +- `hf webhooks disable WEBHOOK_ID` — Disable an active webhook. +- `hf webhooks enable WEBHOOK_ID` — Enable a disabled webhook. +- `hf webhooks info WEBHOOK_ID` — Show full details for a single webhook as JSON. +- `hf webhooks list` — List all webhooks for the current user. `[--format CHOICE --quiet]` +- `hf webhooks update WEBHOOK_ID` — Update an existing webhook. Only provided options are changed. `[--url TEXT --watch TEXT --domain CHOICE --secret TEXT]` + +## Common options + +- `--format` — Output format: `--format json` (or `--json`) or `--format table` (default). +- `-q / --quiet` — Minimal output. +- `--revision` — Git revision id which can be a branch name, a tag, or a commit hash. +- `--token` — Use a User Access Token. Prefer setting `HF_TOKEN` env var instead of passing `--token`. +- `--type` — The type of repository (model, dataset, or space). + +## Mounting repos as local filesystems + +To mount Hub repositories or buckets as local filesystems — no download, no copy, no waiting — use `hf-mount`. Files are fetched on demand. GitHub: https://github.com/huggingface/hf-mount + +Install: `curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh` + +Some command examples: +- `hf-mount start repo openai-community/gpt2 /tmp/gpt2` — mount a repo (read-only) +- `hf-mount start --hf-token $HF_TOKEN bucket myuser/my-bucket /tmp/data` — mount a bucket (read-write) +- `hf-mount status` / `hf-mount stop /tmp/data` — list or unmount + +## Tips + +- Use `hf --help` for full options, descriptions, usage, and real-world examples +- Authenticate with `HF_TOKEN` env var (recommended) or with `--token` diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/SKILL.md b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/SKILL.md new file mode 100644 index 00000000..05bc57ab --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/SKILL.md @@ -0,0 +1,213 @@ +--- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-community-evals" +name: hugging-face-community-evals +description: Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval. +risk: unknown +--- + +# Overview + +## When to Use + +Use this skill for local model evaluation, backend selection, and GPU smoke tests outside the Hugging Face Jobs workflow. + +This skill is for **running evaluations against models on the Hugging Face Hub on local hardware**. + +It covers: +- `inspect-ai` with local inference +- `lighteval` with local inference +- choosing between `vllm`, Hugging Face Transformers, and `accelerate` +- smoke tests, task selection, and backend fallback strategy + +It does **not** cover: +- Hugging Face Jobs orchestration +- model-card or `model-index` edits +- README table extraction +- Artificial Analysis imports +- `.eval_results` generation or publishing +- PR creation or community-evals automation + +If the user wants to **run the same eval remotely on Hugging Face Jobs**, hand off to the `hugging-face-jobs` skill and pass it one of the local scripts in this skill. + +If the user wants to **publish results into the community evals workflow**, stop after generating the evaluation run and hand off that publishing step to `~/code/community-evals`. + +> All paths below are relative to the directory containing this `SKILL.md`. + +# When To Use Which Script + +| Use case | Script | +|---|---| +| Local `inspect-ai` eval on a Hub model via inference providers | `scripts/inspect_eval_uv.py` | +| Local GPU eval with `inspect-ai` using `vllm` or Transformers | `scripts/inspect_vllm_uv.py` | +| Local GPU eval with `lighteval` using `vllm` or `accelerate` | `scripts/lighteval_vllm_uv.py` | +| Extra command patterns | `examples/USAGE_EXAMPLES.md` | + +# Prerequisites + +- Prefer `uv run` for local execution. +- Set `HF_TOKEN` for gated/private models. +- For local GPU runs, verify GPU access before starting: + +```bash +uv --version +printenv HF_TOKEN >/dev/null +nvidia-smi +``` + +If `nvidia-smi` is unavailable, either: +- use `scripts/inspect_eval_uv.py` for lighter provider-backed evaluation, or +- hand off to the `hugging-face-jobs` skill if the user wants remote compute. + +# Core Workflow + +1. Choose the evaluation framework. + - Use `inspect-ai` when you want explicit task control and inspect-native flows. + - Use `lighteval` when the benchmark is naturally expressed as a lighteval task string, especially leaderboard-style tasks. +2. Choose the inference backend. + - Prefer `vllm` for throughput on supported architectures. + - Use Hugging Face Transformers (`--backend hf`) or `accelerate` as compatibility fallbacks. +3. Start with a smoke test. + - `inspect-ai`: add `--limit 10` or similar. + - `lighteval`: add `--max-samples 10`. +4. Scale up only after the smoke test passes. +5. If the user wants remote execution, hand off to `hugging-face-jobs` with the same script + args. + +# Quick Start + +## Option A: inspect-ai with local inference providers path + +Best when the model is already supported by Hugging Face Inference Providers and you want the lowest local setup overhead. + +```bash +uv run scripts/inspect_eval_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task mmlu \ + --limit 20 +``` + +Use this path when: +- you want a quick local smoke test +- you do not need direct GPU control +- the task already exists in `inspect-evals` + +## Option B: inspect-ai on Local GPU + +Best when you need to load the Hub model directly, use `vllm`, or fall back to Transformers for unsupported architectures. + +Local GPU: + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task gsm8k \ + --limit 20 +``` + +Transformers fallback: + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model microsoft/phi-2 \ + --task mmlu \ + --backend hf \ + --trust-remote-code \ + --limit 20 +``` + +## Option C: lighteval on Local GPU + +Best when the task is naturally expressed as a `lighteval` task string, especially Open LLM Leaderboard style benchmarks. + +Local GPU: + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \ + --max-samples 20 \ + --use-chat-template +``` + +`accelerate` fallback: + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model microsoft/phi-2 \ + --tasks "leaderboard|mmlu|5" \ + --backend accelerate \ + --trust-remote-code \ + --max-samples 20 +``` + +# Remote Execution Boundary + +This skill intentionally stops at **local execution and backend selection**. + +If the user wants to: +- run these scripts on Hugging Face Jobs +- pick remote hardware +- pass secrets to remote jobs +- schedule recurring runs +- inspect / cancel / monitor jobs + +then switch to the **`hugging-face-jobs`** skill and pass it one of these scripts plus the chosen arguments. + +# Task Selection + +`inspect-ai` examples: +- `mmlu` +- `gsm8k` +- `hellaswag` +- `arc_challenge` +- `truthfulqa` +- `winogrande` +- `humaneval` + +`lighteval` task strings use `suite|task|num_fewshot`: +- `leaderboard|mmlu|5` +- `leaderboard|gsm8k|5` +- `leaderboard|arc_challenge|25` +- `lighteval|hellaswag|0` + +Multiple `lighteval` tasks can be comma-separated in `--tasks`. + +# Backend Selection + +- Prefer `inspect_vllm_uv.py --backend vllm` for fast GPU inference on supported architectures. +- Use `inspect_vllm_uv.py --backend hf` when `vllm` does not support the model. +- Prefer `lighteval_vllm_uv.py --backend vllm` for throughput on supported models. +- Use `lighteval_vllm_uv.py --backend accelerate` as the compatibility fallback. +- Use `inspect_eval_uv.py` when Inference Providers already cover the model and you do not need direct GPU control. + +# Hardware Guidance + +| Model size | Suggested local hardware | +|---|---| +| `< 3B` | consumer GPU / Apple Silicon / small dev GPU | +| `3B - 13B` | stronger local GPU | +| `13B+` | high-memory local GPU or hand off to `hugging-face-jobs` | + +For smoke tests, prefer cheaper local runs plus `--limit` or `--max-samples`. + +# Troubleshooting + +- CUDA or vLLM OOM: + - reduce `--batch-size` + - reduce `--gpu-memory-utilization` + - switch to a smaller model for the smoke test + - if necessary, hand off to `hugging-face-jobs` +- Model unsupported by `vllm`: + - switch to `--backend hf` for `inspect-ai` + - switch to `--backend accelerate` for `lighteval` +- Gated/private repo access fails: + - verify `HF_TOKEN` +- Custom model code required: + - add `--trust-remote-code` + +# Examples + +See: +- `examples/USAGE_EXAMPLES.md` for local command patterns +- `scripts/inspect_eval_uv.py` +- `scripts/inspect_vllm_uv.py` +- `scripts/lighteval_vllm_uv.py` diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/.env.example b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/.env.example new file mode 100644 index 00000000..26d9b9b4 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/.env.example @@ -0,0 +1,3 @@ +# Hugging Face Token (required for gated/private models) +# Get your token at: https://huggingface.co/settings/tokens +HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md new file mode 100644 index 00000000..64c24334 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md @@ -0,0 +1,101 @@ +# Usage Examples + +This document provides practical examples for **running evaluations locally** against Hugging Face Hub models. + +## What this skill covers + +- `inspect-ai` local runs +- `inspect-ai` with `vllm` or Transformers backends +- `lighteval` local runs with `vllm` or `accelerate` +- smoke tests and backend fallback patterns + +## What this skill does NOT cover + +- `model-index` +- `.eval_results` +- community eval publication workflows +- model-card PR creation +- Hugging Face Jobs orchestration + +If you want to run these same scripts remotely, use the `hugging-face-jobs` skill and pass one of the scripts in `scripts/`. + +## Setup + +```bash +cd skills/hugging-face-evaluation +export HF_TOKEN=hf_xxx +uv --version +``` + +For local GPU runs: + +```bash +nvidia-smi +``` + +## inspect-ai examples + +### Quick smoke test + +```bash +uv run scripts/inspect_eval_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task mmlu \ + --limit 10 +``` + +### Local GPU with vLLM + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model meta-llama/Llama-3.2-8B-Instruct \ + --task gsm8k \ + --limit 20 +``` + +### Transformers fallback + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model microsoft/phi-2 \ + --task mmlu \ + --backend hf \ + --trust-remote-code \ + --limit 20 +``` + +## lighteval examples + +### Single task + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5" \ + --max-samples 20 +``` + +### Multiple tasks + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \ + --max-samples 20 \ + --use-chat-template +``` + +### accelerate fallback + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model microsoft/phi-2 \ + --tasks "leaderboard|mmlu|5" \ + --backend accelerate \ + --trust-remote-code \ + --max-samples 20 +``` + +## Hand-off to Hugging Face Jobs + +When local hardware is not enough, switch to the `hugging-face-jobs` skill and run one of these scripts remotely. Keep the script path and args; move the orchestration there. diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py new file mode 100644 index 00000000..d398bc60 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py @@ -0,0 +1,104 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "inspect-ai>=0.3.0", +# "inspect-evals", +# "openai", +# ] +# /// + +""" +Entry point script for running inspect-ai evaluations against Hugging Face inference providers. +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from pathlib import Path +from typing import Optional + + +def _inspect_evals_tasks_root() -> Optional[Path]: + """Return the installed inspect_evals package path if available.""" + try: + import inspect_evals + + return Path(inspect_evals.__file__).parent + except Exception: + return None + + +def _normalize_task(task: str) -> str: + """Allow lighteval-style `suite|task|shots` strings by keeping the task name.""" + if "|" in task: + parts = task.split("|") + if len(parts) >= 2 and parts[1]: + return parts[1] + return task + + +def main() -> None: + parser = argparse.ArgumentParser(description="Inspect-ai job runner") + parser.add_argument("--model", required=True, help="Model ID on Hugging Face Hub") + parser.add_argument("--task", required=True, help="inspect-ai task to execute") + parser.add_argument("--limit", type=int, default=None, help="Limit number of samples to evaluate") + parser.add_argument( + "--tasks-root", + default=None, + help="Optional path to inspect task files. Defaults to the installed inspect_evals package.", + ) + parser.add_argument( + "--sandbox", + default="local", + help="Sandbox backend to use (default: local for HF jobs without Docker).", + ) + args = parser.parse_args() + + # Ensure downstream libraries can read the token passed as a secret + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + task = _normalize_task(args.task) + tasks_root = Path(args.tasks_root) if args.tasks_root else _inspect_evals_tasks_root() + if tasks_root and not tasks_root.exists(): + tasks_root = None + + cmd = [ + "inspect", + "eval", + task, + "--model", + f"hf-inference-providers/{args.model}", + "--log-level", + "info", + # Reduce batch size to avoid OOM errors (default is 32) + "--max-connections", + "1", + # Set a small positive temperature (HF doesn't allow temperature=0) + "--temperature", + "0.001", + ] + + if args.sandbox: + cmd.extend(["--sandbox", args.sandbox]) + + if args.limit: + cmd.extend(["--limit", str(args.limit)]) + + try: + subprocess.run(cmd, check=True, cwd=tasks_root) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + location = f" (cwd={tasks_root})" if tasks_root else "" + print(f"Evaluation failed with exit code {exc.returncode}{location}", file=sys.stderr) + raise + + +if __name__ == "__main__": + main() + diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py new file mode 100644 index 00000000..f1454c5a --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py @@ -0,0 +1,306 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "inspect-ai>=0.3.0", +# "inspect-evals", +# "vllm>=0.4.0", +# "torch>=2.0.0", +# "transformers>=4.40.0", +# ] +# /// + +""" +Entry point script for running inspect-ai evaluations with vLLM or HuggingFace Transformers backend. + +This script runs evaluations on custom HuggingFace models using local GPU inference, +separate from inference provider scripts (which use external APIs). + +Usage (standalone): + uv run scripts/inspect_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --task "mmlu" + +Model backends: + - vllm: Fast inference with vLLM (recommended for large models) + - hf: HuggingFace Transformers backend (broader model compatibility) +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from typing import Optional + + +def setup_environment() -> None: + """Configure environment variables for HuggingFace authentication.""" + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + +def run_inspect_vllm( + model_id: str, + task: str, + limit: Optional[int] = None, + max_connections: int = 4, + temperature: float = 0.0, + tensor_parallel_size: int = 1, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + trust_remote_code: bool = False, + log_level: str = "info", +) -> None: + """ + Run inspect-ai evaluation with vLLM backend. + + Args: + model_id: HuggingFace model ID + task: inspect-ai task to execute (e.g., "mmlu", "gsm8k") + limit: Limit number of samples to evaluate + max_connections: Maximum concurrent connections + temperature: Sampling temperature + tensor_parallel_size: Number of GPUs for tensor parallelism + gpu_memory_utilization: GPU memory fraction + dtype: Data type (auto, float16, bfloat16) + trust_remote_code: Allow remote code execution + log_level: Logging level + """ + setup_environment() + + model_spec = f"vllm/{model_id}" + cmd = [ + "inspect", + "eval", + task, + "--model", + model_spec, + "--log-level", + log_level, + "--max-connections", + str(max_connections), + ] + + # vLLM supports temperature=0 unlike HF inference providers + cmd.extend(["--temperature", str(temperature)]) + + # Older inspect-ai CLI versions do not support --model-args; rely on defaults + # and let vLLM choose sensible settings for small models. + if tensor_parallel_size != 1: + cmd.extend(["--tensor-parallel-size", str(tensor_parallel_size)]) + if gpu_memory_utilization != 0.8: + cmd.extend(["--gpu-memory-utilization", str(gpu_memory_utilization)]) + if dtype != "auto": + cmd.extend(["--dtype", dtype]) + if trust_remote_code: + cmd.append("--trust-remote-code") + + if limit: + cmd.extend(["--limit", str(limit)]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def run_inspect_hf( + model_id: str, + task: str, + limit: Optional[int] = None, + max_connections: int = 1, + temperature: float = 0.001, + device: str = "auto", + dtype: str = "auto", + trust_remote_code: bool = False, + log_level: str = "info", +) -> None: + """ + Run inspect-ai evaluation with HuggingFace Transformers backend. + + Use this when vLLM doesn't support the model architecture. + + Args: + model_id: HuggingFace model ID + task: inspect-ai task to execute + limit: Limit number of samples + max_connections: Maximum concurrent connections (keep low for memory) + temperature: Sampling temperature + device: Device to use (auto, cuda, cpu) + dtype: Data type + trust_remote_code: Allow remote code execution + log_level: Logging level + """ + setup_environment() + + model_spec = f"hf/{model_id}" + + cmd = [ + "inspect", + "eval", + task, + "--model", + model_spec, + "--log-level", + log_level, + "--max-connections", + str(max_connections), + "--temperature", + str(temperature), + ] + + if device != "auto": + cmd.extend(["--device", device]) + if dtype != "auto": + cmd.extend(["--dtype", dtype]) + if trust_remote_code: + cmd.append("--trust-remote-code") + + if limit: + cmd.extend(["--limit", str(limit)]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run inspect-ai evaluations with vLLM or HuggingFace Transformers on custom models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run MMLU with vLLM backend + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu + + # Run with HuggingFace Transformers backend + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --backend hf + + # Run with limited samples for testing + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --limit 10 + + # Run on multiple GPUs with tensor parallelism + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-70B --task mmlu --tensor-parallel-size 4 + +Available tasks (from inspect-evals): + - mmlu: Massive Multitask Language Understanding + - gsm8k: Grade School Math + - hellaswag: Common sense reasoning + - arc_challenge: AI2 Reasoning Challenge + - truthfulqa: TruthfulQA benchmark + - winogrande: Winograd Schema Challenge + - humaneval: Code generation (HumanEval) + + """, + ) + + parser.add_argument( + "--model", + required=True, + help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)", + ) + parser.add_argument( + "--task", + required=True, + help="inspect-ai task to execute (e.g., mmlu, gsm8k)", + ) + parser.add_argument( + "--backend", + choices=["vllm", "hf"], + default="vllm", + help="Model backend (default: vllm)", + ) + parser.add_argument( + "--limit", + type=int, + default=None, + help="Limit number of samples to evaluate", + ) + parser.add_argument( + "--max-connections", + type=int, + default=None, + help="Maximum concurrent connections (default: 4 for vllm, 1 for hf)", + ) + parser.add_argument( + "--temperature", + type=float, + default=None, + help="Sampling temperature (default: 0.0 for vllm, 0.001 for hf)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=1, + help="Number of GPUs for tensor parallelism (vLLM only, default: 1)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.8, + help="GPU memory fraction to use (vLLM only, default: 0.8)", + ) + parser.add_argument( + "--dtype", + default="auto", + choices=["auto", "float16", "bfloat16", "float32"], + help="Data type for model weights (default: auto)", + ) + parser.add_argument( + "--device", + default="auto", + help="Device for HF backend (auto, cuda, cpu)", + ) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="Allow executing remote code from model repository", + ) + parser.add_argument( + "--log-level", + default="info", + choices=["debug", "info", "warning", "error"], + help="Logging level (default: info)", + ) + + args = parser.parse_args() + + if args.backend == "vllm": + run_inspect_vllm( + model_id=args.model, + task=args.task, + limit=args.limit, + max_connections=args.max_connections or 4, + temperature=args.temperature if args.temperature is not None else 0.0, + tensor_parallel_size=args.tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + log_level=args.log_level, + ) + else: + run_inspect_hf( + model_id=args.model, + task=args.task, + limit=args.limit, + max_connections=args.max_connections or 1, + temperature=args.temperature if args.temperature is not None else 0.001, + device=args.device, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + log_level=args.log_level, + ) + + +if __name__ == "__main__": + main() diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py new file mode 100644 index 00000000..91ba83b3 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py @@ -0,0 +1,297 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "lighteval[accelerate,vllm]>=0.6.0", +# "torch>=2.0.0", +# "transformers>=4.40.0", +# "accelerate>=0.30.0", +# "vllm>=0.4.0", +# ] +# /// + +""" +Entry point script for running lighteval evaluations with local GPU backends. + +This script runs evaluations using vLLM or accelerate on custom HuggingFace models. +It is separate from inference provider scripts and evaluates models directly on local hardware. + +Usage (standalone): + uv run scripts/lighteval_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --tasks "leaderboard|mmlu|5" + +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from typing import Optional + + +def setup_environment() -> None: + """Configure environment variables for HuggingFace authentication.""" + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + +def run_lighteval_vllm( + model_id: str, + tasks: str, + output_dir: Optional[str] = None, + max_samples: Optional[int] = None, + batch_size: int = 1, + tensor_parallel_size: int = 1, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + trust_remote_code: bool = False, + use_chat_template: bool = False, + system_prompt: Optional[str] = None, +) -> None: + """ + Run lighteval with vLLM backend for efficient GPU inference. + + Args: + model_id: HuggingFace model ID (e.g., "meta-llama/Llama-3.2-1B") + tasks: Task specification (e.g., "leaderboard|mmlu|5" or "lighteval|hellaswag|0") + output_dir: Directory for evaluation results + max_samples: Limit number of samples per task + batch_size: Batch size for evaluation + tensor_parallel_size: Number of GPUs for tensor parallelism + gpu_memory_utilization: GPU memory fraction to use (0.0-1.0) + dtype: Data type for model weights (auto, float16, bfloat16) + trust_remote_code: Allow executing remote code from model repo + use_chat_template: Apply chat template for conversational models + system_prompt: System prompt for chat models + """ + setup_environment() + + # Build lighteval vllm command + cmd = [ + "lighteval", + "vllm", + model_id, + tasks, + "--batch-size", str(batch_size), + "--tensor-parallel-size", str(tensor_parallel_size), + "--gpu-memory-utilization", str(gpu_memory_utilization), + "--dtype", dtype, + ] + + if output_dir: + cmd.extend(["--output-dir", output_dir]) + + if max_samples: + cmd.extend(["--max-samples", str(max_samples)]) + + if trust_remote_code: + cmd.append("--trust-remote-code") + + if use_chat_template: + cmd.append("--use-chat-template") + + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def run_lighteval_accelerate( + model_id: str, + tasks: str, + output_dir: Optional[str] = None, + max_samples: Optional[int] = None, + batch_size: int = 1, + dtype: str = "bfloat16", + trust_remote_code: bool = False, + use_chat_template: bool = False, + system_prompt: Optional[str] = None, +) -> None: + """ + Run lighteval with accelerate backend for multi-GPU distributed inference. + + Use this backend when vLLM is not available or for models not supported by vLLM. + + Args: + model_id: HuggingFace model ID + tasks: Task specification + output_dir: Directory for evaluation results + max_samples: Limit number of samples per task + batch_size: Batch size for evaluation + dtype: Data type for model weights + trust_remote_code: Allow executing remote code + use_chat_template: Apply chat template + system_prompt: System prompt for chat models + """ + setup_environment() + + # Build lighteval accelerate command + cmd = [ + "lighteval", + "accelerate", + model_id, + tasks, + "--batch-size", str(batch_size), + "--dtype", dtype, + ] + + if output_dir: + cmd.extend(["--output-dir", output_dir]) + + if max_samples: + cmd.extend(["--max-samples", str(max_samples)]) + + if trust_remote_code: + cmd.append("--trust-remote-code") + + if use_chat_template: + cmd.append("--use-chat-template") + + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run lighteval evaluations with vLLM or accelerate backend on custom HuggingFace models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run MMLU evaluation with vLLM + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" + + # Run with accelerate backend instead of vLLM + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" --backend accelerate + + # Run with chat template for instruction-tuned models + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B-Instruct --tasks "leaderboard|mmlu|5" --use-chat-template + + # Run with limited samples for testing + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" --max-samples 10 + +Task format: + Tasks use the format: "suite|task|num_fewshot" + - leaderboard|mmlu|5 (MMLU with 5-shot) + - lighteval|hellaswag|0 (HellaSwag zero-shot) + - leaderboard|gsm8k|5 (GSM8K with 5-shot) + - Multiple tasks: "leaderboard|mmlu|5,leaderboard|gsm8k|5" + """, + ) + + parser.add_argument( + "--model", + required=True, + help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)", + ) + parser.add_argument( + "--tasks", + required=True, + help="Task specification (e.g., 'leaderboard|mmlu|5')", + ) + parser.add_argument( + "--backend", + choices=["vllm", "accelerate"], + default="vllm", + help="Inference backend to use (default: vllm)", + ) + parser.add_argument( + "--output-dir", + default=None, + help="Directory for evaluation results", + ) + parser.add_argument( + "--max-samples", + type=int, + default=None, + help="Limit number of samples per task (useful for testing)", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1, + help="Batch size for evaluation (default: 1)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=1, + help="Number of GPUs for tensor parallelism (vLLM only, default: 1)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.8, + help="GPU memory fraction to use (vLLM only, default: 0.8)", + ) + parser.add_argument( + "--dtype", + default="auto", + choices=["auto", "float16", "bfloat16", "float32"], + help="Data type for model weights (default: auto)", + ) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="Allow executing remote code from model repository", + ) + parser.add_argument( + "--use-chat-template", + action="store_true", + help="Apply chat template for instruction-tuned/chat models", + ) + parser.add_argument( + "--system-prompt", + default=None, + help="System prompt for chat models", + ) + + args = parser.parse_args() + + if args.backend == "vllm": + run_lighteval_vllm( + model_id=args.model, + tasks=args.tasks, + output_dir=args.output_dir, + max_samples=args.max_samples, + batch_size=args.batch_size, + tensor_parallel_size=args.tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + else: + run_lighteval_accelerate( + model_id=args.model, + tasks=args.tasks, + output_dir=args.output_dir, + max_samples=args.max_samples, + batch_size=args.batch_size, + dtype=args.dtype if args.dtype != "auto" else "bfloat16", + trust_remote_code=args.trust_remote_code, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + + +if __name__ == "__main__": + main() diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-dataset-viewer/SKILL.md b/plugins/antigravity-awesome-skills/skills/hugging-face-dataset-viewer/SKILL.md index 410eb832..624bc78d 100644 --- a/plugins/antigravity-awesome-skills/skills/hugging-face-dataset-viewer/SKILL.md +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-dataset-viewer/SKILL.md @@ -1,127 +1,127 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-datasets" name: hugging-face-dataset-viewer -description: Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics. +description: Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links. risk: unknown -source: community --- - + # Hugging Face Dataset Viewer -Use this skill to execute read-only Dataset Viewer API calls for dataset exploration and extraction. - -## Core workflow - -1. Optionally validate dataset availability with `/is-valid`. -2. Resolve `config` + `split` with `/splits`. -3. Preview with `/first-rows`. -4. Paginate content with `/rows` using `offset` and `length` (max 100). -5. Use `/search` for text matching and `/filter` for row predicates. -6. Retrieve parquet links via `/parquet` and totals/metadata via `/size` and `/statistics`. - -## Defaults - -- Base URL: `https://datasets-server.huggingface.co` -- Default API method: `GET` -- Query params should be URL-encoded. -- `offset` is 0-based. -- `length` max is usually `100` for row-like endpoints. -- Gated/private datasets require `Authorization: Bearer `. - -## Dataset Viewer - -- `Validate dataset`: `/is-valid?dataset=` -- `List subsets and splits`: `/splits?dataset=` -- `Preview first rows`: `/first-rows?dataset=&config=&split=` -- `Paginate rows`: `/rows?dataset=&config=&split=&offset=&length=` -- `Search text`: `/search?dataset=&config=&split=&query=&offset=&length=` -- `Filter with predicates`: `/filter?dataset=&config=&split=&where=&orderby=&offset=&length=` -- `List parquet shards`: `/parquet?dataset=` -- `Get size totals`: `/size?dataset=` -- `Get column statistics`: `/statistics?dataset=&config=&split=` -- `Get Croissant metadata (if available)`: `/croissant?dataset=` - -Pagination pattern: - -```bash -curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=0&length=100" -curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=100&length=100" -``` - -When pagination is partial, use response fields such as `num_rows_total`, `num_rows_per_page`, and `partial` to drive continuation logic. - -Search/filter notes: - -- `/search` matches string columns (full-text style behavior is internal to the API). -- `/filter` requires predicate syntax in `where` and optional sort in `orderby`. -- Keep filtering and searches read-only and side-effect free. - -## Querying Datasets - -Use `npx parquetlens` with Hub parquet alias paths for SQL querying. - -Parquet alias shape: - -```text -hf://datasets//@~parquet///.parquet -``` - -Derive ``, ``, and `` from Dataset Viewer `/parquet`: - -```bash -curl -s "https://datasets-server.huggingface.co/parquet?dataset=cfahlgren1/hub-stats" \ - | jq -r '.parquet_files[] | "hf://datasets/\(.dataset)@~parquet/\(.config)/\(.split)/\(.filename)"' -``` - -Run SQL query: - -```bash -npx -y -p parquetlens -p @parquetlens/sql parquetlens \ - "hf://datasets//@~parquet///.parquet" \ - --sql "SELECT * FROM data LIMIT 20" -``` - -### SQL export - -- CSV: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.csv' (FORMAT CSV, HEADER, DELIMITER ',')"` -- JSON: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.json' (FORMAT JSON)"` -- Parquet: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.parquet' (FORMAT PARQUET)"` - -## Creating and Uploading Datasets - -Use one of these flows depending on dependency constraints. - -Zero local dependencies (Hub UI): - -- Create dataset repo in browser: `https://huggingface.co/new-dataset` -- Upload parquet files in the repo "Files and versions" page. -- Verify shards appear in Dataset Viewer: - -```bash -curl -s "https://datasets-server.huggingface.co/parquet?dataset=/" -``` - -Low dependency CLI flow (`npx @huggingface/hub` / `hfjs`): - -- Set auth token: - -```bash -export HF_TOKEN= -``` - -- Upload parquet folder to a dataset repo (auto-creates repo if missing): - -```bash -npx -y @huggingface/hub upload datasets// ./local/parquet-folder data -``` - -- Upload as private repo on creation: - -```bash -npx -y @huggingface/hub upload datasets// ./local/parquet-folder data --private -``` - -After upload, call `/parquet` to discover `//` values for querying with `@~parquet`. - - ## When to Use -Use this skill when tackling tasks related to its primary domain or functionality as described above. + +Use this skill when you need read-only exploration of a Hugging Face dataset through the Dataset Viewer API. + +Use this skill to execute read-only Dataset Viewer API calls for dataset exploration and extraction. + +## Core workflow + +1. Optionally validate dataset availability with `/is-valid`. +2. Resolve `config` + `split` with `/splits`. +3. Preview with `/first-rows`. +4. Paginate content with `/rows` using `offset` and `length` (max 100). +5. Use `/search` for text matching and `/filter` for row predicates. +6. Retrieve parquet links via `/parquet` and totals/metadata via `/size` and `/statistics`. + +## Defaults + +- Base URL: `https://datasets-server.huggingface.co` +- Default API method: `GET` +- Query params should be URL-encoded. +- `offset` is 0-based. +- `length` max is usually `100` for row-like endpoints. +- Gated/private datasets require `Authorization: Bearer `. + +## Dataset Viewer + +- `Validate dataset`: `/is-valid?dataset=` +- `List subsets and splits`: `/splits?dataset=` +- `Preview first rows`: `/first-rows?dataset=&config=&split=` +- `Paginate rows`: `/rows?dataset=&config=&split=&offset=&length=` +- `Search text`: `/search?dataset=&config=&split=&query=&offset=&length=` +- `Filter with predicates`: `/filter?dataset=&config=&split=&where=&orderby=&offset=&length=` +- `List parquet shards`: `/parquet?dataset=` +- `Get size totals`: `/size?dataset=` +- `Get column statistics`: `/statistics?dataset=&config=&split=` +- `Get Croissant metadata (if available)`: `/croissant?dataset=` + +Pagination pattern: + +```bash +curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=0&length=100" +curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=100&length=100" +``` + +When pagination is partial, use response fields such as `num_rows_total`, `num_rows_per_page`, and `partial` to drive continuation logic. + +Search/filter notes: + +- `/search` matches string columns (full-text style behavior is internal to the API). +- `/filter` requires predicate syntax in `where` and optional sort in `orderby`. +- Keep filtering and searches read-only and side-effect free. + +## Querying Datasets + +Use `npx parquetlens` with Hub parquet alias paths for SQL querying. + +Parquet alias shape: + +```text +hf://datasets//@~parquet///.parquet +``` + +Derive ``, ``, and `` from Dataset Viewer `/parquet`: + +```bash +curl -s "https://datasets-server.huggingface.co/parquet?dataset=cfahlgren1/hub-stats" \ + | jq -r '.parquet_files[] | "hf://datasets/\(.dataset)@~parquet/\(.config)/\(.split)/\(.filename)"' +``` + +Run SQL query: + +```bash +npx -y -p parquetlens -p @parquetlens/sql parquetlens \ + "hf://datasets//@~parquet///.parquet" \ + --sql "SELECT * FROM data LIMIT 20" +``` + +### SQL export + +- CSV: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.csv' (FORMAT CSV, HEADER, DELIMITER ',')"` +- JSON: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.json' (FORMAT JSON)"` +- Parquet: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.parquet' (FORMAT PARQUET)"` + +## Creating and Uploading Datasets + +Use one of these flows depending on dependency constraints. + +Zero local dependencies (Hub UI): + +- Create dataset repo in browser: `https://huggingface.co/new-dataset` +- Upload parquet files in the repo "Files and versions" page. +- Verify shards appear in Dataset Viewer: + +```bash +curl -s "https://datasets-server.huggingface.co/parquet?dataset=/" +``` + +Low dependency CLI flow (`npx @huggingface/hub` / `hfjs`): + +- Set auth token: + +```bash +export HF_TOKEN= +``` + +- Upload parquet folder to a dataset repo (auto-creates repo if missing): + +```bash +npx -y @huggingface/hub upload datasets// ./local/parquet-folder data +``` + +- Upload as private repo on creation: + +```bash +npx -y @huggingface/hub upload datasets// ./local/parquet-folder data --private +``` + +After upload, call `/parquet` to discover `//` values for querying with `@~parquet`. diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-gradio/SKILL.md b/plugins/antigravity-awesome-skills/skills/hugging-face-gradio/SKILL.md new file mode 100644 index 00000000..b15c3a39 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-gradio/SKILL.md @@ -0,0 +1,304 @@ +--- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-gradio" +name: hugging-face-gradio +description: Build or edit Gradio apps, layouts, components, and chat interfaces in Python. +risk: unknown +--- + +# Gradio + +## When to Use + +Use this skill when a user wants a Gradio demo, UI prototype, or Python-based ML interface. + +Gradio is a Python library for building interactive web UIs and ML demos. This skill covers the core API, patterns, and examples. + +## Guides + +Detailed guides on specific topics (read these when relevant): + +- [Quickstart](https://www.gradio.app/guides/quickstart) +- [The Interface Class](https://www.gradio.app/guides/the-interface-class) +- [Blocks and Event Listeners](https://www.gradio.app/guides/blocks-and-event-listeners) +- [Controlling Layout](https://www.gradio.app/guides/controlling-layout) +- [More Blocks Features](https://www.gradio.app/guides/more-blocks-features) +- [Custom CSS and JS](https://www.gradio.app/guides/custom-CSS-and-JS) +- [Streaming Outputs](https://www.gradio.app/guides/streaming-outputs) +- [Streaming Inputs](https://www.gradio.app/guides/streaming-inputs) +- [Sharing Your App](https://www.gradio.app/guides/sharing-your-app) +- [Custom HTML Components](https://www.gradio.app/guides/custom-HTML-components) +- [Getting Started with the Python Client](https://www.gradio.app/guides/getting-started-with-the-python-client) +- [Getting Started with the JS Client](https://www.gradio.app/guides/getting-started-with-the-js-client) + +## Core Patterns + +**Interface** (high-level): wraps a function with input/output components. + +```python +import gradio as gr + +def greet(name): + return f"Hello {name}!" + +gr.Interface(fn=greet, inputs="text", outputs="text").launch() +``` + +**Blocks** (low-level): flexible layout with explicit event wiring. + +```python +import gradio as gr + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Greeting") + btn = gr.Button("Greet") + btn.click(fn=lambda n: f"Hello {n}!", inputs=name, outputs=output) + +demo.launch() +``` + +**ChatInterface**: high-level wrapper for chatbot UIs. + +```python +import gradio as gr + +def respond(message, history): + return f"You said: {message}" + +gr.ChatInterface(fn=respond).launch() +``` + +## Key Component Signatures + +### `Textbox(value: str | I18nData | Callable | None = None, type: Literal['text', 'password', 'email'] = "text", lines: int = 1, max_lines: int | None = None, placeholder: str | I18nData | None = None, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, autofocus: bool = False, autoscroll: bool = True, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", text_align: Literal['left', 'right'] | None = None, rtl: bool = False, buttons: list[Literal['copy'] | Button] | None = None, max_length: int | None = None, submit_btn: str | bool | None = False, stop_btn: str | bool | None = False, html_attributes: InputHTMLAttributes | None = None)` +Creates a textarea for user to enter string input or display string output.. + +### `Number(value: float | Callable | None = None, label: str | I18nData | None = None, placeholder: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None, precision: int | None = None, minimum: float | None = None, maximum: float | None = None, step: float = 1)` +Creates a numeric field for user to enter numbers as input or display numeric output.. + +### `Slider(minimum: float = 0, maximum: float = 100, value: float | Callable | None = None, step: float | None = None, precision: int | None = None, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", randomize: bool = False, buttons: list[Literal['reset']] | None = None)` +Creates a slider that ranges from {minimum} to {maximum} with a step size of {step}.. + +### `Checkbox(value: bool | Callable = False, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None)` +Creates a checkbox that can be set to `True` or `False`. + +### `Dropdown(choices: Sequence[str | int | float | tuple[str, str | int | float]] | None = None, value: str | int | float | Sequence[str | int | float] | Callable | DefaultValue | None = DefaultValue(), type: Literal['value', 'index'] = "value", multiselect: bool | None = None, allow_custom_value: bool = False, max_choices: int | None = None, filterable: bool = True, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None)` +Creates a dropdown of choices from which a single entry or multiple entries can be selected (as an input component) or displayed (as an output component).. + +### `Radio(choices: Sequence[str | int | float | tuple[str, str | int | float]] | None = None, value: str | int | float | Callable | None = None, type: Literal['value', 'index'] = "value", label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", rtl: bool = False, buttons: list[Button] | None = None)` +Creates a set of (string or numeric type) radio buttons of which only one can be selected.. + +### `Image(value: str | PIL.Image.Image | np.ndarray | Callable | None = None, format: str = "webp", height: int | str | None = None, width: int | str | None = None, image_mode: Literal['1', 'L', 'P', 'RGB', 'RGBA', 'CMYK', 'YCbCr', 'LAB', 'HSV', 'I', 'F'] | None = "RGB", sources: list[Literal['upload', 'webcam', 'clipboard']] | Literal['upload', 'webcam', 'clipboard'] | None = None, type: Literal['numpy', 'pil', 'filepath'] = "numpy", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, buttons: list[Literal['download', 'share', 'fullscreen'] | Button] | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, streaming: bool = False, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", webcam_options: WebcamOptions | None = None, placeholder: str | None = None, watermark: WatermarkOptions | None = None)` +Creates an image component that can be used to upload images (as an input) or display images (as an output).. + +### `Audio(value: str | Path | tuple[int, np.ndarray] | Callable | None = None, sources: list[Literal['upload', 'microphone']] | Literal['upload', 'microphone'] | None = None, type: Literal['numpy', 'filepath'] = "numpy", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, streaming: bool = False, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", format: Literal['wav', 'mp3'] | None = None, autoplay: bool = False, editable: bool = True, buttons: list[Literal['download', 'share'] | Button] | None = None, waveform_options: WaveformOptions | dict | None = None, loop: bool = False, recording: bool = False, subtitles: str | Path | list[dict[str, Any]] | None = None, playback_position: float = 0)` +Creates an audio component that can be used to upload/record audio (as an input) or display audio (as an output).. + +### `Video(value: str | Path | Callable | None = None, format: str | None = None, sources: list[Literal['upload', 'webcam']] | Literal['upload', 'webcam'] | None = None, height: int | str | None = None, width: int | str | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", webcam_options: WebcamOptions | None = None, include_audio: bool | None = None, autoplay: bool = False, buttons: list[Literal['download', 'share'] | Button] | None = None, loop: bool = False, streaming: bool = False, watermark: WatermarkOptions | None = None, subtitles: str | Path | list[dict[str, Any]] | None = None, playback_position: float = 0)` +Creates a video component that can be used to upload/record videos (as an input) or display videos (as an output). + +### `File(value: str | list[str] | Callable | None = None, file_count: Literal['single', 'multiple', 'directory'] = "single", file_types: list[str] | None = None, type: Literal['filepath', 'binary'] = "filepath", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, height: int | str | float | None = None, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", allow_reordering: bool = False, buttons: list[Button] | None = None)` +Creates a file component that allows uploading one or more generic files (when used as an input) or displaying generic files or URLs for download (as output). + +### `Chatbot(value: list[MessageDict | Message] | Callable | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, autoscroll: bool = True, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", height: int | str | None = 400, resizable: bool = False, max_height: int | str | None = None, min_height: int | str | None = None, editable: Literal['user', 'all'] | None = None, latex_delimiters: list[dict[str, str | bool]] | None = None, rtl: bool = False, buttons: list[Literal['share', 'copy', 'copy_all'] | Button] | None = None, watermark: str | None = None, avatar_images: tuple[str | Path | None, str | Path | None] | None = None, sanitize_html: bool = True, render_markdown: bool = True, feedback_options: list[str] | tuple[str, ...] | None = ('Like', 'Dislike'), feedback_value: Sequence[str | None] | None = None, line_breaks: bool = True, layout: Literal['panel', 'bubble'] | None = None, placeholder: str | None = None, examples: list[ExampleMessage] | None = None, allow_file_downloads: = True, group_consecutive_messages: bool = True, allow_tags: list[str] | bool = True, reasoning_tags: list[tuple[str, str]] | None = None, like_user_message: bool = False)` +Creates a chatbot that displays user-submitted messages and responses. + +### `Button(value: str | I18nData | Callable = "Run", every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, variant: Literal['primary', 'secondary', 'stop', 'huggingface'] = "secondary", size: Literal['sm', 'md', 'lg'] = "lg", icon: str | Path | None = None, link: str | None = None, link_target: Literal['_self', '_blank', '_parent', '_top'] = "_self", visible: bool | Literal['hidden'] = True, interactive: bool = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", scale: int | None = None, min_width: int | None = None)` +Creates a button that can be assigned arbitrary .click() events. + +### `Markdown(value: str | I18nData | Callable | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, rtl: bool = False, latex_delimiters: list[dict[str, str | bool]] | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", sanitize_html: bool = True, line_breaks: bool = False, header_links: bool = False, height: int | str | None = None, max_height: int | str | None = None, min_height: int | str | None = None, buttons: list[Literal['copy']] | None = None, container: bool = False, padding: bool = False)` +Used to render arbitrary Markdown output. + +### `HTML(value: Any | Callable | None = None, label: str | I18nData | None = None, html_template: str = "${value}", css_template: str = "", js_on_load: str | None = "element.addEventListener('click', function() { trigger('click') });", apply_default_css: bool = True, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool = False, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", min_height: int | None = None, max_height: int | None = None, container: bool = False, padding: bool = False, autoscroll: bool = False, buttons: list[Button] | None = None, server_functions: list[Callable] | None = None, props: Any)` +Creates a component with arbitrary HTML. + + +## Custom HTML Components + +If a task requires significant customization of an existing component or a component that doesn't exist in Gradio, you can create one with `gr.HTML`. It supports `html_template` (with `${}` JS expressions and `{{}}` Handlebars syntax), `css_template` for scoped styles, and `js_on_load` for interactivity — where `props.value` updates the component value and `trigger('event_name')` fires Gradio events. For reuse, subclass `gr.HTML` and define `api_info()` for API/MCP support. See the [full guide](https://www.gradio.app/guides/custom-HTML-components). + +Here's an example that shows how to create and use these kinds of components: + +```python +import gradio as gr + +class StarRating(gr.HTML): + def __init__(self, label, value=0, **kwargs): + html_template = """ +

${label} rating:

+ ${Array.from({length: 5}, (_, i) => ``).join('')} + """ + css_template = """ + img { height: 50px; display: inline-block; cursor: pointer; } + .faded { filter: grayscale(100%); opacity: 0.3; } + """ + js_on_load = """ + const imgs = element.querySelectorAll('img'); + imgs.forEach((img, index) => { + img.addEventListener('click', () => { + props.value = index + 1; + }); + }); + """ + super().__init__(value=value, label=label, html_template=html_template, css_template=css_template, js_on_load=js_on_load, **kwargs) + + def api_info(self): + return {"type": "integer", "minimum": 0, "maximum": 5} + + +with gr.Blocks() as demo: + gr.Markdown("# Restaurant Review") + food_rating = StarRating(label="Food", value=3) + service_rating = StarRating(label="Service", value=3) + ambience_rating = StarRating(label="Ambience", value=3) + average_btn = gr.Button("Calculate Average Rating") + rating_output = StarRating(label="Average", value=3) + def calculate_average(food, service, ambience): + return round((food + service + ambience) / 3) + average_btn.click( + fn=calculate_average, + inputs=[food_rating, service_rating, ambience_rating], + outputs=rating_output + ) + +demo.launch() +``` + +## Event Listeners + +All event listeners share the same signature: + +```python +component.event_name( + fn: Callable | None | Literal["decorator"] = "decorator", + inputs: Component | Sequence[Component] | set[Component] | None = None, + outputs: Component | Sequence[Component] | set[Component] | None = None, + api_name: str | None = None, + api_description: str | None | Literal[False] = None, + scroll_to_output: bool = False, + show_progress: Literal["full", "minimal", "hidden"] = "full", + show_progress_on: Component | Sequence[Component] | None = None, + queue: bool = True, + batch: bool = False, + max_batch_size: int = 4, + preprocess: bool = True, + postprocess: bool = True, + cancels: dict[str, Any] | list[dict[str, Any]] | None = None, + trigger_mode: Literal["once", "multiple", "always_last"] | None = None, + js: str | Literal[True] | None = None, + concurrency_limit: int | None | Literal["default"] = "default", + concurrency_id: str | None = None, + api_visibility: Literal["public", "private", "undocumented"] = "public", + time_limit: int | None = None, + stream_every: float = 0.5, + key: int | str | tuple[int | str, ...] | None = None, + validator: Callable | None = None, +) -> Dependency +``` + +Supported events per component: + +- **AnnotatedImage**: select +- **Audio**: stream, change, clear, play, pause, stop, pause, start_recording, pause_recording, stop_recording, upload, input +- **BarPlot**: select, double_click +- **BrowserState**: change +- **Button**: click +- **Chatbot**: change, select, like, retry, undo, example_select, option_select, clear, copy, edit +- **Checkbox**: change, input, select +- **CheckboxGroup**: change, input, select +- **ClearButton**: click +- **Code**: change, input, focus, blur +- **ColorPicker**: change, input, submit, focus, blur +- **Dataframe**: change, input, select, edit +- **Dataset**: click, select +- **DateTime**: change, submit +- **DeepLinkButton**: click +- **Dialogue**: change, input, submit +- **DownloadButton**: click +- **Dropdown**: change, input, select, focus, blur, key_up +- **DuplicateButton**: click +- **File**: change, select, clear, upload, delete, download +- **FileExplorer**: change, input, select +- **Gallery**: select, upload, change, delete, preview_close, preview_open +- **HTML**: change, input, click, double_click, submit, stop, edit, clear, play, pause, end, start_recording, pause_recording, stop_recording, focus, blur, upload, release, select, stream, like, example_select, option_select, load, key_up, apply, delete, tick, undo, retry, expand, collapse, download, copy +- **HighlightedText**: change, select +- **Image**: clear, change, stream, select, upload, input +- **ImageEditor**: clear, change, input, select, upload, apply +- **ImageSlider**: clear, change, stream, select, upload, input +- **JSON**: change +- **Label**: change, select +- **LinePlot**: select, double_click +- **LoginButton**: click +- **Markdown**: change, copy +- **Model3D**: change, upload, edit, clear +- **MultimodalTextbox**: change, input, select, submit, focus, blur, stop +- **Navbar**: change +- **Number**: change, input, submit, focus, blur +- **ParamViewer**: change, upload +- **Plot**: change +- **Radio**: select, change, input +- **ScatterPlot**: select, double_click +- **SimpleImage**: clear, change, upload +- **Slider**: change, input, release +- **State**: change +- **Textbox**: change, input, select, submit, focus, blur, stop, copy +- **Timer**: tick +- **UploadButton**: click, upload +- **Video**: change, clear, start_recording, stop_recording, stop, play, pause, end, upload, input + +## Prediction CLI + +The `gradio` CLI includes `info` and `predict` commands for interacting with Gradio apps programmatically. These are especially useful for coding agents that need to use Spaces in their workflows. + +### `gradio info` — Discover endpoints and parameters + +```bash +gradio info +``` + +Returns a JSON payload describing all endpoints, their parameters (with types and defaults), and return values. + +```bash +gradio info gradio/calculator +# { +# "/predict": { +# "parameters": [ +# {"name": "num1", "required": true, "default": null, "type": {"type": "number"}}, +# {"name": "operation", "required": true, "default": null, "type": {"enum": ["add", "subtract", "multiply", "divide"], "type": "string"}}, +# {"name": "num2", "required": true, "default": null, "type": {"type": "number"}} +# ], +# "returns": [{"name": "output", "type": {"type": "number"}}], +# "description": "" +# } +# } +``` + +File-type parameters show `"type": "filepath"` with instructions to include `"meta": {"_type": "gradio.FileData"}` — this signals the file will be uploaded to the remote server. + +### `gradio predict` — Send predictions + +```bash +gradio predict +``` + +Returns a JSON object with named output keys. + +```bash +# Simple numeric prediction +gradio predict gradio/calculator /predict '{"num1": 5, "operation": "multiply", "num2": 3}' +# {"output": 15} + +# Image generation +gradio predict black-forest-labs/FLUX.2-dev /infer '{"prompt": "A majestic dragon"}' +# {"Result": "/tmp/gradio/.../image.webp", "Seed": 1117868604} + +# File upload (must include meta key) +gradio predict gradio/image_mod /predict '{"image": {"path": "/path/to/image.png", "meta": {"_type": "gradio.FileData"}}}' +# {"output": "/tmp/gradio/.../output.png"} +``` + +Both commands accept `--token` for accessing private Spaces. + +## Additional Reference + +- [End-to-End Examples](examples.md) — complete working apps diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-gradio/examples.md b/plugins/antigravity-awesome-skills/skills/hugging-face-gradio/examples.md new file mode 100644 index 00000000..b48c4cdc --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-gradio/examples.md @@ -0,0 +1,613 @@ +# Gradio End-to-End Examples + +Complete working Gradio apps for reference. + +## Blocks Essay Simple + +```python +import gradio as gr + +def change_textbox(choice): + if choice == "short": + return gr.Textbox(lines=2, visible=True) + elif choice == "long": + return gr.Textbox(lines=8, visible=True, value="Lorem ipsum dolor sit amet") + else: + return gr.Textbox(visible=False) + +with gr.Blocks() as demo: + radio = gr.Radio( + ["short", "long", "none"], label="What kind of essay would you like to write?" + ) + text = gr.Textbox(lines=2, interactive=True, buttons=["copy"]) + radio.change(fn=change_textbox, inputs=radio, outputs=text) + +demo.launch() +``` + +## Blocks Flipper + +```python +import numpy as np +import gradio as gr + +def flip_text(x): + return x[::-1] + +def flip_image(x): + return np.fliplr(x) + +with gr.Blocks() as demo: + gr.Markdown("Flip text or image files using this demo.") + with gr.Tab("Flip Text"): + text_input = gr.Textbox() + text_output = gr.Textbox() + text_button = gr.Button("Flip") + with gr.Tab("Flip Image"): + with gr.Row(): + image_input = gr.Image() + image_output = gr.Image() + image_button = gr.Button("Flip") + + with gr.Accordion("Open for More!", open=False): + gr.Markdown("Look at me...") + temp_slider = gr.Slider( + 0, 1, + value=0.1, + step=0.1, + interactive=True, + label="Slide me", + ) + + text_button.click(flip_text, inputs=text_input, outputs=text_output) + image_button.click(flip_image, inputs=image_input, outputs=image_output) + +demo.launch() +``` + +## Blocks Form + +```python +import gradio as gr + +with gr.Blocks() as demo: + name_box = gr.Textbox(label="Name") + age_box = gr.Number(label="Age", minimum=0, maximum=100) + symptoms_box = gr.CheckboxGroup(["Cough", "Fever", "Runny Nose"]) + submit_btn = gr.Button("Submit") + + with gr.Column(visible=False) as output_col: + diagnosis_box = gr.Textbox(label="Diagnosis") + patient_summary_box = gr.Textbox(label="Patient Summary") + + def submit(name, age, symptoms): + return { + submit_btn: gr.Button(visible=False), + output_col: gr.Column(visible=True), + diagnosis_box: "covid" if "Cough" in symptoms else "flu", + patient_summary_box: f"{name}, {age} y/o", + } + + submit_btn.click( + submit, + [name_box, age_box, symptoms_box], + [submit_btn, diagnosis_box, patient_summary_box, output_col], + ) + +demo.launch() +``` + +## Blocks Hello + +```python +import gradio as gr + +def welcome(name): + return f"Welcome to Gradio, {name}!" + +with gr.Blocks() as demo: + gr.Markdown( + """ + # Hello World! + Start typing below to see the output. + """) + inp = gr.Textbox(placeholder="What is your name?") + out = gr.Textbox() + inp.change(welcome, inp, out) + +demo.launch() +``` + +## Blocks Layout + +```python +import gradio as gr + +demo = gr.Blocks() + +with demo: + with gr.Row(): + gr.Image(interactive=True, scale=2) + gr.Image() + with gr.Row(): + gr.Textbox(label="Text") + gr.Number(label="Count", scale=2) + gr.Radio(choices=["One", "Two"]) + with gr.Row(): + gr.Button("500", scale=0, min_width=500) + gr.Button("A", scale=0) + gr.Button("grow") + with gr.Row(): + gr.Textbox() + gr.Textbox() + gr.Button() + with gr.Row(): + with gr.Row(): + with gr.Column(): + gr.Textbox(label="Text") + gr.Number(label="Count") + gr.Radio(choices=["One", "Two"]) + gr.Image() + with gr.Column(): + gr.Image(interactive=True) + gr.Image() + gr.Image() + gr.Textbox(label="Text") + gr.Number(label="Count") + gr.Radio(choices=["One", "Two"]) + +demo.launch() +``` + +## Calculator + +```python +import gradio as gr + +def calculator(num1, operation, num2): + if operation == "add": + return num1 + num2 + elif operation == "subtract": + return num1 - num2 + elif operation == "multiply": + return num1 * num2 + elif operation == "divide": + if num2 == 0: + raise gr.Error("Cannot divide by zero!") + return num1 / num2 + +demo = gr.Interface( + calculator, + [ + "number", + gr.Radio(["add", "subtract", "multiply", "divide"]), + "number" + ], + "number", + examples=[ + [45, "add", 3], + [3.14, "divide", 2], + [144, "multiply", 2.5], + [0, "subtract", 1.2], + ], + title="Toy Calculator", + description="Here's a sample toy calculator.", + api_name="predict" +) + +demo.launch() +``` + +## Chatbot Simple + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot]) + + def respond(message, chat_history): + bot_message = random.choice(["How are you?", "Today is a great day", "I'm very hungry"]) + chat_history.append({"role": "user", "content": message}) + chat_history.append({"role": "assistant", "content": bot_message}) + time.sleep(2) + return "", chat_history + + msg.submit(respond, [msg, chatbot], [msg, chatbot]) + +demo.launch() +``` + +## Chatbot Streaming + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + clear = gr.Button("Clear") + + def user(user_message, history: list): + return "", history + [{"role": "user", "content": user_message}] + + def bot(history: list): + bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) + history.append({"role": "assistant", "content": ""}) + for character in bot_message: + history[-1]['content'] += character + time.sleep(0.05) + yield history + + msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, chatbot, chatbot + ) + clear.click(lambda: None, None, chatbot, queue=False) + +demo.launch() +``` + +## Custom Css + +```python +import gradio as gr + +with gr.Blocks() as demo: + with gr.Column(elem_classes="cool-col"): + gr.Markdown("### Gradio Demo with Custom CSS", elem_classes="darktest") + gr.Markdown( + elem_classes="markdown", + value="Resize the browser window to see the CSS media query in action.", + ) + +if __name__ == "__main__": + demo.launch(css_paths=["demo/custom_css/custom_css.css"]) +``` + +## Fake Diffusion + +```python +import gradio as gr +import numpy as np +import time + +def fake_diffusion(steps): + rng = np.random.default_rng() + for i in range(steps): + time.sleep(1) + image = rng.random(size=(600, 600, 3)) + yield image + image = np.ones((1000,1000,3), np.uint8) + image[:] = [255, 124, 0] + yield image + +demo = gr.Interface(fake_diffusion, + inputs=gr.Slider(1, 10, 3, step=1), + outputs="image", + api_name="predict") + +demo.launch() +``` + +## Hello World + +```python +import gradio as gr + + +def greet(name): + return "Hello " + name + "!" + + +demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox", api_name="predict") + +demo.launch() +``` + +## Image Editor + +```python +import gradio as gr +import time + + +def sleep(im): + time.sleep(5) + return [im["background"], im["layers"][0], im["layers"][1], im["composite"]] + + +def predict(im): + return im["composite"] + + +with gr.Blocks() as demo: + with gr.Row(): + im = gr.ImageEditor( + type="numpy", + ) + im_preview = gr.Image() + n_upload = gr.Number(0, label="Number of upload events", step=1) + n_change = gr.Number(0, label="Number of change events", step=1) + n_input = gr.Number(0, label="Number of input events", step=1) + + im.upload(lambda x: x + 1, outputs=n_upload, inputs=n_upload) + im.change(lambda x: x + 1, outputs=n_change, inputs=n_change) + im.input(lambda x: x + 1, outputs=n_input, inputs=n_input) + im.change(predict, outputs=im_preview, inputs=im, show_progress="hidden") + +demo.launch() +``` + +## On Listener Decorator + +```python +import gradio as gr + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Output Box") + greet_btn = gr.Button("Greet") + + @gr.on(triggers=[name.submit, greet_btn.click], inputs=name, outputs=output) + def greet(name): + return "Hello " + name + "!" + +demo.launch() +``` + +## Render Merge + +```python +import gradio as gr +import time + +with gr.Blocks() as demo: + text_count = gr.Slider(1, 5, value=1, step=1, label="Textbox Count") + + @gr.render(inputs=text_count) + def render_count(count): + boxes = [] + for i in range(count): + box = gr.Textbox(label=f"Box {i}") + boxes.append(box) + + def merge(*args): + time.sleep(0.2) # simulate a delay + return " ".join(args) + + merge_btn.click(merge, boxes, output) + + def clear(): + time.sleep(0.2) # simulate a delay + return [" "] * count + + clear_btn.click(clear, None, boxes) + + def countup(): + time.sleep(0.2) # simulate a delay + return list(range(count)) + + count_btn.click(countup, None, boxes, queue=False) + + with gr.Row(): + merge_btn = gr.Button("Merge") + clear_btn = gr.Button("Clear") + count_btn = gr.Button("Count") + + output = gr.Textbox() + +demo.launch() +``` + +## Reverse Audio 2 + +```python +import gradio as gr +import numpy as np + +def reverse_audio(audio): + sr, data = audio + return (sr, np.flipud(data)) + +demo = gr.Interface(fn=reverse_audio, + inputs="microphone", + outputs="audio", api_name="predict") + +demo.launch() +``` + +## Sepia Filter + +```python +import numpy as np +import gradio as gr + +def sepia(input_img): + sepia_filter = np.array([ + [0.393, 0.769, 0.189], + [0.349, 0.686, 0.168], + [0.272, 0.534, 0.131] + ]) + sepia_img = input_img.dot(sepia_filter.T) + sepia_img /= sepia_img.max() + return sepia_img + +demo = gr.Interface(sepia, gr.Image(), "image", api_name="predict") +demo.launch() +``` + +## Sort Records + +```python +import gradio as gr + +def sort_records(records): + return records.sort("Quantity") + +demo = gr.Interface( + sort_records, + gr.Dataframe( + headers=["Item", "Quantity"], + datatype=["str", "number"], + row_count=3, + column_count=2, + column_limits=(2, 2), + type="polars" + ), + "dataframe", + description="Sort by Quantity" +) + +demo.launch() +``` + +## Streaming Simple + +```python +import gradio as gr + +with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(): + input_img = gr.Image(label="Input", sources="webcam") + with gr.Column(): + output_img = gr.Image(label="Output") + input_img.stream(lambda s: s, input_img, output_img, time_limit=15, stream_every=0.1, concurrency_limit=30) + +if __name__ == "__main__": + + demo.launch() +``` + +## Tabbed Interface Lite + +```python +import gradio as gr + +hello_world = gr.Interface(lambda name: "Hello " + name, "text", "text", api_name="predict") +bye_world = gr.Interface(lambda name: "Bye " + name, "text", "text", api_name="predict") +chat = gr.ChatInterface(lambda *args: "Hello " + args[0], api_name="chat") + +demo = gr.TabbedInterface([hello_world, bye_world, chat], ["Hello World", "Bye World", "Chat"]) + +demo.launch() +``` + +## Tax Calculator + +```python +import gradio as gr + +def tax_calculator(income, marital_status, assets): + tax_brackets = [(10, 0), (25, 8), (60, 12), (120, 20), (250, 30)] + total_deductible = sum(cost for cost, deductible in zip(assets["Cost"], assets["Deductible"]) if deductible) + taxable_income = income - total_deductible + + total_tax = 0 + for bracket, rate in tax_brackets: + if taxable_income > bracket: + total_tax += (taxable_income - bracket) * rate / 100 + + if marital_status == "Married": + total_tax *= 0.75 + elif marital_status == "Divorced": + total_tax *= 0.8 + + return round(total_tax) + +demo = gr.Interface( + tax_calculator, + [ + "number", + gr.Radio(["Single", "Married", "Divorced"]), + gr.Dataframe( + headers=["Item", "Cost", "Deductible"], + datatype=["str", "number", "bool"], + label="Assets Purchased this Year", + ), + ], + gr.Number(label="Tax due"), + examples=[ + [10000, "Married", [["Suit", 5000, True], ["Laptop (for work)", 800, False], ["Car", 1800, True]]], + [80000, "Single", [["Suit", 800, True], ["Watch", 1800, True], ["Food", 800, True]]], + ], + live=True, + api_name="predict" +) + +demo.launch() +``` + +## Timer Simple + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + timer = gr.Timer(1) + timestamp = gr.Number(label="Time") + timer.tick(lambda: round(time.time()), outputs=timestamp, api_name="timestamp") + + number = gr.Number(lambda: random.randint(1, 10), every=timer, label="Random Number") + with gr.Row(): + gr.Button("Start").click(lambda: gr.Timer(active=True), None, timer) + gr.Button("Stop").click(lambda: gr.Timer(active=False), None, timer) + gr.Button("Go Fast").click(lambda: 0.2, None, timer) + +if __name__ == "__main__": + demo.launch() +``` + +## Variable Outputs + +```python +import gradio as gr + +max_textboxes = 10 + +def variable_outputs(k): + k = int(k) + return [gr.Textbox(visible=True)]*k + [gr.Textbox(visible=False)]*(max_textboxes-k) + +with gr.Blocks() as demo: + s = gr.Slider(1, max_textboxes, value=max_textboxes, step=1, label="How many textboxes to show:") + textboxes = [] + for i in range(max_textboxes): + t = gr.Textbox(f"Textbox {i}") + textboxes.append(t) + + s.change(variable_outputs, s, textboxes) + +if __name__ == "__main__": + demo.launch() +``` + +## Video Identity + +```python +import gradio as gr +from gradio.media import get_video + +def video_identity(video): + return video + +# get_video() returns file paths to sample media included with Gradio +demo = gr.Interface(video_identity, + gr.Video(), + "playable_video", + examples=[ + get_video("world.mp4") + ], + cache_examples=True, + api_name="predict",) + +demo.launch() +``` diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/SKILL.md b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/SKILL.md index 9da285b8..ed9f90f0 100644 --- a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/SKILL.md +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/SKILL.md @@ -1,9 +1,9 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-jobs" name: hugging-face-jobs -description: "Run any workload on fully managed Hugging Face infrastructure. No local setup required—jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub." -risk: safe -source: "https://github.com/huggingface/skills/tree/main/skills/hugging-face-jobs" -date_added: "2026-02-27" +description: Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence. +license: Complete terms in LICENSE.txt +risk: unknown --- # Running Workloads on Hugging Face Jobs @@ -66,12 +66,15 @@ Before starting any job, verify: **How to provide tokens:** ```python -{ - "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Recommended: automatic token -} +# hf_jobs MCP tool — $HF_TOKEN is auto-replaced with real token: +{"secrets": {"HF_TOKEN": "$HF_TOKEN"}} + +# HfApi().run_uv_job() — MUST pass actual token: +from huggingface_hub import get_token +secrets={"HF_TOKEN": get_token()} ``` -**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is automatically replaced with your logged-in token. Never hardcode tokens in scripts. +**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is ONLY auto-replaced by the `hf_jobs` MCP tool. When using `HfApi().run_uv_job()`, you MUST pass the real token via `get_token()`. Passing the literal string `"$HF_TOKEN"` results in a 9-character invalid token and 401 errors. ## Token Usage Guide @@ -539,9 +542,12 @@ requests.post("https://your-api.com/results", json=results) **In job submission:** ```python -{ - "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Enables authentication -} +# hf_jobs MCP tool: +{"secrets": {"HF_TOKEN": "$HF_TOKEN"}} # auto-replaced + +# HfApi().run_uv_job(): +from huggingface_hub import get_token +secrets={"HF_TOKEN": get_token()} # must pass real token ``` **In script:** @@ -560,7 +566,7 @@ api.upload_file(...) Before submitting: - [ ] Results persistence method chosen -- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` if using Hub +- [ ] Token in secrets if using Hub (MCP: `"$HF_TOKEN"`, Python API: `get_token()`) - [ ] Script handles missing token gracefully - [ ] Test persistence path works @@ -950,7 +956,7 @@ hf_jobs("uv", { ### Hub Push Failures **Fix:** -1. Add to job: `secrets={"HF_TOKEN": "$HF_TOKEN"}` +1. Add token to secrets: MCP uses `"$HF_TOKEN"` (auto-replaced), Python API uses `get_token()` (must pass real token) 2. Verify token in script: `assert "HF_TOKEN" in os.environ` 3. Check token permissions 4. Verify repo exists or can be created @@ -969,7 +975,7 @@ Add to PEP 723 header: **Fix:** 1. Check `hf_whoami()` works locally -2. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Verify token in secrets — MCP: `"$HF_TOKEN"`, Python API: `get_token()` (NOT `"$HF_TOKEN"`) 3. Re-login: `hf auth login` 4. Check token has required permissions @@ -1017,7 +1023,7 @@ Add to PEP 723 header: 2. **Jobs are asynchronous** - Don't wait/poll; let user check when ready 3. **Always set timeout** - Default 30 min may be insufficient; set appropriate timeout 4. **Always persist results** - Environment is ephemeral; without persistence, all work is lost -5. **Use tokens securely** - Always use `secrets={"HF_TOKEN": "$HF_TOKEN"}` for Hub operations +5. **Use tokens securely** - MCP: `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API: `secrets={"HF_TOKEN": get_token()}` — `"$HF_TOKEN"` only works with MCP tool 6. **Choose appropriate hardware** - Start small, scale up based on needs (see hardware guide) 7. **Use UV scripts** - Default to `hf_jobs("uv", {...})` with inline scripts for Python workloads 8. **Handle authentication** - Verify tokens are available before Hub operations @@ -1033,6 +1039,7 @@ Add to PEP 723 header: | List jobs | `hf_jobs("ps")` | `hf jobs ps` | `list_jobs()` | | View logs | `hf_jobs("logs", {...})` | `hf jobs logs ` | `fetch_job_logs(job_id)` | | Cancel job | `hf_jobs("cancel", {...})` | `hf jobs cancel ` | `cancel_job(job_id)` | -| Schedule UV | `hf_jobs("scheduled uv", {...})` | - | `create_scheduled_uv_job()` | -| Schedule Docker | `hf_jobs("scheduled run", {...})` | - | `create_scheduled_job()` | - +| Schedule UV | `hf_jobs("scheduled uv", {...})` | `hf jobs scheduled uv run SCHEDULE script.py` | `create_scheduled_uv_job()` | +| Schedule Docker | `hf_jobs("scheduled run", {...})` | `hf jobs scheduled run SCHEDULE image cmd` | `create_scheduled_job()` | +| List scheduled | `hf_jobs("scheduled ps")` | `hf jobs scheduled ps` | `list_scheduled_jobs()` | +| Delete scheduled | `hf_jobs("scheduled delete", {...})` | `hf jobs scheduled delete ` | `delete_scheduled_job()` | diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/index.html b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/index.html new file mode 100644 index 00000000..6db24014 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/index.html @@ -0,0 +1,216 @@ + + + + + + hf-jobs - Run Workloads on Hugging Face Jobs + + + +
+

Agent Skill : hf-jobs

+ +
+

Run any workload on Hugging Face Jobs.

+

Use this skill when you want to run GPU/CPU workloads (batch inference, synthetic data generation, dataset stats, experiments) on Hugging Face Jobs, with correct token handling and result persistence back to the Hub.

+
+ + + +
+

Overview

+

This skill focuses on running real workloads via Hugging Face Jobs. It includes ready-to-run UV scripts and guides for authentication (HF tokens), secrets vs env vars, timeouts, hardware selection, and pushing results to the Hub.

+
+ +
+

Core Documentation

+
    +
  • + SKILL.md +
    hf-jobs/SKILL.md
    +
    Complete skill documentation (how to submit jobs, tokens/secrets, timeouts, persistence, and how to use the bundled scripts)
    +
  • +
+
+ +
+

References

+
    +
  • + token_usage.md +
    hf-jobs/references/token_usage.md
    +
    Token best practices: secrets vs env, permissions, common errors (401/403), and secure patterns
    +
  • +
  • + hub_saving.md +
    hf-jobs/references/hub_saving.md
    +
    How to persist results: push datasets/models/files to the Hub (ephemeral job filesystem)
    +
  • +
  • + hardware_guide.md +
    hf-jobs/references/hardware_guide.md
    +
    Flavor selection guidance for CPU/GPU/TPU workloads
    +
  • +
  • + troubleshooting.md +
    hf-jobs/references/troubleshooting.md
    +
    Common failure modes (timeouts, missing deps, OOM, auth) and fixes
    +
  • +
+
+ +
+

Scripts

+
    +
  • + generate-responses.py +
    hf-jobs/scripts/generate-responses.py
    +
    vLLM batch generation: load prompts/messages from a dataset, generate responses, push dataset + card to Hub
    +
  • +
  • + cot-self-instruct.py +
    hf-jobs/scripts/cot-self-instruct.py
    +
    CoT Self-Instruct synthetic data generation (reasoning/instruction) + optional filtering, pushes dataset + card
    +
  • +
  • + finepdfs-stats.py +
    hf-jobs/scripts/finepdfs-stats.py
    +
    Polars streaming stats over Hub parquet (finepdfs-edu); optional upload of computed stats to a dataset repo
    +
  • +
+
+
+ + + + + + diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hardware_guide.md b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hardware_guide.md new file mode 100644 index 00000000..0a846338 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hardware_guide.md @@ -0,0 +1,336 @@ +# Hardware Selection Guide + +Choosing the right hardware (flavor) is critical for cost-effective workloads. + +> **Reference:** [HF Jobs Hardware Documentation](https://huggingface.co/docs/hub/en/spaces-config-reference) (updated 07/2025) + +## Available Hardware + +### CPU Flavors +| Flavor | Description | Use Case | +|--------|-------------|----------| +| `cpu-basic` | Basic CPU instance | Testing, lightweight scripts | +| `cpu-upgrade` | Enhanced CPU instance | Data processing, parallel workloads | + +**Use cases:** Data processing, testing scripts, lightweight workloads +**Not recommended for:** Model training, GPU-accelerated workloads + +### GPU Flavors + +| Flavor | GPU | VRAM | Use Case | +|--------|-----|------|----------| +| `t4-small` | NVIDIA T4 | 16GB | <1B models, demos, quick tests | +| `t4-medium` | NVIDIA T4 | 16GB | 1-3B models, development | +| `l4x1` | NVIDIA L4 | 24GB | 3-7B models, efficient workloads | +| `l4x4` | 4x NVIDIA L4 | 96GB | Multi-GPU, parallel workloads | +| `a10g-small` | NVIDIA A10G | 24GB | 3-7B models, production | +| `a10g-large` | NVIDIA A10G | 24GB | 7-13B models, batch inference | +| `a10g-largex2` | 2x NVIDIA A10G | 48GB | Multi-GPU, large models | +| `a10g-largex4` | 4x NVIDIA A10G | 96GB | Multi-GPU, very large models | +| `a100-large` | NVIDIA A100 | 40GB | 13B+ models, fastest GPU option | + +### TPU Flavors + +| Flavor | Configuration | Use Case | +|--------|---------------|----------| +| `v5e-1x1` | TPU v5e (1x1) | Small TPU workloads | +| `v5e-2x2` | TPU v5e (2x2) | Medium TPU workloads | +| `v5e-2x4` | TPU v5e (2x4) | Large TPU workloads | + +**TPU Use Cases:** +- JAX/Flax model training +- Large-scale inference +- TPU-optimized workloads + +## Selection Guidelines + +### By Workload Type + +**Data Processing** +- **Recommended:** `cpu-upgrade` or `l4x1` +- **Use case:** Transform, filter, analyze datasets +- **Batch size:** Depends on data size +- **Time:** Varies by dataset size + +**Batch Inference** +- **Recommended:** `a10g-large` or `a100-large` +- **Use case:** Run inference on thousands of samples +- **Batch size:** 8-32 depending on model +- **Time:** Depends on number of samples + +**Experiments & Benchmarks** +- **Recommended:** `a10g-small` or `a10g-large` +- **Use case:** Reproducible ML experiments +- **Batch size:** Varies +- **Time:** Depends on experiment complexity + +**Model Training** (see `model-trainer` skill for details) +- **Recommended:** See model-trainer skill +- **Use case:** Fine-tuning models +- **Batch size:** Depends on model size +- **Time:** Hours to days + +**Synthetic Data Generation** +- **Recommended:** `a10g-large` or `a100-large` +- **Use case:** Generate datasets using LLMs +- **Batch size:** Depends on generation method +- **Time:** Hours for large datasets + +### By Budget + +**Minimal Budget (<$5 total)** +- Use `cpu-basic` or `t4-small` +- Process small datasets +- Quick tests and demos + +**Small Budget ($5-20)** +- Use `t4-medium` or `a10g-small` +- Process medium datasets +- Run experiments + +**Medium Budget ($20-50)** +- Use `a10g-small` or `a10g-large` +- Process large datasets +- Production workloads + +**Large Budget ($50-200)** +- Use `a10g-large` or `a100-large` +- Large-scale processing +- Multiple experiments + +### By Model Size (for inference/processing) + +**Tiny Models (<1B parameters)** +- **Recommended:** `t4-small` +- **Example:** Qwen2.5-0.5B, TinyLlama +- **Batch size:** 8-16 + +**Small Models (1-3B parameters)** +- **Recommended:** `t4-medium` or `a10g-small` +- **Example:** Qwen2.5-1.5B, Phi-2 +- **Batch size:** 4-8 + +**Medium Models (3-7B parameters)** +- **Recommended:** `a10g-small` or `a10g-large` +- **Example:** Qwen2.5-7B, Mistral-7B +- **Batch size:** 2-4 + +**Large Models (7-13B parameters)** +- **Recommended:** `a10g-large` or `a100-large` +- **Example:** Llama-3-8B +- **Batch size:** 1-2 + +**Very Large Models (13B+ parameters)** +- **Recommended:** `a100-large` +- **Example:** Llama-3-13B, Llama-3-70B +- **Batch size:** 1 + +## Memory Considerations + +### Estimating Memory Requirements + +**For inference:** +``` +Memory (GB) ≈ (Model params in billions) × 2-4 +``` + +**For training:** +``` +Memory (GB) ≈ (Model params in billions) × 20 (full) or × 4 (LoRA) +``` + +**Examples:** +- Qwen2.5-0.5B inference: ~1-2GB ✅ fits t4-small +- Qwen2.5-7B inference: ~14-28GB ✅ fits a10g-large +- Qwen2.5-7B training: ~140GB ❌ not feasible without LoRA + +### Memory Optimization + +If hitting memory limits: + +1. **Reduce batch size** + ```python + batch_size = 1 + ``` + +2. **Process in chunks** + ```python + for chunk in chunks: + process(chunk) + ``` + +3. **Use smaller models** + - Use quantized models + - Use LoRA adapters + +4. **Upgrade hardware** + - cpu → t4 → a10g → a100 + +## Cost Estimation + +### Formula + +``` +Total Cost = (Hours of runtime) × (Cost per hour) +``` + +### Example Calculations + +**Data processing:** +- Hardware: cpu-upgrade ($0.50/hour) +- Time: 1 hour +- Cost: $0.50 + +**Batch inference:** +- Hardware: a10g-large ($5/hour) +- Time: 2 hours +- Cost: $10.00 + +**Experiments:** +- Hardware: a10g-small ($3.50/hour) +- Time: 4 hours +- Cost: $14.00 + +### Cost Optimization Tips + +1. **Start small:** Test on cpu-basic or t4-small +2. **Monitor runtime:** Set appropriate timeouts +3. **Optimize code:** Reduce unnecessary compute +4. **Choose right hardware:** Don't over-provision +5. **Use checkpoints:** Resume if job fails +6. **Monitor costs:** Check running jobs regularly + +## Multi-GPU Workloads + +Multi-GPU flavors automatically distribute workloads: + +**Multi-GPU flavors:** +- `l4x4` - 4x L4 GPUs (96GB total VRAM) +- `a10g-largex2` - 2x A10G GPUs (48GB total VRAM) +- `a10g-largex4` - 4x A10G GPUs (96GB total VRAM) + +**When to use:** +- Large models (>13B parameters) +- Need faster processing (linear speedup) +- Large datasets (>100K samples) +- Parallel workloads +- Tensor parallelism for inference + +**MCP Tool Example:** +```python +hf_jobs("uv", { + "script": "process.py", + "flavor": "a10g-largex2", # 2 GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**CLI Equivalent:** +```bash +hf jobs uv run process.py --flavor a10g-largex2 --timeout 4h +``` + +## Choosing Between Options + +### CPU vs GPU + +**Choose CPU when:** +- No GPU acceleration needed +- Data processing only +- Budget constrained +- Simple workloads + +**Choose GPU when:** +- Model inference/training +- GPU-accelerated libraries +- Need faster processing +- Large models + +### a10g vs a100 + +**Choose a10g when:** +- Model <13B parameters +- Budget conscious +- Processing time not critical + +**Choose a100 when:** +- Model 13B+ parameters +- Need fastest processing +- Memory requirements high +- Budget allows + +### Single vs Multi-GPU + +**Choose single GPU when:** +- Model <7B parameters +- Budget constrained +- Simpler debugging + +**Choose multi-GPU when:** +- Model >13B parameters +- Need faster processing +- Large batch sizes required +- Cost-effective for large jobs + +## Quick Reference + +### All Available Flavors + +```python +# Official flavor list (updated 07/2025) +FLAVORS = { + # CPU + "cpu-basic", # Testing, lightweight + "cpu-upgrade", # Data processing + + # GPU - Single + "t4-small", # 16GB - <1B models + "t4-medium", # 16GB - 1-3B models + "l4x1", # 24GB - 3-7B models + "a10g-small", # 24GB - 3-7B production + "a10g-large", # 24GB - 7-13B models + "a100-large", # 40GB - 13B+ models + + # GPU - Multi + "l4x4", # 4x L4 (96GB total) + "a10g-largex2", # 2x A10G (48GB total) + "a10g-largex4", # 4x A10G (96GB total) + + # TPU + "v5e-1x1", # TPU v5e 1x1 + "v5e-2x2", # TPU v5e 2x2 + "v5e-2x4", # TPU v5e 2x4 +} +``` + +### Workload → Hardware Mapping + +```python +HARDWARE_MAP = { + "data_processing": "cpu-upgrade", + "batch_inference_small": "t4-small", + "batch_inference_medium": "a10g-large", + "batch_inference_large": "a100-large", + "experiments": "a10g-small", + "tpu_workloads": "v5e-1x1", + "training": "see model-trainer skill" +} +``` + +### CLI Examples + +```bash +# CPU job +hf jobs run python:3.12 python script.py + +# GPU job +hf jobs run --flavor a10g-large pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel python script.py + +# TPU job +hf jobs run --flavor v5e-1x1 your-tpu-image python script.py + +# UV script with GPU +hf jobs uv run --flavor a10g-small my_script.py +``` + diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hub_saving.md b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hub_saving.md new file mode 100644 index 00000000..e2af0283 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/hub_saving.md @@ -0,0 +1,352 @@ +# Saving Results to Hugging Face Hub + +**⚠️ CRITICAL:** Job environments are ephemeral. ALL results are lost when a job completes unless persisted to the Hub or external storage. + +## Why Persistence is Required + +When running on Hugging Face Jobs: +- Environment is temporary +- All files deleted on job completion +- No local disk persistence +- Cannot access results after job ends + +**Without persistence, all work is permanently lost.** + +## Persistence Options + +### Option 1: Push to Hugging Face Hub (Recommended) + +**For models:** +```python +from transformers import AutoModel +model.push_to_hub("username/model-name", token=os.environ.get("HF_TOKEN")) +``` + +**For datasets:** +```python +from datasets import Dataset +dataset.push_to_hub("username/dataset-name", token=os.environ.get("HF_TOKEN")) +``` + +**For files/artifacts:** +```python +from huggingface_hub import HfApi +api = HfApi(token=os.environ.get("HF_TOKEN")) +api.upload_file( + path_or_fileobj="results.json", + path_in_repo="results.json", + repo_id="username/results", + repo_type="dataset" +) +``` + +### Option 2: External Storage + +**S3:** +```python +import boto3 +s3 = boto3.client('s3') +s3.upload_file('results.json', 'my-bucket', 'results.json') +``` + +**Google Cloud Storage:** +```python +from google.cloud import storage +client = storage.Client() +bucket = client.bucket('my-bucket') +blob = bucket.blob('results.json') +blob.upload_from_filename('results.json') +``` + +### Option 3: API Endpoint + +```python +import requests +requests.post("https://your-api.com/results", json=results) +``` + +## Required Configuration for Hub Push + +### Job Configuration + +**Always include HF_TOKEN:** +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required for Hub operations +}) +``` + +### Script Configuration + +**Verify token exists:** +```python +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN required for Hub operations!" +``` + +**Use token for Hub operations:** +```python +from huggingface_hub import HfApi + +# Auto-detects HF_TOKEN from environment +api = HfApi() + +# Or explicitly pass token +api = HfApi(token=os.environ.get("HF_TOKEN")) +``` + +## Complete Examples + +### Example 1: Push Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets", "huggingface-hub"] +# /// + +import os +from datasets import Dataset +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Process data +data = {"text": ["Sample 1", "Sample 2"]} +dataset = Dataset.from_dict(data) + +# Push to Hub +dataset.push_to_hub("username/my-dataset") +print("✅ Dataset pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +### Example 2: Push Model + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["transformers"] +# /// + +import os +from transformers import AutoModel, AutoTokenizer + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load and process model +model = AutoModel.from_pretrained("base-model") +tokenizer = AutoTokenizer.from_pretrained("base-model") +# ... process model ... + +# Push to Hub +model.push_to_hub("username/my-model") +tokenizer.push_to_hub("username/my-model") +print("✅ Model pushed!") +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +### Example 3: Push Artifacts + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["huggingface-hub", "pandas"] +# /// + +import os +import json +import pandas as pd +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Generate results +results = {"accuracy": 0.95, "loss": 0.05} +df = pd.DataFrame([results]) + +# Save files +with open("results.json", "w") as f: + json.dump(results, f) +df.to_csv("results.csv", index=False) + +# Push to Hub +api = HfApi() +api.upload_file("results.json", "results.json", "username/results", repo_type="dataset") +api.upload_file("results.csv", "results.csv", "username/results", repo_type="dataset") +print("✅ Results pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +## Authentication Methods + +### Method 1: Automatic Token (Recommended) + +```python +"secrets": {"HF_TOKEN": "$HF_TOKEN"} +``` + +Uses your logged-in Hugging Face token automatically. + +### Method 2: Explicit Token + +```python +"secrets": {"HF_TOKEN": "hf_abc123..."} +``` + +Provide token explicitly (not recommended for security). + +### Method 3: Environment Variable + +```python +"env": {"HF_TOKEN": "hf_abc123..."} +``` + +Pass as regular environment variable (less secure than secrets). + +**Always prefer Method 1** for security and convenience. + +## Verification Checklist + +Before submitting any job that saves to Hub, verify: + +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Hub push code included in script +- [ ] Repository name doesn't conflict with existing repos +- [ ] You have write access to the target namespace + +## Repository Setup + +### Automatic Creation + +If repository doesn't exist, it's created automatically when first pushing (if token has write permissions). + +### Manual Creation + +Create repository before pushing: + +```python +from huggingface_hub import HfApi + +api = HfApi() +api.create_repo( + repo_id="username/repo-name", + repo_type="model", # or "dataset" + private=False, # or True for private repo +) +``` + +### Repository Naming + +**Valid names:** +- `username/my-model` +- `username/model-name` +- `organization/model-name` + +**Invalid names:** +- `model-name` (missing username) +- `username/model name` (spaces not allowed) +- `username/MODEL` (uppercase discouraged) + +## Troubleshooting + +### Error: 401 Unauthorized + +**Cause:** HF_TOKEN not provided or invalid + +**Solutions:** +1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Check you're logged in: `hf_whoami()` +3. Re-login: `hf auth login` + +### Error: 403 Forbidden + +**Cause:** No write access to repository + +**Solutions:** +1. Check repository namespace matches your username +2. Verify you're a member of organization (if using org namespace) +3. Check token has write permissions + +### Error: Repository not found + +**Cause:** Repository doesn't exist and auto-creation failed + +**Solutions:** +1. Manually create repository first +2. Check repository name format +3. Verify namespace exists + +### Error: Push failed + +**Cause:** Network issues or Hub unavailable + +**Solutions:** +1. Check logs for specific error +2. Verify token is valid +3. Retry push operation + +## Best Practices + +1. **Always verify token exists** before Hub operations +2. **Use descriptive repo names** (e.g., `my-experiment-results` not `results`) +3. **Push incrementally** for large results (use checkpoints) +4. **Verify push success** in logs before job completes +5. **Use appropriate repo types** (model vs dataset) +6. **Add README** with result descriptions +7. **Tag repos** with relevant tags + +## Monitoring Push Progress + +Check logs for push progress: + +**MCP Tool:** +```python +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**CLI:** +```bash +hf jobs logs +``` + +**Python API:** +```python +from huggingface_hub import fetch_job_logs +for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +**Look for:** +``` +Pushing to username/repo-name... +Upload file results.json: 100% +✅ Push successful +``` + +## Key Takeaway + +**Without `secrets={"HF_TOKEN": "$HF_TOKEN"}` and persistence code, all results are permanently lost.** + +Always verify both are configured before submitting any job that produces results. + diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/token_usage.md b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/token_usage.md new file mode 100644 index 00000000..89d675d4 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/token_usage.md @@ -0,0 +1,570 @@ +# Token Usage Guide for Hugging Face Jobs + +**⚠️ CRITICAL:** Proper token usage is essential for any job that interacts with the Hugging Face Hub. + +## Overview + +Hugging Face tokens are authentication credentials that allow your jobs to interact with the Hub. They're required for: +- Pushing models/datasets to Hub +- Accessing private repositories +- Creating new repositories +- Using Hub APIs programmatically +- Any authenticated Hub operations + +## Token Types + +### Read Token +- **Permissions:** Download models/datasets, read private repos +- **Use case:** Jobs that only need to download/read content +- **Creation:** https://huggingface.co/settings/tokens + +### Write Token +- **Permissions:** Push models/datasets, create repos, modify content +- **Use case:** Jobs that need to upload results (most common) +- **Creation:** https://huggingface.co/settings/tokens +- **⚠️ Required for:** Pushing models, datasets, or any uploads + +### Organization Token +- **Permissions:** Act on behalf of an organization +- **Use case:** Jobs running under organization namespace +- **Creation:** Organization settings → Tokens + +## Providing Tokens to Jobs + +### Method 1: `hf_jobs` MCP tool with `$HF_TOKEN` (Recommended) ⭐ + +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Automatic replacement +}) +``` + +**How it works:** +1. `$HF_TOKEN` is a placeholder that gets replaced with your actual token +2. Uses the token from your logged-in session (`hf auth login`) +3. Token is encrypted server-side when passed as a secret +4. Most secure and convenient method + +**Benefits:** +- ✅ No token exposure in code +- ✅ Uses your current login session +- ✅ Automatically updated if you re-login +- ✅ Works seamlessly with MCP tools +- ✅ Token encrypted server-side + +**Requirements:** +- Must be logged in: `hf auth login` or `hf_whoami()` works +- Token must have required permissions + +**⚠️ CRITICAL:** `$HF_TOKEN` auto-replacement is an `hf_jobs` MCP tool feature ONLY. It does NOT work with `HfApi().run_uv_job()` — see Method 1b below. + +### Method 1b: `HfApi().run_uv_job()` with `get_token()` (Required for Python API) + +```python +from huggingface_hub import HfApi, get_token +api = HfApi() +api.run_uv_job( + script="your_script.py", + secrets={"HF_TOKEN": get_token()}, # ✅ Passes actual token value +) +``` + +**How it works:** +1. `get_token()` retrieves the token from your logged-in session +2. The actual token value is passed to the `secrets` parameter +3. Token is encrypted server-side + +**Why `"$HF_TOKEN"` fails with `HfApi().run_uv_job()`:** +- The Python API passes the literal string `"$HF_TOKEN"` (9 characters) as the token +- The Jobs server receives this invalid string instead of a real token +- Result: `401 Unauthorized` errors when the script tries to authenticate +- You MUST use `get_token()` from `huggingface_hub` to get the real token + +### Method 2: Explicit Token (Not Recommended) + +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Hardcoded token +}) +``` + +**When to use:** +- Only if automatic token doesn't work +- Testing with a specific token +- Organization tokens (use with caution) + +**Security concerns:** +- ❌ Token visible in code/logs +- ❌ Must manually update if token rotates +- ❌ Risk of token exposure +- ❌ Not recommended for production + +### Method 3: Environment Variable (Less Secure) + +```python +hf_jobs("uv", { + "script": "your_script.py", + "env": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Less secure than secrets +}) +``` + +**Difference from secrets:** +- `env` variables are visible in job logs +- `secrets` are encrypted server-side +- Always prefer `secrets` for tokens + +**When to use:** +- Only for non-sensitive configuration +- Never use for tokens (use `secrets` instead) + +## Using Tokens in Scripts + +### Accessing Tokens + +Tokens passed via `secrets` are available as environment variables in your script: + +```python +import os + +# Get token from environment +token = os.environ.get("HF_TOKEN") + +# Verify token exists +if not token: + raise ValueError("HF_TOKEN not found in environment!") +``` + +### Using with Hugging Face Hub + +**Option 1: Explicit token parameter** +```python +from huggingface_hub import HfApi + +api = HfApi(token=os.environ.get("HF_TOKEN")) +api.upload_file(...) +``` + +**Option 2: Auto-detection (Recommended)** +```python +from huggingface_hub import HfApi + +# Automatically uses HF_TOKEN env var +api = HfApi() # ✅ Simpler, uses token from environment +api.upload_file(...) +``` + +**Option 3: With transformers/datasets** +```python +from transformers import AutoModel +from datasets import load_dataset + +# Auto-detects HF_TOKEN from environment +model = AutoModel.from_pretrained("username/model") +dataset = load_dataset("username/dataset") + +# For push operations, token is auto-detected +model.push_to_hub("username/new-model") +dataset.push_to_hub("username/new-dataset") +``` + +### Complete Example + +```python +# /// script +# dependencies = ["huggingface-hub", "datasets"] +# /// + +import os +from huggingface_hub import HfApi +from datasets import Dataset + +# Verify token is available +assert "HF_TOKEN" in os.environ, "HF_TOKEN required for Hub operations!" + +# Use token for Hub operations +api = HfApi() # Auto-detects HF_TOKEN + +# Create and push dataset +data = {"text": ["Hello", "World"]} +dataset = Dataset.from_dict(data) + +# Push to Hub (token auto-detected) +dataset.push_to_hub("username/my-dataset") + +print("✅ Dataset pushed successfully!") +``` + +## Token Verification + +### Check Authentication Locally + +```python +from huggingface_hub import whoami + +try: + user_info = whoami() + print(f"✅ Logged in as: {user_info['name']}") +except Exception as e: + print(f"❌ Not authenticated: {e}") +``` + +### Verify Token in Job + +```python +import os + +# Check token exists +if "HF_TOKEN" not in os.environ: + raise ValueError("HF_TOKEN not found in environment!") + +token = os.environ["HF_TOKEN"] + +# Verify token format (should start with "hf_") +if not token.startswith("hf_"): + raise ValueError(f"Invalid token format: {token[:10]}...") + +# Test token works +from huggingface_hub import whoami +try: + user_info = whoami(token=token) + print(f"✅ Token valid for user: {user_info['name']}") +except Exception as e: + raise ValueError(f"Token validation failed: {e}") +``` + +## Common Token Issues + +### Error: 401 Unauthorized + +**Symptoms:** +``` +401 Client Error: Unauthorized for url: https://huggingface.co/api/... +``` + +**Causes:** +1. Token missing from job +2. Token invalid or expired +3. Token not passed correctly + +**Solutions:** +1. Add `secrets={"HF_TOKEN": "$HF_TOKEN"}` to job config +2. Verify `hf_whoami()` works locally +3. Re-login: `hf auth login` +4. Check token hasn't expired + +**Verification:** +```python +# In your script +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!" +``` + +### Error: 403 Forbidden + +**Symptoms:** +``` +403 Client Error: Forbidden for url: https://huggingface.co/api/... +``` + +**Causes:** +1. Token lacks required permissions (read-only token used for write) +2. No access to private repository +3. Organization permissions insufficient + +**Solutions:** +1. Ensure token has write permissions +2. Check token type at https://huggingface.co/settings/tokens +3. Verify access to target repository +4. Use organization token if needed + +**Check token permissions:** +```python +from huggingface_hub import whoami + +user_info = whoami() +print(f"User: {user_info['name']}") +print(f"Type: {user_info.get('type', 'user')}") +``` + +### Error: Token not found in environment + +**Symptoms:** +``` +KeyError: 'HF_TOKEN' +ValueError: HF_TOKEN not found +``` + +**Causes:** +1. `secrets` not passed in job config +2. Wrong key name (should be `HF_TOKEN`) +3. Using `env` instead of `secrets` + +**Solutions:** +1. Use `secrets={"HF_TOKEN": "$HF_TOKEN"}` (not `env`) +2. Verify key name is exactly `HF_TOKEN` +3. Check job config syntax + +**Correct configuration:** +```python +# ✅ Correct +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) + +# ❌ Wrong - using env instead of secrets +hf_jobs("uv", { + "script": "...", + "env": {"HF_TOKEN": "$HF_TOKEN"} # Less secure +}) + +# ❌ Wrong - wrong key name +hf_jobs("uv", { + "script": "...", + "secrets": {"TOKEN": "$HF_TOKEN"} # Wrong key +}) +``` + +### Error: Repository access denied + +**Symptoms:** +``` +403 Client Error: Forbidden +Repository not found or access denied +``` + +**Causes:** +1. Token doesn't have access to private repo +2. Repository doesn't exist and can't be created +3. Wrong namespace + +**Solutions:** +1. Use token from account with access +2. Verify repo visibility (public vs private) +3. Check namespace matches token owner +4. Create repo first if needed + +**Check repository access:** +```python +from huggingface_hub import HfApi + +api = HfApi() +try: + repo_info = api.repo_info("username/repo-name") + print(f"✅ Access granted: {repo_info.id}") +except Exception as e: + print(f"❌ Access denied: {e}") +``` + +## Token Security Best Practices + +### 1. Never Commit Tokens + +**❌ Bad:** +```python +# Never do this! +token = "hf_abc123xyz..." +api = HfApi(token=token) +``` + +**✅ Good:** +```python +# Use environment variable +token = os.environ.get("HF_TOKEN") +api = HfApi(token=token) +``` + +### 2. Use Secrets, Not Environment Variables + +**❌ Bad:** +```python +hf_jobs("uv", { + "script": "...", + "env": {"HF_TOKEN": "$HF_TOKEN"} # Visible in logs +}) +``` + +**✅ Good:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Encrypted server-side +}) +``` + +### 3. Use Automatic Token Replacement + +**❌ Bad:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "hf_abc123..."} # Hardcoded +}) +``` + +**✅ Good:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Automatic +}) +``` + +### 4. Rotate Tokens Regularly + +- Generate new tokens periodically +- Revoke old tokens +- Update job configurations +- Monitor token usage + +### 5. Use Minimal Permissions + +- Create tokens with only needed permissions +- Use read tokens when write isn't needed +- Don't use admin tokens for regular jobs + +### 6. Don't Share Tokens + +- Each user should use their own token +- Don't commit tokens to repositories +- Don't share tokens in logs or messages + +### 7. Monitor Token Usage + +- Check token activity in Hub settings +- Review job logs for token issues +- Set up alerts for unauthorized access + +## Token Workflow Examples + +### Example 1: Push Model to Hub + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["transformers"] +# /// + +import os +from transformers import AutoModel, AutoTokenizer + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load and process model +model = AutoModel.from_pretrained("base-model") +# ... process model ... + +# Push to Hub (token auto-detected) +model.push_to_hub("username/my-model") +print("✅ Model pushed!") +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +### Example 2: Access Private Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets"] +# /// + +import os +from datasets import load_dataset + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load private dataset (token auto-detected) +dataset = load_dataset("private-org/private-dataset") +print(f"✅ Loaded {len(dataset)} examples") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +### Example 3: Create and Push Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets", "huggingface-hub"] +# /// + +import os +from datasets import Dataset +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Create dataset +data = {"text": ["Sample 1", "Sample 2"]} +dataset = Dataset.from_dict(data) + +# Push to Hub +api = HfApi() # Auto-detects HF_TOKEN +dataset.push_to_hub("username/my-dataset") +print("✅ Dataset pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +## Quick Reference + +### Token Checklist + +Before submitting a job that uses Hub: + +- [ ] Job includes `secrets={"HF_TOKEN": "$HF_TOKEN"}` +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Token has required permissions (read/write) +- [ ] User is logged in: `hf_whoami()` works +- [ ] Token not hardcoded in script +- [ ] Using `secrets` not `env` for token + +### Common Patterns + +**Pattern 1: Auto-detect token** +```python +from huggingface_hub import HfApi +api = HfApi() # Uses HF_TOKEN from environment +``` + +**Pattern 2: Explicit token** +```python +import os +from huggingface_hub import HfApi +api = HfApi(token=os.environ.get("HF_TOKEN")) +``` + +**Pattern 3: Verify token** +```python +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" +``` + +## Key Takeaways + +1. **Always use `secrets={"HF_TOKEN": "$HF_TOKEN"}`** for Hub operations +2. **Never hardcode tokens** in scripts or job configs +3. **Verify token exists** in script before Hub operations +4. **Use auto-detection** when possible (`HfApi()` without token parameter) +5. **Check permissions** - ensure token has required access +6. **Monitor token usage** - review activity regularly +7. **Rotate tokens** - generate new tokens periodically + diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/troubleshooting.md b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/troubleshooting.md new file mode 100644 index 00000000..338b6894 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/references/troubleshooting.md @@ -0,0 +1,475 @@ +# Troubleshooting Guide + +Common issues and solutions for Hugging Face Jobs. + +## Authentication Issues + +### Error: 401 Unauthorized + +**Symptoms:** +``` +401 Client Error: Unauthorized for url: https://huggingface.co/api/... +``` + +**Causes:** +- Token missing from job +- Token invalid or expired +- Token not passed correctly + +**Solutions:** +1. Add token to secrets: `hf_jobs` MCP uses `"$HF_TOKEN"` (auto-replaced); `HfApi().run_uv_job()` MUST use `get_token()` from `huggingface_hub` (the literal string `"$HF_TOKEN"` will NOT work with the Python API) +2. Verify `hf_whoami()` works locally +3. Re-login: `hf auth login` +4. Check token hasn't expired + +**Verification:** +```python +# In your script +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!" +``` + +### Error: 403 Forbidden + +**Symptoms:** +``` +403 Client Error: Forbidden for url: https://huggingface.co/api/... +``` + +**Causes:** +- Token lacks required permissions +- No access to private repository +- Organization permissions insufficient + +**Solutions:** +1. Ensure token has write permissions +2. Check token type at https://huggingface.co/settings/tokens +3. Verify access to target repository +4. Use organization token if needed + +### Error: Token not found in environment + +**Symptoms:** +``` +KeyError: 'HF_TOKEN' +ValueError: HF_TOKEN not found +``` + +**Causes:** +- `secrets` not passed in job config +- Wrong key name (should be `HF_TOKEN`) +- Using `env` instead of `secrets` + +**Solutions:** +1. Use `secrets` (not `env`) — with `hf_jobs` MCP: `"$HF_TOKEN"`; with `HfApi().run_uv_job()`: `get_token()` +2. Verify key name is exactly `HF_TOKEN` +3. Check job config syntax + +## Job Execution Issues + +### Error: Job Timeout + +**Symptoms:** +- Job stops unexpectedly +- Status shows "TIMEOUT" +- Partial results only + +**Causes:** +- Default 30min timeout exceeded +- Job takes longer than expected +- No timeout specified + +**Solutions:** +1. Check logs for actual runtime +2. Increase timeout with buffer: `"timeout": "3h"` +3. Optimize code for faster execution +4. Process data in chunks +5. Add 20-30% buffer to estimated time + +**MCP Tool Example:** +```python +hf_jobs("uv", { + "script": "...", + "timeout": "2h" # Set appropriate timeout +}) +``` + +**Python API Example:** +```python +from huggingface_hub import run_uv_job, inspect_job, fetch_job_logs + +job = run_uv_job("script.py", timeout="4h") + +# Check if job failed +job_info = inspect_job(job_id=job.id) +if job_info.status.stage == "ERROR": + print(f"Job failed: {job_info.status.message}") + # Check logs for details + for log in fetch_job_logs(job_id=job.id): + print(log) +``` + +### Error: Out of Memory (OOM) + +**Symptoms:** +``` +RuntimeError: CUDA out of memory +MemoryError: Unable to allocate array +``` + +**Causes:** +- Batch size too large +- Model too large for hardware +- Insufficient GPU memory + +**Solutions:** +1. Reduce batch size +2. Process data in smaller chunks +3. Upgrade hardware: cpu → t4 → a10g → a100 +4. Use smaller models or quantization +5. Enable gradient checkpointing (for training) + +**Example:** +```python +# Reduce batch size +batch_size = 1 + +# Process in chunks +for chunk in chunks: + process(chunk) +``` + +### Error: Missing Dependencies + +**Symptoms:** +``` +ModuleNotFoundError: No module named 'package_name' +ImportError: cannot import name 'X' +``` + +**Causes:** +- Package not in dependencies +- Wrong package name +- Version mismatch + +**Solutions:** +1. Add to PEP 723 header: + ```python + # /// script + # dependencies = ["package-name>=1.0.0"] + # /// + ``` +2. Check package name spelling +3. Specify version if needed +4. Check package availability + +### Error: Script Not Found + +**Symptoms:** +``` +FileNotFoundError: script.py not found +``` + +**Causes:** +- Local file path used (not supported) +- URL incorrect +- Script not accessible + +**Solutions:** +1. Use inline script (recommended) +2. Use publicly accessible URL +3. Upload script to Hub first +4. Check URL is correct + +**Correct approaches:** +```python +# ✅ Inline code +hf_jobs("uv", {"script": "# /// script\n# dependencies = [...]\n# ///\n\n"}) + +# ✅ From URL +hf_jobs("uv", {"script": "https://huggingface.co/user/repo/resolve/main/script.py"}) +``` + +## Hub Push Issues + +### Error: Push Failed + +**Symptoms:** +``` +Error pushing to Hub +Upload failed +``` + +**Causes:** +- Network issues +- Token missing or invalid +- Repository access denied +- File too large + +**Solutions:** +1. Check token: `assert "HF_TOKEN" in os.environ` +2. Verify repository exists or can be created +3. Check network connectivity in logs +4. Retry push operation +5. Split large files into chunks + +### Error: Repository Not Found + +**Symptoms:** +``` +404 Client Error: Not Found +Repository not found +``` + +**Causes:** +- Repository doesn't exist +- Wrong repository name +- No access to private repo + +**Solutions:** +1. Create repository first: + ```python + from huggingface_hub import HfApi + api = HfApi() + api.create_repo("username/repo-name", repo_type="dataset") + ``` +2. Check repository name format +3. Verify namespace exists +4. Check repository visibility + +### Error: Results Not Saved + +**Symptoms:** +- Job completes successfully +- No results visible on Hub +- Files not persisted + +**Causes:** +- No persistence code in script +- Push code not executed +- Push failed silently + +**Solutions:** +1. Add persistence code to script +2. Verify push executes successfully +3. Check logs for push errors +4. Add error handling around push + +**Example:** +```python +try: + dataset.push_to_hub("username/dataset") + print("✅ Push successful") +except Exception as e: + print(f"❌ Push failed: {e}") + raise +``` + +## Hardware Issues + +### Error: GPU Not Available + +**Symptoms:** +``` +CUDA not available +No GPU found +``` + +**Causes:** +- CPU flavor used instead of GPU +- GPU not requested +- CUDA not installed in image + +**Solutions:** +1. Use GPU flavor: `"flavor": "a10g-large"` +2. Check image has CUDA support +3. Verify GPU availability in logs + +### Error: Slow Performance + +**Symptoms:** +- Job takes longer than expected +- Low GPU utilization +- CPU bottleneck + +**Causes:** +- Wrong hardware selected +- Inefficient code +- Data loading bottleneck + +**Solutions:** +1. Upgrade hardware +2. Optimize code +3. Use batch processing +4. Profile code to find bottlenecks + +## General Issues + +### Error: Job Status Unknown + +**Symptoms:** +- Can't check job status +- Status API returns error + +**Solutions:** +1. Use job URL: `https://huggingface.co/jobs/username/job-id` +2. Check logs: `hf_jobs("logs", {"job_id": "..."})` +3. Inspect job: `hf_jobs("inspect", {"job_id": "..."})` + +### Error: Logs Not Available + +**Symptoms:** +- No logs visible +- Logs delayed + +**Causes:** +- Job just started (logs delayed 30-60s) +- Job failed before logging +- Logs not yet generated + +**Solutions:** +1. Wait 30-60 seconds after job start +2. Check job status first +3. Use job URL for web interface + +### Error: Cost Unexpectedly High + +**Symptoms:** +- Job costs more than expected +- Longer runtime than estimated + +**Causes:** +- Job ran longer than timeout +- Wrong hardware selected +- Inefficient code + +**Solutions:** +1. Monitor job runtime +2. Set appropriate timeout +3. Optimize code +4. Choose right hardware +5. Check cost estimates before running + +## Debugging Tips + +### 1. Add Logging + +```python +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +logger.info("Starting processing...") +logger.info(f"Processed {count} items") +``` + +### 2. Verify Environment + +```python +import os +print(f"Python version: {os.sys.version}") +print(f"CUDA available: {torch.cuda.is_available()}") +print(f"HF_TOKEN present: {'HF_TOKEN' in os.environ}") +``` + +### 3. Test Locally First + +Run script locally before submitting to catch errors early: +```bash +python script.py +# Or with uv +uv run script.py +``` + +### 4. Check Job Logs + +**MCP Tool:** +```python +# View logs +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**CLI:** +```bash +hf jobs logs +``` + +**Python API:** +```python +from huggingface_hub import fetch_job_logs +for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +**Or use job URL:** `https://huggingface.co/jobs/username/job-id` + +### 5. Add Error Handling + +```python +try: + # Your code + process_data() +except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + raise +``` + +### 6. Check Job Status Programmatically + +```python +from huggingface_hub import inspect_job, fetch_job_logs + +job_info = inspect_job(job_id="your-job-id") +print(f"Status: {job_info.status.stage}") +print(f"Message: {job_info.status.message}") + +if job_info.status.stage == "ERROR": + print("Job failed! Logs:") + for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +## Quick Reference + +### Common Error Codes + +| Code | Meaning | Solution | +|------|---------|----------| +| 401 | Unauthorized | Add token to secrets: MCP uses `"$HF_TOKEN"`, Python API uses `get_token()` | +| 403 | Forbidden | Check token permissions | +| 404 | Not Found | Verify repository exists | +| 500 | Server Error | Retry or contact support | + +### Checklist Before Submitting + +- [ ] Token configured: MCP uses `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API uses `secrets={"HF_TOKEN": get_token()}` +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Timeout set appropriately +- [ ] Hardware selected correctly +- [ ] Dependencies listed in PEP 723 header +- [ ] Persistence code included +- [ ] Error handling added +- [ ] Logging added for debugging + +## Getting Help + +If issues persist: + +1. **Check logs** - Most errors include detailed messages +2. **Review documentation** - See main SKILL.md +3. **Check Hub status** - https://status.huggingface.co +4. **Community forums** - https://discuss.huggingface.co +5. **GitHub issues** - For bugs in huggingface_hub + +## Key Takeaways + +1. **Always include token** - MCP: `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API: `secrets={"HF_TOKEN": get_token()}` +2. **Set appropriate timeout** - Default 30min may be insufficient +3. **Verify persistence** - Results won't persist without code +4. **Check logs** - Most issues visible in job logs +5. **Test locally** - Catch errors before submitting +6. **Add error handling** - Better debugging information +7. **Monitor costs** - Set timeouts to avoid unexpected charges + diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/cot-self-instruct.py b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/cot-self-instruct.py new file mode 100644 index 00000000..5388438b --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/cot-self-instruct.py @@ -0,0 +1,718 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "datasets", +# "transformers", +# "vllm>=0.6.5", +# "huggingface-hub[hf_transfer]", +# "torch", +# "numpy", +# "tqdm", +# "scikit-learn", +# ] +# /// +""" +Generate high-quality synthetic data using Chain-of-Thought Self-Instruct methodology. + +This script implements the CoT-Self-Instruct approach from the paper "CoT-Self-Instruct: +Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025). + +It supports two modes: +1. Reasoning tasks: Generates both questions and answers with Chain-of-Thought +2. Instruction tasks: Generates diverse prompts for general instruction following + +Example usage: + # Reasoning tasks with Answer-Consistency filtering + uv run cot-self-instruct.py \\ + --seed-dataset davanstrien/s1k-reasoning \\ + --output-dataset username/synthetic-math \\ + --task-type reasoning \\ + --num-samples 5000 \\ + --filter-method answer-consistency + + # Instruction tasks with RIP filtering + uv run cot-self-instruct.py \\ + --seed-dataset wildchat-filtered \\ + --output-dataset username/synthetic-prompts \\ + --task-type instruction \\ + --filter-method rip \\ + --reward-model Nexusflow/Athene-RM-8B + + # HF Jobs execution + hf jobs uv run --flavor l4x4 \\ + --image vllm/vllm-openai \\ + -e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\ + https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + [args...] +""" + +import argparse +import json +import logging +import os +import random +import re +import sys +from collections import Counter +from datetime import datetime +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import torch +from datasets import Dataset, load_dataset +from huggingface_hub import DatasetCard, login +from sklearn.cluster import KMeans +from tqdm.auto import tqdm +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +# Enable HF Transfer for faster downloads +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Prompt templates from the paper +REASONING_PROMPT_TEMPLATE = """You are a reasoning question generator assistant. Your goal is to create a novel, and challenging reasoning question. You are provided the following seed questions: +Seed Question 1: {seed1} +Seed Question 2: {seed2} +Your task is to: +1. Write a brand-new, self-contained reasoning question that meets the following requirements: +(a) The question draws inspiration from the seed question without copying it verbatim, remaining novel and of comparable difficulty. +(b) The question's final answer should be a single, unambiguous scalar value (e.g., an integer, reduced fraction, exact radical), or another answer type that can be verified in one step (e.g., 'yes/no,' a choice from A to D). +2. Then reason step by step, solve the new question and format your output as follows: +[New Question Begin]{{your_generated_question}}[New Question End] +[Final Answer to New Question Begin]\\boxed{{your_final_answer}}[Final Answer to New Question End]""" + +INSTRUCTION_PROMPT_TEMPLATE = """You are a prompt generator assistant. Your goal is to create diverse and creative synthetic prompts. +Please follow the steps below to create synthetic prompts. +Step 1: Carefully read #Prompt 1# and #Prompt 2#. Identify and list all the common elements between these two prompts. If no common elements are found, list the main elements from each prompt. +Step 2: Develop a comprehensive plan based on the #Common Elements List# or #Main Elements List# from Step 1. This plan will guide the generation of new synthetic prompts that are similar to the original prompts. +Step 3: Execute the plan step by step and provide one #Synthetic Prompt#. +Please reply strictly in the following format: +- Step 1 #Common Elements List# or #Main Elements List#: +- Step 2 #Plan#: +- Step 3 #Synthetic Prompt#: +#Prompt 1#: +{prompt1} +#Prompt 2#: +{prompt2}""" + + +def check_gpu_availability() -> int: + """Check if CUDA is available and return the number of GPUs.""" + if not torch.cuda.is_available(): + logger.error("CUDA is not available. This script requires a GPU.") + logger.error( + "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor." + ) + sys.exit(1) + + num_gpus = torch.cuda.device_count() + for i in range(num_gpus): + gpu_name = torch.cuda.get_device_name(i) + gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1024**3 + logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory") + + return num_gpus + + +def parse_thinking_output(text: str) -> str: + """Remove thinking tokens from model output.""" + # Remove ... blocks + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + return text.strip() + + +def extract_reasoning_output(text: str) -> Tuple[Optional[str], Optional[str]]: + """Extract question and answer from reasoning task output.""" + text = parse_thinking_output(text) + + # Extract question + question_match = re.search(r'\[New Question Begin\](.*?)\[New Question End\]', text, re.DOTALL) + if not question_match: + return None, None + question = question_match.group(1).strip() + + # Extract answer + answer_match = re.search(r'\[Final Answer to New Question Begin\]\\?boxed\{(.*?)\}\[Final Answer to New Question End\]', text, re.DOTALL) + if not answer_match: + # Try without \boxed + answer_match = re.search(r'\[Final Answer to New Question Begin\](.*?)\[Final Answer to New Question End\]', text, re.DOTALL) + + if not answer_match: + return question, None + + answer = answer_match.group(1).strip() + return question, answer + + +def extract_instruction_output(text: str) -> Optional[str]: + """Extract synthetic prompt from instruction task output.""" + text = parse_thinking_output(text) + + # Look for the synthetic prompt after "Step 3 #Synthetic Prompt#:" + match = re.search(r'Step 3 #Synthetic Prompt#:\s*(.+)', text, re.DOTALL) + if match: + return match.group(1).strip() + return None + + +def categorize_prompts(prompts: List[str], num_categories: int = 8) -> Dict[int, List[int]]: + """Categorize prompts using clustering for instruction tasks.""" + from transformers import AutoModel + + logger.info(f"Categorizing {len(prompts)} prompts into {num_categories} categories...") + + # Use a small model for embeddings + tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + + # Get embeddings + embeddings = [] + for prompt in tqdm(prompts, desc="Computing embeddings"): + inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) + with torch.no_grad(): + outputs = model(**inputs) + embedding = outputs.last_hidden_state.mean(dim=1).numpy() + embeddings.append(embedding[0]) + + # Cluster + kmeans = KMeans(n_clusters=num_categories, random_state=42) + labels = kmeans.fit_predict(embeddings) + + # Group by category + categories = {} + for idx, label in enumerate(labels): + if label not in categories: + categories[label] = [] + categories[label].append(idx) + + return categories + + +def generate_synthetic_data( + llm: LLM, + seed_data: List[Dict], + task_type: str, + num_samples: int, + categories: Optional[Dict[int, List[int]]] = None, +) -> List[Dict]: + """Generate synthetic data using CoT-Self-Instruct.""" + synthetic_data = [] + + # Set up progress bar + pbar = tqdm(total=num_samples, desc="Generating synthetic data") + + while len(synthetic_data) < num_samples: + # Sample seed data + if task_type == "reasoning": + # Random sampling for reasoning tasks + seeds = random.sample(seed_data, min(2, len(seed_data))) + prompt = REASONING_PROMPT_TEMPLATE.format( + seed1=seeds[0].get("question", seeds[0].get("prompt", "")), + seed2=seeds[1].get("question", seeds[1].get("prompt", "")) if len(seeds) > 1 else seeds[0].get("question", seeds[0].get("prompt", "")) + ) + else: + # Category-aware sampling for instruction tasks + if categories: + # Pick a random category + category = random.choice(list(categories.keys())) + category_indices = categories[category] + indices = random.sample(category_indices, min(2, len(category_indices))) + seeds = [seed_data[i] for i in indices] + else: + seeds = random.sample(seed_data, min(2, len(seed_data))) + + prompt = INSTRUCTION_PROMPT_TEMPLATE.format( + prompt1=seeds[0].get("prompt", seeds[0].get("question", "")), + prompt2=seeds[1].get("prompt", seeds[1].get("question", "")) if len(seeds) > 1 else seeds[0].get("prompt", seeds[0].get("question", "")) + ) + + # Generate + sampling_params = SamplingParams( + temperature=0.7 if task_type == "reasoning" else 0.8, + top_p=0.95 if task_type == "reasoning" else 0.9, + max_tokens=2048, + ) + + outputs = llm.generate([prompt], sampling_params) + output_text = outputs[0].outputs[0].text + + # Parse output + if task_type == "reasoning": + question, answer = extract_reasoning_output(output_text) + if question and answer: + synthetic_data.append({ + "question": question, + "answer": answer, + "seed_indices": [seed_data.index(s) for s in seeds], + }) + pbar.update(1) + else: + synthetic_prompt = extract_instruction_output(output_text) + if synthetic_prompt: + synthetic_data.append({ + "prompt": synthetic_prompt, + "seed_indices": [seed_data.index(s) for s in seeds], + }) + pbar.update(1) + + pbar.close() + return synthetic_data + + +def answer_consistency_filter( + llm: LLM, + synthetic_data: List[Dict], + k_responses: int = 16, + threshold: float = 0.5, +) -> List[Dict]: + """Filter reasoning tasks using Answer-Consistency.""" + logger.info(f"Applying Answer-Consistency filter with K={k_responses}") + + filtered_data = [] + + for item in tqdm(synthetic_data, desc="Answer-Consistency filtering"): + question = item["question"] + original_answer = item["answer"] + + # Generate K responses + prompts = [question] * k_responses + sampling_params = SamplingParams( + temperature=0.6, + top_p=0.95, + max_tokens=1024, + ) + + outputs = llm.generate(prompts, sampling_params) + + # Extract answers + answers = [] + for output in outputs: + text = output.outputs[0].text + # Try to extract boxed answer + match = re.search(r'\\boxed\{(.*?)\}', text) + if match: + answers.append(match.group(1).strip()) + + if not answers: + continue + + # Get majority answer + answer_counts = Counter(answers) + if answer_counts: + majority_answer, count = answer_counts.most_common(1)[0] + + # Check if majority answer matches original and meets threshold + if (majority_answer == original_answer and + count / len(answers) >= threshold): + item["consistency_score"] = count / len(answers) + filtered_data.append(item) + + logger.info(f"Answer-Consistency: kept {len(filtered_data)}/{len(synthetic_data)} examples") + return filtered_data + + +def rip_filter( + llm: LLM, + synthetic_data: List[Dict], + reward_model_id: str, + k_responses: int = 32, + threshold: float = 0.5, +) -> List[Dict]: + """Filter using Rejecting Instruction Preferences (RIP).""" + logger.info(f"Applying RIP filter with K={k_responses} and reward model {reward_model_id}") + + # Note: In a full implementation, you would load and use the actual reward model + # For this example, we'll use a placeholder scoring mechanism + logger.warning("RIP filtering requires a reward model implementation - using placeholder") + + filtered_data = [] + + for item in tqdm(synthetic_data, desc="RIP filtering"): + prompt = item.get("prompt", item.get("question", "")) + + # Generate K responses + prompts = [prompt] * k_responses + sampling_params = SamplingParams( + temperature=1.0, + top_p=1.0, + max_tokens=1024, + ) + + outputs = llm.generate(prompts, sampling_params) + + # In real implementation: score each response with reward model + # For now, use length as a proxy (longer responses often score higher) + scores = [len(output.outputs[0].text) for output in outputs] + + # Use minimum score as quality indicator + min_score = min(scores) if scores else 0 + normalized_score = min_score / 1000 # Normalize to 0-1 range + + if normalized_score >= threshold: + item["rip_score"] = normalized_score + filtered_data.append(item) + + logger.info(f"RIP filter: kept {len(filtered_data)}/{len(synthetic_data)} examples") + return filtered_data + + +def create_dataset_card( + task_type: str, + source_dataset: str, + generation_model: str, + filter_method: str, + num_generated: int, + num_filtered: int, + generation_time: str, + additional_info: Dict = None, +) -> str: + """Create a comprehensive dataset card.""" + filter_info = "" + if filter_method == "answer-consistency": + filter_info = """ +### Answer-Consistency Filtering + +This dataset was filtered using Answer-Consistency: +- Generated K responses for each synthetic question +- Kept only examples where majority answer matched the generated answer +- Ensures high-quality, correctly solved problems""" + elif filter_method == "rip": + filter_info = """ +### RIP (Rejecting Instruction Preferences) Filtering + +This dataset was filtered using RIP: +- Generated K responses for each synthetic prompt +- Scored responses using a reward model +- Kept only prompts with high minimum scores""" + + return f"""--- +tags: +- synthetic-data +- cot-self-instruct +- {task_type} +- uv-script +--- + +# CoT-Self-Instruct Synthetic Data + +This dataset contains synthetic {task_type} data generated using the Chain-of-Thought Self-Instruct methodology. + +## Generation Details + +- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset}) +- **Generation Model**: [{generation_model}](https://huggingface.co/{generation_model}) +- **Task Type**: {task_type} +- **Filter Method**: {filter_method} +- **Generated Examples**: {num_generated:,} +- **After Filtering**: {num_filtered:,} ({(num_filtered/num_generated)*100:.1f}% acceptance rate) +- **Generation Date**: {generation_time} +{filter_info} + +## Methodology + +Generated using CoT-Self-Instruct, which: +1. Uses Chain-of-Thought reasoning to analyze seed examples +2. Generates new synthetic examples of similar quality and complexity +3. Applies quality filtering to ensure high-quality outputs + +Based on the paper: "CoT-Self-Instruct: Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025) + +## Generation Script + +Generated using the CoT-Self-Instruct script from [uv-scripts/synthetic-data](https://huggingface.co/datasets/uv-scripts/synthetic-data). + +To reproduce: +```bash +uv run https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + --seed-dataset {source_dataset} \\ + --output-dataset \\ + --task-type {task_type} \\ + --generation-model {generation_model} \\ + --filter-method {filter_method} +``` +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Generate synthetic data using CoT-Self-Instruct", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + # Dataset arguments + parser.add_argument( + "--seed-dataset", + type=str, + required=True, + help="HuggingFace dataset ID containing seed examples", + ) + parser.add_argument( + "--output-dataset", + type=str, + required=True, + help="HuggingFace dataset ID for output", + ) + + # Task configuration + parser.add_argument( + "--task-type", + type=str, + choices=["reasoning", "instruction", "auto"], + default="auto", + help="Type of task (reasoning generates Q&A, instruction generates prompts)", + ) + parser.add_argument( + "--task-column", + type=str, + default=None, + help="Column name containing tasks (auto-detected if not specified)", + ) + + # Model configuration + parser.add_argument( + "--generation-model", + type=str, + default="Qwen/Qwen3-30B-A3B-Thinking-2507", + help="Model for synthetic data generation", + ) + parser.add_argument( + "--filter-model", + type=str, + default=None, + help="Model for filtering (defaults to generation model)", + ) + parser.add_argument( + "--reward-model", + type=str, + default="Nexusflow/Athene-RM-8B", + help="Reward model for RIP filtering", + ) + + # Generation parameters + parser.add_argument( + "--num-samples", + type=int, + default=5000, + help="Number of synthetic examples to generate", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1, + help="Batch size for generation", + ) + + # Filtering parameters + parser.add_argument( + "--filter-method", + type=str, + choices=["answer-consistency", "rip", "both", "none"], + default="answer-consistency", + help="Quality filtering method", + ) + parser.add_argument( + "--k-responses", + type=int, + default=16, + help="Number of responses for filtering", + ) + parser.add_argument( + "--quality-threshold", + type=float, + default=0.5, + help="Minimum quality threshold for filtering", + ) + + # GPU configuration + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=None, + help="Number of GPUs for tensor parallelism (auto-detected if not set)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.9, + help="GPU memory utilization", + ) + + # Other arguments + parser.add_argument( + "--hf-token", + type=str, + default=None, + help="HuggingFace API token", + ) + parser.add_argument( + "--seed", + type=int, + default=42, + help="Random seed", + ) + + args = parser.parse_args() + + # Set random seeds + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + # Check GPU + num_gpus = check_gpu_availability() + tensor_parallel_size = args.tensor_parallel_size or num_gpus + + # Authentication + hf_token = args.hf_token or os.environ.get("HF_TOKEN") + if hf_token: + login(token=hf_token) + + # Load seed dataset + logger.info(f"Loading seed dataset: {args.seed_dataset}") + seed_dataset = load_dataset(args.seed_dataset, split="train") + + # Auto-detect task type and column if needed + if args.task_type == "auto": + columns = seed_dataset.column_names + if "question" in columns and "answer" in columns: + args.task_type = "reasoning" + logger.info("Auto-detected task type: reasoning") + else: + args.task_type = "instruction" + logger.info("Auto-detected task type: instruction") + + if not args.task_column: + if args.task_type == "reasoning": + args.task_column = "question" + else: + # Try to find prompt column + for col in ["prompt", "instruction", "text", "input"]: + if col in seed_dataset.column_names: + args.task_column = col + break + + logger.info(f"Using task column: {args.task_column}") + + # Convert to list of dicts + seed_data = seed_dataset.to_list() + + # Categorize prompts for instruction tasks + categories = None + if args.task_type == "instruction" and len(seed_data) > 100: + prompts = [item.get(args.task_column, "") for item in seed_data] + categories = categorize_prompts(prompts) + + # Initialize generation model + logger.info(f"Loading generation model: {args.generation_model}") + generation_llm = LLM( + model=args.generation_model, + tensor_parallel_size=tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + ) + + # Generate synthetic data + start_time = datetime.now() + synthetic_data = generate_synthetic_data( + generation_llm, + seed_data, + args.task_type, + args.num_samples, + categories, + ) + + # Apply filtering + filter_llm = generation_llm + if args.filter_model and args.filter_model != args.generation_model: + logger.info(f"Loading filter model: {args.filter_model}") + # Clean up generation model + del generation_llm + torch.cuda.empty_cache() + + filter_llm = LLM( + model=args.filter_model, + tensor_parallel_size=tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + ) + + filtered_data = synthetic_data + if args.filter_method != "none": + if args.filter_method == "answer-consistency" and args.task_type == "reasoning": + filtered_data = answer_consistency_filter( + filter_llm, + synthetic_data, + args.k_responses, + args.quality_threshold, + ) + elif args.filter_method == "rip": + filtered_data = rip_filter( + filter_llm, + synthetic_data, + args.reward_model, + args.k_responses, + args.quality_threshold, + ) + elif args.filter_method == "both": + if args.task_type == "reasoning": + filtered_data = answer_consistency_filter( + filter_llm, + synthetic_data, + args.k_responses, + args.quality_threshold, + ) + filtered_data = rip_filter( + filter_llm, + filtered_data, + args.reward_model, + args.k_responses, + args.quality_threshold, + ) + + # Create HuggingFace dataset + logger.info(f"Creating dataset with {len(filtered_data)} examples") + dataset = Dataset.from_list(filtered_data) + + # Create dataset card + generation_time = start_time.strftime("%Y-%m-%d %H:%M:%S UTC") + dataset_card = create_dataset_card( + args.task_type, + args.seed_dataset, + args.generation_model, + args.filter_method, + len(synthetic_data), + len(filtered_data), + generation_time, + ) + + # Push to hub + logger.info(f"Pushing dataset to: {args.output_dataset}") + # Create dataset card + card = DatasetCard(dataset_card) + dataset.push_to_hub(args.output_dataset) + # Push card separately + card.push_to_hub(args.output_dataset) + + logger.info("Done! Dataset available at: https://huggingface.co/datasets/" + args.output_dataset) + + # Print example HF Jobs command if running locally + if len(sys.argv) > 1: + print("\nTo run on HF Jobs:") + print(f"""hf jobs uv run --flavor l4x4 \\ + --image vllm/vllm-openai \\ + -e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\ + https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + --seed-dataset {args.seed_dataset} \\ + --output-dataset {args.output_dataset} \\ + --task-type {args.task_type} \\ + --generation-model {args.generation_model} \\ + --filter-method {args.filter_method} \\ + --num-samples {args.num_samples}""") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/finepdfs-stats.py b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/finepdfs-stats.py new file mode 100644 index 00000000..989732b6 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/finepdfs-stats.py @@ -0,0 +1,546 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "polars>=1.31.0", +# "huggingface-hub", +# "datasets", +# "ascii-graph", +# ] +# /// +""" +Analyze educational quality trends across CommonCrawl dumps using Polars streaming. + +Answers: "Is the web getting more educational over time?" + +Demonstrates Polars HF Hub integration - process 50M+ docs without downloading 300GB+. + +Example usage: + # Analyze English PDFs (default) + uv run finepdfs-stats.py + + # Analyze all 70+ languages + uv run finepdfs-stats.py --all-languages + + # Quick test + uv run finepdfs-stats.py --limit 10000 --show-plan + + # Save results to HF Hub + uv run finepdfs-stats.py --output-repo username/finepdfs-temporal-stats + + # Run on HF Jobs + hf jobs uv run \\ + -s HF_TOKEN \\ + -e HF_XET_HIGH_PERFORMANCE=1 \\ + https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\ + -- --output-repo username/stats +""" + +import argparse +import logging +import os +import sys +import time +from pathlib import Path + +import polars as pl +from ascii_graph import Pyasciigraph +from datasets import Dataset +from huggingface_hub import HfApi, create_repo, list_repo_tree, login + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Common language+script codes for finepdfs-edu +COMMON_LANGUAGES = { + "eng_Latn": "English (Latin script)", + "fra_Latn": "French (Latin script)", + "deu_Latn": "German (Latin script)", + "spa_Latn": "Spanish (Latin script)", + "por_Latn": "Portuguese (Latin script)", + "ita_Latn": "Italian (Latin script)", + "nld_Latn": "Dutch (Latin script)", + "pol_Latn": "Polish (Latin script)", + "rus_Cyrl": "Russian (Cyrillic script)", + "zho_Hans": "Chinese (Simplified)", + "zho_Hant": "Chinese (Traditional)", + "jpn_Jpan": "Japanese", + "kor_Hang": "Korean", + "ara_Arab": "Arabic", + "hin_Deva": "Hindi (Devanagari)", +} + + +def list_available_languages(dataset_id: str) -> list[str]: + """List available language subsets in the dataset.""" + try: + tree = list_repo_tree(dataset_id, path_in_repo="data", repo_type="dataset") + languages = [ + item.path.replace("data/", "") + for item in tree + if item.path.startswith("data/") + and "/" not in item.path.replace("data/", "") + ] + return sorted(languages) + except Exception as e: + logger.warning(f"Could not list languages: {e}") + return list(COMMON_LANGUAGES.keys()) + + +def compute_temporal_stats(df: pl.LazyFrame, output_path: Path) -> pl.DataFrame: + """Single scan: compute stats grouped by dump for temporal analysis.""" + query = df.group_by("dump").agg( + pl.len().alias("doc_count"), + pl.col("token_count").sum().alias("total_tokens"), + pl.col("fw_edu_scores").list.mean().mean().alias("avg_edu_score"), + (pl.col("fw_edu_scores").list.mean() >= 3).sum().alias("high_edu_count"), + ) + query.sink_parquet(output_path, engine="streaming") + return pl.read_parquet(output_path) + + +def compute_global_stats(temporal: pl.DataFrame) -> pl.DataFrame: + """Compute global stats from temporal breakdown.""" + total = temporal["doc_count"].sum() + return pl.DataFrame( + { + "total_docs": [total], + "total_tokens": [temporal["total_tokens"].sum()], + "avg_edu_score": [ + (temporal["avg_edu_score"] * temporal["doc_count"]).sum() / total + ], + "high_edu_rate": [temporal["high_edu_count"].sum() / total], + "num_dumps": [len(temporal)], + } + ) + + +def format_temporal_stats(temporal: pl.DataFrame) -> pl.DataFrame: + """Format temporal stats with high_edu_rate, sorted chronologically.""" + return ( + temporal.with_columns( + (pl.col("high_edu_count") / pl.col("doc_count")).alias("high_edu_rate") + ) + .select(["dump", "doc_count", "avg_edu_score", "high_edu_rate"]) + .sort( + "dump" + ) # Chronological order (CC-MAIN-2017-xx comes before CC-MAIN-2024-xx) + ) + + +def create_ascii_charts(temporal_stats: pl.DataFrame) -> str: + """Create ASCII bar charts showing temporal trends.""" + # Extract year from dump name (CC-MAIN-2024-42 -> 2024) + # Group by year and average the values for cleaner display + yearly = ( + temporal_stats.with_columns( + pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year") + ) + .group_by("year") + .agg( + pl.col("doc_count").sum(), + pl.col("avg_edu_score").mean(), + pl.col("high_edu_rate").mean(), + ) + .sort("year") + ) + + lines = [] + + # High edu rate chart (more dramatic differences) + data_rate = [ + (row["year"], row["high_edu_rate"] * 100) + for row in yearly.iter_rows(named=True) + ] + graph = Pyasciigraph(line_length=60, float_format="{0:.1f}%") + lines.extend(graph.graph("High Educational Content (edu >= 3)", data_rate)) + + lines.append("") + + # Avg edu score chart + data_score = [ + (row["year"], row["avg_edu_score"]) for row in yearly.iter_rows(named=True) + ] + graph2 = Pyasciigraph(line_length=60, float_format="{0:.2f}") + lines.extend(graph2.graph("Average Educational Score", data_score)) + + return "\n".join(lines) + + +def create_readme( + args, + global_stats: pl.DataFrame, + temporal_stats: pl.DataFrame, + scan_time: float, + ascii_charts: str, +) -> str: + """Create README content for the stats dataset.""" + stats = global_stats.to_dicts()[0] + total_docs = stats.get("total_docs", 0) + docs_per_sec = total_docs / scan_time if scan_time > 0 else 0 + + # Get first and last year averages for trend (more representative than single dumps) + yearly = ( + temporal_stats.with_columns( + pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year") + ) + .group_by("year") + .agg( + pl.col("doc_count").sum(), + pl.col("avg_edu_score").mean(), + pl.col("high_edu_rate").mean(), + ) + .sort("year") + ) + first_year = yearly.head(1).to_dicts()[0] + last_year = yearly.tail(1).to_dicts()[0] + + scope = ( + "all languages" + if args.all_languages + else COMMON_LANGUAGES.get(args.lang, args.lang) + ) + + return f"""--- +tags: + - uv-script + - statistics + - polars + - finepdfs-edu + - temporal-analysis +license: odc-by +configs: + - config_name: global_stats + data_files: global_stats/train-*.parquet + - config_name: temporal_stats + data_files: temporal_stats/train-*.parquet +default_viewer_config: temporal_stats +--- + +# Is the Web Getting More Educational? + +Temporal analysis of educational quality in **{scope}** across {stats.get("num_dumps", 0)} CommonCrawl dumps. + +## Trend + +``` +{ascii_charts} +``` + +## Key Finding + +| Year | Avg Edu Score | High Edu Rate | +|------|---------------|---------------| +| {first_year["year"]} | {first_year["avg_edu_score"]:.2f} | {first_year["high_edu_rate"] * 100:.1f}% | +| {last_year["year"]} | {last_year["avg_edu_score"]:.2f} | {last_year["high_edu_rate"] * 100:.1f}% | + +## Performance + +- **{total_docs:,} documents** processed in **{scan_time:.0f} seconds** +- **{docs_per_sec:,.0f} docs/sec** using Polars streaming +- Single scan, no full dataset download required + +## Summary + +| Metric | Value | +|--------|-------| +| Scope | {scope} | +| Total Documents | {total_docs:,} | +| Total Tokens | {stats.get("total_tokens", 0):,} | +| Avg Edu Score | {stats.get("avg_edu_score", 0):.3f} | +| High Edu Rate | {stats.get("high_edu_rate", 0) * 100:.1f}% | +| CommonCrawl Dumps | {stats.get("num_dumps", 0)} | + +## Files + +- `global_stats` - Overall summary +- `temporal_stats` - Per-dump breakdown (sorted chronologically) + +## Reproduce + +```bash +uv run https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\ + {"--all-languages" if args.all_languages else f"--lang {args.lang}"} --output-repo your-username/stats +``` + +## Source + +- **Dataset**: [{args.source_dataset}](https://huggingface.co/datasets/{args.source_dataset}) +- **Script**: [uv-scripts/dataset-stats](https://huggingface.co/datasets/uv-scripts/dataset-stats) +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze educational quality trends across CommonCrawl dumps", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + parser.add_argument( + "--source-dataset", + type=str, + default="HuggingFaceFW/finepdfs-edu", + help="Source dataset (default: HuggingFaceFW/finepdfs-edu)", + ) + + parser.add_argument( + "--lang", + type=str, + default="eng_Latn", + help="Language+script code (default: eng_Latn)", + ) + + parser.add_argument( + "--all-languages", + action="store_true", + help="Analyze all languages (70+) instead of single language", + ) + + parser.add_argument( + "--show-plan", + action="store_true", + help="Show Polars query plan (demonstrates optimization)", + ) + + parser.add_argument( + "--list-languages", + action="store_true", + help="List available languages and exit", + ) + + parser.add_argument( + "--limit", + type=int, + help="Limit to first N rows (for testing)", + ) + + parser.add_argument( + "--output-repo", + type=str, + help="HuggingFace dataset repository to upload results", + ) + + parser.add_argument( + "--output-dir", + type=str, + default="./stats_output", + help="Local directory for output files", + ) + + parser.add_argument( + "--hf-token", + type=str, + help="HuggingFace API token (or set HF_TOKEN env var)", + ) + + parser.add_argument( + "--private", + action="store_true", + help="Make the output dataset private", + ) + + args = parser.parse_args() + + # Check for high-performance mode + if os.environ.get("HF_XET_HIGH_PERFORMANCE"): + logger.info("High-performance mode enabled (HF_XET_HIGH_PERFORMANCE=1)") + + # List languages mode + if args.list_languages: + print(f"Available language+script codes for {args.source_dataset}:\n") + print("Common languages:") + for code, name in COMMON_LANGUAGES.items(): + print(f" {code:12} - {name}") + print("\nFetching full list from HF Hub...") + all_langs = list_available_languages(args.source_dataset) + print(f"\nAll available ({len(all_langs)} total):") + for lang in all_langs[:30]: # Show first 30 + name = COMMON_LANGUAGES.get(lang, "") + print(f" {lang:12} {name}") + if len(all_langs) > 30: + print(f" ... and {len(all_langs) - 30} more") + sys.exit(0) + + # Build the parquet path + if args.all_languages: + source_path = f"hf://datasets/{args.source_dataset}/data/*/train/*.parquet" + scope_desc = "all languages" + else: + source_path = ( + f"hf://datasets/{args.source_dataset}/data/{args.lang}/train/*.parquet" + ) + scope_desc = f"{args.lang} ({COMMON_LANGUAGES.get(args.lang, 'unknown')})" + + logger.info(f"Scanning: {source_path}") + logger.info(f"Scope: {scope_desc}") + + # Create lazy frame - this doesn't load any data yet! + logger.info("Creating lazy query plan...") + df = pl.scan_parquet(source_path) + + # Apply limit if specified + if args.limit: + logger.info(f"Limiting to first {args.limit:,} rows") + df = df.head(args.limit) + + # Show query plan if requested + if args.show_plan: + # Build a sample query to show the plan + sample_query = df.select( + pl.len(), + pl.col("token_count").sum(), + pl.col("language").n_unique(), + ) + print("\nQuery Plan (showing Polars optimization):") + print("=" * 60) + print(sample_query.explain()) + print("=" * 60) + print("\nNote: Polars uses projection pushdown - only reads columns needed!") + print("The 'text' column is never loaded, making this very fast.\n") + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Single scan: compute temporal stats + logger.info("Computing temporal stats (single scan)...") + start = time.perf_counter() + temporal_path = output_dir / "temporal_stats.parquet" + temporal_raw = compute_temporal_stats(df, temporal_path) + scan_time = time.perf_counter() - start + logger.info(f"Scan complete in {scan_time:.2f}s - {len(temporal_raw)} dumps") + + # Compute stats + global_stats = compute_global_stats(temporal_raw) + temporal_stats = format_temporal_stats(temporal_raw) + + # Save + global_stats.write_parquet(output_dir / "global_stats.parquet") + temporal_stats.write_parquet(output_dir / "temporal_stats.parquet") + + # Print results + total_docs = global_stats["total_docs"][0] + docs_per_sec = total_docs / scan_time if scan_time > 0 else 0 + + print("\n" + "=" * 70) + print("IS THE WEB GETTING MORE EDUCATIONAL?") + print("=" * 70) + + print(f"\nScope: {scope_desc}") + print(f"Dataset: {args.source_dataset}") + + print("\n" + "-" * 70) + print("GLOBAL STATS") + print("-" * 70) + print(global_stats) + + print("\n" + "-" * 70) + print(f"TEMPORAL TREND ({len(temporal_stats)} CommonCrawl dumps)") + print("-" * 70) + # Show first 5 and last 5 + if len(temporal_stats) > 10: + print("Earliest dumps:") + print(temporal_stats.head(5)) + print("\n...") + print("\nLatest dumps:") + print(temporal_stats.tail(5)) + else: + print(temporal_stats) + + # Create ASCII charts + ascii_charts = create_ascii_charts(temporal_stats) + print("\n" + "-" * 70) + print("TREND VISUALIZATION") + print("-" * 70) + print(ascii_charts) + + print("\n" + "-" * 70) + print("PERFORMANCE") + print("-" * 70) + print(f"Scan time: {scan_time:.2f}s") + print(f"Documents: {total_docs:,}") + print(f"Throughput: {docs_per_sec:,.0f} docs/sec") + + logger.info(f"Results saved to: {output_dir}") + + # Upload to HF Hub if requested + if args.output_repo: + hf_token = args.hf_token or os.environ.get("HF_TOKEN") + if hf_token: + login(token=hf_token) + + api = HfApi(token=hf_token) + + logger.info(f"Creating/updating dataset repository: {args.output_repo}") + create_repo( + args.output_repo, + repo_type="dataset", + private=args.private, + token=hf_token, + exist_ok=True, + ) + + # Upload each as a dataset config + configs = [ + ("global_stats", global_stats), + ("temporal_stats", temporal_stats), + ] + + for config_name, stats_df in configs: + logger.info(f"Uploading {config_name}...") + ds = Dataset.from_polars(stats_df) + ds.push_to_hub( + args.output_repo, + config_name=config_name, + token=hf_token, + private=args.private, + ) + time.sleep(1) # Avoid 409 conflicts + + # Upload README + readme_content = create_readme( + args, global_stats, temporal_stats, scan_time, ascii_charts + ) + api.upload_file( + path_or_fileobj=readme_content.encode(), + path_in_repo="README.md", + repo_id=args.output_repo, + repo_type="dataset", + token=hf_token, + ) + + dataset_url = f"https://huggingface.co/datasets/{args.output_repo}" + logger.info(f"Dataset uploaded: {dataset_url}") + print(f"\nResults uploaded to: {dataset_url}") + + +if __name__ == "__main__": + if len(sys.argv) == 1: + print("Is the Web Getting More Educational?") + print("=" * 40) + print("\nAnalyze educational quality trends across CommonCrawl dumps") + print("using Polars streaming - no download needed!\n") + print("Example commands:\n") + print("# Quick test:") + print("uv run finepdfs-stats.py --limit 10000\n") + print("# Analyze English PDFs:") + print("uv run finepdfs-stats.py\n") + print("# Analyze ALL 70+ languages:") + print("uv run finepdfs-stats.py --all-languages\n") + print("# Show query plan (see Polars optimization):") + print("uv run finepdfs-stats.py --show-plan --limit 1000\n") + print("# Save results to HF Hub:") + print("uv run finepdfs-stats.py --output-repo username/temporal-stats\n") + print("# Run on HF Jobs:") + print("hf jobs uv run \\") + print(" -s HF_TOKEN \\") + print(" -e HF_XET_HIGH_PERFORMANCE=1 \\") + print( + " https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\" + ) + print(" -- --output-repo username/stats") + sys.exit(0) + + main() diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/generate-responses.py b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/generate-responses.py new file mode 100644 index 00000000..1496f449 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-jobs/scripts/generate-responses.py @@ -0,0 +1,587 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "datasets", +# "flashinfer-python", +# "huggingface-hub[hf_transfer]", +# "hf-xet>= 1.1.7", +# "torch", +# "transformers", +# "vllm>=0.8.5", +# ] +# +# /// +""" +Generate responses for prompts in a dataset using vLLM for efficient GPU inference. + +This script loads a dataset from Hugging Face Hub containing chat-formatted messages, +applies the model's chat template, generates responses using vLLM, and saves the +results back to the Hub with a comprehensive dataset card. + +Example usage: + # Local execution with auto GPU detection + uv run generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --messages-column messages + + # With custom model and sampling parameters + uv run generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --model-id meta-llama/Llama-3.1-8B-Instruct \\ + --temperature 0.9 \\ + --top-p 0.95 \\ + --max-tokens 2048 + + # HF Jobs execution (see script output for full command) + hf jobs uv run --flavor a100x4 ... +""" + +import argparse +import logging +import os +import sys +from datetime import datetime +from typing import Optional + +from datasets import load_dataset +from huggingface_hub import DatasetCard, get_token, login +from torch import cuda +from tqdm.auto import tqdm +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +# Enable HF Transfer for faster downloads +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +def check_gpu_availability() -> int: + """Check if CUDA is available and return the number of GPUs.""" + if not cuda.is_available(): + logger.error("CUDA is not available. This script requires a GPU.") + logger.error( + "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor." + ) + sys.exit(1) + + num_gpus = cuda.device_count() + for i in range(num_gpus): + gpu_name = cuda.get_device_name(i) + gpu_memory = cuda.get_device_properties(i).total_memory / 1024**3 + logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory") + + return num_gpus + + +def create_dataset_card( + source_dataset: str, + model_id: str, + messages_column: str, + prompt_column: Optional[str], + sampling_params: SamplingParams, + tensor_parallel_size: int, + num_examples: int, + generation_time: str, + num_skipped: int = 0, + max_model_len_used: Optional[int] = None, +) -> str: + """Create a comprehensive dataset card documenting the generation process.""" + filtering_section = "" + if num_skipped > 0: + skip_percentage = (num_skipped / num_examples) * 100 + processed = num_examples - num_skipped + filtering_section = f""" + +### Filtering Statistics + +- **Total Examples**: {num_examples:,} +- **Processed**: {processed:,} ({100 - skip_percentage:.1f}%) +- **Skipped (too long)**: {num_skipped:,} ({skip_percentage:.1f}%) +- **Max Model Length Used**: {max_model_len_used:,} tokens + +Note: Prompts exceeding the maximum model length were skipped and have empty responses.""" + + return f"""--- +tags: +- generated +- vllm +- uv-script +--- + +# Generated Responses Dataset + +This dataset contains generated responses for prompts from [{source_dataset}](https://huggingface.co/datasets/{source_dataset}). + +## Generation Details + +- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset}) +- **Input Column**: `{prompt_column if prompt_column else messages_column}` ({"plain text prompts" if prompt_column else "chat messages"}) +- **Model**: [{model_id}](https://huggingface.co/{model_id}) +- **Number of Examples**: {num_examples:,} +- **Generation Date**: {generation_time}{filtering_section} + +### Sampling Parameters + +- **Temperature**: {sampling_params.temperature} +- **Top P**: {sampling_params.top_p} +- **Top K**: {sampling_params.top_k} +- **Min P**: {sampling_params.min_p} +- **Max Tokens**: {sampling_params.max_tokens} +- **Repetition Penalty**: {sampling_params.repetition_penalty} + +### Hardware Configuration + +- **Tensor Parallel Size**: {tensor_parallel_size} +- **GPU Configuration**: {tensor_parallel_size} GPU(s) + +## Dataset Structure + +The dataset contains all columns from the source dataset plus: +- `response`: The generated response from the model + +## Generation Script + +Generated using the vLLM inference script from [uv-scripts/vllm](https://huggingface.co/datasets/uv-scripts/vllm). + +To reproduce this generation: + +```bash +uv run https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\ + {source_dataset} \\ + \\ + --model-id {model_id} \\ + {"--prompt-column " + prompt_column if prompt_column else "--messages-column " + messages_column} \\ + --temperature {sampling_params.temperature} \\ + --top-p {sampling_params.top_p} \\ + --top-k {sampling_params.top_k} \\ + --max-tokens {sampling_params.max_tokens}{f" \\\\\\n --max-model-len {max_model_len_used}" if max_model_len_used else ""} +``` +""" + + +def main( + src_dataset_hub_id: str, + output_dataset_hub_id: str, + model_id: str = "Qwen/Qwen3-30B-A3B-Instruct-2507", + messages_column: str = "messages", + prompt_column: Optional[str] = None, + output_column: str = "response", + temperature: float = 0.7, + top_p: float = 0.8, + top_k: int = 20, + min_p: float = 0.0, + max_tokens: int = 16384, + repetition_penalty: float = 1.0, + gpu_memory_utilization: float = 0.90, + max_model_len: Optional[int] = None, + tensor_parallel_size: Optional[int] = None, + skip_long_prompts: bool = True, + max_samples: Optional[int] = None, + hf_token: Optional[str] = None, +): + """ + Main generation pipeline. + + Args: + src_dataset_hub_id: Input dataset on Hugging Face Hub + output_dataset_hub_id: Where to save results on Hugging Face Hub + model_id: Hugging Face model ID for generation + messages_column: Column name containing chat messages + prompt_column: Column name containing plain text prompts (alternative to messages_column) + output_column: Column name for generated responses + temperature: Sampling temperature + top_p: Top-p sampling parameter + top_k: Top-k sampling parameter + min_p: Minimum probability threshold + max_tokens: Maximum tokens to generate + repetition_penalty: Repetition penalty parameter + gpu_memory_utilization: GPU memory utilization factor + max_model_len: Maximum model context length (None uses model default) + tensor_parallel_size: Number of GPUs to use (auto-detect if None) + skip_long_prompts: Skip prompts exceeding max_model_len instead of failing + max_samples: Maximum number of samples to process (None for all) + hf_token: Hugging Face authentication token + """ + generation_start_time = datetime.now().isoformat() + + # GPU check and configuration + num_gpus = check_gpu_availability() + if tensor_parallel_size is None: + tensor_parallel_size = num_gpus + logger.info( + f"Auto-detected {num_gpus} GPU(s), using tensor_parallel_size={tensor_parallel_size}" + ) + else: + logger.info(f"Using specified tensor_parallel_size={tensor_parallel_size}") + if tensor_parallel_size > num_gpus: + logger.warning( + f"Requested {tensor_parallel_size} GPUs but only {num_gpus} available" + ) + + # Authentication - try multiple methods + HF_TOKEN = hf_token or os.environ.get("HF_TOKEN") or get_token() + + if not HF_TOKEN: + logger.error("No HuggingFace token found. Please provide token via:") + logger.error(" 1. --hf-token argument") + logger.error(" 2. HF_TOKEN environment variable") + logger.error(" 3. Run 'hf auth login' or use login() in Python") + sys.exit(1) + + logger.info("HuggingFace token found, authenticating...") + login(token=HF_TOKEN) + + # Initialize vLLM + logger.info(f"Loading model: {model_id}") + vllm_kwargs = { + "model": model_id, + "tensor_parallel_size": tensor_parallel_size, + "gpu_memory_utilization": gpu_memory_utilization, + } + if max_model_len is not None: + vllm_kwargs["max_model_len"] = max_model_len + logger.info(f"Using max_model_len={max_model_len}") + + llm = LLM(**vllm_kwargs) + + # Load tokenizer for chat template + logger.info("Loading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(model_id) + + # Create sampling parameters + sampling_params = SamplingParams( + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + max_tokens=max_tokens, + repetition_penalty=repetition_penalty, + ) + + # Load dataset + logger.info(f"Loading dataset: {src_dataset_hub_id}") + dataset = load_dataset(src_dataset_hub_id, split="train") + + # Apply max_samples if specified + if max_samples is not None and max_samples < len(dataset): + logger.info(f"Limiting dataset to {max_samples} samples") + dataset = dataset.select(range(max_samples)) + + total_examples = len(dataset) + logger.info(f"Dataset loaded with {total_examples:,} examples") + + # Determine which column to use and validate + if prompt_column: + # Use prompt column mode + if prompt_column not in dataset.column_names: + logger.error( + f"Column '{prompt_column}' not found. Available columns: {dataset.column_names}" + ) + sys.exit(1) + logger.info(f"Using prompt column mode with column: '{prompt_column}'") + use_messages = False + else: + # Use messages column mode + if messages_column not in dataset.column_names: + logger.error( + f"Column '{messages_column}' not found. Available columns: {dataset.column_names}" + ) + sys.exit(1) + logger.info(f"Using messages column mode with column: '{messages_column}'") + use_messages = True + + # Get effective max length for filtering + if max_model_len is not None: + effective_max_len = max_model_len + else: + # Get model's default max length + effective_max_len = llm.llm_engine.model_config.max_model_len + logger.info(f"Using effective max model length: {effective_max_len}") + + # Process messages and apply chat template + logger.info("Preparing prompts...") + all_prompts = [] + valid_prompts = [] + valid_indices = [] + skipped_info = [] + + for i, example in enumerate(tqdm(dataset, desc="Processing prompts")): + if use_messages: + # Messages mode: use existing chat messages + messages = example[messages_column] + # Apply chat template + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + else: + # Prompt mode: convert plain text to messages format + user_prompt = example[prompt_column] + messages = [{"role": "user", "content": user_prompt}] + # Apply chat template + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + all_prompts.append(prompt) + + # Count tokens if filtering is enabled + if skip_long_prompts: + tokens = tokenizer.encode(prompt) + if len(tokens) <= effective_max_len: + valid_prompts.append(prompt) + valid_indices.append(i) + else: + skipped_info.append((i, len(tokens))) + else: + valid_prompts.append(prompt) + valid_indices.append(i) + + # Log filtering results + if skip_long_prompts and skipped_info: + logger.warning( + f"Skipped {len(skipped_info)} prompts that exceed max_model_len ({effective_max_len} tokens)" + ) + logger.info("Skipped prompt details (first 10):") + for idx, (prompt_idx, token_count) in enumerate(skipped_info[:10]): + logger.info( + f" - Example {prompt_idx}: {token_count} tokens (exceeds by {token_count - effective_max_len})" + ) + if len(skipped_info) > 10: + logger.info(f" ... and {len(skipped_info) - 10} more") + + skip_percentage = (len(skipped_info) / total_examples) * 100 + if skip_percentage > 10: + logger.warning(f"WARNING: {skip_percentage:.1f}% of prompts were skipped!") + + if not valid_prompts: + logger.error("No valid prompts to process after filtering!") + sys.exit(1) + + # Generate responses - vLLM handles batching internally + logger.info(f"Starting generation for {len(valid_prompts):,} valid prompts...") + logger.info("vLLM will handle batching and scheduling automatically") + + outputs = llm.generate(valid_prompts, sampling_params) + + # Extract generated text and create full response list + logger.info("Extracting generated responses...") + responses = [""] * total_examples # Initialize with empty strings + + for idx, output in enumerate(outputs): + original_idx = valid_indices[idx] + response = output.outputs[0].text.strip() + responses[original_idx] = response + + # Add responses to dataset + logger.info("Adding responses to dataset...") + dataset = dataset.add_column(output_column, responses) + + # Create dataset card + logger.info("Creating dataset card...") + card_content = create_dataset_card( + source_dataset=src_dataset_hub_id, + model_id=model_id, + messages_column=messages_column, + prompt_column=prompt_column, + sampling_params=sampling_params, + tensor_parallel_size=tensor_parallel_size, + num_examples=total_examples, + generation_time=generation_start_time, + num_skipped=len(skipped_info) if skip_long_prompts else 0, + max_model_len_used=effective_max_len if skip_long_prompts else None, + ) + + # Push dataset to hub + logger.info(f"Pushing dataset to: {output_dataset_hub_id}") + dataset.push_to_hub(output_dataset_hub_id, token=HF_TOKEN) + + # Push dataset card + card = DatasetCard(card_content) + card.push_to_hub(output_dataset_hub_id, token=HF_TOKEN) + + logger.info("✅ Generation complete!") + logger.info( + f"Dataset available at: https://huggingface.co/datasets/{output_dataset_hub_id}" + ) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + parser = argparse.ArgumentParser( + description="Generate responses for dataset prompts using vLLM", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Basic usage with default Qwen model + uv run generate-responses.py input-dataset output-dataset + + # With custom model and parameters + uv run generate-responses.py input-dataset output-dataset \\ + --model-id meta-llama/Llama-3.1-8B-Instruct \\ + --temperature 0.9 \\ + --max-tokens 2048 + + # Force specific GPU configuration + uv run generate-responses.py input-dataset output-dataset \\ + --tensor-parallel-size 2 \\ + --gpu-memory-utilization 0.95 + + # Using environment variable for token + HF_TOKEN=hf_xxx uv run generate-responses.py input-dataset output-dataset + """, + ) + + parser.add_argument( + "src_dataset_hub_id", + help="Input dataset on Hugging Face Hub (e.g., username/dataset-name)", + ) + parser.add_argument( + "output_dataset_hub_id", help="Output dataset name on Hugging Face Hub" + ) + parser.add_argument( + "--model-id", + type=str, + default="Qwen/Qwen3-30B-A3B-Instruct-2507", + help="Model to use for generation (default: Qwen3-30B-A3B-Instruct-2507)", + ) + parser.add_argument( + "--messages-column", + type=str, + default="messages", + help="Column containing chat messages (default: messages)", + ) + parser.add_argument( + "--prompt-column", + type=str, + help="Column containing plain text prompts (alternative to --messages-column)", + ) + parser.add_argument( + "--output-column", + type=str, + default="response", + help="Column name for generated responses (default: response)", + ) + parser.add_argument( + "--max-samples", + type=int, + help="Maximum number of samples to process (default: all)", + ) + parser.add_argument( + "--temperature", + type=float, + default=0.7, + help="Sampling temperature (default: 0.7)", + ) + parser.add_argument( + "--top-p", + type=float, + default=0.8, + help="Top-p sampling parameter (default: 0.8)", + ) + parser.add_argument( + "--top-k", + type=int, + default=20, + help="Top-k sampling parameter (default: 20)", + ) + parser.add_argument( + "--min-p", + type=float, + default=0.0, + help="Minimum probability threshold (default: 0.0)", + ) + parser.add_argument( + "--max-tokens", + type=int, + default=16384, + help="Maximum tokens to generate (default: 16384)", + ) + parser.add_argument( + "--repetition-penalty", + type=float, + default=1.0, + help="Repetition penalty (default: 1.0)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.90, + help="GPU memory utilization factor (default: 0.90)", + ) + parser.add_argument( + "--max-model-len", + type=int, + help="Maximum model context length (default: model's default)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + help="Number of GPUs to use (default: auto-detect)", + ) + parser.add_argument( + "--hf-token", + type=str, + help="Hugging Face token (can also use HF_TOKEN env var)", + ) + parser.add_argument( + "--skip-long-prompts", + action="store_true", + default=True, + help="Skip prompts that exceed max_model_len instead of failing (default: True)", + ) + parser.add_argument( + "--no-skip-long-prompts", + dest="skip_long_prompts", + action="store_false", + help="Fail on prompts that exceed max_model_len", + ) + + args = parser.parse_args() + + main( + src_dataset_hub_id=args.src_dataset_hub_id, + output_dataset_hub_id=args.output_dataset_hub_id, + model_id=args.model_id, + messages_column=args.messages_column, + prompt_column=args.prompt_column, + output_column=args.output_column, + temperature=args.temperature, + top_p=args.top_p, + top_k=args.top_k, + min_p=args.min_p, + max_tokens=args.max_tokens, + repetition_penalty=args.repetition_penalty, + gpu_memory_utilization=args.gpu_memory_utilization, + max_model_len=args.max_model_len, + tensor_parallel_size=args.tensor_parallel_size, + skip_long_prompts=args.skip_long_prompts, + max_samples=args.max_samples, + hf_token=args.hf_token, + ) + else: + # Show HF Jobs example when run without arguments + print(""" +vLLM Response Generation Script +============================== + +This script requires arguments. For usage information: + uv run generate-responses.py --help + +Example HF Jobs command with multi-GPU: + # If you're logged in with hf auth, token will be auto-detected + hf jobs uv run \\ + --flavor l4x4 \\ + https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --messages-column messages \\ + --model-id Qwen/Qwen3-30B-A3B-Instruct-2507 \\ + --temperature 0.7 \\ + --max-tokens 16384 + """) diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/SKILL.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/SKILL.md index 95994b17..d6d5f742 100644 --- a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/SKILL.md +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/SKILL.md @@ -1,9 +1,9 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-llm-trainer" name: hugging-face-model-trainer -description: "Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required—models train on cloud GPUs and results are automatically saved to the Hugging Face Hub." +description: Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export. license: Complete terms in LICENSE.txt risk: unknown -source: community --- # TRL Training on Hugging Face Jobs @@ -60,11 +60,12 @@ When assisting with training jobs: 4. **Use example scripts as templates** - Reference `scripts/train_sft_example.py`, `scripts/train_dpo_example.py`, etc. as starting points. -## Local Script Dependencies +## Local Script Execution -To run scripts locally (like `estimate_cost.py`), install dependencies: +Repository scripts use PEP 723 inline dependencies. Run them with `uv run`: ```bash -pip install -r requirements.txt +uv run scripts/estimate_cost.py --help +uv run scripts/dataset_inspector.py --help ``` ## Prerequisites Checklist @@ -240,8 +241,8 @@ hf_jobs("uv", {"script": "https://gist.githubusercontent.com/user/id/raw/train.p **To use local scripts:** Upload to HF Hub first: ```bash -huggingface-cli repo create my-training-scripts --type model -huggingface-cli upload my-training-scripts ./train.py train.py +hf repos create my-training-scripts --type model +hf upload my-training-scripts ./train.py train.py # Use: https://huggingface.co/USERNAME/my-training-scripts/resolve/main/train.py ``` @@ -331,13 +332,10 @@ hf jobs cancel # Cancel a job The `trl-jobs` package provides optimized defaults and one-liner training. ```bash -# Install -pip install trl-jobs - -# Train with SFT (simplest possible) -trl-jobs sft \ +uvx trl-jobs sft \ --model_name Qwen/Qwen2.5-0.5B \ --dataset_name trl-lib/Capybara + ``` **Benefits:** Pre-configured settings, automatic Trackio integration, automatic Hub push, one-line commands @@ -685,6 +683,7 @@ Add to PEP 723 header: - `references/hardware_guide.md` - Hardware specs and selection - `references/hub_saving.md` - Hub authentication troubleshooting - `references/troubleshooting.md` - Common issues and solutions +- `references/local_training_macos.md` - Local training on macOS ### Scripts (In This Skill) - `scripts/train_sft_example.py` - Production SFT template diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/gguf_conversion.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/gguf_conversion.md new file mode 100644 index 00000000..a99ea0e8 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/gguf_conversion.md @@ -0,0 +1,296 @@ +# GGUF Conversion Guide + +After training models with TRL on Hugging Face Jobs, convert them to **GGUF format** for use with llama.cpp, Ollama, LM Studio, and other local inference tools. + +**This guide provides production-ready, tested code based on successful conversions.** All critical dependencies and build steps are included. + +## What is GGUF? + +**GGUF** (GPT-Generated Unified Format): +- Optimized format for CPU/GPU inference with llama.cpp +- Supports quantization (4-bit, 5-bit, 8-bit) to reduce model size +- Compatible with: Ollama, LM Studio, Jan, GPT4All, llama.cpp +- Typically 2-8GB for 7B models (vs 14GB unquantized) + +## When to Convert to GGUF + +**Convert when:** +- Running models locally with Ollama or LM Studio +- Using CPU-optimized inference +- Reducing model size with quantization +- Deploying to edge devices +- Sharing models for local-first use + +## Critical Success Factors + +Based on production testing, these are **essential** for reliable conversion: + +### 1. ✅ Install Build Tools FIRST +**Before cloning llama.cpp**, install build dependencies: +```python +subprocess.run(["apt-get", "update", "-qq"], check=True, capture_output=True) +subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True, capture_output=True) +``` + +**Why:** The quantization tool requires gcc and cmake. Installing after cloning doesn't help. + +### 2. ✅ Use CMake (Not Make) +**Build the quantize tool with CMake:** +```python +# Create build directory +os.makedirs("/tmp/llama.cpp/build", exist_ok=True) + +# Configure +subprocess.run([ + "cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", + "-DGGML_CUDA=OFF" # Faster build, CUDA not needed for quantization +], check=True, capture_output=True, text=True) + +# Build +subprocess.run([ + "cmake", "--build", "/tmp/llama.cpp/build", + "--target", "llama-quantize", "-j", "4" +], check=True, capture_output=True, text=True) + +# Binary path +quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize" +``` + +**Why:** CMake is more reliable than `make` and produces consistent binary paths. + +### 3. ✅ Include All Dependencies +**PEP 723 header must include:** +```python +# /// script +# dependencies = [ +# "transformers>=4.36.0", +# "peft>=0.7.0", +# "torch>=2.0.0", +# "accelerate>=0.24.0", +# "huggingface_hub>=0.20.0", +# "sentencepiece>=0.1.99", # Required for tokenizer +# "protobuf>=3.20.0", # Required for tokenizer +# "numpy", +# "gguf", +# ] +# /// +``` + +**Why:** `sentencepiece` and `protobuf` are critical for tokenizer conversion. Missing them causes silent failures. + +### 4. ✅ Verify Names Before Use +**Always verify repos exist:** +```python +# Before submitting job, verify: +hub_repo_details([ADAPTER_MODEL], repo_type="model") +hub_repo_details([BASE_MODEL], repo_type="model") +``` + +**Why:** Non-existent dataset/model names cause job failures that could be caught in seconds. + +## Complete Conversion Script + +See `scripts/convert_to_gguf.py` for the complete, production-ready script. + +**Key features:** +- ✅ All dependencies in PEP 723 header +- ✅ Build tools installed automatically +- ✅ CMake build process (reliable) +- ✅ Comprehensive error handling +- ✅ Environment variable configuration +- ✅ Automatic README generation + +## Quick Conversion Job + +```python +# Before submitting: VERIFY MODELS EXIST +hub_repo_details(["username/my-finetuned-model"], repo_type="model") +hub_repo_details(["Qwen/Qwen2.5-0.5B"], repo_type="model") + +# Submit conversion job +hf_jobs("uv", { + "script": open("trl/scripts/convert_to_gguf.py").read(), # Or inline the script + "flavor": "a10g-large", + "timeout": "45m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"}, + "env": { + "ADAPTER_MODEL": "username/my-finetuned-model", + "BASE_MODEL": "Qwen/Qwen2.5-0.5B", + "OUTPUT_REPO": "username/my-model-gguf", + "HF_USERNAME": "username" # Optional, for README + } +}) +``` + +## Conversion Process + +The script performs these steps: + +1. **Load and Merge** - Load base model and LoRA adapter, merge them +2. **Install Build Tools** - Install gcc, cmake (CRITICAL: before cloning llama.cpp) +3. **Setup llama.cpp** - Clone repo, install Python dependencies +4. **Convert to GGUF** - Create FP16 GGUF using llama.cpp converter +5. **Build Quantize Tool** - Use CMake to build `llama-quantize` +6. **Quantize** - Create Q4_K_M, Q5_K_M, Q8_0 versions +7. **Upload** - Upload all versions + README to Hub + +## Quantization Options + +Common quantization formats (from smallest to largest): + +| Format | Size | Quality | Use Case | +|--------|------|---------|----------| +| **Q4_K_M** | ~300MB | Good | **Recommended** - best balance of size/quality | +| **Q5_K_M** | ~350MB | Better | Higher quality, slightly larger | +| **Q8_0** | ~500MB | Very High | Near-original quality | +| **F16** | ~1GB | Original | Full precision, largest file | + +**Recommendation:** Create Q4_K_M, Q5_K_M, and Q8_0 versions to give users options. + +## Hardware Requirements + +**For conversion:** +- Small models (<1B): CPU-basic works, but slow +- Medium models (1-7B): a10g-large recommended +- Large models (7B+): a10g-large or a100-large + +**Time estimates:** +- 0.5B model: ~15-25 minutes on A10G +- 3B model: ~30-45 minutes on A10G +- 7B model: ~45-60 minutes on A10G + +## Using GGUF Models + +**GGUF models work on both CPU and GPU.** They're optimized for CPU inference but can also leverage GPU acceleration when available. + +### With Ollama (auto-detects GPU) +```bash +# Download GGUF +hf download username/my-model-gguf model-q4_k_m.gguf + +# Create Modelfile +echo "FROM ./model-q4_k_m.gguf" > Modelfile + +# Create and run (uses GPU automatically if available) +ollama create my-model -f Modelfile +ollama run my-model +``` + +### With llama.cpp +```bash +# CPU only +./llama-cli -m model-q4_k_m.gguf -p "Your prompt" + +# With GPU acceleration (offload 32 layers to GPU) +./llama-cli -m model-q4_k_m.gguf -ngl 32 -p "Your prompt" +``` + +### With LM Studio +1. Download the `.gguf` file +2. Import into LM Studio +3. Start chatting + +## Best Practices + +### ✅ DO: +1. **Verify repos exist** before submitting jobs (use `hub_repo_details`) +2. **Install build tools FIRST** before cloning llama.cpp +3. **Use CMake** for building quantize tool (not make) +4. **Include all dependencies** in PEP 723 header (especially sentencepiece, protobuf) +5. **Create multiple quantizations** - Give users choice +6. **Test on known models** before production use +7. **Use A10G GPU** for faster conversion + +### ❌ DON'T: +1. **Assume repos exist** - Always verify with hub tools +2. **Use make** instead of CMake - Less reliable +3. **Remove dependencies** to "simplify" - They're all needed +4. **Skip build tools** - Quantization will fail silently +5. **Use default paths** - CMake puts binaries in build/bin/ + +## Common Issues + +### Out of memory during merge +**Fix:** +- Use larger GPU (a10g-large or a100-large) +- Ensure `device_map="auto"` for automatic placement +- Use `dtype=torch.float16` or `torch.bfloat16` + +### Conversion fails with architecture error +**Fix:** +- Ensure llama.cpp supports the model architecture +- Check for standard architecture (Qwen, Llama, Mistral, etc.) +- Update llama.cpp to latest: `git clone --depth 1 https://github.com/ggerganov/llama.cpp.git` +- Check llama.cpp documentation for model support + +### Quantization fails +**Fix:** +- Verify build tools installed: `apt-get install build-essential cmake` +- Use CMake (not make) to build quantize tool +- Check binary path: `/tmp/llama.cpp/build/bin/llama-quantize` +- Verify FP16 GGUF exists before quantizing + +### Missing sentencepiece error +**Fix:** +- Add to PEP 723 header: `"sentencepiece>=0.1.99", "protobuf>=3.20.0"` +- Don't remove dependencies to "simplify" - all are required + +### Upload fails or times out +**Fix:** +- Large models (>2GB) need longer timeout: `"timeout": "1h"` +- Upload quantized versions separately if needed +- Check network/Hub status + +## Lessons Learned + +These are from production testing and real failures: + +### 1. Always Verify Before Use +**Lesson:** Don't assume repos/datasets exist. Check first. +```python +# BEFORE submitting job +hub_repo_details(["trl-lib/argilla-dpo-mix-7k"], repo_type="dataset") # Would catch error +``` +**Prevented failures:** Non-existent dataset names, typos in model names + +### 2. Prioritize Reliability Over Performance +**Lesson:** Default to what's most likely to succeed. +- Use CMake (not make) - more reliable +- Disable CUDA in build - faster, not needed +- Include all dependencies - don't "simplify" + +**Prevented failures:** Build failures, missing binaries + +### 3. Create Atomic, Self-Contained Scripts +**Lesson:** Don't remove dependencies or steps. Scripts should work as a unit. +- All dependencies in PEP 723 header +- All build steps included +- Clear error messages + +**Prevented failures:** Missing tokenizer libraries, build tool failures + +## References + +**In this skill:** +- `scripts/convert_to_gguf.py` - Complete, production-ready script + +**External:** +- [llama.cpp Repository](https://github.com/ggerganov/llama.cpp) +- [GGUF Specification](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) +- [Ollama Documentation](https://ollama.ai) +- [LM Studio](https://lmstudio.ai) + +## Summary + +**Critical checklist for GGUF conversion:** +- [ ] Verify adapter and base models exist on Hub +- [ ] Use production script from `scripts/convert_to_gguf.py` +- [ ] All dependencies in PEP 723 header (including sentencepiece, protobuf) +- [ ] Build tools installed before cloning llama.cpp +- [ ] CMake used for building quantize tool (not make) +- [ ] Correct binary path: `/tmp/llama.cpp/build/bin/llama-quantize` +- [ ] A10G GPU selected for reasonable conversion time +- [ ] Timeout set to 45m minimum +- [ ] HF_TOKEN in secrets for Hub upload + +**The script in `scripts/convert_to_gguf.py` incorporates all these lessons and has been tested successfully in production.** diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hardware_guide.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hardware_guide.md new file mode 100644 index 00000000..22eba945 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hardware_guide.md @@ -0,0 +1,283 @@ +# Hardware Selection Guide + +Choosing the right hardware (flavor) is critical for cost-effective training. + +## Available Hardware + +### CPU +- `cpu-basic` - Basic CPU, testing only +- `cpu-upgrade` - Enhanced CPU + +**Use cases:** Dataset validation, preprocessing, testing scripts +**Not recommended for training:** Too slow for any meaningful training + +### GPU Options + +| Flavor | GPU | Memory | Use Case | Cost/hour | +|--------|-----|--------|----------|-----------| +| `t4-small` | NVIDIA T4 | 16GB | <1B models, demos | ~$0.50-1 | +| `t4-medium` | NVIDIA T4 | 16GB | 1-3B models, development | ~$1-2 | +| `l4x1` | NVIDIA L4 | 24GB | 3-7B models, efficient training | ~$2-3 | +| `l4x4` | 4x NVIDIA L4 | 96GB | Multi-GPU training | ~$8-12 | +| `a10g-small` | NVIDIA A10G | 24GB | 3-7B models, production | ~$3-4 | +| `a10g-large` | NVIDIA A10G | 24GB | 7-13B models | ~$4-6 | +| `a10g-largex2` | 2x NVIDIA A10G | 48GB | Multi-GPU, large models | ~$8-12 | +| `a10g-largex4` | 4x NVIDIA A10G | 96GB | Multi-GPU, very large models | ~$16-24 | +| `a100-large` | NVIDIA A100 | 40GB | 13B+ models, fast training | ~$8-12 | + +### TPU Options + +| Flavor | Type | Use Case | +|--------|------|----------| +| `v5e-1x1` | TPU v5e | Small TPU workloads | +| `v5e-2x2` | 4x TPU v5e | Medium TPU workloads | +| `v5e-2x4` | 8x TPU v5e | Large TPU workloads | + +**Note:** TPUs require TPU-optimized code. Most TRL training uses GPUs. + +## Selection Guidelines + +### By Model Size + +**Tiny Models (<1B parameters)** +- **Recommended:** `t4-small` +- **Example:** Qwen2.5-0.5B, TinyLlama +- **Batch size:** 4-8 +- **Training time:** 1-2 hours for 1K examples + +**Small Models (1-3B parameters)** +- **Recommended:** `t4-medium` or `a10g-small` +- **Example:** Qwen2.5-1.5B, Phi-2 +- **Batch size:** 2-4 +- **Training time:** 2-4 hours for 10K examples + +**Medium Models (3-7B parameters)** +- **Recommended:** `a10g-small` or `a10g-large` +- **Example:** Qwen2.5-7B, Mistral-7B +- **Batch size:** 1-2 (or LoRA with 4-8) +- **Training time:** 4-8 hours for 10K examples + +**Large Models (7-13B parameters)** +- **Recommended:** `a10g-large` or `a100-large` +- **Example:** Llama-3-8B, Mixtral-8x7B (with LoRA) +- **Batch size:** 1 (full fine-tuning) or 2-4 (LoRA) +- **Training time:** 6-12 hours for 10K examples +- **Note:** Always use LoRA/PEFT + +**Very Large Models (13B+ parameters)** +- **Recommended:** `a100-large` with LoRA +- **Example:** Llama-3-13B, Llama-3-70B (LoRA only) +- **Batch size:** 1-2 with LoRA +- **Training time:** 8-24 hours for 10K examples +- **Note:** Full fine-tuning not feasible, use LoRA/PEFT + +### By Budget + +**Minimal Budget (<$5 total)** +- Use `t4-small` +- Train on subset of data (100-500 examples) +- Limit to 1-2 epochs +- Use small model (<1B) + +**Small Budget ($5-20)** +- Use `t4-medium` or `a10g-small` +- Train on 1K-5K examples +- 2-3 epochs +- Model up to 3B parameters + +**Medium Budget ($20-50)** +- Use `a10g-small` or `a10g-large` +- Train on 5K-20K examples +- 3-5 epochs +- Model up to 7B parameters + +**Large Budget ($50-200)** +- Use `a10g-large` or `a100-large` +- Full dataset training +- Multiple epochs +- Model up to 13B parameters with LoRA + +### By Training Type + +**Quick Demo/Experiment** +- `t4-small` +- 50-100 examples +- 5-10 steps +- ~10-15 minutes + +**Development/Iteration** +- `t4-medium` or `a10g-small` +- 1K examples +- 1 epoch +- ~30-60 minutes + +**Production Training** +- `a10g-large` or `a100-large` +- Full dataset +- 3-5 epochs +- 4-12 hours + +**Research/Experimentation** +- `a100-large` +- Multiple runs +- Various hyperparameters +- Budget for 20-50 hours + +## Memory Considerations + +### Estimating Memory Requirements + +**Full fine-tuning:** +``` +Memory (GB) ≈ (Model params in billions) × 20 +``` + +**LoRA fine-tuning:** +``` +Memory (GB) ≈ (Model params in billions) × 4 +``` + +**Examples:** +- Qwen2.5-0.5B full: ~10GB ✅ fits t4-small +- Qwen2.5-1.5B full: ~30GB ❌ exceeds most GPUs +- Qwen2.5-1.5B LoRA: ~6GB ✅ fits t4-small +- Qwen2.5-7B full: ~140GB ❌ not feasible +- Qwen2.5-7B LoRA: ~28GB ✅ fits a10g-large + +### Memory Optimization + +If hitting memory limits: + +1. **Use LoRA/PEFT** + ```python + peft_config=LoraConfig(r=16, lora_alpha=32) + ``` + +2. **Reduce batch size** + ```python + per_device_train_batch_size=1 + ``` + +3. **Increase gradient accumulation** + ```python + gradient_accumulation_steps=8 # Effective batch size = 1×8 + ``` + +4. **Enable gradient checkpointing** + ```python + gradient_checkpointing=True + ``` + +5. **Use mixed precision** + ```python + bf16=True # or fp16=True + ``` + +6. **Upgrade to larger GPU** + - t4 → a10g → a100 + +## Cost Estimation + +### Formula + +``` +Total Cost = (Hours of training) × (Cost per hour) +``` + +### Example Calculations + +**Quick demo:** +- Hardware: t4-small ($0.75/hour) +- Time: 15 minutes (0.25 hours) +- Cost: $0.19 + +**Development training:** +- Hardware: a10g-small ($3.50/hour) +- Time: 2 hours +- Cost: $7.00 + +**Production training:** +- Hardware: a10g-large ($5/hour) +- Time: 6 hours +- Cost: $30.00 + +**Large model with LoRA:** +- Hardware: a100-large ($10/hour) +- Time: 8 hours +- Cost: $80.00 + +### Cost Optimization Tips + +1. **Start small:** Test on t4-small with subset +2. **Use LoRA:** 4-5x cheaper than full fine-tuning +3. **Optimize hyperparameters:** Fewer epochs if possible +4. **Set appropriate timeout:** Don't waste compute on stalled jobs +5. **Use checkpointing:** Resume if job fails +6. **Monitor costs:** Check running jobs regularly + +## Multi-GPU Training + +TRL automatically handles multi-GPU training with Accelerate when using multi-GPU flavors. + +**Multi-GPU flavors:** +- `l4x4` - 4x L4 GPUs +- `a10g-largex2` - 2x A10G GPUs +- `a10g-largex4` - 4x A10G GPUs + +**When to use:** +- Models >13B parameters +- Need faster training (linear speedup) +- Large datasets (>50K examples) + +**Example:** +```python +hf_jobs("uv", { + "script": "train.py", + "flavor": "a10g-largex2", # 2 GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +No code changes needed—TRL/Accelerate handles distribution automatically. + +## Choosing Between Options + +### a10g vs a100 + +**Choose a10g when:** +- Model <13B parameters +- Budget conscious +- Training time not critical + +**Choose a100 when:** +- Model 13B+ parameters +- Need fastest training +- Memory requirements high +- Budget allows + +### Single vs Multi-GPU + +**Choose single GPU when:** +- Model <7B parameters +- Budget constrained +- Simpler debugging + +**Choose multi-GPU when:** +- Model >13B parameters +- Need faster training +- Large batch sizes required +- Cost-effective for large jobs + +## Quick Reference + +```python +# Model size → Hardware selection +HARDWARE_MAP = { + "<1B": "t4-small", + "1-3B": "a10g-small", + "3-7B": "a10g-large", + "7-13B": "a10g-large (LoRA) or a100-large", + ">13B": "a100-large (LoRA required)" +} +``` diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hub_saving.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hub_saving.md new file mode 100644 index 00000000..734e49b5 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/hub_saving.md @@ -0,0 +1,364 @@ +# Saving Training Results to Hugging Face Hub + +**⚠️ CRITICAL:** Training environments are ephemeral. ALL results are lost when a job completes unless pushed to the Hub. + +## Why Hub Push is Required + +When running on Hugging Face Jobs: +- Environment is temporary +- All files deleted on job completion +- No local disk persistence +- Cannot access results after job ends + +**Without Hub push, training is completely wasted.** + +## Required Configuration + +### 1. Training Configuration + +In your SFTConfig or trainer config: + +```python +SFTConfig( + push_to_hub=True, # Enable Hub push + hub_model_id="username/model-name", # Target repository +) +``` + +### 2. Job Configuration + +When submitting the job: + +```python +hf_jobs("uv", { + "script": "train.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Provide authentication +}) +``` + +**The `$HF_TOKEN` placeholder is automatically replaced with your Hugging Face token.** + +## Complete Example + +```python +# train.py +# /// script +# dependencies = ["trl"] +# /// + +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +dataset = load_dataset("trl-lib/Capybara", split="train") + +# Configure with Hub push +config = SFTConfig( + output_dir="my-model", + num_train_epochs=3, + + # ✅ CRITICAL: Hub push configuration + push_to_hub=True, + hub_model_id="myusername/my-trained-model", + + # Optional: Push strategy + push_to_hub_model_id="myusername/my-trained-model", + push_to_hub_organization=None, + push_to_hub_token=None, # Uses environment token +) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=config, +) + +trainer.train() + +# ✅ Push final model +trainer.push_to_hub() + +print("✅ Model saved to: https://huggingface.co/myusername/my-trained-model") +``` + +**Submit with authentication:** + +```python +hf_jobs("uv", { + "script": "train.py", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required! +}) +``` + +## What Gets Saved + +When `push_to_hub=True`: + +1. **Model weights** - Final trained parameters +2. **Tokenizer** - Associated tokenizer +3. **Configuration** - Model config (config.json) +4. **Training arguments** - Hyperparameters used +5. **Model card** - Auto-generated documentation +6. **Checkpoints** - If `save_strategy="steps"` enabled + +## Checkpoint Saving + +Save intermediate checkpoints during training: + +```python +SFTConfig( + output_dir="my-model", + push_to_hub=True, + hub_model_id="username/my-model", + + # Checkpoint configuration + save_strategy="steps", + save_steps=100, # Save every 100 steps + save_total_limit=3, # Keep only last 3 checkpoints +) +``` + +**Benefits:** +- Resume training if job fails +- Compare checkpoint performance +- Use intermediate models + +**Checkpoints are pushed to:** `username/my-model` (same repo) + +## Authentication Methods + +### Method 1: Automatic Token (Recommended) + +```python +"secrets": {"HF_TOKEN": "$HF_TOKEN"} +``` + +Uses your logged-in Hugging Face token automatically. + +### Method 2: Explicit Token + +```python +"secrets": {"HF_TOKEN": "hf_abc123..."} +``` + +Provide token explicitly (not recommended for security). + +### Method 3: Environment Variable + +```python +"env": {"HF_TOKEN": "hf_abc123..."} +``` + +Pass as regular environment variable (less secure than secrets). + +**Always prefer Method 1** for security and convenience. + +## Verification Checklist + +Before submitting any training job, verify: + +- [ ] `push_to_hub=True` in training config +- [ ] `hub_model_id` is specified (format: `username/model-name`) +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +- [ ] Repository name doesn't conflict with existing repos +- [ ] You have write access to the target namespace + +## Repository Setup + +### Automatic Creation + +If repository doesn't exist, it's created automatically when first pushing. + +### Manual Creation + +Create repository before training: + +```python +from huggingface_hub import HfApi + +api = HfApi() +api.create_repo( + repo_id="username/model-name", + repo_type="model", + private=False, # or True for private repo +) +``` + +### Repository Naming + +**Valid names:** +- `username/my-model` +- `username/model-name` +- `organization/model-name` + +**Invalid names:** +- `model-name` (missing username) +- `username/model name` (spaces not allowed) +- `username/MODEL` (uppercase discouraged) + +## Troubleshooting + +### Error: 401 Unauthorized + +**Cause:** HF_TOKEN not provided or invalid + +**Solutions:** +1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Check you're logged in: `hf auth whoami` +3. Re-login: `hf auth login` + +### Error: 403 Forbidden + +**Cause:** No write access to repository + +**Solutions:** +1. Check repository namespace matches your username +2. Verify you're a member of organization (if using org namespace) +3. Check repository isn't private (if accessing org repo) + +### Error: Repository not found + +**Cause:** Repository doesn't exist and auto-creation failed + +**Solutions:** +1. Manually create repository first +2. Check repository name format +3. Verify namespace exists + +### Error: Push failed during training + +**Cause:** Network issues or Hub unavailable + +**Solutions:** +1. Training continues but final push fails +2. Checkpoints may be saved +3. Re-run push manually after job completes + +### Issue: Model saved but not visible + +**Possible causes:** +1. Repository is private—check https://huggingface.co/username +2. Wrong namespace—verify `hub_model_id` matches login +3. Push still in progress—wait a few minutes + +## Manual Push After Training + +If training completes but push fails, push manually: + +```python +from transformers import AutoModel, AutoTokenizer + +# Load from local checkpoint +model = AutoModel.from_pretrained("./output_dir") +tokenizer = AutoTokenizer.from_pretrained("./output_dir") + +# Push to Hub +model.push_to_hub("username/model-name", token="hf_abc123...") +tokenizer.push_to_hub("username/model-name", token="hf_abc123...") +``` + +**Note:** Only possible if job hasn't completed (files still exist). + +## Best Practices + +1. **Always enable `push_to_hub=True`** +2. **Use checkpoint saving** for long training runs +3. **Verify Hub push** in logs before job completes +4. **Set appropriate `save_total_limit`** to avoid excessive checkpoints +5. **Use descriptive repo names** (e.g., `qwen-capybara-sft` not `model1`) +6. **Add model card** with training details +7. **Tag models** with relevant tags (e.g., `text-generation`, `fine-tuned`) + +## Monitoring Push Progress + +Check logs for push progress: + +```python +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**Look for:** +``` +Pushing model to username/model-name... +Upload file pytorch_model.bin: 100% +✅ Model pushed successfully +``` + +## Example: Full Production Setup + +```python +# production_train.py +# /// script +# dependencies = ["trl>=0.12.0", "peft>=0.7.0"] +# /// + +from datasets import load_dataset +from peft import LoraConfig +from trl import SFTTrainer, SFTConfig +import os + +# Verify token is available +assert "HF_TOKEN" in os.environ, "HF_TOKEN not found in environment!" + +# Load dataset +dataset = load_dataset("trl-lib/Capybara", split="train") +print(f"✅ Dataset loaded: {len(dataset)} examples") + +# Configure with comprehensive Hub settings +config = SFTConfig( + output_dir="qwen-capybara-sft", + + # Hub configuration + push_to_hub=True, + hub_model_id="myusername/qwen-capybara-sft", + hub_strategy="checkpoint", # Push checkpoints + + # Checkpoint configuration + save_strategy="steps", + save_steps=100, + save_total_limit=3, + + # Training settings + num_train_epochs=3, + per_device_train_batch_size=4, + + # Logging + logging_steps=10, + logging_first_step=True, +) + +# Train with LoRA +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=config, + peft_config=LoraConfig(r=16, lora_alpha=32), +) + +print("🚀 Starting training...") +trainer.train() + +print("💾 Pushing final model to Hub...") +trainer.push_to_hub() + +print("✅ Training complete!") +print(f"Model available at: https://huggingface.co/myusername/qwen-capybara-sft") +``` + +**Submit:** + +```python +hf_jobs("uv", { + "script": "production_train.py", + "flavor": "a10g-large", + "timeout": "6h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +## Key Takeaway + +**Without `push_to_hub=True` and `secrets={"HF_TOKEN": "$HF_TOKEN"}`, all training results are permanently lost.** + +Always verify both are configured before submitting any training job. diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/local_training_macos.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/local_training_macos.md new file mode 100644 index 00000000..fdf5dede --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/local_training_macos.md @@ -0,0 +1,231 @@ +# Local Training on macOS (Apple Silicon) + +Run small LoRA fine-tuning jobs locally on Mac for smoke tests and quick iteration before submitting to HF Jobs. + +## When to Use Local Mac vs HF Jobs + +| Local Mac | HF Jobs / Cloud GPU | +|-----------|-------------------| +| Model ≤3B, text-only | Model 7B+ | +| LoRA/PEFT only | QLoRA 4-bit (CUDA/bitsandbytes) | +| Short context (≤1024) | Long context / full fine-tuning | +| Smoke tests, dataset validation | Production runs, VLMs | + +**Typical workflow:** local smoke test → HF Jobs with same config → export/quantize ([gguf_conversion.md](gguf_conversion.md)) + +## Recommended Defaults + +| Setting | Value | Notes | +|---------|-------|-------| +| Model size | 0.5B–1.5B first run | Scale up after verifying | +| Max seq length | 512–1024 | Lower = less memory | +| Batch size | 1 | Scale via gradient accumulation | +| Gradient accumulation | 8–16 | Effective batch = 8–16 | +| LoRA rank (r) | 8–16 | alpha = 2×r | +| Dtype | float32 | fp16 causes NaN on MPS; bf16 only on M1 Pro+ and M2/M3/M4 | + +### Memory by hardware + +| Unified RAM | Max Model Size | +|-------------|---------------| +| 16 GB | ~0.5B–1.5B | +| 32 GB | ~1.5B–3B | +| 64 GB | ~3B (short context) | + +## Setup + +```bash +xcode-select --install +python3 -m venv .venv && source .venv/bin/activate +pip install -U "torch>=2.2" "transformers>=4.40" "trl>=0.12" "peft>=0.10" \ + datasets accelerate safetensors huggingface_hub +``` + +Verify MPS: +```bash +python -c "import torch; print(torch.__version__, '| MPS:', torch.backends.mps.is_available())" +``` + +Optional — configure Accelerate for local Mac (no distributed, no mixed precision, MPS device): +```bash +accelerate config +``` + +## Training Script + +
+train_lora_sft.py + +```python +import os +from dataclasses import dataclass +from typing import Optional +import torch +from datasets import load_dataset +from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed +from peft import LoraConfig +from trl import SFTTrainer, SFTConfig + +set_seed(42) + +@dataclass +class Cfg: + model_id: str = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-0.5B-Instruct") + dataset_id: str = os.environ.get("DATASET_ID", "HuggingFaceH4/ultrachat_200k") + dataset_split: str = os.environ.get("DATASET_SPLIT", "train_sft[:500]") + data_files: Optional[str] = os.environ.get("DATA_FILES", None) + text_field: str = os.environ.get("TEXT_FIELD", "") + messages_field: str = os.environ.get("MESSAGES_FIELD", "messages") + out_dir: str = os.environ.get("OUT_DIR", "outputs/local-lora") + max_seq_length: int = int(os.environ.get("MAX_SEQ_LENGTH", "512")) + max_steps: int = int(os.environ.get("MAX_STEPS", "-1")) + +cfg = Cfg() +device = "mps" if torch.backends.mps.is_available() else "cpu" + +tokenizer = AutoTokenizer.from_pretrained(cfg.model_id, use_fast=True) +if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token +tokenizer.padding_side = "right" + +model = AutoModelForCausalLM.from_pretrained(cfg.model_id, torch_dtype=torch.float32) +model.to(device) +model.config.use_cache = False + +if cfg.data_files: + ds = load_dataset("json", data_files=cfg.data_files, split="train") +else: + ds = load_dataset(cfg.dataset_id, split=cfg.dataset_split) + +def format_example(ex): + if cfg.text_field and isinstance(ex.get(cfg.text_field), str): + ex["text"] = ex[cfg.text_field] + return ex + msgs = ex.get(cfg.messages_field) + if isinstance(msgs, list): + if hasattr(tokenizer, "apply_chat_template"): + try: + ex["text"] = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=False) + return ex + except Exception: + pass + ex["text"] = "\n".join([str(m) for m in msgs]) + return ex + ex["text"] = str(ex) + return ex + +ds = ds.map(format_example) +ds = ds.remove_columns([c for c in ds.column_names if c != "text"]) + +lora = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, bias="none", + task_type="CAUSAL_LM", target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]) + +sft_kwargs = dict( + output_dir=cfg.out_dir, per_device_train_batch_size=1, gradient_accumulation_steps=8, + learning_rate=2e-4, logging_steps=10, save_steps=200, save_total_limit=2, + gradient_checkpointing=True, report_to="none", fp16=False, bf16=False, + max_seq_length=cfg.max_seq_length, dataset_text_field="text", +) +if cfg.max_steps > 0: + sft_kwargs["max_steps"] = cfg.max_steps +else: + sft_kwargs["num_train_epochs"] = 1 + +trainer = SFTTrainer(model=model, train_dataset=ds, peft_config=lora, + args=SFTConfig(**sft_kwargs), processing_class=tokenizer) +trainer.train() +trainer.save_model(cfg.out_dir) +print(f"✅ Saved to: {cfg.out_dir}") +``` + +
+ +### Run + +```bash +python train_lora_sft.py +``` + +**Env overrides:** + +```bash +MODEL_ID="Qwen/Qwen2.5-1.5B-Instruct" python train_lora_sft.py # different model +MAX_STEPS=50 python train_lora_sft.py # quick 50-step test +DATA_FILES="my_data.jsonl" python train_lora_sft.py # local JSONL file +PYTORCH_ENABLE_MPS_FALLBACK=1 python train_lora_sft.py # MPS op fallback to CPU +PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 python train_lora_sft.py # disable MPS memory limit (use with caution) +``` + +**Local JSONL format** — chat messages or plain text: +```jsonl +{"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}]} +``` +```jsonl +{"text": "User: Hello\nAssistant: Hi!"} +``` +For plain text: `DATA_FILES="file.jsonl" TEXT_FIELD="text" MESSAGES_FIELD="" python train_lora_sft.py` + +### Verify Success + +- Loss decreases over steps +- `outputs/local-lora/` contains `adapter_config.json` + `*.safetensors` + +## Quick Evaluation + +
+eval_generate.py + +```python +import os, torch +from transformers import AutoTokenizer, AutoModelForCausalLM +from peft import PeftModel + +BASE = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-0.5B-Instruct") +ADAPTER = os.environ.get("ADAPTER_DIR", "outputs/local-lora") +device = "mps" if torch.backends.mps.is_available() else "cpu" + +tokenizer = AutoTokenizer.from_pretrained(BASE, use_fast=True) +model = AutoModelForCausalLM.from_pretrained(BASE, torch_dtype=torch.float32) +model.to(device) +model = PeftModel.from_pretrained(model, ADAPTER) + +prompt = os.environ.get("PROMPT", "Explain gradient accumulation in 3 bullet points.") +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) +with torch.no_grad(): + out = model.generate(**inputs, max_new_tokens=120, do_sample=True, temperature=0.7, top_p=0.9) +print(tokenizer.decode(out[0], skip_special_tokens=True)) +``` + +
+ +## Troubleshooting (macOS-Specific) + +For general training issues, see [troubleshooting.md](troubleshooting.md). + +| Problem | Fix | +|---------|-----| +| MPS unsupported op / crash | `PYTORCH_ENABLE_MPS_FALLBACK=1` | +| OOM / system instability | Reduce `MAX_SEQ_LENGTH`, use smaller model, set `PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0` (caution) | +| fp16 NaN / loss explosion | Keep `fp16=False` (default), lower learning rate | +| LoRA "module not found" | Print `model.named_modules()` to find correct target names | +| TRL TypeError on args | Check TRL version; script uses `SFTConfig` + `processing_class` (TRL ≥0.12) | +| Intel Mac | No MPS — use HF Jobs instead | + +**Common LoRA target modules by architecture:** + +| Architecture | target_modules | +|-------------|---------------| +| Llama/Qwen/Mistral | `q_proj`, `k_proj`, `v_proj`, `o_proj` | +| GPT-2/GPT-J | `c_attn`, `c_proj` | +| BLOOM | `query_key_value`, `dense` | + +## MLX Alternative + +[MLX](https://github.com/ml-explore/mlx) offers tighter Apple Silicon integration but has a smaller ecosystem and less mature training APIs. For this skill's workflow (local validation → HF Jobs), PyTorch + MPS is recommended for consistency. See [mlx-lm](https://github.com/ml-explore/mlx-lm) for MLX-based fine-tuning. + +## See Also + +- [troubleshooting.md](troubleshooting.md) — General TRL troubleshooting +- [hardware_guide.md](hardware_guide.md) — GPU selection for HF Jobs +- [gguf_conversion.md](gguf_conversion.md) — Export for on-device inference +- [training_methods.md](training_methods.md) — SFT, DPO, GRPO overview diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/reliability_principles.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/reliability_principles.md new file mode 100644 index 00000000..bf2f7458 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/reliability_principles.md @@ -0,0 +1,371 @@ +# Reliability Principles for Training Jobs + +These principles are derived from real production failures and successful fixes. Following them prevents common failure modes and ensures reliable job execution. + +## Principle 1: Always Verify Before Use + +**Rule:** Never assume repos, datasets, or resources exist. Verify with tools first. + +### What It Prevents + +- **Non-existent datasets** - Jobs fail immediately when dataset doesn't exist +- **Typos in names** - Simple mistakes like "argilla-dpo-mix-7k" vs "ultrafeedback_binarized" +- **Incorrect paths** - Old or moved repos, renamed files +- **Missing dependencies** - Undocumented requirements + +### How to Apply + +**Before submitting ANY job:** + +```python +# Verify dataset exists +dataset_search({"query": "dataset-name", "author": "author-name", "limit": 5}) +hub_repo_details(["author/dataset-name"], repo_type="dataset") + +# Verify model exists +hub_repo_details(["org/model-name"], repo_type="model") + +# Check script/file paths (for URL-based scripts) +# Verify before using: https://github.com/user/repo/blob/main/script.py +``` + +**Examples that would have caught errors:** + +```python +# ❌ WRONG: Assumed dataset exists +hf_jobs("uv", { + "script": """...""", + "env": {"DATASET": "trl-lib/argilla-dpo-mix-7k"} # Doesn't exist! +}) + +# ✅ CORRECT: Verify first +dataset_search({"query": "argilla dpo", "author": "trl-lib"}) +# Would show: "trl-lib/ultrafeedback_binarized" is the correct name + +hub_repo_details(["trl-lib/ultrafeedback_binarized"], repo_type="dataset") +# Confirms it exists before using +``` + +### Implementation Checklist + +- [ ] Check dataset exists before training +- [ ] Verify base model exists before fine-tuning +- [ ] Confirm adapter model exists before GGUF conversion +- [ ] Test script URLs are valid before submitting +- [ ] Validate file paths in repositories +- [ ] Check for recent updates/renames of resources + +**Time cost:** 5-10 seconds +**Time saved:** Hours of failed job time + debugging + +--- + +## Principle 2: Prioritize Reliability Over Performance + +**Rule:** Default to what is most likely to succeed, not what is theoretically fastest. + +### What It Prevents + +- **Hardware incompatibilities** - Features that fail on certain GPUs +- **Unstable optimizations** - Speed-ups that cause crashes +- **Complex configurations** - More failure points +- **Build system issues** - Unreliable compilation methods + +### How to Apply + +**Choose reliability:** + +```python +# ❌ RISKY: Aggressive optimization that may fail +SFTConfig( + torch_compile=True, # Can fail on T4, A10G GPUs + optim="adamw_bnb_8bit", # Requires specific setup + fp16=False, # May cause training instability + ... +) + +# ✅ SAFE: Proven defaults +SFTConfig( + # torch_compile=True, # Commented with note: "Enable on H100 for 20% speedup" + optim="adamw_torch", # Standard, always works + fp16=True, # Stable and fast + ... +) +``` + +**For build processes:** + +```python +# ❌ UNRELIABLE: Uses make (platform-dependent) +subprocess.run(["make", "-C", "/tmp/llama.cpp", "llama-quantize"], check=True) + +# ✅ RELIABLE: Uses CMake (consistent, documented) +subprocess.run([ + "cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", + "-DGGML_CUDA=OFF" # Disable CUDA for faster, more reliable build +], check=True) + +subprocess.run([ + "cmake", "--build", "/tmp/llama.cpp/build", + "--target", "llama-quantize", "-j", "4" +], check=True) +``` + +### Real-World Example + +**The `torch.compile` failure:** +- Added for "20% speedup" on H100 +- **Failed fatally on T4-medium** with cryptic error +- Misdiagnosed as dataset issue (cost hours) +- **Fix:** Disable by default, add as optional comment + +**Result:** Reliability > 20% performance gain + +### Implementation Checklist + +- [ ] Use proven, standard configurations by default +- [ ] Comment out performance optimizations with hardware notes +- [ ] Use stable build systems (CMake > make) +- [ ] Test on target hardware before production +- [ ] Document known incompatibilities +- [ ] Provide "safe" and "fast" variants when needed + +**Performance loss:** 10-20% in best case +**Reliability gain:** 95%+ success rate vs 60-70% + +--- + +## Principle 3: Create Atomic, Self-Contained Scripts + +**Rule:** Scripts should work as complete, independent units. Don't remove parts to "simplify." + +### What It Prevents + +- **Missing dependencies** - Removed "unnecessary" packages that are actually required +- **Incomplete processes** - Skipped steps that seem redundant +- **Environment assumptions** - Scripts that need pre-setup +- **Partial failures** - Some parts work, others fail silently + +### How to Apply + +**Complete dependency specifications:** + +```python +# ❌ INCOMPLETE: "Simplified" by removing dependencies +# /// script +# dependencies = [ +# "transformers", +# "peft", +# "torch", +# ] +# /// + +# ✅ COMPLETE: All dependencies explicit +# /// script +# dependencies = [ +# "transformers>=4.36.0", +# "peft>=0.7.0", +# "torch>=2.0.0", +# "accelerate>=0.24.0", +# "huggingface_hub>=0.20.0", +# "sentencepiece>=0.1.99", # Required for tokenizers +# "protobuf>=3.20.0", # Required for tokenizers +# "numpy", +# "gguf", +# ] +# /// +``` + +**Complete build processes:** + +```python +# ❌ INCOMPLETE: Assumes build tools exist +subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"]) +subprocess.run(["make", "-C", "/tmp/llama.cpp", "llama-quantize"]) # FAILS: no gcc/make + +# ✅ COMPLETE: Installs all requirements +subprocess.run(["apt-get", "update", "-qq"], check=True) +subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True) +subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"]) +# ... then build +``` + +### Real-World Example + +**The `sentencepiece` failure:** +- Original script had it: worked fine +- "Simplified" version removed it: "doesn't look necessary" +- **GGUF conversion failed silently** - tokenizer couldn't convert +- Hard to debug: no obvious error message +- **Fix:** Restore all original dependencies + +**Result:** Don't remove dependencies without thorough testing + +### Implementation Checklist + +- [ ] All dependencies in PEP 723 header with version pins +- [ ] All system packages installed by script +- [ ] No assumptions about pre-existing environment +- [ ] No "optional" steps that are actually required +- [ ] Test scripts in clean environment +- [ ] Document why each dependency is needed + +**Complexity:** Slightly longer scripts +**Reliability:** Scripts "just work" every time + +--- + +## Principle 4: Provide Clear Error Context + +**Rule:** When things fail, make it obvious what went wrong and how to fix it. + +### How to Apply + +**Wrap subprocess calls:** + +```python +# ❌ UNCLEAR: Silent failure +subprocess.run([...], check=True, capture_output=True) + +# ✅ CLEAR: Shows what failed +try: + result = subprocess.run( + [...], + check=True, + capture_output=True, + text=True + ) + print(result.stdout) + if result.stderr: + print("Warnings:", result.stderr) +except subprocess.CalledProcessError as e: + print(f"❌ Command failed!") + print("STDOUT:", e.stdout) + print("STDERR:", e.stderr) + raise +``` + +**Validate inputs:** + +```python +# ❌ UNCLEAR: Fails later with cryptic error +model = load_model(MODEL_NAME) + +# ✅ CLEAR: Fails fast with clear message +if not MODEL_NAME: + raise ValueError("MODEL_NAME environment variable not set!") + +print(f"Loading model: {MODEL_NAME}") +try: + model = load_model(MODEL_NAME) + print(f"✅ Model loaded successfully") +except Exception as e: + print(f"❌ Failed to load model: {MODEL_NAME}") + print(f"Error: {e}") + print("Hint: Check that model exists on Hub") + raise +``` + +### Implementation Checklist + +- [ ] Wrap external calls with try/except +- [ ] Print stdout/stderr on failure +- [ ] Validate environment variables early +- [ ] Add progress indicators (✅, ❌, 🔄) +- [ ] Include hints for common failures +- [ ] Log configuration at start + +--- + +## Principle 5: Test the Happy Path on Known-Good Inputs + +**Rule:** Before using new code in production, test with inputs you know work. + +### How to Apply + +**Known-good test inputs:** + +```python +# For training +TEST_DATASET = "trl-lib/Capybara" # Small, well-formatted, widely used +TEST_MODEL = "Qwen/Qwen2.5-0.5B" # Small, fast, reliable + +# For GGUF conversion +TEST_ADAPTER = "evalstate/qwen-capybara-medium" # Known working model +TEST_BASE = "Qwen/Qwen2.5-0.5B" # Compatible base +``` + +**Testing workflow:** + +1. Test with known-good inputs first +2. If that works, try production inputs +3. If production fails, you know it's the inputs (not code) +4. Isolate the difference + +### Implementation Checklist + +- [ ] Maintain list of known-good test models/datasets +- [ ] Test new scripts with test inputs first +- [ ] Document what makes inputs "good" +- [ ] Keep test jobs cheap (small models, short timeouts) +- [ ] Only move to production after test succeeds + +**Time cost:** 5-10 minutes for test run +**Debugging time saved:** Hours + +--- + +## Summary: The Reliability Checklist + +Before submitting ANY job: + +### Pre-Flight Checks +- [ ] **Verified** all repos/datasets exist (hub_repo_details) +- [ ] **Tested** with known-good inputs if new code +- [ ] **Using** proven hardware/configuration +- [ ] **Included** all dependencies in PEP 723 header +- [ ] **Installed** system requirements (build tools, etc.) +- [ ] **Set** appropriate timeout (not default 30m) +- [ ] **Configured** Hub push with HF_TOKEN +- [ ] **Added** clear error handling + +### Script Quality +- [ ] Self-contained (no external setup needed) +- [ ] Complete dependencies listed +- [ ] Build tools installed by script +- [ ] Progress indicators included +- [ ] Error messages are clear +- [ ] Configuration logged at start + +### Job Configuration +- [ ] Timeout > expected runtime + 30% buffer +- [ ] Hardware appropriate for model size +- [ ] Secrets include HF_TOKEN +- [ ] Environment variables set correctly +- [ ] Cost estimated and acceptable + +**Following these principles transforms job success rate from ~60-70% to ~95%+** + +--- + +## When Principles Conflict + +Sometimes reliability and performance conflict. Here's how to choose: + +| Scenario | Choose | Rationale | +|----------|--------|-----------| +| Demo/test | Reliability | Fast failure is worse than slow success | +| Production (first run) | Reliability | Prove it works before optimizing | +| Production (proven) | Performance | Safe to optimize after validation | +| Time-critical | Reliability | Failures cause more delay than slow runs | +| Cost-critical | Balanced | Test with small model, then optimize | + +**General rule:** Reliability first, optimize second. + +--- + +## Further Reading + +- `troubleshooting.md` - Common issues and fixes +- `training_patterns.md` - Proven training configurations +- `gguf_conversion.md` - Production GGUF workflow diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/trackio_guide.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/trackio_guide.md new file mode 100644 index 00000000..342045ee --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/trackio_guide.md @@ -0,0 +1,189 @@ +# Trackio Integration for TRL Training + +**Trackio** is an experiment tracking library that provides real-time metrics visualization for remote training on Hugging Face Jobs infrastructure. + +⚠️ **IMPORTANT**: For Jobs training (remote cloud GPUs): +- Training happens on ephemeral cloud runners (not your local machine) +- Trackio syncs metrics to a Hugging Face Space for real-time monitoring +- Without a Space, metrics are lost when the job completes +- The Space dashboard persists your training metrics permanently + +## Setting Up Trackio for Jobs + +**Step 1: Add trackio dependency** +```python +# /// script +# dependencies = [ +# "trl>=0.12.0", +# "trackio", # Required! +# ] +# /// +``` + +**Step 2: Create a Trackio Space (one-time setup)** + +**Option A: Let Trackio auto-create (Recommended)** +Pass a `space_id` to `trackio.init()` and Trackio will automatically create the Space if it doesn't exist. + +**Option B: Create manually** +- Create Space via Hub UI at https://huggingface.co/new-space +- Select Gradio SDK +- OR use command: `hf repos create my-trackio-dashboard --type space --space-sdk gradio` + +**Step 3: Initialize Trackio with space_id** +```python +import trackio + +trackio.init( + project="my-training", + space_id="username/trackio", # CRITICAL for Jobs! Replace 'username' with your HF username + config={ + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + } +) +``` + +**Step 4: Configure TRL to use Trackio** +```python +SFTConfig( + report_to="trackio", + # ... other config +) +``` + +**Step 5: Finish tracking** +```python +trainer.train() +trackio.finish() # Ensures final metrics are synced +``` + +## What Trackio Tracks + +Trackio automatically logs: +- ✅ Training loss +- ✅ Learning rate +- ✅ GPU utilization +- ✅ Memory usage +- ✅ Training throughput +- ✅ Custom metrics + +## How It Works with Jobs + +1. **Training runs** → Metrics logged to local SQLite DB +2. **Every 5 minutes** → Trackio syncs DB to HF Dataset (Parquet) +3. **Space dashboard** → Reads from Dataset, displays metrics in real-time +4. **Job completes** → Final sync ensures all metrics persisted + +## Default Configuration Pattern + +**Use sensible defaults for trackio configuration unless user requests otherwise.** + +### Recommended Defaults + +```python +import trackio + +trackio.init( + project="qwen-capybara-sft", + name="baseline-run", # Descriptive name user will recognize + space_id="username/trackio", # Default space: {username}/trackio + config={ + # Keep config minimal - hyperparameters and model/dataset info only + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + "num_epochs": 3, + } +) +``` + +**Key principles:** +- **Space ID**: Use `{username}/trackio` with "trackio" as default space name +- **Run naming**: Unless otherwise specified, name the run in a way the user will recognize +- **Config**: Keep minimal - don't automatically capture job metadata unless requested +- **Grouping**: Optional - only use if user requests organizing related experiments + +## Grouping Runs (Optional) + +The `group` parameter helps organize related runs together in the dashboard sidebar. This is useful when user is running multiple experiments with different configurations but wants to compare them together: + +```python +# Example: Group runs by experiment type +trackio.init(project="my-project", run_name="baseline-run-1", group="baseline") +trackio.init(project="my-project", run_name="augmented-run-1", group="augmented") +trackio.init(project="my-project", run_name="tuned-run-1", group="tuned") +``` + +Runs with the same group name can be grouped together in the sidebar, making it easier to compare related experiments. You can group by any configuration parameter: + +```python +# Hyperparameter sweep - group by learning rate +trackio.init(project="hyperparam-sweep", run_name="lr-0.001-run", group="lr_0.001") +trackio.init(project="hyperparam-sweep", run_name="lr-0.01-run", group="lr_0.01") +``` + +## Environment Variables for Jobs + +You can configure trackio using environment variables instead of passing parameters to `trackio.init()`. This is useful for managing configuration across multiple jobs. + + + +**`HF_TOKEN`** +Required for creating Spaces and writing to datasets (passed via `secrets`): +```python +hf_jobs("uv", { + "script": "...", + "secrets": { + "HF_TOKEN": "$HF_TOKEN" # Enables Space creation and Hub push + } +}) +``` + +### Example with Environment Variables + +```python +hf_jobs("uv", { + "script": """ +# Training script - trackio config from environment +import trackio +from datetime import datetime + +# Auto-generate run name +timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M") +run_name = f"sft_qwen25_{timestamp}" + +# Project and space_id can come from environment variables +trackio.init(run_name=run_name, group="SFT") + +# ... training code ... +trackio.finish() +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**When to use environment variables:** +- Managing multiple jobs with same configuration +- Keeping training scripts portable across projects +- Separating configuration from code + +**When to use direct parameters:** +- Single job with specific configuration +- When clarity in code is preferred +- When each job has different project/space + +## Viewing the Dashboard + +After starting training: +1. Navigate to the Space: `https://huggingface.co/spaces/username/trackio` +2. The Gradio dashboard shows all tracked experiments +3. Filter by project, compare runs, view charts with smoothing + +## Recommendation + +- **Trackio**: Best for real-time monitoring during long training runs +- **Weights & Biases**: Best for team collaboration, requires account diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_methods.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_methods.md new file mode 100644 index 00000000..2393d773 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_methods.md @@ -0,0 +1,150 @@ +# TRL Training Methods Overview + +TRL (Transformer Reinforcement Learning) provides multiple training methods for fine-tuning and aligning language models. This reference provides a brief overview of each method. + +## Supervised Fine-Tuning (SFT) + +**What it is:** Standard instruction tuning with supervised learning on demonstration data. + +**When to use:** +- Initial fine-tuning of base models on task-specific data +- Teaching new capabilities or domains +- Most common starting point for fine-tuning + +**Dataset format:** Conversational format with "messages" field, OR text field, OR prompt/completion pairs + +**Example:** +```python +from trl import SFTTrainer, SFTConfig + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=SFTConfig( + output_dir="my-model", + push_to_hub=True, + hub_model_id="username/my-model", + eval_strategy="no", # Disable eval for simple example + # max_length=1024 is the default - only set if you need different length + ) +) +trainer.train() +``` + +**Note:** For production training with evaluation monitoring, see `scripts/train_sft_example.py` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/sft_trainer")` + +## Direct Preference Optimization (DPO) + +**What it is:** Alignment method that trains directly on preference pairs (chosen vs rejected responses) without requiring a reward model. + +**When to use:** +- Aligning models to human preferences +- Improving response quality after SFT +- Have paired preference data (chosen/rejected responses) + +**Dataset format:** Preference pairs with "chosen" and "rejected" fields + +**Example:** +```python +from trl import DPOTrainer, DPOConfig + +trainer = DPOTrainer( + model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model + train_dataset=dataset, + args=DPOConfig( + output_dir="dpo-model", + beta=0.1, # KL penalty coefficient + eval_strategy="no", # Disable eval for simple example + # max_length=1024 is the default - only set if you need different length + ) +) +trainer.train() +``` + +**Note:** For production training with evaluation monitoring, see `scripts/train_dpo_example.py` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer")` + +## Group Relative Policy Optimization (GRPO) + +**What it is:** Online RL method that optimizes relative to group performance, useful for tasks with verifiable rewards. + +**When to use:** +- Tasks with automatic reward signals (code execution, math verification) +- Online learning scenarios +- When DPO offline data is insufficient + +**Dataset format:** Prompt-only format (model generates responses, reward computed online) + +**Example:** +```python +# Use TRL maintained script +hf_jobs("uv", { + "script": "https://raw.githubusercontent.com/huggingface/trl/main/examples/scripts/grpo.py", + "script_args": [ + "--model_name_or_path", "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset_name", "trl-lib/math_shepherd", + "--output_dir", "grpo-model" + ], + "flavor": "a10g-large", + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/grpo_trainer")` + +## Reward Modeling + +**What it is:** Train a reward model to score responses, used as a component in RLHF pipelines. + +**When to use:** +- Building RLHF pipeline +- Need automatic quality scoring +- Creating reward signals for PPO training + +**Dataset format:** Preference pairs with "chosen" and "rejected" responses + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/reward_trainer")` + +## Method Selection Guide + +| Method | Complexity | Data Required | Use Case | +|--------|-----------|---------------|----------| +| **SFT** | Low | Demonstrations | Initial fine-tuning | +| **DPO** | Medium | Paired preferences | Post-SFT alignment | +| **GRPO** | Medium | Prompts + reward fn | Online RL with automatic rewards | +| **Reward** | Medium | Paired preferences | Building RLHF pipeline | + +## Recommended Pipeline + +**For most use cases:** +1. **Start with SFT** - Fine-tune base model on task data +2. **Follow with DPO** - Align to preferences using paired data +3. **Optional: GGUF conversion** - Deploy for local inference + +**For advanced RL scenarios:** +1. **Start with SFT** - Fine-tune base model +2. **Train reward model** - On preference data + +## Dataset Format Reference + +For complete dataset format specifications, use: +```python +hf_doc_fetch("https://huggingface.co/docs/trl/dataset_formats") +``` + +Or validate your dataset: +```bash +uv run https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py \ + --dataset your/dataset --split train +``` + +## See Also + +- `references/training_patterns.md` - Common training patterns and examples +- `scripts/train_sft_example.py` - Complete SFT template +- `scripts/train_dpo_example.py` - Complete DPO template +- [Dataset Inspector](https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py) - Dataset format validation tool diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_patterns.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_patterns.md new file mode 100644 index 00000000..2101e12a --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/training_patterns.md @@ -0,0 +1,203 @@ +# Common Training Patterns + +This guide provides common training patterns and use cases for TRL on Hugging Face Jobs. + +## Multi-GPU Training + +Automatic distributed training across multiple GPUs. TRL/Accelerate handles distribution automatically: + +```python +hf_jobs("uv", { + "script": """ +# Your training script here (same as single GPU) +# No changes needed - Accelerate detects multiple GPUs +""", + "flavor": "a10g-largex2", # 2x A10G GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**Tips for multi-GPU:** +- No code changes needed +- Use `per_device_train_batch_size` (per GPU, not total) +- Effective batch size = `per_device_train_batch_size` × `num_gpus` × `gradient_accumulation_steps` +- Monitor GPU utilization to ensure both GPUs are being used + +## DPO Training (Preference Learning) + +Train with preference data for alignment: + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["trl>=0.12.0", "trackio"] +# /// + +from datasets import load_dataset +from trl import DPOTrainer, DPOConfig +import trackio + +dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train") + +# Create train/eval split +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +config = DPOConfig( + output_dir="dpo-model", + push_to_hub=True, + hub_model_id="username/dpo-model", + num_train_epochs=1, + beta=0.1, # KL penalty coefficient + eval_strategy="steps", + eval_steps=50, + report_to="trackio", + run_name="baseline_run", # use a meaningful run name + # max_length=1024, # Default - only set if you need different sequence length +) + +trainer = DPOTrainer( + model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model as base + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # IMPORTANT: Provide eval_dataset when eval_strategy is enabled + args=config, +) + +trainer.train() +trainer.push_to_hub() +trackio.finish() +""", + "flavor": "a10g-large", + "timeout": "3h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**For DPO documentation:** Use `hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer")` + +## GRPO Training (Online RL) + +Group Relative Policy Optimization for online reinforcement learning: + +```python +hf_jobs("uv", { + "script": "https://raw.githubusercontent.com/huggingface/trl/main/examples/scripts/grpo.py", + "script_args": [ + "--model_name_or_path", "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset_name", "trl-lib/math_shepherd", + "--output_dir", "grpo-model", + "--push_to_hub", + "--hub_model_id", "username/grpo-model" + ], + "flavor": "a10g-large", + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**For GRPO documentation:** Use `hf_doc_fetch("https://huggingface.co/docs/trl/grpo_trainer")` + +## Trackio Configuration + +**Use sensible defaults for trackio setup.** See `references/trackio_guide.md` for complete documentation including grouping runs for experiments. + +### Basic Pattern + +```python +import trackio + +trackio.init( + project="my-training", + run_name="baseline-run", # Descriptive name user will recognize + space_id="username/trackio", # Default space: {username}/trackio + config={ + # Keep config minimal - hyperparameters and model/dataset info only + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + } +) + +# Your training code... + +trackio.finish() +``` + +### Grouping for Experiments (Optional) + +When user wants to compare related runs, use the `group` parameter: + +```python +# Hyperparameter sweep +trackio.init(project="hyperparam-sweep", run_name="lr-0.001", group="lr_0.001") +trackio.init(project="hyperparam-sweep", run_name="lr-0.01", group="lr_0.01") +``` + +## Pattern Selection Guide + +| Use Case | Pattern | Hardware | Time | +|----------|---------|----------|------| +| SFT training | `scripts/train_sft_example.py` | a10g-large | 2-6 hours | +| Large dataset (>10K) | Multi-GPU | a10g-largex2 | 4-12 hours | +| Preference learning | DPO Training | a10g-large | 2-4 hours | +| Online RL | GRPO Training | a10g-large | 3-6 hours | + +## Critical: Evaluation Dataset Requirements + +**⚠️ IMPORTANT**: If you set `eval_strategy="steps"` or `eval_strategy="epoch"`, you **MUST** provide an `eval_dataset` to the trainer, or the training will hang. + +### ✅ CORRECT - With eval dataset: +```python +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # ← MUST provide when eval_strategy is enabled + args=SFTConfig(eval_strategy="steps", ...), +) +``` + +### ❌ WRONG - Will hang: +```python +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # NO eval_dataset but eval_strategy="steps" ← WILL HANG + args=SFTConfig(eval_strategy="steps", ...), +) +``` + +### Option: Disable evaluation if no eval dataset +```python +config = SFTConfig( + eval_strategy="no", # ← Explicitly disable evaluation + # ... other config +) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # No eval_dataset needed + args=config, +) +``` + +## Best Practices + +1. **Use train/eval splits** - Create evaluation split for monitoring progress +2. **Enable Trackio** - Monitor progress in real-time +3. **Add 20-30% buffer to timeout** - Account for loading/saving overhead +4. **Test with TRL official scripts first** - Use maintained examples before custom code +5. **Always provide eval_dataset** - When using eval_strategy, or set to "no" +6. **Use multi-GPU for large models** - 7B+ models benefit significantly + +## See Also + +- `scripts/train_sft_example.py` - Complete SFT template with Trackio and eval split +- `scripts/train_dpo_example.py` - Complete DPO template +- `scripts/train_grpo_example.py` - Complete GRPO template +- `references/hardware_guide.md` - Detailed hardware specifications +- `references/training_methods.md` - Overview of all TRL training methods +- `references/troubleshooting.md` - Common issues and solutions diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/troubleshooting.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/troubleshooting.md new file mode 100644 index 00000000..430816ce --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/troubleshooting.md @@ -0,0 +1,282 @@ +# Troubleshooting TRL Training Jobs + +Common issues and solutions when training with TRL on Hugging Face Jobs. + +## Training Hangs at "Starting training..." Step + +**Problem:** Job starts but hangs at the training step - never progresses, never times out, just sits there. + +**Root Cause:** Using `eval_strategy="steps"` or `eval_strategy="epoch"` without providing an `eval_dataset` to the trainer. + +**Solution:** + +**Option A: Provide eval_dataset (recommended)** +```python +# Create train/eval split +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # ← MUST provide when eval_strategy is enabled + args=SFTConfig( + eval_strategy="steps", + eval_steps=50, + ... + ), +) +``` + +**Option B: Disable evaluation** +```python +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # No eval_dataset + args=SFTConfig( + eval_strategy="no", # ← Explicitly disable + ... + ), +) +``` + +**Prevention:** +- Always create train/eval split for better monitoring +- Use `dataset.train_test_split(test_size=0.1, seed=42)` +- Check example scripts: `scripts/train_sft_example.py` includes proper eval setup + +## Job Times Out + +**Problem:** Job terminates before training completes, all progress lost. + +**Solutions:** +- Increase timeout parameter (e.g., `"timeout": "4h"`) +- Reduce `num_train_epochs` or use smaller dataset slice +- Use smaller model or enable LoRA/PEFT to speed up training +- Add 20-30% buffer to estimated time for loading/saving overhead + +**Prevention:** +- Always start with a quick demo run to estimate timing +- Use `scripts/estimate_cost.py` to get time estimates +- Monitor first runs closely via Trackio or logs + +## Model Not Saved to Hub + +**Problem:** Training completes but model doesn't appear on Hub - all work lost. + +**Check:** +- [ ] `push_to_hub=True` in training config +- [ ] `hub_model_id` specified with username (e.g., `"username/model-name"`) +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job submission +- [ ] User has write access to target repo +- [ ] Token has write permissions (check at https://huggingface.co/settings/tokens) +- [ ] Training script calls `trainer.push_to_hub()` at the end + +**See:** `references/hub_saving.md` for detailed Hub authentication troubleshooting + +## Out of Memory (OOM) + +**Problem:** Job fails with CUDA out of memory error. + +**Solutions (in order of preference):** +1. **Reduce batch size:** Lower `per_device_train_batch_size` (try 4 → 2 → 1) +2. **Increase gradient accumulation:** Raise `gradient_accumulation_steps` to maintain effective batch size +3. **Disable evaluation:** Remove `eval_dataset` and `eval_strategy` (saves ~40% memory, good for demos) +4. **Enable LoRA/PEFT:** Use `peft_config=LoraConfig(r=8, lora_alpha=16)` to train adapters only (smaller rank = less memory) +5. **Use larger GPU:** Switch from `t4-small` → `l4x1` → `a10g-large` → `a100-large` +6. **Enable gradient checkpointing:** Set `gradient_checkpointing=True` in config (slower but saves memory) +7. **Use smaller model:** Try a smaller variant (e.g., 0.5B instead of 3B) + +**Memory guidelines:** +- T4 (16GB): <1B models with LoRA +- A10G (24GB): 1-3B models with LoRA, <1B full fine-tune +- A100 (40GB/80GB): 7B+ models with LoRA, 3B full fine-tune + +## Parameter Naming Issues + +**Problem:** `TypeError: SFTConfig.__init__() got an unexpected keyword argument 'max_seq_length'` + +**Cause:** TRL config classes use `max_length`, not `max_seq_length`. + +**Solution:** +```python +# ✅ CORRECT - TRL uses max_length +SFTConfig(max_length=512) +DPOConfig(max_length=512) + +# ❌ WRONG - This will fail +SFTConfig(max_seq_length=512) +``` + +**Note:** Most TRL configs don't require explicit max_length - the default (1024) works well. Only set if you need a specific value. + +## Dataset Format Error + +**Problem:** Training fails with dataset format errors or missing fields. + +**Solutions:** +1. **Check format documentation:** + ```python + hf_doc_fetch("https://huggingface.co/docs/trl/dataset_formats") + ``` + +2. **Validate dataset before training:** + ```bash + uv run https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py \ + --dataset --split train + ``` + Or via hf_jobs: + ```python + hf_jobs("uv", { + "script": "https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py", + "script_args": ["--dataset", "dataset-name", "--split", "train"] + }) + ``` + +3. **Verify field names:** + - **SFT:** Needs "messages" field (conversational), OR "text" field, OR "prompt"/"completion" + - **DPO:** Needs "chosen" and "rejected" fields + - **GRPO:** Needs prompt-only format + +4. **Check dataset split:** + - Ensure split exists (e.g., `split="train"`) + - Preview dataset: `load_dataset("name", split="train[:5]")` + +## Import/Module Errors + +**Problem:** Job fails with "ModuleNotFoundError" or import errors. + +**Solutions:** +1. **Add PEP 723 header with dependencies:** + ```python + # /// script + # dependencies = [ + # "trl>=0.12.0", + # "peft>=0.7.0", + # "transformers>=4.36.0", + # ] + # /// + ``` + +2. **Verify exact format:** + - Must have `# ///` delimiters (with space after `#`) + - Dependencies must be valid PyPI package names + - Check spelling and version constraints + +3. **Test locally first:** + ```bash + uv run train.py # Tests if dependencies are correct + ``` + +## Authentication Errors + +**Problem:** Job fails with authentication or permission errors when pushing to Hub. + +**Solutions:** +1. **Verify authentication:** + ```python + mcp__huggingface__hf_whoami() # Check who's authenticated + ``` + +2. **Check token permissions:** + - Go to https://huggingface.co/settings/tokens + - Ensure token has "write" permission + - Token must not be "read-only" + +3. **Verify token in job:** + ```python + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Must be in job config + ``` + +4. **Check repo permissions:** + - User must have write access to target repo + - If org repo, user must be member with write access + - Repo must exist or user must have permission to create + +## Job Stuck or Not Starting + +**Problem:** Job shows "pending" or "starting" for extended period. + +**Solutions:** +- Check Jobs dashboard for status: https://huggingface.co/jobs +- Verify hardware availability (some GPU types may have queues) +- Try different hardware flavor if one is heavily utilized +- Check for account billing issues (Jobs requires paid plan) + +**Typical startup times:** +- CPU jobs: 10-30 seconds +- GPU jobs: 30-90 seconds +- If >3 minutes: likely queued or stuck + +## Training Loss Not Decreasing + +**Problem:** Training runs but loss stays flat or doesn't improve. + +**Solutions:** +1. **Check learning rate:** May be too low (try 2e-5 to 5e-5) or too high (try 1e-6) +2. **Verify dataset quality:** Inspect examples to ensure they're reasonable +3. **Check model size:** Very small models may not have capacity for task +4. **Increase training steps:** May need more epochs or larger dataset +5. **Verify dataset format:** Wrong format may cause degraded training + +## Logs Not Appearing + +**Problem:** Cannot see training logs or progress. + +**Solutions:** +1. **Wait 30-60 seconds:** Initial logs can be delayed +2. **Check logs via MCP tool:** + ```python + hf_jobs("logs", {"job_id": "your-job-id"}) + ``` +3. **Use Trackio for real-time monitoring:** See `references/trackio_guide.md` +4. **Verify job is actually running:** + ```python + hf_jobs("inspect", {"job_id": "your-job-id"}) + ``` + +## Checkpoint/Resume Issues + +**Problem:** Cannot resume from checkpoint or checkpoint not saved. + +**Solutions:** +1. **Enable checkpoint saving:** + ```python + SFTConfig( + save_strategy="steps", + save_steps=100, + hub_strategy="every_save", # Push each checkpoint + ) + ``` + +2. **Verify checkpoints pushed to Hub:** Check model repo for checkpoint folders + +3. **Resume from checkpoint:** + ```python + trainer = SFTTrainer( + model="username/model-name", # Can be checkpoint path + resume_from_checkpoint="username/model-name/checkpoint-1000", + ) + ``` + +## Getting Help + +If issues persist: + +1. **Check TRL documentation:** + ```python + hf_doc_search("your issue", product="trl") + ``` + +2. **Check Jobs documentation:** + ```python + hf_doc_fetch("https://huggingface.co/docs/huggingface_hub/guides/jobs") + ``` + +3. **Review related guides:** + - `references/hub_saving.md` - Hub authentication issues + - `references/hardware_guide.md` - Hardware selection and specs + - `references/training_patterns.md` - Eval dataset requirements + - SKILL.md "Working with Scripts" section - Script format and URL issues + +4. **Ask in HF forums:** https://discuss.huggingface.co/ diff --git a/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/unsloth.md b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/unsloth.md new file mode 100644 index 00000000..83e1e116 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/hugging-face-model-trainer/references/unsloth.md @@ -0,0 +1,313 @@ +# Unsloth: Fast Fine-Tuning with Memory Optimization + +**Unsloth** is a fine-tuning library that provides ~2x faster training and ~60% less VRAM usage for LLM training. It's particularly useful when working with limited GPU memory or when speed is critical. + +- **GitHub**: [unslothai/unsloth](https://github.com/unslothai/unsloth) +- **Docs**: [unsloth.ai/docs](https://unsloth.ai/docs) + +## When to Use Unsloth + +Use Unsloth if instructed to do so, or one of the following use cases applies: + +| Use Case | Recommendation | +|----------|----------------| +| Standard text LLM fine-tuning | TRL is sufficient, but Unsloth is faster | +| Limited GPU memory | **Use Unsloth** - 60% less VRAM | +| Need maximum speed | **Use Unsloth** - 2x faster | +| Large models (>13B) | **Use Unsloth** - memory efficiency critical | + +## Supported Models + +Unsloth supports many popular models including: +- **Text LLMs**: Llama 3/3.1/3.2/3.3, Qwen 2.5/3, Mistral, Phi-4, Gemma 2/3, LFM2/2.5 +- **Vision LLMs**: Qwen3-VL, Gemma 3, Llama 3.2 Vision, Pixtral + +Use Unsloth's pre-optimized model variants when available: +```python +# Unsloth-optimized models load faster and use less memory +model_id = "unsloth/LFM2.5-1.2B-Instruct" # 4-bit quantized +model_id = "unsloth/gemma-3-4b-pt" # Vision model +model_id = "unsloth/Qwen3-VL-8B-Instruct" # Vision model +``` + +## Installation + +```python +# /// script +# dependencies = [ +# "unsloth", +# "trl", +# "datasets", +# "trackio", +# ] +# /// +``` + +## Basic Usage: Text LLM + +```python +from unsloth import FastLanguageModel +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +# Load model with Unsloth optimizations +model, tokenizer = FastLanguageModel.from_pretrained( + model_name="LiquidAI/LFM2.5-1.2B-Instruct", + max_seq_length=4096, +) + +# Add LoRA adapters +model = FastLanguageModel.get_peft_model( + model, + r=16, + lora_alpha=16, + target_modules=["q_proj", "k_proj", "v_proj", "out_proj", "in_proj", "w1", "w2", "w3"], + lora_dropout=0, + bias="none", + use_gradient_checkpointing="unsloth", + random_state=3407, +) + +# Load dataset +dataset = load_dataset("trl-lib/Capybara", split="train") + +# Train with TRL +trainer = SFTTrainer( + model=model, + tokenizer=tokenizer, + train_dataset=dataset, + args=SFTConfig( + output_dir="./output", + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + max_steps=500, + learning_rate=2e-4, + report_to="trackio", + ), +) + +trainer.train() +``` + +## LFM2.5 Specific Settings + +For LFM2.5 inference, use these recommended generation parameters: + +**Instruct models:** +```python +temperature = 0.1 +top_k = 50 +top_p = 0.1 +repetition_penalty = 1.05 +``` + +**Thinking models:** +```python +temperature = 0.05 +top_k = 50 +repetition_penalty = 1.05 +``` + +## Vision-Language Models (VLMs) + +Unsloth provides specialized support for VLMs with `FastVisionModel`: + +```python +from unsloth import FastVisionModel, get_chat_template +from unsloth.trainer import UnslothVisionDataCollator +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +# Load VLM with Unsloth +model, processor = FastVisionModel.from_pretrained( + "unsloth/gemma-3-4b-pt", # or "unsloth/Qwen3-VL-8B-Instruct" + load_in_4bit=True, + use_gradient_checkpointing="unsloth", +) + +# Add LoRA for all modalities +model = FastVisionModel.get_peft_model( + model, + finetune_vision_layers=True, # Train vision encoder + finetune_language_layers=True, # Train language model + finetune_attention_modules=True, # Train attention + finetune_mlp_modules=True, # Train MLPs + r=16, + lora_alpha=32, + target_modules="all-linear", +) + +# Apply chat template (required for base models) +processor = get_chat_template(processor, "gemma-3") + +# Load VLM dataset (with images and messages) +dataset = load_dataset("your-vlm-dataset", split="train", streaming=True) + +# Enable training mode +FastVisionModel.for_training(model) + +# Train with VLM-specific collator +trainer = SFTTrainer( + model=model, + train_dataset=dataset, + processing_class=processor.tokenizer, + data_collator=UnslothVisionDataCollator(model, processor), + args=SFTConfig( + output_dir="./vlm-output", + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + max_steps=500, + learning_rate=2e-4, + # VLM-specific settings + remove_unused_columns=False, + dataset_text_field="", + dataset_kwargs={"skip_prepare_dataset": True}, + report_to="trackio", + ), +) + +trainer.train() +``` + +## Key Differences from Standard TRL + +| Aspect | Standard TRL | Unsloth | +|--------|--------------|---------| +| Model loading | `AutoModelForCausalLM.from_pretrained()` | `FastLanguageModel.from_pretrained()` | +| LoRA setup | `PeftModel` / `LoraConfig` | `FastLanguageModel.get_peft_model()` | +| VLM loading | Limited support | `FastVisionModel.from_pretrained()` | +| VLM collator | Manual | `UnslothVisionDataCollator` | +| Memory usage | Standard | ~60% less | +| Training speed | Standard | ~2x faster | + +## VLM Dataset Format + +VLM datasets should have: +- `images`: List of PIL images or image paths +- `messages`: Conversation format with image references + +```python +{ + "images": [, ...], + "messages": [ + {"role": "user", "content": [ + {"type": "image"}, + {"type": "text", "text": "Describe this image"} + ]}, + {"role": "assistant", "content": "This image shows..."} + ] +} +``` + +## Streaming Datasets + +For large VLM datasets, use streaming to avoid disk space issues: + +```python +dataset = load_dataset( + "your-vlm-dataset", + split="train", + streaming=True, # Stream from Hub +) + +# Must use max_steps with streaming (no epoch-based training) +SFTConfig(max_steps=500, ...) +``` + +## Saving Models + +### Save LoRA Adapter + +```python +model.save_pretrained("./adapter") +processor.save_pretrained("./adapter") + +# Push to Hub +model.push_to_hub("username/my-vlm-adapter") +processor.push_to_hub("username/my-vlm-adapter") +``` + +### Merge and Save Full Model + +```python +# Merge LoRA weights into base model +model = model.merge_and_unload() + +# Save merged model +model.save_pretrained("./merged") +tokenizer.save_pretrained("./merged") +``` + +### Convert to GGUF + +Unsloth models can be converted to GGUF for llama.cpp/Ollama: + +```python +# Save in 16-bit for GGUF conversion +model.save_pretrained_gguf("./gguf", tokenizer, quantization_method="f16") + +# Or directly quantize +model.save_pretrained_gguf("./gguf", tokenizer, quantization_method="q4_k_m") +``` + +## Qwen3-VL Specific Settings + +For Qwen3-VL models, use these recommended settings: + +**Instruct models:** +```python +temperature = 0.7 +top_p = 0.8 +presence_penalty = 1.5 +``` + +**Thinking models:** +```python +temperature = 1.0 +top_p = 0.95 +presence_penalty = 0.0 +``` + +## Hardware Requirements + +| Model | Min VRAM (Unsloth 4-bit) | Recommended GPU | +|-------|--------------------------|-----------------| +| 2B-4B | 8GB | T4, L4 | +| 7B-8B | 16GB | A10G, L4x4 | +| 13B | 24GB | A10G-large | +| 30B+ | 48GB+ | A100 | + +## Example: Full VLM Training Script + +See `scripts/unsloth_sft_example.py` for a complete production-ready example that includes: +- Unsloth VLM setup +- Streaming dataset support +- Trackio monitoring +- Hub push +- CLI arguments + +Run locally: +```bash +uv run scripts/unsloth_sft_example.py \ + --dataset trl-lib/Capybara \ + --max-steps 500 \ + --output-repo username/my-model +``` + +Run on HF Jobs: +```python +hf_jobs("uv", { + "script": " +``` + +## Core Concepts + +### 1. Pipeline API +The pipeline API is the easiest way to use models. It groups together preprocessing, model inference, and postprocessing: + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// Create a pipeline for a specific task +const pipe = await pipeline('sentiment-analysis'); + +// Use the pipeline +const result = await pipe('I love transformers!'); +// Output: [{ label: 'POSITIVE', score: 0.999817686 }] + +// IMPORTANT: Always dispose when done to free memory +await classifier.dispose(); +``` + +**⚠️ Memory Management:** All pipelines must be disposed with `pipe.dispose()` when finished to prevent memory leaks. See examples in [Code Examples](./references/EXAMPLES.md) for cleanup patterns across different environments. + +### 2. Model Selection +You can specify a custom model as the second argument: + +```javascript +const pipe = await pipeline( + 'sentiment-analysis', + 'Xenova/bert-base-multilingual-uncased-sentiment' +); +``` + +**Finding Models:** + +Browse available Transformers.js models on Hugging Face Hub: +- **All models**: https://huggingface.co/models?library=transformers.js&sort=trending +- **By task**: Add `pipeline_tag` parameter + - Text generation: https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending + - Image classification: https://huggingface.co/models?pipeline_tag=image-classification&library=transformers.js&sort=trending + - Speech recognition: https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js&sort=trending + +**Tip:** Filter by task type, sort by trending/downloads, and check model cards for performance metrics and usage examples. + +### 3. Device Selection +Choose where to run the model: + +```javascript +// Run on CPU (default for WASM) +const pipe = await pipeline('sentiment-analysis', 'model-id'); + +// Run on GPU (WebGPU - experimental) +const pipe = await pipeline('sentiment-analysis', 'model-id', { + device: 'webgpu', +}); +``` + +### 4. Quantization Options +Control model precision vs. performance: + +```javascript +// Use quantized model (faster, smaller) +const pipe = await pipeline('sentiment-analysis', 'model-id', { + dtype: 'q4', // Options: 'fp32', 'fp16', 'q8', 'q4' +}); +``` + +## Supported Tasks + +**Note:** All examples below show basic usage. + +### Natural Language Processing + +#### Text Classification +```javascript +const classifier = await pipeline('text-classification'); +const result = await classifier('This movie was amazing!'); +``` + +#### Named Entity Recognition (NER) +```javascript +const ner = await pipeline('token-classification'); +const entities = await ner('My name is John and I live in New York.'); +``` + +#### Question Answering +```javascript +const qa = await pipeline('question-answering'); +const answer = await qa({ + question: 'What is the capital of France?', + context: 'Paris is the capital and largest city of France.' +}); +``` + +#### Text Generation +```javascript +const generator = await pipeline('text-generation', 'onnx-community/gemma-3-270m-it-ONNX'); +const text = await generator('Once upon a time', { + max_new_tokens: 100, + temperature: 0.7 +}); +``` + +**For streaming and chat:** See **[Text Generation Guide](./references/TEXT_GENERATION.md)** for: +- Streaming token-by-token output with `TextStreamer` +- Chat/conversation format with system/user/assistant roles +- Generation parameters (temperature, top_k, top_p) +- Browser and Node.js examples +- React components and API endpoints + +#### Translation +```javascript +const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M'); +const output = await translator('Hello, how are you?', { + src_lang: 'eng_Latn', + tgt_lang: 'fra_Latn' +}); +``` + +#### Summarization +```javascript +const summarizer = await pipeline('summarization'); +const summary = await summarizer(longText, { + max_length: 100, + min_length: 30 +}); +``` + +#### Zero-Shot Classification +```javascript +const classifier = await pipeline('zero-shot-classification'); +const result = await classifier('This is a story about sports.', ['politics', 'sports', 'technology']); +``` + +### Computer Vision + +#### Image Classification +```javascript +const classifier = await pipeline('image-classification'); +const result = await classifier('https://example.com/image.jpg'); +// Or with local file +const result = await classifier(imageUrl); +``` + +#### Object Detection +```javascript +const detector = await pipeline('object-detection'); +const objects = await detector('https://example.com/image.jpg'); +// Returns: [{ label: 'person', score: 0.95, box: { xmin, ymin, xmax, ymax } }, ...] +``` + +#### Image Segmentation +```javascript +const segmenter = await pipeline('image-segmentation'); +const segments = await segmenter('https://example.com/image.jpg'); +``` + +#### Depth Estimation +```javascript +const depthEstimator = await pipeline('depth-estimation'); +const depth = await depthEstimator('https://example.com/image.jpg'); +``` + +#### Zero-Shot Image Classification +```javascript +const classifier = await pipeline('zero-shot-image-classification'); +const result = await classifier('image.jpg', ['cat', 'dog', 'bird']); +``` + +### Audio Processing + +#### Automatic Speech Recognition +```javascript +const transcriber = await pipeline('automatic-speech-recognition'); +const result = await transcriber('audio.wav'); +// Returns: { text: 'transcribed text here' } +``` + +#### Audio Classification +```javascript +const classifier = await pipeline('audio-classification'); +const result = await classifier('audio.wav'); +``` + +#### Text-to-Speech +```javascript +const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts'); +const audio = await synthesizer('Hello, this is a test.', { + speaker_embeddings: speakerEmbeddings +}); +``` + +### Multimodal + +#### Image-to-Text (Image Captioning) +```javascript +const captioner = await pipeline('image-to-text'); +const caption = await captioner('image.jpg'); +``` + +#### Document Question Answering +```javascript +const docQA = await pipeline('document-question-answering'); +const answer = await docQA('document-image.jpg', 'What is the total amount?'); +``` + +#### Zero-Shot Object Detection +```javascript +const detector = await pipeline('zero-shot-object-detection'); +const objects = await detector('image.jpg', ['person', 'car', 'tree']); +``` + +### Feature Extraction (Embeddings) + +```javascript +const extractor = await pipeline('feature-extraction'); +const embeddings = await extractor('This is a sentence to embed.'); +// Returns: tensor of shape [1, sequence_length, hidden_size] + +// For sentence embeddings (mean pooling) +const extractor = await pipeline('feature-extraction', 'onnx-community/all-MiniLM-L6-v2-ONNX'); +const embeddings = await extractor('Text to embed', { pooling: 'mean', normalize: true }); +``` + +## Finding and Choosing Models + +### Browsing the Hugging Face Hub + +Discover compatible Transformers.js models on Hugging Face Hub: + +**Base URL (all models):** +``` +https://huggingface.co/models?library=transformers.js&sort=trending +``` + +**Filter by task** using the `pipeline_tag` parameter: + +| Task | URL | +|------|-----| +| **Text Generation** | https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending | +| **Text Classification** | https://huggingface.co/models?pipeline_tag=text-classification&library=transformers.js&sort=trending | +| **Translation** | https://huggingface.co/models?pipeline_tag=translation&library=transformers.js&sort=trending | +| **Summarization** | https://huggingface.co/models?pipeline_tag=summarization&library=transformers.js&sort=trending | +| **Question Answering** | https://huggingface.co/models?pipeline_tag=question-answering&library=transformers.js&sort=trending | +| **Image Classification** | https://huggingface.co/models?pipeline_tag=image-classification&library=transformers.js&sort=trending | +| **Object Detection** | https://huggingface.co/models?pipeline_tag=object-detection&library=transformers.js&sort=trending | +| **Image Segmentation** | https://huggingface.co/models?pipeline_tag=image-segmentation&library=transformers.js&sort=trending | +| **Speech Recognition** | https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js&sort=trending | +| **Audio Classification** | https://huggingface.co/models?pipeline_tag=audio-classification&library=transformers.js&sort=trending | +| **Image-to-Text** | https://huggingface.co/models?pipeline_tag=image-to-text&library=transformers.js&sort=trending | +| **Feature Extraction** | https://huggingface.co/models?pipeline_tag=feature-extraction&library=transformers.js&sort=trending | +| **Zero-Shot Classification** | https://huggingface.co/models?pipeline_tag=zero-shot-classification&library=transformers.js&sort=trending | + +**Sort options:** +- `&sort=trending` - Most popular recently +- `&sort=downloads` - Most downloaded overall +- `&sort=likes` - Most liked by community +- `&sort=modified` - Recently updated + +### Choosing the Right Model + +Consider these factors when selecting a model: + +**1. Model Size** +- **Small (< 100MB)**: Fast, suitable for browsers, limited accuracy +- **Medium (100MB - 500MB)**: Balanced performance, good for most use cases +- **Large (> 500MB)**: High accuracy, slower, better for Node.js or powerful devices + +**2. Quantization** +Models are often available in different quantization levels: +- `fp32` - Full precision (largest, most accurate) +- `fp16` - Half precision (smaller, still accurate) +- `q8` - 8-bit quantized (much smaller, slight accuracy loss) +- `q4` - 4-bit quantized (smallest, noticeable accuracy loss) + +**3. Task Compatibility** +Check the model card for: +- Supported tasks (some models support multiple tasks) +- Input/output formats +- Language support (multilingual vs. English-only) +- License restrictions + +**4. Performance Metrics** +Model cards typically show: +- Accuracy scores +- Benchmark results +- Inference speed +- Memory requirements + +### Example: Finding a Text Generation Model + +```javascript +// 1. Visit: https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending + +// 2. Browse and select a model (e.g., onnx-community/gemma-3-270m-it-ONNX) + +// 3. Check model card for: +// - Model size: ~270M parameters +// - Quantization: q4 available +// - Language: English +// - Use case: Instruction-following chat + +// 4. Use the model: +import { pipeline } from '@huggingface/transformers'; + +const generator = await pipeline( + 'text-generation', + 'onnx-community/gemma-3-270m-it-ONNX', + { dtype: 'q4' } // Use quantized version for faster inference +); + +const output = await generator('Explain quantum computing in simple terms.', { + max_new_tokens: 100 +}); + +await generator.dispose(); +``` + +### Tips for Model Selection + +1. **Start Small**: Test with a smaller model first, then upgrade if needed +2. **Check ONNX Support**: Ensure the model has ONNX files (look for `onnx` folder in model repo) +3. **Read Model Cards**: Model cards contain usage examples, limitations, and benchmarks +4. **Test Locally**: Benchmark inference speed and memory usage in your environment +5. **Community Models**: Look for models by `Xenova` (Transformers.js maintainer) or `onnx-community` +6. **Version Pin**: Use specific git commits in production for stability: + ```javascript + const pipe = await pipeline('task', 'model-id', { revision: 'abc123' }); + ``` + +## Advanced Configuration + +### Environment Configuration (`env`) + +The `env` object provides comprehensive control over Transformers.js execution, caching, and model loading. + +**Quick Overview:** + +```javascript +import { env } from '@huggingface/transformers'; + +// View version +console.log(env.version); // e.g., '3.8.1' + +// Common settings +env.allowRemoteModels = true; // Load from Hugging Face Hub +env.allowLocalModels = false; // Load from file system +env.localModelPath = '/models/'; // Local model directory +env.useFSCache = true; // Cache models on disk (Node.js) +env.useBrowserCache = true; // Cache models in browser +env.cacheDir = './.cache'; // Cache directory location +``` + +**Configuration Patterns:** + +```javascript +// Development: Fast iteration with remote models +env.allowRemoteModels = true; +env.useFSCache = true; + +// Production: Local models only +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; + +// Custom CDN +env.remoteHost = 'https://cdn.example.com/models'; + +// Disable caching (testing) +env.useFSCache = false; +env.useBrowserCache = false; +``` + +For complete documentation on all configuration options, caching strategies, cache management, pre-downloading models, and more, see: + +**→ [Configuration Reference](./references/CONFIGURATION.md)** + +### Working with Tensors + +```javascript +import { AutoTokenizer, AutoModel } from '@huggingface/transformers'; + +// Load tokenizer and model separately for more control +const tokenizer = await AutoTokenizer.from_pretrained('bert-base-uncased'); +const model = await AutoModel.from_pretrained('bert-base-uncased'); + +// Tokenize input +const inputs = await tokenizer('Hello world!'); + +// Run model +const outputs = await model(inputs); +``` + +### Batch Processing + +```javascript +const classifier = await pipeline('sentiment-analysis'); + +// Process multiple texts +const results = await classifier([ + 'I love this!', + 'This is terrible.', + 'It was okay.' +]); +``` + +## Browser-Specific Considerations + +### WebGPU Usage +WebGPU provides GPU acceleration in browsers: + +```javascript +const pipe = await pipeline('text-generation', 'onnx-community/gemma-3-270m-it-ONNX', { + device: 'webgpu', + dtype: 'fp32' +}); +``` + +**Note**: WebGPU is experimental. Check browser compatibility and file issues if problems occur. + +### WASM Performance +Default browser execution uses WASM: + +```javascript +// Optimized for browsers with quantization +const pipe = await pipeline('sentiment-analysis', 'model-id', { + dtype: 'q8' // or 'q4' for even smaller size +}); +``` + +### Progress Tracking & Loading Indicators + +Models can be large (ranging from a few MB to several GB) and consist of multiple files. Track download progress by passing a callback to the `pipeline()` function: + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// Track progress for each file +const fileProgress = {}; + +function onProgress(info) { + console.log(`${info.status}: ${info.file}`); + + if (info.status === 'progress') { + fileProgress[info.file] = info.progress; + console.log(`${info.file}: ${info.progress.toFixed(1)}%`); + } + + if (info.status === 'done') { + console.log(`✓ ${info.file} complete`); + } +} + +// Pass callback to pipeline +const classifier = await pipeline('sentiment-analysis', null, { + progress_callback: onProgress +}); +``` + +**Progress Info Properties:** + +```typescript +interface ProgressInfo { + status: 'initiate' | 'download' | 'progress' | 'done' | 'ready'; + name: string; // Model id or path + file: string; // File being processed + progress?: number; // Percentage (0-100, only for 'progress' status) + loaded?: number; // Bytes downloaded (only for 'progress' status) + total?: number; // Total bytes (only for 'progress' status) +} +``` + +For complete examples including browser UIs, React components, CLI progress bars, and retry logic, see: + +**→ [Pipeline Options - Progress Callback](./references/PIPELINE_OPTIONS.md#progress-callback)** + +## Error Handling + +```javascript +try { + const pipe = await pipeline('sentiment-analysis', 'model-id'); + const result = await pipe('text to analyze'); +} catch (error) { + if (error.message.includes('fetch')) { + console.error('Model download failed. Check internet connection.'); + } else if (error.message.includes('ONNX')) { + console.error('Model execution failed. Check model compatibility.'); + } else { + console.error('Unknown error:', error); + } +} +``` + +## Performance Tips + +1. **Reuse Pipelines**: Create pipeline once, reuse for multiple inferences +2. **Use Quantization**: Start with `q8` or `q4` for faster inference +3. **Batch Processing**: Process multiple inputs together when possible +4. **Cache Models**: Models are cached automatically (see **[Caching Reference](./references/CACHE.md)** for details on browser Cache API, Node.js filesystem cache, and custom implementations) +5. **WebGPU for Large Models**: Use WebGPU for models that benefit from GPU acceleration +6. **Prune Context**: For text generation, limit `max_new_tokens` to avoid memory issues +7. **Clean Up Resources**: Call `pipe.dispose()` when done to free memory + +## Memory Management + +**IMPORTANT:** Always call `pipe.dispose()` when finished to prevent memory leaks. + +```javascript +const pipe = await pipeline('sentiment-analysis'); +const result = await pipe('Great product!'); +await pipe.dispose(); // ✓ Free memory (100MB - several GB per model) +``` + +**When to dispose:** +- Application shutdown or component unmount +- Before loading a different model +- After batch processing in long-running apps + +Models consume significant memory and hold GPU/CPU resources. Disposal is critical for browser memory limits and server stability. + +For detailed patterns (React cleanup, servers, browser), see **[Code Examples](./references/EXAMPLES.md)** + +## Troubleshooting + +### Model Not Found +- Verify model exists on Hugging Face Hub +- Check model name spelling +- Ensure model has ONNX files (look for `onnx` folder in model repo) + +### Memory Issues +- Use smaller models or quantized versions (`dtype: 'q4'`) +- Reduce batch size +- Limit sequence length with `max_length` + +### WebGPU Errors +- Check browser compatibility (Chrome 113+, Edge 113+) +- Try `dtype: 'fp16'` if `fp32` fails +- Fall back to WASM if WebGPU unavailable + +## Reference Documentation + +### This Skill +- **[Pipeline Options](./references/PIPELINE_OPTIONS.md)** - Configure `pipeline()` with `progress_callback`, `device`, `dtype`, etc. +- **[Configuration Reference](./references/CONFIGURATION.md)** - Global `env` configuration for caching and model loading +- **[Caching Reference](./references/CACHE.md)** - Browser Cache API, Node.js filesystem cache, and custom cache implementations +- **[Text Generation Guide](./references/TEXT_GENERATION.md)** - Streaming, chat format, and generation parameters +- **[Model Architectures](./references/MODEL_ARCHITECTURES.md)** - Supported models and selection tips +- **[Code Examples](./references/EXAMPLES.md)** - Real-world implementations for different runtimes + +### Official Transformers.js +- Official docs: https://huggingface.co/docs/transformers.js +- API reference: https://huggingface.co/docs/transformers.js/api/pipelines +- Model hub: https://huggingface.co/models?library=transformers.js +- GitHub: https://github.com/huggingface/transformers.js +- Examples: https://github.com/huggingface/transformers.js/tree/main/examples + +## Best Practices + +1. **Always Dispose Pipelines**: Call `pipe.dispose()` when done - critical for preventing memory leaks +2. **Start with Pipelines**: Use the pipeline API unless you need fine-grained control +3. **Test Locally First**: Test models with small inputs before deploying +4. **Monitor Model Sizes**: Be aware of model download sizes for web applications +5. **Handle Loading States**: Show progress indicators for better UX +6. **Version Pin**: Pin specific model versions for production stability +7. **Error Boundaries**: Always wrap pipeline calls in try-catch blocks +8. **Progressive Enhancement**: Provide fallbacks for unsupported browsers +9. **Reuse Models**: Load once, use many times - don't recreate pipelines unnecessarily +10. **Graceful Shutdown**: Dispose models on SIGTERM/SIGINT in servers + +## Quick Reference: Task IDs + +| Task | Task ID | +|------|---------| +| Text classification | `text-classification` or `sentiment-analysis` | +| Token classification | `token-classification` or `ner` | +| Question answering | `question-answering` | +| Fill mask | `fill-mask` | +| Summarization | `summarization` | +| Translation | `translation` | +| Text generation | `text-generation` | +| Text-to-text generation | `text2text-generation` | +| Zero-shot classification | `zero-shot-classification` | +| Image classification | `image-classification` | +| Image segmentation | `image-segmentation` | +| Object detection | `object-detection` | +| Depth estimation | `depth-estimation` | +| Image-to-image | `image-to-image` | +| Zero-shot image classification | `zero-shot-image-classification` | +| Zero-shot object detection | `zero-shot-object-detection` | +| Automatic speech recognition | `automatic-speech-recognition` | +| Audio classification | `audio-classification` | +| Text-to-speech | `text-to-speech` or `text-to-audio` | +| Image-to-text | `image-to-text` | +| Document question answering | `document-question-answering` | +| Feature extraction | `feature-extraction` | +| Sentence similarity | `sentence-similarity` | + +--- + +This skill enables you to integrate state-of-the-art machine learning capabilities directly into JavaScript applications without requiring separate ML servers or Python environments. diff --git a/plugins/antigravity-awesome-skills/skills/transformers-js/references/CACHE.md b/plugins/antigravity-awesome-skills/skills/transformers-js/references/CACHE.md new file mode 100644 index 00000000..6f97b2cd --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/transformers-js/references/CACHE.md @@ -0,0 +1,339 @@ +# Caching Reference + +Complete guide to caching strategies for Transformers.js models across different environments. + +## Table of Contents + +1. [Overview](#overview) +2. [Browser Caching](#browser-caching) +3. [Node.js Caching](#nodejs-caching) +4. [Custom Cache Implementation](#custom-cache-implementation) +5. [Cache Configuration](#cache-configuration) + +## Overview + +Transformers.js models can be large (from a few MB to several GB), so caching is critical for performance. The caching strategy differs based on the environment: + +- **Browser**: Uses the Cache API (browser cache storage) +- **Node.js**: Uses filesystem cache in `~/.cache/huggingface/` +- **Custom**: Implement your own cache (database, cloud storage, etc.) + +### Default Behavior + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// First load: downloads model +const pipe = await pipeline('sentiment-analysis'); + +// Subsequent loads: uses cached model +const pipe2 = await pipeline('sentiment-analysis'); // Fast! +``` + +Caching is **automatic** and enabled by default. Models are cached after the first download. + +## Browser Caching + +### Using the Cache API + +In browser environments, Transformers.js uses the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) to store models: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Browser cache is enabled by default +console.log(env.useBrowserCache); // true + +// Load model (cached automatically) +const classifier = await pipeline('sentiment-analysis'); +``` + +**How it works:** + +1. Model files are downloaded from Hugging Face Hub +2. Files are stored in the browser's Cache Storage +3. Subsequent loads retrieve from cache (no network request) +4. Cache persists across page reloads and browser sessions + +### Cache Location + +Browser caches are stored in: +- **Chrome/Edge**: `Cache Storage` in DevTools → Application tab → Cache storage +- **Firefox**: `about:cache` → Storage +- **Safari**: Web Inspector → Storage tab + +### Disable Browser Cache + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable browser caching (not recommended) +env.useBrowserCache = false; + +// Models will be re-downloaded on every page load +``` + +**Use case:** Testing, development, or debugging cache issues. + +### Browser Storage Limits + +Browsers impose storage quotas: + +- **Chrome**: ~60% of available disk space (but can evict data) +- **Firefox**: ~50% of available disk space +- **Safari**: ~1GB per origin (prompt for more) + +**Tip:** Monitor storage usage with the [Storage API](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API): + +```javascript +if ('storage' in navigator && 'estimate' in navigator.storage) { + const estimate = await navigator.storage.estimate(); + const percentUsed = (estimate.usage / estimate.quota) * 100; + console.log(`Storage: ${percentUsed.toFixed(2)}% used`); + console.log(`Available: ${((estimate.quota - estimate.usage) / 1024 / 1024).toFixed(2)} MB`); +} +``` + +## Node.js Caching + +### Filesystem Cache + +In Node.js, models are cached to the filesystem: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Default cache directory (Node.js) +console.log(env.cacheDir); // './.cache' (relative to current directory) + +// Filesystem cache is enabled by default +console.log(env.useFSCache); // true + +// Load model (cached to disk) +const classifier = await pipeline('sentiment-analysis'); +``` + +### Default Cache Location + +**Default behavior:** +- Cache directory: `./.cache` (relative to where Node.js process runs) +- Full default path: `~/.cache/huggingface/` when using Hugging Face tools + +**Note:** The statement "Models are cached automatically in `~/.cache/huggingface/`" from performance tips is specific to Hugging Face's Python tooling convention. In Transformers.js for Node.js, the default is `./.cache` unless configured otherwise. + +### Custom Cache Directory + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Set custom cache directory +env.cacheDir = '/var/cache/transformers'; + +// Or use environment variable (Node.js convention) +env.cacheDir = process.env.HF_HOME || '~/.cache/huggingface'; + +// Now load model +const classifier = await pipeline('sentiment-analysis'); +// Cached to: /var/cache/transformers/models--Xenova--distilbert-base-uncased-finetuned-sst-2-english/ +``` + +**Pattern:** `models--{organization}--{model-name}/` + +### Disable Filesystem Cache + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable filesystem caching (not recommended) +env.useFSCache = false; + +// Models will be re-downloaded on every load +``` + +**Use case:** Testing, CI/CD environments, or containers with ephemeral storage. + +## Custom Cache Implementation + +Implement your own cache for specialized storage backends. + +### Custom Cache Interface + +```typescript +interface CacheInterface { + /** + * Check if a URL is cached + */ + match(url: string): Promise; + + /** + * Store a URL and its response + */ + put(url: string, response: Response): Promise; +} +``` + +### Example: Cloud Storage Cache (S3) + +```javascript +import { env, pipeline } from '@huggingface/transformers'; +import { S3Client, GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3'; +import { Readable } from 'stream'; + +class S3Cache { + constructor(bucket, region = 'us-east-1') { + this.bucket = bucket; + this.s3 = new S3Client({ region }); + } + + async match(url) { + const key = this.urlToKey(url); + + try { + const command = new GetObjectCommand({ + Bucket: this.bucket, + Key: key + }); + const response = await this.s3.send(command); + + // Convert stream to buffer + const chunks = []; + for await (const chunk of response.Body) { + chunks.push(chunk); + } + const body = Buffer.concat(chunks); + + return new Response(body, { + status: 200, + headers: JSON.parse(response.Metadata.headers || '{}') + }); + } catch (error) { + if (error.name === 'NoSuchKey') return undefined; + throw error; + } + } + + async put(url, response) { + const key = this.urlToKey(url); + const clonedResponse = response.clone(); + const body = Buffer.from(await clonedResponse.arrayBuffer()); + const headers = JSON.stringify(Object.fromEntries(response.headers.entries())); + + const command = new PutObjectCommand({ + Bucket: this.bucket, + Key: key, + Body: body, + Metadata: { headers } + }); + + await this.s3.send(command); + } + + urlToKey(url) { + // Convert URL to S3 key (remove protocol, replace slashes) + return url.replace(/^https?:\/\//, '').replace(/\//g, '_'); + } +} + +// Configure S3 cache +env.useCustomCache = true; +env.customCache = new S3Cache('my-transformers-cache', 'us-east-1'); +env.useFSCache = false; + +// Use S3 cache +const classifier = await pipeline('sentiment-analysis'); +``` + +## Cache Configuration + +### Environment Variables + +Use environment variables to configure caching: + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure cache directory from environment +env.cacheDir = process.env.TRANSFORMERS_CACHE || './.cache'; + +// Disable caching in CI/CD +if (process.env.CI === 'true') { + env.useFSCache = false; + env.useBrowserCache = false; +} + +// Production: use pre-cached models +if (process.env.NODE_ENV === 'production') { + env.allowRemoteModels = false; + env.allowLocalModels = true; + env.localModelPath = process.env.MODEL_PATH || '/app/models'; +} +``` + +### Configuration Patterns + +#### Development: Enable All Caching + +```javascript +import { env } from '@huggingface/transformers'; + +env.allowRemoteModels = true; +env.useFSCache = true; // Node.js +env.useBrowserCache = true; // Browser +env.cacheDir = './.cache'; +``` + +#### Production: Local Models Only + +```javascript +import { env } from '@huggingface/transformers'; + +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models'; +env.useFSCache = true; +``` + +#### Testing: Disable Caching + +```javascript +import { env } from '@huggingface/transformers'; + +env.useFSCache = false; +env.useBrowserCache = false; +env.allowRemoteModels = true; // Download every time +``` + +#### Hybrid: Cache + Remote Fallback + +```javascript +import { env } from '@huggingface/transformers'; + +// Try local cache first, fall back to remote +env.allowRemoteModels = true; +env.allowLocalModels = true; +env.useFSCache = true; +env.localModelPath = './models'; +``` + +--- + +## Summary + +Transformers.js provides flexible caching options: + +- **Browser**: Cache API (automatic, persistent) +- **Node.js**: Filesystem cache (default `./.cache`, configurable) +- **Custom**: Implement your own (database, cloud storage, etc.) + +**Key takeaways:** + +1. Caching is enabled by default and automatic +2. Configure cache **before** loading models +3. Browser uses Cache API, Node.js uses filesystem +4. Custom caches enable advanced storage backends +5. Monitor cache size and implement cleanup strategies +6. Pre-download models for production deployments + +For more configuration options, see: +- [Configuration Reference](./CONFIGURATION.md) +- [Pipeline Options](./PIPELINE_OPTIONS.md) diff --git a/plugins/antigravity-awesome-skills/skills/transformers-js/references/CONFIGURATION.md b/plugins/antigravity-awesome-skills/skills/transformers-js/references/CONFIGURATION.md new file mode 100644 index 00000000..52e18d96 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/transformers-js/references/CONFIGURATION.md @@ -0,0 +1,390 @@ +# Environment Configuration Reference + +Complete guide to configuring Transformers.js behavior using the `env` object. + +## Table of Contents + +1. [Overview](#overview) +2. [Remote Model Configuration](#remote-model-configuration) +3. [Local Model Configuration](#local-model-configuration) +4. [Cache Configuration](#cache-configuration) +5. [WASM Configuration](#wasm-configuration) +6. [Common Configuration Patterns](#common-configuration-patterns) +7. [Environment Best Practices](#environment-best-practices) + +## Overview + +The `env` object provides comprehensive control over Transformers.js execution, caching, and model loading: + +```javascript +import { env } from '@huggingface/transformers'; + +// View current version +console.log(env.version); // e.g., '3.8.1' +``` + +### Available Properties + +```typescript +interface TransformersEnvironment { + // Version info + version: string; + + // Backend configuration + backends: { + onnx: Partial; + }; + + // Remote model settings + allowRemoteModels: boolean; + remoteHost: string; + remotePathTemplate: string; + + // Local model settings + allowLocalModels: boolean; + localModelPath: string; + useFS: boolean; + + // Cache settings + useBrowserCache: boolean; + useFSCache: boolean; + cacheDir: string | null; + useCustomCache: boolean; + customCache: CacheInterface | null; + useWasmCache: boolean; + cacheKey: string; +} +``` + +## Remote Model Configuration + +Control how models are loaded from remote sources (default: Hugging Face Hub). + +### Disable Remote Loading + +```javascript +import { env } from '@huggingface/transformers'; + +// Force local-only mode (no network requests) +env.allowRemoteModels = false; +``` + +**Use case:** Offline applications, security requirements, or air-gapped environments. + +### Custom Model Host + +```javascript +import { env } from '@huggingface/transformers'; + +// Use your own CDN or model server +env.remoteHost = 'https://cdn.example.com/models'; + +// Customize the URL pattern +// Default: '{model}/resolve/{revision}/{file}' +env.remotePathTemplate = 'custom/{model}/{file}'; +``` + +**Use case:** Self-hosting models, using a CDN for faster downloads, or corporate proxies. + +### Example: Private Model Server + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Configure custom model host +env.remoteHost = 'https://models.mycompany.com'; +env.remotePathTemplate = '{model}/{file}'; + +// Models will be loaded from: +// https://models.mycompany.com/my-model/model.onnx +const pipe = await pipeline('sentiment-analysis', 'my-model'); +``` + +## Local Model Configuration + +Control loading models from the local file system. + +### Enable Local Models + +```javascript +import { env } from '@huggingface/transformers'; + +// Enable local file system loading +env.allowLocalModels = true; + +// Set the base path for local models +env.localModelPath = '/path/to/models/'; +``` + +**Default values:** +- Browser: `allowLocalModels = false`, `localModelPath = '/models/'` +- Node.js: `allowLocalModels = true`, `localModelPath = '/models/'` + +### File System Control + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable file system usage entirely (Node.js only) +env.useFS = false; +``` + +### Example: Local Model Directory Structure + +``` +/app/models/ +├── onnx-community/ +│ ├── Supertonic-TTS-ONNX/ +│ │ ├── config.json +│ │ ├── tokenizer.json +│ │ ├── model.onnx +│ │ └── ... +│ └── yolo26l-pose-ONNX/ +│ ├── config.json +│ ├── preprocessor_config.json +│ ├── model.onnx +│ └── ... +``` + +```javascript +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; +env.allowRemoteModels = false; // Offline mode + +const classifier = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english'); +``` + +## Cache Configuration + +Transformers.js supports multiple caching strategies to improve performance and reduce network usage. + +### Quick Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +// Browser cache (Cache API) +env.useBrowserCache = true; // default: true +env.cacheKey = 'my-app-transformers-cache'; // default: 'transformers-cache' + +// Node.js filesystem cache +env.useFSCache = true; // default: true +env.cacheDir = './custom-cache-dir'; // default: './.cache' + +// Custom cache implementation +env.useCustomCache = true; +env.customCache = new CustomCache(); // Implement Cache API interface + +// WASM binary caching +env.useWasmCache = true; // default: true +``` + +### Disable Caching + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable all caching (re-download on every load) +env.useFSCache = false; +env.useBrowserCache = false; +env.useWasmCache = false; +env.cacheDir = null; +``` + +For comprehensive caching documentation including: +- Browser Cache API details and storage limits +- Node.js filesystem cache structure and management +- Custom cache implementations (Redis, database, S3) +- Cache clearing and monitoring strategies +- Best practices and troubleshooting + +See **[Caching Reference](./CACHE.md)** + +## WASM Configuration + +Configure ONNX Runtime Web Assembly backend settings. + +### Basic WASM Settings + +```javascript +import { env } from '@huggingface/transformers'; + +// Set custom WASM paths +env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/'; + +// Configure number of threads (Node.js only) +env.backends.onnx.wasm.numThreads = 4; + +// Enable/disable SIMD (single instruction, multiple data) +env.backends.onnx.wasm.simd = true; +``` + +### Proxy Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure proxy for WASM downloads +env.backends.onnx.wasm.proxy = true; +``` + +### Self-Hosted WASM Files + +```javascript +import { env } from '@huggingface/transformers'; + +// Host WASM files on your own server +env.backends.onnx.wasm.wasmPaths = '/static/wasm/'; +``` + +**Required files:** +- `ort-wasm.wasm` - Main WASM binary +- `ort-wasm-simd.wasm` - SIMD-enabled WASM binary +- `ort-wasm-threaded.wasm` - Multi-threaded WASM binary +- `ort-wasm-simd-threaded.wasm` - SIMD + multi-threaded WASM binary + +## Common Configuration Patterns + +### Development Setup + +```javascript +import { env } from '@huggingface/transformers'; + +// Fast iteration with caching +env.allowRemoteModels = true; +env.useBrowserCache = true; // Browser +env.useFSCache = true; // Node.js +env.cacheDir = './.cache'; +``` + +### Production (Local Models) + +```javascript +import { env } from '@huggingface/transformers'; + +// Secure, offline-capable setup +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; +env.useFSCache = false; // Models already local +``` + +### Offline-First Application + +```javascript +import { env } from '@huggingface/transformers'; + +// Try local first, fall back to remote +env.allowLocalModels = true; +env.localModelPath = './models/'; +env.allowRemoteModels = true; +env.useFSCache = true; +env.cacheDir = './cache'; +``` + +### Custom CDN + +```javascript +import { env } from '@huggingface/transformers'; + +// Use your own model hosting +env.remoteHost = 'https://cdn.example.com/ml-models'; +env.remotePathTemplate = '{model}/{file}'; +env.useBrowserCache = true; +``` + +### Memory-Constrained Environment + +```javascript +import { env } from '@huggingface/transformers'; + +// Minimize disk/memory usage +env.useFSCache = false; +env.useBrowserCache = false; +env.useWasmCache = false; +env.cacheDir = null; +``` + +### Testing/CI Environment + +```javascript +import { env } from '@huggingface/transformers'; + +// Predictable, isolated testing +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = './test-fixtures/models/'; +env.useFSCache = false; +``` + + + +## Environment Best Practices + +### 1. Configure Early + +Set `env` properties before loading any models: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// ✓ Good: Configure before loading +env.allowRemoteModels = false; +env.localModelPath = '/app/models/'; +const pipe = await pipeline('sentiment-analysis'); + +// ✗ Bad: Configuring after loading may not take effect +const pipe = await pipeline('sentiment-analysis'); +env.allowRemoteModels = false; // Too late! +``` + +### 2. Use Environment Variables + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure based on environment +env.allowRemoteModels = process.env.NODE_ENV === 'development'; +env.cacheDir = process.env.MODEL_CACHE_DIR || './.cache'; +env.localModelPath = process.env.LOCAL_MODELS_PATH || '/app/models/'; +``` + +### 3. Handle Errors Gracefully + +```javascript +import { pipeline, env } from '@huggingface/transformers'; + +try { + env.allowRemoteModels = false; + const pipe = await pipeline('sentiment-analysis', 'my-model'); +} catch (error) { + if (error.message.includes('not found')) { + console.error('Model not found locally. Enable remote models or download the model.'); + } + throw error; +} +``` + +### 4. Log Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +console.log('Transformers.js Configuration:', { + version: env.version, + allowRemoteModels: env.allowRemoteModels, + allowLocalModels: env.allowLocalModels, + localModelPath: env.localModelPath, + cacheDir: env.cacheDir, + useFSCache: env.useFSCache, + useBrowserCache: env.useBrowserCache +}); +``` + +## Related Documentation + +- **[Caching Reference](./CACHE.md)** - Comprehensive caching guide (browser, Node.js, custom implementations) +- [Pipeline Options](./PIPELINE_OPTIONS.md) - Configure pipeline loading with `progress_callback`, `device`, `dtype`, etc. +- [Model Architectures](./MODEL_ARCHITECTURES.md) - Supported models and architectures +- [Examples](./EXAMPLES.md) - Code examples for different runtimes +- [Main Skill Guide](../SKILL.md) - Getting started and common usage diff --git a/plugins/antigravity-awesome-skills/skills/transformers-js/references/EXAMPLES.md b/plugins/antigravity-awesome-skills/skills/transformers-js/references/EXAMPLES.md new file mode 100644 index 00000000..6a6e9b74 --- /dev/null +++ b/plugins/antigravity-awesome-skills/skills/transformers-js/references/EXAMPLES.md @@ -0,0 +1,605 @@ +# Transformers.js Code Examples + +Working examples showing how to use Transformers.js across different runtimes and frameworks. + +All examples use the same task and model for consistency: +- **Task**: `feature-extraction` +- **Model**: `onnx-community/all-MiniLM-L6-v2-ONNX` + +## Table of Contents +1. [Browser (Vanilla JS)](#browser-vanilla-js) +2. [Node.js](#nodejs) +3. [React](#react) +4. [Express API](#express-api) + +## Browser (Vanilla JS) + +### Basic Usage + +```html + + + + Feature Extraction + + +

Text Embedding Generator

+ + +
+ + + + + +``` + +### With Progress Tracking + +```html + + + + Feature Extraction with Progress + + + +

Text Embedding Generator

+
+

Loading model...

+
+
+ + + + + +``` + +## Node.js + +### Basic Script + +```javascript +// embed.js +import { pipeline } from '@huggingface/transformers'; + +async function generateEmbedding(text) { + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + + const output = await extractor(text, { pooling: 'mean', normalize: true }); + + console.log('Text:', text); + console.log('Embedding dimensions:', output.data.length); + console.log('First 5 values:', Array.from(output.data).slice(0, 5)); + + await extractor.dispose(); +} + +generateEmbedding('Hello, world!'); +``` + +### Batch Processing + +```javascript +// batch-embed.js +import { pipeline } from '@huggingface/transformers'; +import fs from 'fs/promises'; + +async function embedDocuments(documents) { + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + + console.log(`Processing ${documents.length} documents...`); + + const embeddings = []; + + for (let i = 0; i < documents.length; i++) { + const output = await extractor(documents[i], { + pooling: 'mean', + normalize: true + }); + + embeddings.push({ + text: documents[i], + embedding: Array.from(output.data) + }); + + console.log(`Processed ${i + 1}/${documents.length}`); + } + + await fs.writeFile( + 'embeddings.json', + JSON.stringify(embeddings, null, 2) + ); + + console.log('Saved to embeddings.json'); + + await extractor.dispose(); +} + +const documents = [ + 'The cat sat on the mat', + 'A dog played in the park', + 'Machine learning is fascinating' +]; + +embedDocuments(documents); +``` + +### CLI with Progress + +```javascript +// cli-embed.js +import { pipeline } from '@huggingface/transformers'; + +async function main() { + const text = process.argv[2] || 'Hello, world!'; + + console.log('Loading model...'); + + const fileProgress = {}; + + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX', + { + progress_callback: (info) => { + if (info.status === 'progress') { + fileProgress[info.file] = info.progress; + + // Show all files progress + const progressLines = Object.entries(fileProgress) + .map(([file, progress]) => ` ${file}: ${progress.toFixed(1)}%`) + .join('\n'); + + process.stdout.write(`\r\x1b[K${progressLines}`); + } + + if (info.status === 'done') { + console.log(`\n✓ ${info.file} complete`); + } + + if (info.status === 'ready') { + console.log('\nModel ready!'); + } + } + } + ); + + console.log('Generating embedding...'); + const output = await extractor(text, { pooling: 'mean', normalize: true }); + + console.log(`\nText: "${text}"`); + console.log(`Dimensions: ${output.data.length}`); + console.log(`First 5 values: ${Array.from(output.data).slice(0, 5).join(', ')}`); + + await extractor.dispose(); +} + +main(); +``` + +## React + +### Basic Component + +```jsx +// EmbeddingGenerator.jsx +import { useState, useRef, useEffect } from 'react'; +import { pipeline } from '@huggingface/transformers'; + +export function EmbeddingGenerator() { + const extractorRef = useRef(null); + const [text, setText] = useState(''); + const [embedding, setEmbedding] = useState(null); + const [loading, setLoading] = useState(false); + + const generate = async () => { + if (!text) return; + + setLoading(true); + + // Load model on first generate + if (!extractorRef.current) { + extractorRef.current = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + } + + const output = await extractorRef.current(text, { + pooling: 'mean', + normalize: true + }); + setEmbedding(Array.from(output.data)); + setLoading(false); + }; + + // Cleanup on unmount + useEffect(() => { + return () => { + if (extractorRef.current) { + extractorRef.current.dispose(); + } + }; + }, []); + + return ( +
+

Text Embedding Generator

+ + + +
+ + + + +``` + +### React + +```jsx +import { useState, useRef, useEffect } from 'react'; +import { pipeline, TextStreamer } from '@huggingface/transformers'; + +function StreamingGenerator() { + const generatorRef = useRef(null); + const [output, setOutput] = useState(''); + const [loading, setLoading] = useState(false); + + const handleGenerate = async (prompt) => { + if (!prompt) return; + + setLoading(true); + setOutput(''); + + // Load model on first generate + if (!generatorRef.current) { + generatorRef.current = await pipeline( + 'text-generation', + 'onnx-community/Qwen2.5-0.5B-Instruct', + { dtype: 'q4' } + ); + } + + const streamer = new TextStreamer(generatorRef.current.tokenizer, { + skip_prompt: true, + skip_special_tokens: true, + callback_function: (token) => { + setOutput((prev) => prev + token); + }, + }); + + await generatorRef.current(prompt, { + max_new_tokens: 200, + temperature: 0.7, + streamer, + }); + + setLoading(false); + }; + + // Cleanup on unmount + useEffect(() => { + return () => { + if (generatorRef.current) { + generatorRef.current.dispose(); + } + }; + }, []); + + return ( +
+ +
{output}
+
+ ); +} +``` + +## Chat Format + +Use structured messages for conversations. Works with both basic generation and streaming (just add `streamer` parameter). + +### Single Turn + +```javascript +import { pipeline } from '@huggingface/transformers'; + +const generator = await pipeline( + 'text-generation', + 'onnx-community/Qwen2.5-0.5B-Instruct', + { dtype: 'q4' } +); + +const messages = [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'How do I create an async function?' } +]; + +const result = await generator(messages, { + max_new_tokens: 256, + temperature: 0.7, +}); + +console.log(result[0].generated_text); +``` + +### Multi-turn Conversation + +```javascript +const conversation = [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'What is JavaScript?' }, + { role: 'assistant', content: 'JavaScript is a programming language...' }, + { role: 'user', content: 'Can you show an example?' } +]; + +const result = await generator(conversation, { + max_new_tokens: 200, + temperature: 0.7, +}); + +// To add streaming, just pass a streamer: +// streamer: new TextStreamer(generator.tokenizer, {...}) +``` + +## Generation Parameters + +### Common Parameters + +```javascript +await generator(prompt, { + // Token limits + max_new_tokens: 512, // Maximum tokens to generate + min_new_tokens: 0, // Minimum tokens to generate + + // Sampling + temperature: 0.7, // Randomness (0.0-2.0) + top_k: 50, // Consider top K tokens + top_p: 0.95, // Nucleus sampling + do_sample: true, // Use random sampling (false = always pick most likely token) + + // Repetition control + repetition_penalty: 1.0, // Penalty for repeating (1.0 = no penalty) + no_repeat_ngram_size: 0, // Prevent repeating n-grams + + // Streaming + streamer: streamer, // TextStreamer instance +}); +``` + +### Parameter Effects + +**Temperature:** +- Low (0.1-0.5): More focused and deterministic +- Medium (0.6-0.9): Balanced creativity and coherence +- High (1.0-2.0): More creative and random + +```javascript +// Focused output +await generator(prompt, { temperature: 0.3, max_new_tokens: 100 }); + +// Creative output +await generator(prompt, { temperature: 1.2, max_new_tokens: 100 }); +``` + +**Sampling Methods:** + +```javascript +// Greedy (deterministic) +await generator(prompt, { + do_sample: false, + max_new_tokens: 100 +}); + +// Top-k sampling +await generator(prompt, { + top_k: 50, + temperature: 0.7, + max_new_tokens: 100 +}); + +// Top-p (nucleus) sampling +await generator(prompt, { + top_p: 0.95, + temperature: 0.7, + max_new_tokens: 100 +}); +``` + +## Model Selection + +Browse available text generation models on Hugging Face Hub: + +**https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending** + +### Selection Tips + +- **Small models (< 1B params)**: Fast, browser-friendly, use `dtype: 'q4'` +- **Medium models (1-3B params)**: Balanced quality/speed, use `dtype: 'q4'` or `fp16` +- **Large models (> 3B params)**: High quality, slower, best for Node.js with `dtype: 'fp16'` + +Check model cards for: +- Parameter count and model size +- Supported languages +- Benchmark scores +- License restrictions + +## Best Practices + +1. **Model Size**: Use quantized models (`q4`) for browsers, larger models (`fp16`) for servers +2. **Streaming**: Use streaming for better UX - shows progress and feels responsive +3. **Token Limits**: Set `max_new_tokens` to prevent runaway generation +4. **Temperature**: Tune based on use case (creative: 0.8-1.2, factual: 0.3-0.7) +5. **Memory**: Always call `dispose()` when done +6. **Caching**: Load model once, reuse for multiple requests + +## Related Documentation + +- [Pipeline Options](./PIPELINE_OPTIONS.md) - Configure pipeline loading +- [Configuration Reference](./CONFIGURATION.md) - Environment settings +- [Code Examples](./EXAMPLES.md) - More examples for different runtimes +- [Main Skill Guide](../SKILL.md) - Getting started guide diff --git a/skills/hugging-face-cli/SKILL.md b/skills/hugging-face-cli/SKILL.md index eb68c478..11665159 100644 --- a/skills/hugging-face-cli/SKILL.md +++ b/skills/hugging-face-cli/SKILL.md @@ -1,199 +1,194 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/hf-cli" name: hugging-face-cli -description: "The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources." -risk: safe -source: "https://github.com/huggingface/skills/tree/main/skills/hugging-face-cli" -date_added: "2026-02-27" +description: "Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces." +risk: unknown --- -# Hugging Face CLI +Install: `curl -LsSf https://hf.co/cli/install.sh | bash -s`. -The `hf` CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources. +## When to Use -## When to Use This Skill +Use this skill when you need the `hf` CLI for Hub authentication, downloads, uploads, repo management, or basic compute operations. -Use this skill when: -- User needs to download models, datasets, or spaces -- Uploading files to Hub repositories -- Creating Hugging Face repositories -- Managing local cache -- Running compute jobs on HF infrastructure -- Working with Hugging Face Hub authentication +The Hugging Face Hub CLI tool `hf` is available. IMPORTANT: The `hf` command replaces the deprecated `huggingface-cli` command. -## Quick Command Reference +Use `hf --help` to view available functions. Note that auth commands are now all under `hf auth` e.g. `hf auth whoami`. -| Task | Command | -|------|---------| -| Login | `hf auth login` | -| Download model | `hf download ` | -| Download to folder | `hf download --local-dir ./path` | -| Upload folder | `hf upload . .` | -| Create repo | `hf repo create ` | -| Create tag | `hf repo tag create ` | -| Delete files | `hf repo-files delete ` | -| List cache | `hf cache ls` | -| Remove from cache | `hf cache rm ` | -| List models | `hf models ls` | -| Get model info | `hf models info ` | -| List datasets | `hf datasets ls` | -| Get dataset info | `hf datasets info ` | -| List spaces | `hf spaces ls` | -| Get space info | `hf spaces info ` | -| List endpoints | `hf endpoints ls` | -| Run GPU job | `hf jobs run --flavor a10g-small ` | -| Environment info | `hf env` | +Generated with `huggingface_hub v1.8.0`. Run `hf skills add --force` to regenerate. -## Core Commands +## Commands -### Authentication -```bash -hf auth login # Interactive login -hf auth login --token $HF_TOKEN # Non-interactive -hf auth whoami # Check current user -hf auth list # List stored tokens -hf auth switch # Switch between tokens -hf auth logout # Log out -``` +- `hf download REPO_ID` — Download files from the Hub. `[--type CHOICE --revision TEXT --include TEXT --exclude TEXT --cache-dir TEXT --local-dir TEXT --force-download --dry-run --quiet --max-workers INTEGER]` +- `hf env` — Print information about the environment. +- `hf sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]` +- `hf upload REPO_ID` — Upload a file or a folder to the Hub. Recommended for single-commit uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --delete TEXT --commit-message TEXT --commit-description TEXT --create-pr --every FLOAT --quiet]` +- `hf upload-large-folder REPO_ID LOCAL_PATH` — Upload a large folder to the Hub. Recommended for resumable uploads. `[--type CHOICE --revision TEXT --private --include TEXT --exclude TEXT --num-workers INTEGER --no-report --no-bars]` +- `hf version` — Print information about the hf version. -### Download -```bash -hf download # Full repo to cache -hf download file.safetensors # Specific file -hf download --local-dir ./models # To local directory -hf download --include "*.safetensors" # Filter by pattern -hf download --repo-type dataset # Dataset -hf download --revision v1.0 # Specific version -``` +### `hf auth` — Manage authentication (login, logout, etc.). -### Upload -```bash -hf upload . . # Current dir to root -hf upload ./models /weights # Folder to path -hf upload model.safetensors # Single file -hf upload . . --repo-type dataset # Dataset -hf upload . . --create-pr # Create PR -hf upload . . --commit-message="msg" # Custom message -``` +- `hf auth list` — List all stored access tokens. +- `hf auth login` — Login using a token from huggingface.co/settings/tokens. `[--add-to-git-credential --force]` +- `hf auth logout` — Logout from a specific token. `[--token-name TEXT]` +- `hf auth switch` — Switch between access tokens. `[--token-name TEXT --add-to-git-credential]` +- `hf auth whoami` — Find out which huggingface.co account you are logged in as. `[--format CHOICE]` -### Repository Management -```bash -hf repo create # Create model repo -hf repo create --repo-type dataset # Create dataset -hf repo create --private # Private repo -hf repo create --repo-type space --space_sdk gradio # Gradio space -hf repo delete # Delete repo -hf repo move # Move repo to new namespace -hf repo settings --private true # Update repo settings -hf repo list --repo-type model # List repos -hf repo branch create release-v1 # Create branch -hf repo branch delete release-v1 # Delete branch -hf repo tag create v1.0 # Create tag -hf repo tag list # List tags -hf repo tag delete v1.0 # Delete tag -``` +### `hf buckets` — Commands to interact with buckets. -### Delete Files from Repo -```bash -hf repo-files delete folder/ # Delete folder -hf repo-files delete "*.txt" # Delete with pattern -``` +- `hf buckets cp SRC` — Copy a single file to or from a bucket. `[--quiet]` +- `hf buckets create BUCKET_ID` — Create a new bucket. `[--private --exist-ok --quiet]` +- `hf buckets delete BUCKET_ID` — Delete a bucket. `[--yes --missing-ok --quiet]` +- `hf buckets info BUCKET_ID` — Get info about a bucket. `[--quiet]` +- `hf buckets list` — List buckets or files in a bucket. `[--human-readable --tree --recursive --format CHOICE --quiet]` +- `hf buckets move FROM_ID TO_ID` — Move (rename) a bucket to a new name or namespace. +- `hf buckets remove ARGUMENT` — Remove files from a bucket. `[--recursive --yes --dry-run --include TEXT --exclude TEXT --quiet]` +- `hf buckets sync` — Sync files between local directory and a bucket. `[--delete --ignore-times --ignore-sizes --plan TEXT --apply TEXT --dry-run --include TEXT --exclude TEXT --filter-from TEXT --existing --ignore-existing --verbose --quiet]` -### Cache Management -```bash -hf cache ls # List cached repos -hf cache ls --revisions # Include individual revisions -hf cache rm model/gpt2 # Remove cached repo -hf cache rm # Remove cached revision -hf cache prune # Remove detached revisions -hf cache verify gpt2 # Verify checksums from cache -``` +### `hf cache` — Manage local cache directory. -### Browse Hub -```bash -# Models -hf models ls # List top trending models -hf models ls --search "MiniMax" --author MiniMaxAI # Search models -hf models ls --filter "text-generation" --limit 20 # Filter by task -hf models info MiniMaxAI/MiniMax-M2.1 # Get model info +- `hf cache list` — List cached repositories or revisions. `[--cache-dir TEXT --revisions --filter TEXT --format CHOICE --quiet --sort CHOICE --limit INTEGER]` +- `hf cache prune` — Remove detached revisions from the cache. `[--cache-dir TEXT --yes --dry-run]` +- `hf cache rm TARGETS` — Remove cached repositories or revisions. `[--cache-dir TEXT --yes --dry-run]` +- `hf cache verify REPO_ID` — Verify checksums for a single repo revision from cache or a local directory. `[--type CHOICE --revision TEXT --cache-dir TEXT --local-dir TEXT --fail-on-missing-files --fail-on-extra-files]` -# Datasets -hf datasets ls # List top trending datasets -hf datasets ls --search "finepdfs" --sort downloads # Search datasets -hf datasets info HuggingFaceFW/finepdfs # Get dataset info +### `hf collections` — Interact with collections on the Hub. -# Spaces -hf spaces ls # List top trending spaces -hf spaces ls --filter "3d" --limit 10 # Filter by 3D modeling spaces -hf spaces info enzostvs/deepsite # Get space info -``` +- `hf collections add-item COLLECTION_SLUG ITEM_ID ITEM_TYPE` — Add an item to a collection. `[--note TEXT --exists-ok]` +- `hf collections create TITLE` — Create a new collection on the Hub. `[--namespace TEXT --description TEXT --private --exists-ok]` +- `hf collections delete COLLECTION_SLUG` — Delete a collection from the Hub. `[--missing-ok]` +- `hf collections delete-item COLLECTION_SLUG ITEM_OBJECT_ID` — Delete an item from a collection. `[--missing-ok]` +- `hf collections info COLLECTION_SLUG` — Get info about a collection on the Hub. Output is in JSON format. +- `hf collections list` — List collections on the Hub. `[--owner TEXT --item TEXT --sort CHOICE --limit INTEGER --format CHOICE --quiet]` +- `hf collections update COLLECTION_SLUG` — Update a collection's metadata on the Hub. `[--title TEXT --description TEXT --position INTEGER --private --theme TEXT]` +- `hf collections update-item COLLECTION_SLUG ITEM_OBJECT_ID` — Update an item in a collection. `[--note TEXT --position INTEGER]` -### Jobs (Cloud Compute) -```bash -hf jobs run python:3.12 python script.py # Run on CPU -hf jobs run --flavor a10g-small # Run on GPU -hf jobs run --secrets HF_TOKEN # With HF token -hf jobs ps # List jobs -hf jobs logs # View logs -hf jobs cancel # Cancel job -``` +### `hf datasets` — Interact with datasets on the Hub. -### Inference Endpoints -```bash -hf endpoints ls # List endpoints -hf endpoints deploy my-endpoint \ - --repo openai/gpt-oss-120b \ - --framework vllm \ - --accelerator gpu \ - --instance-size x4 \ - --instance-type nvidia-a10g \ - --region us-east-1 \ - --vendor aws -hf endpoints describe my-endpoint # Show endpoint details -hf endpoints pause my-endpoint # Pause endpoint -hf endpoints resume my-endpoint # Resume endpoint -hf endpoints scale-to-zero my-endpoint # Scale to zero -hf endpoints delete my-endpoint --yes # Delete endpoint -``` -**GPU Flavors:** `cpu-basic`, `cpu-upgrade`, `cpu-xl`, `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `l40sx1`, `l40sx4`, `l40sx8`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`, `a100-large`, `h100`, `h100x8` +- `hf datasets info DATASET_ID` — Get info about a dataset on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf datasets list` — List datasets on the Hub. `[--search TEXT --author TEXT --filter TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` +- `hf datasets parquet DATASET_ID` — List parquet file URLs available for a dataset. `[--subset TEXT --split TEXT --format CHOICE --quiet]` +- `hf datasets sql SQL` — Execute a raw SQL query with DuckDB against dataset parquet URLs. `[--format CHOICE]` -## Common Patterns +### `hf discussions` — Manage discussions and pull requests on the Hub. -### Download and Use Model Locally -```bash -# Download to local directory for deployment -hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./model +- `hf discussions close REPO_ID NUM` — Close a discussion or pull request. `[--comment TEXT --yes --type CHOICE]` +- `hf discussions comment REPO_ID NUM` — Comment on a discussion or pull request. `[--body TEXT --body-file PATH --type CHOICE]` +- `hf discussions create REPO_ID --title TEXT` — Create a new discussion or pull request on a repo. `[--body TEXT --body-file PATH --pull-request --type CHOICE]` +- `hf discussions diff REPO_ID NUM` — Show the diff of a pull request. `[--type CHOICE]` +- `hf discussions info REPO_ID NUM` — Get info about a discussion or pull request. `[--comments --diff --no-color --type CHOICE --format CHOICE]` +- `hf discussions list REPO_ID` — List discussions and pull requests on a repo. `[--status CHOICE --kind CHOICE --author TEXT --limit INTEGER --type CHOICE --format CHOICE --quiet]` +- `hf discussions merge REPO_ID NUM` — Merge a pull request. `[--comment TEXT --yes --type CHOICE]` +- `hf discussions rename REPO_ID NUM NEW_TITLE` — Rename a discussion or pull request. `[--type CHOICE]` +- `hf discussions reopen REPO_ID NUM` — Reopen a closed discussion or pull request. `[--comment TEXT --yes --type CHOICE]` -# Or use cache and get path -MODEL_PATH=$(hf download meta-llama/Llama-3.2-1B-Instruct --quiet) -``` +### `hf endpoints` — Manage Hugging Face Inference Endpoints. -### Publish Model/Dataset -```bash -hf repo create my-username/my-model --private -hf upload my-username/my-model ./output . --commit-message="Initial release" -hf repo tag create my-username/my-model v1.0 -``` +- `hf endpoints catalog deploy --repo TEXT` — Deploy an Inference Endpoint from the Model Catalog. `[--name TEXT --accelerator TEXT --namespace TEXT]` +- `hf endpoints catalog list` — List available Catalog models. +- `hf endpoints delete NAME` — Delete an Inference Endpoint permanently. `[--namespace TEXT --yes]` +- `hf endpoints deploy NAME --repo TEXT --framework TEXT --accelerator TEXT --instance-size TEXT --instance-type TEXT --region TEXT --vendor TEXT` — Deploy an Inference Endpoint from a Hub repository. `[--namespace TEXT --task TEXT --min-replica INTEGER --max-replica INTEGER --scale-to-zero-timeout INTEGER --scaling-metric CHOICE --scaling-threshold FLOAT]` +- `hf endpoints describe NAME` — Get information about an existing endpoint. `[--namespace TEXT]` +- `hf endpoints list` — Lists all Inference Endpoints for the given namespace. `[--namespace TEXT --format CHOICE --quiet]` +- `hf endpoints pause NAME` — Pause an Inference Endpoint. `[--namespace TEXT]` +- `hf endpoints resume NAME` — Resume an Inference Endpoint. `[--namespace TEXT --fail-if-already-running]` +- `hf endpoints scale-to-zero NAME` — Scale an Inference Endpoint to zero. `[--namespace TEXT]` +- `hf endpoints update NAME` — Update an existing endpoint. `[--namespace TEXT --repo TEXT --accelerator TEXT --instance-size TEXT --instance-type TEXT --framework TEXT --revision TEXT --task TEXT --min-replica INTEGER --max-replica INTEGER --scale-to-zero-timeout INTEGER --scaling-metric CHOICE --scaling-threshold FLOAT]` -### Sync Space with Local -```bash -hf upload my-username/my-space . . --repo-type space \ - --exclude="logs/*" --delete="*" --commit-message="Sync" -``` +### `hf extensions` — Manage hf CLI extensions. -### Check Cache Usage -```bash -hf cache ls # See all cached repos and sizes -hf cache rm model/gpt2 # Remove a repo from cache -``` +- `hf extensions exec NAME` — Execute an installed extension. +- `hf extensions install REPO_ID` — Install an extension from a public GitHub repository. `[--force]` +- `hf extensions list` — List installed extension commands. `[--format CHOICE --quiet]` +- `hf extensions remove NAME` — Remove an installed extension. +- `hf extensions search` — Search extensions available on GitHub (tagged with 'hf-extension' topic). `[--format CHOICE --quiet]` -## Key Options +### `hf jobs` — Run and manage Jobs on the Hub. -- `--repo-type`: `model` (default), `dataset`, `space` -- `--revision`: Branch, tag, or commit hash -- `--token`: Override authentication -- `--quiet`: Output only essential info (paths/URLs) +- `hf jobs cancel JOB_ID` — Cancel a Job `[--namespace TEXT]` +- `hf jobs hardware` — List available hardware options for Jobs +- `hf jobs inspect JOB_IDS` — Display detailed information on one or more Jobs `[--namespace TEXT]` +- `hf jobs logs JOB_ID` — Fetch the logs of a Job. `[--follow --tail INTEGER --namespace TEXT]` +- `hf jobs ps` — List Jobs. `[--all --namespace TEXT --filter TEXT --format TEXT --quiet]` +- `hf jobs run IMAGE COMMAND` — Run a Job. `[--env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --flavor CHOICE --timeout TEXT --detach --namespace TEXT]` +- `hf jobs scheduled delete SCHEDULED_JOB_ID` — Delete a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled inspect SCHEDULED_JOB_IDS` — Display detailed information on one or more scheduled Jobs `[--namespace TEXT]` +- `hf jobs scheduled ps` — List scheduled Jobs `[--all --namespace TEXT --filter TEXT --format TEXT --quiet]` +- `hf jobs scheduled resume SCHEDULED_JOB_ID` — Resume (unpause) a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled run SCHEDULE IMAGE COMMAND` — Schedule a Job. `[--suspend --concurrency --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --flavor CHOICE --timeout TEXT --namespace TEXT]` +- `hf jobs scheduled suspend SCHEDULED_JOB_ID` — Suspend (pause) a scheduled Job. `[--namespace TEXT]` +- `hf jobs scheduled uv run SCHEDULE SCRIPT` — Run a UV script (local file or URL) on HF infrastructure `[--suspend --concurrency --image TEXT --flavor CHOICE --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --timeout TEXT --namespace TEXT --with TEXT --python TEXT]` +- `hf jobs stats` — Fetch the resource usage statistics and metrics of Jobs `[--namespace TEXT]` +- `hf jobs uv run SCRIPT` — Run a UV script (local file or URL) on HF infrastructure `[--image TEXT --flavor CHOICE --env TEXT --secrets TEXT --label TEXT --volume TEXT --env-file TEXT --secrets-file TEXT --timeout TEXT --detach --namespace TEXT --with TEXT --python TEXT]` -## References +### `hf models` — Interact with models on the Hub. -- **Complete command reference**: See references/commands.md -- **Workflow examples**: See references/examples.md +- `hf models info MODEL_ID` — Get info about a model on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf models list` — List models on the Hub. `[--search TEXT --author TEXT --filter TEXT --num-parameters TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` + +### `hf papers` — Interact with papers on the Hub. + +- `hf papers info PAPER_ID` — Get info about a paper on the Hub. Output is in JSON format. +- `hf papers list` — List daily papers on the Hub. `[--date TEXT --week TEXT --month TEXT --submitter TEXT --sort CHOICE --limit INTEGER --format CHOICE --quiet]` +- `hf papers read PAPER_ID` — Read a paper as markdown. +- `hf papers search QUERY` — Search papers on the Hub. `[--limit INTEGER --format CHOICE --quiet]` + +### `hf repos` — Manage repos on the Hub. + +- `hf repos branch create REPO_ID BRANCH` — Create a new branch for a repo on the Hub. `[--revision TEXT --type CHOICE --exist-ok]` +- `hf repos branch delete REPO_ID BRANCH` — Delete a branch from a repo on the Hub. `[--type CHOICE]` +- `hf repos create REPO_ID` — Create a new repo on the Hub. `[--type CHOICE --space-sdk TEXT --private --public --protected --exist-ok --resource-group-id TEXT --flavor TEXT --storage TEXT --sleep-time INTEGER --secrets TEXT --secrets-file TEXT --env TEXT --env-file TEXT]` +- `hf repos delete REPO_ID` — Delete a repo from the Hub. This is an irreversible operation. `[--type CHOICE --missing-ok]` +- `hf repos delete-files REPO_ID PATTERNS` — Delete files from a repo on the Hub. `[--type CHOICE --revision TEXT --commit-message TEXT --commit-description TEXT --create-pr]` +- `hf repos duplicate FROM_ID` — Duplicate a repo on the Hub (model, dataset, or Space). `[--type CHOICE --private --public --protected --exist-ok --flavor TEXT --storage TEXT --sleep-time INTEGER --secrets TEXT --secrets-file TEXT --env TEXT --env-file TEXT]` +- `hf repos move FROM_ID TO_ID` — Move a repository from a namespace to another namespace. `[--type CHOICE]` +- `hf repos settings REPO_ID` — Update the settings of a repository. `[--gated CHOICE --private --public --protected --type CHOICE]` +- `hf repos tag create REPO_ID TAG` — Create a tag for a repo. `[--message TEXT --revision TEXT --type CHOICE]` +- `hf repos tag delete REPO_ID TAG` — Delete a tag for a repo. `[--yes --type CHOICE]` +- `hf repos tag list REPO_ID` — List tags for a repo. `[--type CHOICE]` + +### `hf skills` — Manage skills for AI assistants. + +- `hf skills add` — Download a skill and install it for an AI assistant. `[--claude --codex --cursor --opencode --global --dest PATH --force]` +- `hf skills preview` — Print the generated SKILL.md to stdout. + +### `hf spaces` — Interact with spaces on the Hub. + +- `hf spaces dev-mode SPACE_ID` — Enable or disable dev mode on a Space. `[--stop]` +- `hf spaces hot-reload SPACE_ID` — Hot-reload any Python file of a Space without a full rebuild + restart. `[--local-file TEXT --skip-checks --skip-summary]` +- `hf spaces info SPACE_ID` — Get info about a space on the Hub. Output is in JSON format. `[--revision TEXT --expand TEXT]` +- `hf spaces list` — List spaces on the Hub. `[--search TEXT --author TEXT --filter TEXT --sort CHOICE --limit INTEGER --expand TEXT --format CHOICE --quiet]` + +### `hf webhooks` — Manage webhooks on the Hub. + +- `hf webhooks create --watch TEXT` — Create a new webhook. `[--url TEXT --job-id TEXT --domain CHOICE --secret TEXT]` +- `hf webhooks delete WEBHOOK_ID` — Delete a webhook permanently. `[--yes]` +- `hf webhooks disable WEBHOOK_ID` — Disable an active webhook. +- `hf webhooks enable WEBHOOK_ID` — Enable a disabled webhook. +- `hf webhooks info WEBHOOK_ID` — Show full details for a single webhook as JSON. +- `hf webhooks list` — List all webhooks for the current user. `[--format CHOICE --quiet]` +- `hf webhooks update WEBHOOK_ID` — Update an existing webhook. Only provided options are changed. `[--url TEXT --watch TEXT --domain CHOICE --secret TEXT]` + +## Common options + +- `--format` — Output format: `--format json` (or `--json`) or `--format table` (default). +- `-q / --quiet` — Minimal output. +- `--revision` — Git revision id which can be a branch name, a tag, or a commit hash. +- `--token` — Use a User Access Token. Prefer setting `HF_TOKEN` env var instead of passing `--token`. +- `--type` — The type of repository (model, dataset, or space). + +## Mounting repos as local filesystems + +To mount Hub repositories or buckets as local filesystems — no download, no copy, no waiting — use `hf-mount`. Files are fetched on demand. GitHub: https://github.com/huggingface/hf-mount + +Install: `curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh` + +Some command examples: +- `hf-mount start repo openai-community/gpt2 /tmp/gpt2` — mount a repo (read-only) +- `hf-mount start --hf-token $HF_TOKEN bucket myuser/my-bucket /tmp/data` — mount a bucket (read-write) +- `hf-mount status` / `hf-mount stop /tmp/data` — list or unmount + +## Tips + +- Use `hf --help` for full options, descriptions, usage, and real-world examples +- Authenticate with `HF_TOKEN` env var (recommended) or with `--token` diff --git a/skills/hugging-face-community-evals/SKILL.md b/skills/hugging-face-community-evals/SKILL.md new file mode 100644 index 00000000..05bc57ab --- /dev/null +++ b/skills/hugging-face-community-evals/SKILL.md @@ -0,0 +1,213 @@ +--- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-community-evals" +name: hugging-face-community-evals +description: Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval. +risk: unknown +--- + +# Overview + +## When to Use + +Use this skill for local model evaluation, backend selection, and GPU smoke tests outside the Hugging Face Jobs workflow. + +This skill is for **running evaluations against models on the Hugging Face Hub on local hardware**. + +It covers: +- `inspect-ai` with local inference +- `lighteval` with local inference +- choosing between `vllm`, Hugging Face Transformers, and `accelerate` +- smoke tests, task selection, and backend fallback strategy + +It does **not** cover: +- Hugging Face Jobs orchestration +- model-card or `model-index` edits +- README table extraction +- Artificial Analysis imports +- `.eval_results` generation or publishing +- PR creation or community-evals automation + +If the user wants to **run the same eval remotely on Hugging Face Jobs**, hand off to the `hugging-face-jobs` skill and pass it one of the local scripts in this skill. + +If the user wants to **publish results into the community evals workflow**, stop after generating the evaluation run and hand off that publishing step to `~/code/community-evals`. + +> All paths below are relative to the directory containing this `SKILL.md`. + +# When To Use Which Script + +| Use case | Script | +|---|---| +| Local `inspect-ai` eval on a Hub model via inference providers | `scripts/inspect_eval_uv.py` | +| Local GPU eval with `inspect-ai` using `vllm` or Transformers | `scripts/inspect_vllm_uv.py` | +| Local GPU eval with `lighteval` using `vllm` or `accelerate` | `scripts/lighteval_vllm_uv.py` | +| Extra command patterns | `examples/USAGE_EXAMPLES.md` | + +# Prerequisites + +- Prefer `uv run` for local execution. +- Set `HF_TOKEN` for gated/private models. +- For local GPU runs, verify GPU access before starting: + +```bash +uv --version +printenv HF_TOKEN >/dev/null +nvidia-smi +``` + +If `nvidia-smi` is unavailable, either: +- use `scripts/inspect_eval_uv.py` for lighter provider-backed evaluation, or +- hand off to the `hugging-face-jobs` skill if the user wants remote compute. + +# Core Workflow + +1. Choose the evaluation framework. + - Use `inspect-ai` when you want explicit task control and inspect-native flows. + - Use `lighteval` when the benchmark is naturally expressed as a lighteval task string, especially leaderboard-style tasks. +2. Choose the inference backend. + - Prefer `vllm` for throughput on supported architectures. + - Use Hugging Face Transformers (`--backend hf`) or `accelerate` as compatibility fallbacks. +3. Start with a smoke test. + - `inspect-ai`: add `--limit 10` or similar. + - `lighteval`: add `--max-samples 10`. +4. Scale up only after the smoke test passes. +5. If the user wants remote execution, hand off to `hugging-face-jobs` with the same script + args. + +# Quick Start + +## Option A: inspect-ai with local inference providers path + +Best when the model is already supported by Hugging Face Inference Providers and you want the lowest local setup overhead. + +```bash +uv run scripts/inspect_eval_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task mmlu \ + --limit 20 +``` + +Use this path when: +- you want a quick local smoke test +- you do not need direct GPU control +- the task already exists in `inspect-evals` + +## Option B: inspect-ai on Local GPU + +Best when you need to load the Hub model directly, use `vllm`, or fall back to Transformers for unsupported architectures. + +Local GPU: + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task gsm8k \ + --limit 20 +``` + +Transformers fallback: + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model microsoft/phi-2 \ + --task mmlu \ + --backend hf \ + --trust-remote-code \ + --limit 20 +``` + +## Option C: lighteval on Local GPU + +Best when the task is naturally expressed as a `lighteval` task string, especially Open LLM Leaderboard style benchmarks. + +Local GPU: + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \ + --max-samples 20 \ + --use-chat-template +``` + +`accelerate` fallback: + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model microsoft/phi-2 \ + --tasks "leaderboard|mmlu|5" \ + --backend accelerate \ + --trust-remote-code \ + --max-samples 20 +``` + +# Remote Execution Boundary + +This skill intentionally stops at **local execution and backend selection**. + +If the user wants to: +- run these scripts on Hugging Face Jobs +- pick remote hardware +- pass secrets to remote jobs +- schedule recurring runs +- inspect / cancel / monitor jobs + +then switch to the **`hugging-face-jobs`** skill and pass it one of these scripts plus the chosen arguments. + +# Task Selection + +`inspect-ai` examples: +- `mmlu` +- `gsm8k` +- `hellaswag` +- `arc_challenge` +- `truthfulqa` +- `winogrande` +- `humaneval` + +`lighteval` task strings use `suite|task|num_fewshot`: +- `leaderboard|mmlu|5` +- `leaderboard|gsm8k|5` +- `leaderboard|arc_challenge|25` +- `lighteval|hellaswag|0` + +Multiple `lighteval` tasks can be comma-separated in `--tasks`. + +# Backend Selection + +- Prefer `inspect_vllm_uv.py --backend vllm` for fast GPU inference on supported architectures. +- Use `inspect_vllm_uv.py --backend hf` when `vllm` does not support the model. +- Prefer `lighteval_vllm_uv.py --backend vllm` for throughput on supported models. +- Use `lighteval_vllm_uv.py --backend accelerate` as the compatibility fallback. +- Use `inspect_eval_uv.py` when Inference Providers already cover the model and you do not need direct GPU control. + +# Hardware Guidance + +| Model size | Suggested local hardware | +|---|---| +| `< 3B` | consumer GPU / Apple Silicon / small dev GPU | +| `3B - 13B` | stronger local GPU | +| `13B+` | high-memory local GPU or hand off to `hugging-face-jobs` | + +For smoke tests, prefer cheaper local runs plus `--limit` or `--max-samples`. + +# Troubleshooting + +- CUDA or vLLM OOM: + - reduce `--batch-size` + - reduce `--gpu-memory-utilization` + - switch to a smaller model for the smoke test + - if necessary, hand off to `hugging-face-jobs` +- Model unsupported by `vllm`: + - switch to `--backend hf` for `inspect-ai` + - switch to `--backend accelerate` for `lighteval` +- Gated/private repo access fails: + - verify `HF_TOKEN` +- Custom model code required: + - add `--trust-remote-code` + +# Examples + +See: +- `examples/USAGE_EXAMPLES.md` for local command patterns +- `scripts/inspect_eval_uv.py` +- `scripts/inspect_vllm_uv.py` +- `scripts/lighteval_vllm_uv.py` diff --git a/skills/hugging-face-community-evals/examples/.env.example b/skills/hugging-face-community-evals/examples/.env.example new file mode 100644 index 00000000..26d9b9b4 --- /dev/null +++ b/skills/hugging-face-community-evals/examples/.env.example @@ -0,0 +1,3 @@ +# Hugging Face Token (required for gated/private models) +# Get your token at: https://huggingface.co/settings/tokens +HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx diff --git a/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md b/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md new file mode 100644 index 00000000..64c24334 --- /dev/null +++ b/skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md @@ -0,0 +1,101 @@ +# Usage Examples + +This document provides practical examples for **running evaluations locally** against Hugging Face Hub models. + +## What this skill covers + +- `inspect-ai` local runs +- `inspect-ai` with `vllm` or Transformers backends +- `lighteval` local runs with `vllm` or `accelerate` +- smoke tests and backend fallback patterns + +## What this skill does NOT cover + +- `model-index` +- `.eval_results` +- community eval publication workflows +- model-card PR creation +- Hugging Face Jobs orchestration + +If you want to run these same scripts remotely, use the `hugging-face-jobs` skill and pass one of the scripts in `scripts/`. + +## Setup + +```bash +cd skills/hugging-face-evaluation +export HF_TOKEN=hf_xxx +uv --version +``` + +For local GPU runs: + +```bash +nvidia-smi +``` + +## inspect-ai examples + +### Quick smoke test + +```bash +uv run scripts/inspect_eval_uv.py \ + --model meta-llama/Llama-3.2-1B \ + --task mmlu \ + --limit 10 +``` + +### Local GPU with vLLM + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model meta-llama/Llama-3.2-8B-Instruct \ + --task gsm8k \ + --limit 20 +``` + +### Transformers fallback + +```bash +uv run scripts/inspect_vllm_uv.py \ + --model microsoft/phi-2 \ + --task mmlu \ + --backend hf \ + --trust-remote-code \ + --limit 20 +``` + +## lighteval examples + +### Single task + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5" \ + --max-samples 20 +``` + +### Multiple tasks + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \ + --max-samples 20 \ + --use-chat-template +``` + +### accelerate fallback + +```bash +uv run scripts/lighteval_vllm_uv.py \ + --model microsoft/phi-2 \ + --tasks "leaderboard|mmlu|5" \ + --backend accelerate \ + --trust-remote-code \ + --max-samples 20 +``` + +## Hand-off to Hugging Face Jobs + +When local hardware is not enough, switch to the `hugging-face-jobs` skill and run one of these scripts remotely. Keep the script path and args; move the orchestration there. diff --git a/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py b/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py new file mode 100644 index 00000000..d398bc60 --- /dev/null +++ b/skills/hugging-face-community-evals/scripts/inspect_eval_uv.py @@ -0,0 +1,104 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "inspect-ai>=0.3.0", +# "inspect-evals", +# "openai", +# ] +# /// + +""" +Entry point script for running inspect-ai evaluations against Hugging Face inference providers. +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from pathlib import Path +from typing import Optional + + +def _inspect_evals_tasks_root() -> Optional[Path]: + """Return the installed inspect_evals package path if available.""" + try: + import inspect_evals + + return Path(inspect_evals.__file__).parent + except Exception: + return None + + +def _normalize_task(task: str) -> str: + """Allow lighteval-style `suite|task|shots` strings by keeping the task name.""" + if "|" in task: + parts = task.split("|") + if len(parts) >= 2 and parts[1]: + return parts[1] + return task + + +def main() -> None: + parser = argparse.ArgumentParser(description="Inspect-ai job runner") + parser.add_argument("--model", required=True, help="Model ID on Hugging Face Hub") + parser.add_argument("--task", required=True, help="inspect-ai task to execute") + parser.add_argument("--limit", type=int, default=None, help="Limit number of samples to evaluate") + parser.add_argument( + "--tasks-root", + default=None, + help="Optional path to inspect task files. Defaults to the installed inspect_evals package.", + ) + parser.add_argument( + "--sandbox", + default="local", + help="Sandbox backend to use (default: local for HF jobs without Docker).", + ) + args = parser.parse_args() + + # Ensure downstream libraries can read the token passed as a secret + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + task = _normalize_task(args.task) + tasks_root = Path(args.tasks_root) if args.tasks_root else _inspect_evals_tasks_root() + if tasks_root and not tasks_root.exists(): + tasks_root = None + + cmd = [ + "inspect", + "eval", + task, + "--model", + f"hf-inference-providers/{args.model}", + "--log-level", + "info", + # Reduce batch size to avoid OOM errors (default is 32) + "--max-connections", + "1", + # Set a small positive temperature (HF doesn't allow temperature=0) + "--temperature", + "0.001", + ] + + if args.sandbox: + cmd.extend(["--sandbox", args.sandbox]) + + if args.limit: + cmd.extend(["--limit", str(args.limit)]) + + try: + subprocess.run(cmd, check=True, cwd=tasks_root) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + location = f" (cwd={tasks_root})" if tasks_root else "" + print(f"Evaluation failed with exit code {exc.returncode}{location}", file=sys.stderr) + raise + + +if __name__ == "__main__": + main() + diff --git a/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py b/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py new file mode 100644 index 00000000..f1454c5a --- /dev/null +++ b/skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py @@ -0,0 +1,306 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "inspect-ai>=0.3.0", +# "inspect-evals", +# "vllm>=0.4.0", +# "torch>=2.0.0", +# "transformers>=4.40.0", +# ] +# /// + +""" +Entry point script for running inspect-ai evaluations with vLLM or HuggingFace Transformers backend. + +This script runs evaluations on custom HuggingFace models using local GPU inference, +separate from inference provider scripts (which use external APIs). + +Usage (standalone): + uv run scripts/inspect_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --task "mmlu" + +Model backends: + - vllm: Fast inference with vLLM (recommended for large models) + - hf: HuggingFace Transformers backend (broader model compatibility) +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from typing import Optional + + +def setup_environment() -> None: + """Configure environment variables for HuggingFace authentication.""" + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + +def run_inspect_vllm( + model_id: str, + task: str, + limit: Optional[int] = None, + max_connections: int = 4, + temperature: float = 0.0, + tensor_parallel_size: int = 1, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + trust_remote_code: bool = False, + log_level: str = "info", +) -> None: + """ + Run inspect-ai evaluation with vLLM backend. + + Args: + model_id: HuggingFace model ID + task: inspect-ai task to execute (e.g., "mmlu", "gsm8k") + limit: Limit number of samples to evaluate + max_connections: Maximum concurrent connections + temperature: Sampling temperature + tensor_parallel_size: Number of GPUs for tensor parallelism + gpu_memory_utilization: GPU memory fraction + dtype: Data type (auto, float16, bfloat16) + trust_remote_code: Allow remote code execution + log_level: Logging level + """ + setup_environment() + + model_spec = f"vllm/{model_id}" + cmd = [ + "inspect", + "eval", + task, + "--model", + model_spec, + "--log-level", + log_level, + "--max-connections", + str(max_connections), + ] + + # vLLM supports temperature=0 unlike HF inference providers + cmd.extend(["--temperature", str(temperature)]) + + # Older inspect-ai CLI versions do not support --model-args; rely on defaults + # and let vLLM choose sensible settings for small models. + if tensor_parallel_size != 1: + cmd.extend(["--tensor-parallel-size", str(tensor_parallel_size)]) + if gpu_memory_utilization != 0.8: + cmd.extend(["--gpu-memory-utilization", str(gpu_memory_utilization)]) + if dtype != "auto": + cmd.extend(["--dtype", dtype]) + if trust_remote_code: + cmd.append("--trust-remote-code") + + if limit: + cmd.extend(["--limit", str(limit)]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def run_inspect_hf( + model_id: str, + task: str, + limit: Optional[int] = None, + max_connections: int = 1, + temperature: float = 0.001, + device: str = "auto", + dtype: str = "auto", + trust_remote_code: bool = False, + log_level: str = "info", +) -> None: + """ + Run inspect-ai evaluation with HuggingFace Transformers backend. + + Use this when vLLM doesn't support the model architecture. + + Args: + model_id: HuggingFace model ID + task: inspect-ai task to execute + limit: Limit number of samples + max_connections: Maximum concurrent connections (keep low for memory) + temperature: Sampling temperature + device: Device to use (auto, cuda, cpu) + dtype: Data type + trust_remote_code: Allow remote code execution + log_level: Logging level + """ + setup_environment() + + model_spec = f"hf/{model_id}" + + cmd = [ + "inspect", + "eval", + task, + "--model", + model_spec, + "--log-level", + log_level, + "--max-connections", + str(max_connections), + "--temperature", + str(temperature), + ] + + if device != "auto": + cmd.extend(["--device", device]) + if dtype != "auto": + cmd.extend(["--dtype", dtype]) + if trust_remote_code: + cmd.append("--trust-remote-code") + + if limit: + cmd.extend(["--limit", str(limit)]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run inspect-ai evaluations with vLLM or HuggingFace Transformers on custom models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run MMLU with vLLM backend + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu + + # Run with HuggingFace Transformers backend + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --backend hf + + # Run with limited samples for testing + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --limit 10 + + # Run on multiple GPUs with tensor parallelism + uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-70B --task mmlu --tensor-parallel-size 4 + +Available tasks (from inspect-evals): + - mmlu: Massive Multitask Language Understanding + - gsm8k: Grade School Math + - hellaswag: Common sense reasoning + - arc_challenge: AI2 Reasoning Challenge + - truthfulqa: TruthfulQA benchmark + - winogrande: Winograd Schema Challenge + - humaneval: Code generation (HumanEval) + + """, + ) + + parser.add_argument( + "--model", + required=True, + help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)", + ) + parser.add_argument( + "--task", + required=True, + help="inspect-ai task to execute (e.g., mmlu, gsm8k)", + ) + parser.add_argument( + "--backend", + choices=["vllm", "hf"], + default="vllm", + help="Model backend (default: vllm)", + ) + parser.add_argument( + "--limit", + type=int, + default=None, + help="Limit number of samples to evaluate", + ) + parser.add_argument( + "--max-connections", + type=int, + default=None, + help="Maximum concurrent connections (default: 4 for vllm, 1 for hf)", + ) + parser.add_argument( + "--temperature", + type=float, + default=None, + help="Sampling temperature (default: 0.0 for vllm, 0.001 for hf)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=1, + help="Number of GPUs for tensor parallelism (vLLM only, default: 1)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.8, + help="GPU memory fraction to use (vLLM only, default: 0.8)", + ) + parser.add_argument( + "--dtype", + default="auto", + choices=["auto", "float16", "bfloat16", "float32"], + help="Data type for model weights (default: auto)", + ) + parser.add_argument( + "--device", + default="auto", + help="Device for HF backend (auto, cuda, cpu)", + ) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="Allow executing remote code from model repository", + ) + parser.add_argument( + "--log-level", + default="info", + choices=["debug", "info", "warning", "error"], + help="Logging level (default: info)", + ) + + args = parser.parse_args() + + if args.backend == "vllm": + run_inspect_vllm( + model_id=args.model, + task=args.task, + limit=args.limit, + max_connections=args.max_connections or 4, + temperature=args.temperature if args.temperature is not None else 0.0, + tensor_parallel_size=args.tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + log_level=args.log_level, + ) + else: + run_inspect_hf( + model_id=args.model, + task=args.task, + limit=args.limit, + max_connections=args.max_connections or 1, + temperature=args.temperature if args.temperature is not None else 0.001, + device=args.device, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + log_level=args.log_level, + ) + + +if __name__ == "__main__": + main() diff --git a/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py b/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py new file mode 100644 index 00000000..91ba83b3 --- /dev/null +++ b/skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py @@ -0,0 +1,297 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "lighteval[accelerate,vllm]>=0.6.0", +# "torch>=2.0.0", +# "transformers>=4.40.0", +# "accelerate>=0.30.0", +# "vllm>=0.4.0", +# ] +# /// + +""" +Entry point script for running lighteval evaluations with local GPU backends. + +This script runs evaluations using vLLM or accelerate on custom HuggingFace models. +It is separate from inference provider scripts and evaluates models directly on local hardware. + +Usage (standalone): + uv run scripts/lighteval_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --tasks "leaderboard|mmlu|5" + +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from typing import Optional + + +def setup_environment() -> None: + """Configure environment variables for HuggingFace authentication.""" + hf_token = os.getenv("HF_TOKEN") + if hf_token: + os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) + os.environ.setdefault("HF_HUB_TOKEN", hf_token) + + +def run_lighteval_vllm( + model_id: str, + tasks: str, + output_dir: Optional[str] = None, + max_samples: Optional[int] = None, + batch_size: int = 1, + tensor_parallel_size: int = 1, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + trust_remote_code: bool = False, + use_chat_template: bool = False, + system_prompt: Optional[str] = None, +) -> None: + """ + Run lighteval with vLLM backend for efficient GPU inference. + + Args: + model_id: HuggingFace model ID (e.g., "meta-llama/Llama-3.2-1B") + tasks: Task specification (e.g., "leaderboard|mmlu|5" or "lighteval|hellaswag|0") + output_dir: Directory for evaluation results + max_samples: Limit number of samples per task + batch_size: Batch size for evaluation + tensor_parallel_size: Number of GPUs for tensor parallelism + gpu_memory_utilization: GPU memory fraction to use (0.0-1.0) + dtype: Data type for model weights (auto, float16, bfloat16) + trust_remote_code: Allow executing remote code from model repo + use_chat_template: Apply chat template for conversational models + system_prompt: System prompt for chat models + """ + setup_environment() + + # Build lighteval vllm command + cmd = [ + "lighteval", + "vllm", + model_id, + tasks, + "--batch-size", str(batch_size), + "--tensor-parallel-size", str(tensor_parallel_size), + "--gpu-memory-utilization", str(gpu_memory_utilization), + "--dtype", dtype, + ] + + if output_dir: + cmd.extend(["--output-dir", output_dir]) + + if max_samples: + cmd.extend(["--max-samples", str(max_samples)]) + + if trust_remote_code: + cmd.append("--trust-remote-code") + + if use_chat_template: + cmd.append("--use-chat-template") + + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def run_lighteval_accelerate( + model_id: str, + tasks: str, + output_dir: Optional[str] = None, + max_samples: Optional[int] = None, + batch_size: int = 1, + dtype: str = "bfloat16", + trust_remote_code: bool = False, + use_chat_template: bool = False, + system_prompt: Optional[str] = None, +) -> None: + """ + Run lighteval with accelerate backend for multi-GPU distributed inference. + + Use this backend when vLLM is not available or for models not supported by vLLM. + + Args: + model_id: HuggingFace model ID + tasks: Task specification + output_dir: Directory for evaluation results + max_samples: Limit number of samples per task + batch_size: Batch size for evaluation + dtype: Data type for model weights + trust_remote_code: Allow executing remote code + use_chat_template: Apply chat template + system_prompt: System prompt for chat models + """ + setup_environment() + + # Build lighteval accelerate command + cmd = [ + "lighteval", + "accelerate", + model_id, + tasks, + "--batch-size", str(batch_size), + "--dtype", dtype, + ] + + if output_dir: + cmd.extend(["--output-dir", output_dir]) + + if max_samples: + cmd.extend(["--max-samples", str(max_samples)]) + + if trust_remote_code: + cmd.append("--trust-remote-code") + + if use_chat_template: + cmd.append("--use-chat-template") + + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + print(f"Running: {' '.join(cmd)}") + + try: + subprocess.run(cmd, check=True) + print("Evaluation complete.") + except subprocess.CalledProcessError as exc: + print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr) + sys.exit(exc.returncode) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run lighteval evaluations with vLLM or accelerate backend on custom HuggingFace models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run MMLU evaluation with vLLM + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" + + # Run with accelerate backend instead of vLLM + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" --backend accelerate + + # Run with chat template for instruction-tuned models + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B-Instruct --tasks "leaderboard|mmlu|5" --use-chat-template + + # Run with limited samples for testing + uv run scripts/lighteval_vllm_uv.py --model meta-llama/Llama-3.2-1B --tasks "leaderboard|mmlu|5" --max-samples 10 + +Task format: + Tasks use the format: "suite|task|num_fewshot" + - leaderboard|mmlu|5 (MMLU with 5-shot) + - lighteval|hellaswag|0 (HellaSwag zero-shot) + - leaderboard|gsm8k|5 (GSM8K with 5-shot) + - Multiple tasks: "leaderboard|mmlu|5,leaderboard|gsm8k|5" + """, + ) + + parser.add_argument( + "--model", + required=True, + help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)", + ) + parser.add_argument( + "--tasks", + required=True, + help="Task specification (e.g., 'leaderboard|mmlu|5')", + ) + parser.add_argument( + "--backend", + choices=["vllm", "accelerate"], + default="vllm", + help="Inference backend to use (default: vllm)", + ) + parser.add_argument( + "--output-dir", + default=None, + help="Directory for evaluation results", + ) + parser.add_argument( + "--max-samples", + type=int, + default=None, + help="Limit number of samples per task (useful for testing)", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1, + help="Batch size for evaluation (default: 1)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=1, + help="Number of GPUs for tensor parallelism (vLLM only, default: 1)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.8, + help="GPU memory fraction to use (vLLM only, default: 0.8)", + ) + parser.add_argument( + "--dtype", + default="auto", + choices=["auto", "float16", "bfloat16", "float32"], + help="Data type for model weights (default: auto)", + ) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="Allow executing remote code from model repository", + ) + parser.add_argument( + "--use-chat-template", + action="store_true", + help="Apply chat template for instruction-tuned/chat models", + ) + parser.add_argument( + "--system-prompt", + default=None, + help="System prompt for chat models", + ) + + args = parser.parse_args() + + if args.backend == "vllm": + run_lighteval_vllm( + model_id=args.model, + tasks=args.tasks, + output_dir=args.output_dir, + max_samples=args.max_samples, + batch_size=args.batch_size, + tensor_parallel_size=args.tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + dtype=args.dtype, + trust_remote_code=args.trust_remote_code, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + else: + run_lighteval_accelerate( + model_id=args.model, + tasks=args.tasks, + output_dir=args.output_dir, + max_samples=args.max_samples, + batch_size=args.batch_size, + dtype=args.dtype if args.dtype != "auto" else "bfloat16", + trust_remote_code=args.trust_remote_code, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + + +if __name__ == "__main__": + main() diff --git a/skills/hugging-face-dataset-viewer/SKILL.md b/skills/hugging-face-dataset-viewer/SKILL.md index 410eb832..624bc78d 100644 --- a/skills/hugging-face-dataset-viewer/SKILL.md +++ b/skills/hugging-face-dataset-viewer/SKILL.md @@ -1,127 +1,127 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-datasets" name: hugging-face-dataset-viewer -description: Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics. +description: Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links. risk: unknown -source: community --- - + # Hugging Face Dataset Viewer -Use this skill to execute read-only Dataset Viewer API calls for dataset exploration and extraction. - -## Core workflow - -1. Optionally validate dataset availability with `/is-valid`. -2. Resolve `config` + `split` with `/splits`. -3. Preview with `/first-rows`. -4. Paginate content with `/rows` using `offset` and `length` (max 100). -5. Use `/search` for text matching and `/filter` for row predicates. -6. Retrieve parquet links via `/parquet` and totals/metadata via `/size` and `/statistics`. - -## Defaults - -- Base URL: `https://datasets-server.huggingface.co` -- Default API method: `GET` -- Query params should be URL-encoded. -- `offset` is 0-based. -- `length` max is usually `100` for row-like endpoints. -- Gated/private datasets require `Authorization: Bearer `. - -## Dataset Viewer - -- `Validate dataset`: `/is-valid?dataset=` -- `List subsets and splits`: `/splits?dataset=` -- `Preview first rows`: `/first-rows?dataset=&config=&split=` -- `Paginate rows`: `/rows?dataset=&config=&split=&offset=&length=` -- `Search text`: `/search?dataset=&config=&split=&query=&offset=&length=` -- `Filter with predicates`: `/filter?dataset=&config=&split=&where=&orderby=&offset=&length=` -- `List parquet shards`: `/parquet?dataset=` -- `Get size totals`: `/size?dataset=` -- `Get column statistics`: `/statistics?dataset=&config=&split=` -- `Get Croissant metadata (if available)`: `/croissant?dataset=` - -Pagination pattern: - -```bash -curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=0&length=100" -curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=100&length=100" -``` - -When pagination is partial, use response fields such as `num_rows_total`, `num_rows_per_page`, and `partial` to drive continuation logic. - -Search/filter notes: - -- `/search` matches string columns (full-text style behavior is internal to the API). -- `/filter` requires predicate syntax in `where` and optional sort in `orderby`. -- Keep filtering and searches read-only and side-effect free. - -## Querying Datasets - -Use `npx parquetlens` with Hub parquet alias paths for SQL querying. - -Parquet alias shape: - -```text -hf://datasets//@~parquet///.parquet -``` - -Derive ``, ``, and `` from Dataset Viewer `/parquet`: - -```bash -curl -s "https://datasets-server.huggingface.co/parquet?dataset=cfahlgren1/hub-stats" \ - | jq -r '.parquet_files[] | "hf://datasets/\(.dataset)@~parquet/\(.config)/\(.split)/\(.filename)"' -``` - -Run SQL query: - -```bash -npx -y -p parquetlens -p @parquetlens/sql parquetlens \ - "hf://datasets//@~parquet///.parquet" \ - --sql "SELECT * FROM data LIMIT 20" -``` - -### SQL export - -- CSV: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.csv' (FORMAT CSV, HEADER, DELIMITER ',')"` -- JSON: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.json' (FORMAT JSON)"` -- Parquet: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.parquet' (FORMAT PARQUET)"` - -## Creating and Uploading Datasets - -Use one of these flows depending on dependency constraints. - -Zero local dependencies (Hub UI): - -- Create dataset repo in browser: `https://huggingface.co/new-dataset` -- Upload parquet files in the repo "Files and versions" page. -- Verify shards appear in Dataset Viewer: - -```bash -curl -s "https://datasets-server.huggingface.co/parquet?dataset=/" -``` - -Low dependency CLI flow (`npx @huggingface/hub` / `hfjs`): - -- Set auth token: - -```bash -export HF_TOKEN= -``` - -- Upload parquet folder to a dataset repo (auto-creates repo if missing): - -```bash -npx -y @huggingface/hub upload datasets// ./local/parquet-folder data -``` - -- Upload as private repo on creation: - -```bash -npx -y @huggingface/hub upload datasets// ./local/parquet-folder data --private -``` - -After upload, call `/parquet` to discover `//` values for querying with `@~parquet`. - - ## When to Use -Use this skill when tackling tasks related to its primary domain or functionality as described above. + +Use this skill when you need read-only exploration of a Hugging Face dataset through the Dataset Viewer API. + +Use this skill to execute read-only Dataset Viewer API calls for dataset exploration and extraction. + +## Core workflow + +1. Optionally validate dataset availability with `/is-valid`. +2. Resolve `config` + `split` with `/splits`. +3. Preview with `/first-rows`. +4. Paginate content with `/rows` using `offset` and `length` (max 100). +5. Use `/search` for text matching and `/filter` for row predicates. +6. Retrieve parquet links via `/parquet` and totals/metadata via `/size` and `/statistics`. + +## Defaults + +- Base URL: `https://datasets-server.huggingface.co` +- Default API method: `GET` +- Query params should be URL-encoded. +- `offset` is 0-based. +- `length` max is usually `100` for row-like endpoints. +- Gated/private datasets require `Authorization: Bearer `. + +## Dataset Viewer + +- `Validate dataset`: `/is-valid?dataset=` +- `List subsets and splits`: `/splits?dataset=` +- `Preview first rows`: `/first-rows?dataset=&config=&split=` +- `Paginate rows`: `/rows?dataset=&config=&split=&offset=&length=` +- `Search text`: `/search?dataset=&config=&split=&query=&offset=&length=` +- `Filter with predicates`: `/filter?dataset=&config=&split=&where=&orderby=&offset=&length=` +- `List parquet shards`: `/parquet?dataset=` +- `Get size totals`: `/size?dataset=` +- `Get column statistics`: `/statistics?dataset=&config=&split=` +- `Get Croissant metadata (if available)`: `/croissant?dataset=` + +Pagination pattern: + +```bash +curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=0&length=100" +curl "https://datasets-server.huggingface.co/rows?dataset=stanfordnlp/imdb&config=plain_text&split=train&offset=100&length=100" +``` + +When pagination is partial, use response fields such as `num_rows_total`, `num_rows_per_page`, and `partial` to drive continuation logic. + +Search/filter notes: + +- `/search` matches string columns (full-text style behavior is internal to the API). +- `/filter` requires predicate syntax in `where` and optional sort in `orderby`. +- Keep filtering and searches read-only and side-effect free. + +## Querying Datasets + +Use `npx parquetlens` with Hub parquet alias paths for SQL querying. + +Parquet alias shape: + +```text +hf://datasets//@~parquet///.parquet +``` + +Derive ``, ``, and `` from Dataset Viewer `/parquet`: + +```bash +curl -s "https://datasets-server.huggingface.co/parquet?dataset=cfahlgren1/hub-stats" \ + | jq -r '.parquet_files[] | "hf://datasets/\(.dataset)@~parquet/\(.config)/\(.split)/\(.filename)"' +``` + +Run SQL query: + +```bash +npx -y -p parquetlens -p @parquetlens/sql parquetlens \ + "hf://datasets//@~parquet///.parquet" \ + --sql "SELECT * FROM data LIMIT 20" +``` + +### SQL export + +- CSV: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.csv' (FORMAT CSV, HEADER, DELIMITER ',')"` +- JSON: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.json' (FORMAT JSON)"` +- Parquet: `--sql "COPY (SELECT * FROM data LIMIT 1000) TO 'export.parquet' (FORMAT PARQUET)"` + +## Creating and Uploading Datasets + +Use one of these flows depending on dependency constraints. + +Zero local dependencies (Hub UI): + +- Create dataset repo in browser: `https://huggingface.co/new-dataset` +- Upload parquet files in the repo "Files and versions" page. +- Verify shards appear in Dataset Viewer: + +```bash +curl -s "https://datasets-server.huggingface.co/parquet?dataset=/" +``` + +Low dependency CLI flow (`npx @huggingface/hub` / `hfjs`): + +- Set auth token: + +```bash +export HF_TOKEN= +``` + +- Upload parquet folder to a dataset repo (auto-creates repo if missing): + +```bash +npx -y @huggingface/hub upload datasets// ./local/parquet-folder data +``` + +- Upload as private repo on creation: + +```bash +npx -y @huggingface/hub upload datasets// ./local/parquet-folder data --private +``` + +After upload, call `/parquet` to discover `//` values for querying with `@~parquet`. diff --git a/skills/hugging-face-gradio/SKILL.md b/skills/hugging-face-gradio/SKILL.md new file mode 100644 index 00000000..b15c3a39 --- /dev/null +++ b/skills/hugging-face-gradio/SKILL.md @@ -0,0 +1,304 @@ +--- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-gradio" +name: hugging-face-gradio +description: Build or edit Gradio apps, layouts, components, and chat interfaces in Python. +risk: unknown +--- + +# Gradio + +## When to Use + +Use this skill when a user wants a Gradio demo, UI prototype, or Python-based ML interface. + +Gradio is a Python library for building interactive web UIs and ML demos. This skill covers the core API, patterns, and examples. + +## Guides + +Detailed guides on specific topics (read these when relevant): + +- [Quickstart](https://www.gradio.app/guides/quickstart) +- [The Interface Class](https://www.gradio.app/guides/the-interface-class) +- [Blocks and Event Listeners](https://www.gradio.app/guides/blocks-and-event-listeners) +- [Controlling Layout](https://www.gradio.app/guides/controlling-layout) +- [More Blocks Features](https://www.gradio.app/guides/more-blocks-features) +- [Custom CSS and JS](https://www.gradio.app/guides/custom-CSS-and-JS) +- [Streaming Outputs](https://www.gradio.app/guides/streaming-outputs) +- [Streaming Inputs](https://www.gradio.app/guides/streaming-inputs) +- [Sharing Your App](https://www.gradio.app/guides/sharing-your-app) +- [Custom HTML Components](https://www.gradio.app/guides/custom-HTML-components) +- [Getting Started with the Python Client](https://www.gradio.app/guides/getting-started-with-the-python-client) +- [Getting Started with the JS Client](https://www.gradio.app/guides/getting-started-with-the-js-client) + +## Core Patterns + +**Interface** (high-level): wraps a function with input/output components. + +```python +import gradio as gr + +def greet(name): + return f"Hello {name}!" + +gr.Interface(fn=greet, inputs="text", outputs="text").launch() +``` + +**Blocks** (low-level): flexible layout with explicit event wiring. + +```python +import gradio as gr + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Greeting") + btn = gr.Button("Greet") + btn.click(fn=lambda n: f"Hello {n}!", inputs=name, outputs=output) + +demo.launch() +``` + +**ChatInterface**: high-level wrapper for chatbot UIs. + +```python +import gradio as gr + +def respond(message, history): + return f"You said: {message}" + +gr.ChatInterface(fn=respond).launch() +``` + +## Key Component Signatures + +### `Textbox(value: str | I18nData | Callable | None = None, type: Literal['text', 'password', 'email'] = "text", lines: int = 1, max_lines: int | None = None, placeholder: str | I18nData | None = None, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, autofocus: bool = False, autoscroll: bool = True, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", text_align: Literal['left', 'right'] | None = None, rtl: bool = False, buttons: list[Literal['copy'] | Button] | None = None, max_length: int | None = None, submit_btn: str | bool | None = False, stop_btn: str | bool | None = False, html_attributes: InputHTMLAttributes | None = None)` +Creates a textarea for user to enter string input or display string output.. + +### `Number(value: float | Callable | None = None, label: str | I18nData | None = None, placeholder: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None, precision: int | None = None, minimum: float | None = None, maximum: float | None = None, step: float = 1)` +Creates a numeric field for user to enter numbers as input or display numeric output.. + +### `Slider(minimum: float = 0, maximum: float = 100, value: float | Callable | None = None, step: float | None = None, precision: int | None = None, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", randomize: bool = False, buttons: list[Literal['reset']] | None = None)` +Creates a slider that ranges from {minimum} to {maximum} with a step size of {step}.. + +### `Checkbox(value: bool | Callable = False, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None)` +Creates a checkbox that can be set to `True` or `False`. + +### `Dropdown(choices: Sequence[str | int | float | tuple[str, str | int | float]] | None = None, value: str | int | float | Sequence[str | int | float] | Callable | DefaultValue | None = DefaultValue(), type: Literal['value', 'index'] = "value", multiselect: bool | None = None, allow_custom_value: bool = False, max_choices: int | None = None, filterable: bool = True, label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", buttons: list[Button] | None = None)` +Creates a dropdown of choices from which a single entry or multiple entries can be selected (as an input component) or displayed (as an output component).. + +### `Radio(choices: Sequence[str | int | float | tuple[str, str | int | float]] | None = None, value: str | int | float | Callable | None = None, type: Literal['value', 'index'] = "value", label: str | I18nData | None = None, info: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", rtl: bool = False, buttons: list[Button] | None = None)` +Creates a set of (string or numeric type) radio buttons of which only one can be selected.. + +### `Image(value: str | PIL.Image.Image | np.ndarray | Callable | None = None, format: str = "webp", height: int | str | None = None, width: int | str | None = None, image_mode: Literal['1', 'L', 'P', 'RGB', 'RGBA', 'CMYK', 'YCbCr', 'LAB', 'HSV', 'I', 'F'] | None = "RGB", sources: list[Literal['upload', 'webcam', 'clipboard']] | Literal['upload', 'webcam', 'clipboard'] | None = None, type: Literal['numpy', 'pil', 'filepath'] = "numpy", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, buttons: list[Literal['download', 'share', 'fullscreen'] | Button] | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, streaming: bool = False, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", webcam_options: WebcamOptions | None = None, placeholder: str | None = None, watermark: WatermarkOptions | None = None)` +Creates an image component that can be used to upload images (as an input) or display images (as an output).. + +### `Audio(value: str | Path | tuple[int, np.ndarray] | Callable | None = None, sources: list[Literal['upload', 'microphone']] | Literal['upload', 'microphone'] | None = None, type: Literal['numpy', 'filepath'] = "numpy", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, streaming: bool = False, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", format: Literal['wav', 'mp3'] | None = None, autoplay: bool = False, editable: bool = True, buttons: list[Literal['download', 'share'] | Button] | None = None, waveform_options: WaveformOptions | dict | None = None, loop: bool = False, recording: bool = False, subtitles: str | Path | list[dict[str, Any]] | None = None, playback_position: float = 0)` +Creates an audio component that can be used to upload/record audio (as an input) or display audio (as an output).. + +### `Video(value: str | Path | Callable | None = None, format: str | None = None, sources: list[Literal['upload', 'webcam']] | Literal['upload', 'webcam'] | None = None, height: int | str | None = None, width: int | str | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", webcam_options: WebcamOptions | None = None, include_audio: bool | None = None, autoplay: bool = False, buttons: list[Literal['download', 'share'] | Button] | None = None, loop: bool = False, streaming: bool = False, watermark: WatermarkOptions | None = None, subtitles: str | Path | list[dict[str, Any]] | None = None, playback_position: float = 0)` +Creates a video component that can be used to upload/record videos (as an input) or display videos (as an output). + +### `File(value: str | list[str] | Callable | None = None, file_count: Literal['single', 'multiple', 'directory'] = "single", file_types: list[str] | None = None, type: Literal['filepath', 'binary'] = "filepath", label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, height: int | str | float | None = None, interactive: bool | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", allow_reordering: bool = False, buttons: list[Button] | None = None)` +Creates a file component that allows uploading one or more generic files (when used as an input) or displaying generic files or URLs for download (as output). + +### `Chatbot(value: list[MessageDict | Message] | Callable | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, container: bool = True, scale: int | None = None, min_width: int = 160, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, autoscroll: bool = True, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", height: int | str | None = 400, resizable: bool = False, max_height: int | str | None = None, min_height: int | str | None = None, editable: Literal['user', 'all'] | None = None, latex_delimiters: list[dict[str, str | bool]] | None = None, rtl: bool = False, buttons: list[Literal['share', 'copy', 'copy_all'] | Button] | None = None, watermark: str | None = None, avatar_images: tuple[str | Path | None, str | Path | None] | None = None, sanitize_html: bool = True, render_markdown: bool = True, feedback_options: list[str] | tuple[str, ...] | None = ('Like', 'Dislike'), feedback_value: Sequence[str | None] | None = None, line_breaks: bool = True, layout: Literal['panel', 'bubble'] | None = None, placeholder: str | None = None, examples: list[ExampleMessage] | None = None, allow_file_downloads: = True, group_consecutive_messages: bool = True, allow_tags: list[str] | bool = True, reasoning_tags: list[tuple[str, str]] | None = None, like_user_message: bool = False)` +Creates a chatbot that displays user-submitted messages and responses. + +### `Button(value: str | I18nData | Callable = "Run", every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, variant: Literal['primary', 'secondary', 'stop', 'huggingface'] = "secondary", size: Literal['sm', 'md', 'lg'] = "lg", icon: str | Path | None = None, link: str | None = None, link_target: Literal['_self', '_blank', '_parent', '_top'] = "_self", visible: bool | Literal['hidden'] = True, interactive: bool = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", scale: int | None = None, min_width: int | None = None)` +Creates a button that can be assigned arbitrary .click() events. + +### `Markdown(value: str | I18nData | Callable | None = None, label: str | I18nData | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool | None = None, rtl: bool = False, latex_delimiters: list[dict[str, str | bool]] | None = None, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", sanitize_html: bool = True, line_breaks: bool = False, header_links: bool = False, height: int | str | None = None, max_height: int | str | None = None, min_height: int | str | None = None, buttons: list[Literal['copy']] | None = None, container: bool = False, padding: bool = False)` +Used to render arbitrary Markdown output. + +### `HTML(value: Any | Callable | None = None, label: str | I18nData | None = None, html_template: str = "${value}", css_template: str = "", js_on_load: str | None = "element.addEventListener('click', function() { trigger('click') });", apply_default_css: bool = True, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, show_label: bool = False, visible: bool | Literal['hidden'] = True, elem_id: str | None = None, elem_classes: list[str] | str | None = None, render: bool = True, key: int | str | tuple[int | str, ...] | None = None, preserved_by_key: list[str] | str | None = "value", min_height: int | None = None, max_height: int | None = None, container: bool = False, padding: bool = False, autoscroll: bool = False, buttons: list[Button] | None = None, server_functions: list[Callable] | None = None, props: Any)` +Creates a component with arbitrary HTML. + + +## Custom HTML Components + +If a task requires significant customization of an existing component or a component that doesn't exist in Gradio, you can create one with `gr.HTML`. It supports `html_template` (with `${}` JS expressions and `{{}}` Handlebars syntax), `css_template` for scoped styles, and `js_on_load` for interactivity — where `props.value` updates the component value and `trigger('event_name')` fires Gradio events. For reuse, subclass `gr.HTML` and define `api_info()` for API/MCP support. See the [full guide](https://www.gradio.app/guides/custom-HTML-components). + +Here's an example that shows how to create and use these kinds of components: + +```python +import gradio as gr + +class StarRating(gr.HTML): + def __init__(self, label, value=0, **kwargs): + html_template = """ +

${label} rating:

+ ${Array.from({length: 5}, (_, i) => ``).join('')} + """ + css_template = """ + img { height: 50px; display: inline-block; cursor: pointer; } + .faded { filter: grayscale(100%); opacity: 0.3; } + """ + js_on_load = """ + const imgs = element.querySelectorAll('img'); + imgs.forEach((img, index) => { + img.addEventListener('click', () => { + props.value = index + 1; + }); + }); + """ + super().__init__(value=value, label=label, html_template=html_template, css_template=css_template, js_on_load=js_on_load, **kwargs) + + def api_info(self): + return {"type": "integer", "minimum": 0, "maximum": 5} + + +with gr.Blocks() as demo: + gr.Markdown("# Restaurant Review") + food_rating = StarRating(label="Food", value=3) + service_rating = StarRating(label="Service", value=3) + ambience_rating = StarRating(label="Ambience", value=3) + average_btn = gr.Button("Calculate Average Rating") + rating_output = StarRating(label="Average", value=3) + def calculate_average(food, service, ambience): + return round((food + service + ambience) / 3) + average_btn.click( + fn=calculate_average, + inputs=[food_rating, service_rating, ambience_rating], + outputs=rating_output + ) + +demo.launch() +``` + +## Event Listeners + +All event listeners share the same signature: + +```python +component.event_name( + fn: Callable | None | Literal["decorator"] = "decorator", + inputs: Component | Sequence[Component] | set[Component] | None = None, + outputs: Component | Sequence[Component] | set[Component] | None = None, + api_name: str | None = None, + api_description: str | None | Literal[False] = None, + scroll_to_output: bool = False, + show_progress: Literal["full", "minimal", "hidden"] = "full", + show_progress_on: Component | Sequence[Component] | None = None, + queue: bool = True, + batch: bool = False, + max_batch_size: int = 4, + preprocess: bool = True, + postprocess: bool = True, + cancels: dict[str, Any] | list[dict[str, Any]] | None = None, + trigger_mode: Literal["once", "multiple", "always_last"] | None = None, + js: str | Literal[True] | None = None, + concurrency_limit: int | None | Literal["default"] = "default", + concurrency_id: str | None = None, + api_visibility: Literal["public", "private", "undocumented"] = "public", + time_limit: int | None = None, + stream_every: float = 0.5, + key: int | str | tuple[int | str, ...] | None = None, + validator: Callable | None = None, +) -> Dependency +``` + +Supported events per component: + +- **AnnotatedImage**: select +- **Audio**: stream, change, clear, play, pause, stop, pause, start_recording, pause_recording, stop_recording, upload, input +- **BarPlot**: select, double_click +- **BrowserState**: change +- **Button**: click +- **Chatbot**: change, select, like, retry, undo, example_select, option_select, clear, copy, edit +- **Checkbox**: change, input, select +- **CheckboxGroup**: change, input, select +- **ClearButton**: click +- **Code**: change, input, focus, blur +- **ColorPicker**: change, input, submit, focus, blur +- **Dataframe**: change, input, select, edit +- **Dataset**: click, select +- **DateTime**: change, submit +- **DeepLinkButton**: click +- **Dialogue**: change, input, submit +- **DownloadButton**: click +- **Dropdown**: change, input, select, focus, blur, key_up +- **DuplicateButton**: click +- **File**: change, select, clear, upload, delete, download +- **FileExplorer**: change, input, select +- **Gallery**: select, upload, change, delete, preview_close, preview_open +- **HTML**: change, input, click, double_click, submit, stop, edit, clear, play, pause, end, start_recording, pause_recording, stop_recording, focus, blur, upload, release, select, stream, like, example_select, option_select, load, key_up, apply, delete, tick, undo, retry, expand, collapse, download, copy +- **HighlightedText**: change, select +- **Image**: clear, change, stream, select, upload, input +- **ImageEditor**: clear, change, input, select, upload, apply +- **ImageSlider**: clear, change, stream, select, upload, input +- **JSON**: change +- **Label**: change, select +- **LinePlot**: select, double_click +- **LoginButton**: click +- **Markdown**: change, copy +- **Model3D**: change, upload, edit, clear +- **MultimodalTextbox**: change, input, select, submit, focus, blur, stop +- **Navbar**: change +- **Number**: change, input, submit, focus, blur +- **ParamViewer**: change, upload +- **Plot**: change +- **Radio**: select, change, input +- **ScatterPlot**: select, double_click +- **SimpleImage**: clear, change, upload +- **Slider**: change, input, release +- **State**: change +- **Textbox**: change, input, select, submit, focus, blur, stop, copy +- **Timer**: tick +- **UploadButton**: click, upload +- **Video**: change, clear, start_recording, stop_recording, stop, play, pause, end, upload, input + +## Prediction CLI + +The `gradio` CLI includes `info` and `predict` commands for interacting with Gradio apps programmatically. These are especially useful for coding agents that need to use Spaces in their workflows. + +### `gradio info` — Discover endpoints and parameters + +```bash +gradio info +``` + +Returns a JSON payload describing all endpoints, their parameters (with types and defaults), and return values. + +```bash +gradio info gradio/calculator +# { +# "/predict": { +# "parameters": [ +# {"name": "num1", "required": true, "default": null, "type": {"type": "number"}}, +# {"name": "operation", "required": true, "default": null, "type": {"enum": ["add", "subtract", "multiply", "divide"], "type": "string"}}, +# {"name": "num2", "required": true, "default": null, "type": {"type": "number"}} +# ], +# "returns": [{"name": "output", "type": {"type": "number"}}], +# "description": "" +# } +# } +``` + +File-type parameters show `"type": "filepath"` with instructions to include `"meta": {"_type": "gradio.FileData"}` — this signals the file will be uploaded to the remote server. + +### `gradio predict` — Send predictions + +```bash +gradio predict +``` + +Returns a JSON object with named output keys. + +```bash +# Simple numeric prediction +gradio predict gradio/calculator /predict '{"num1": 5, "operation": "multiply", "num2": 3}' +# {"output": 15} + +# Image generation +gradio predict black-forest-labs/FLUX.2-dev /infer '{"prompt": "A majestic dragon"}' +# {"Result": "/tmp/gradio/.../image.webp", "Seed": 1117868604} + +# File upload (must include meta key) +gradio predict gradio/image_mod /predict '{"image": {"path": "/path/to/image.png", "meta": {"_type": "gradio.FileData"}}}' +# {"output": "/tmp/gradio/.../output.png"} +``` + +Both commands accept `--token` for accessing private Spaces. + +## Additional Reference + +- [End-to-End Examples](examples.md) — complete working apps diff --git a/skills/hugging-face-gradio/examples.md b/skills/hugging-face-gradio/examples.md new file mode 100644 index 00000000..b48c4cdc --- /dev/null +++ b/skills/hugging-face-gradio/examples.md @@ -0,0 +1,613 @@ +# Gradio End-to-End Examples + +Complete working Gradio apps for reference. + +## Blocks Essay Simple + +```python +import gradio as gr + +def change_textbox(choice): + if choice == "short": + return gr.Textbox(lines=2, visible=True) + elif choice == "long": + return gr.Textbox(lines=8, visible=True, value="Lorem ipsum dolor sit amet") + else: + return gr.Textbox(visible=False) + +with gr.Blocks() as demo: + radio = gr.Radio( + ["short", "long", "none"], label="What kind of essay would you like to write?" + ) + text = gr.Textbox(lines=2, interactive=True, buttons=["copy"]) + radio.change(fn=change_textbox, inputs=radio, outputs=text) + +demo.launch() +``` + +## Blocks Flipper + +```python +import numpy as np +import gradio as gr + +def flip_text(x): + return x[::-1] + +def flip_image(x): + return np.fliplr(x) + +with gr.Blocks() as demo: + gr.Markdown("Flip text or image files using this demo.") + with gr.Tab("Flip Text"): + text_input = gr.Textbox() + text_output = gr.Textbox() + text_button = gr.Button("Flip") + with gr.Tab("Flip Image"): + with gr.Row(): + image_input = gr.Image() + image_output = gr.Image() + image_button = gr.Button("Flip") + + with gr.Accordion("Open for More!", open=False): + gr.Markdown("Look at me...") + temp_slider = gr.Slider( + 0, 1, + value=0.1, + step=0.1, + interactive=True, + label="Slide me", + ) + + text_button.click(flip_text, inputs=text_input, outputs=text_output) + image_button.click(flip_image, inputs=image_input, outputs=image_output) + +demo.launch() +``` + +## Blocks Form + +```python +import gradio as gr + +with gr.Blocks() as demo: + name_box = gr.Textbox(label="Name") + age_box = gr.Number(label="Age", minimum=0, maximum=100) + symptoms_box = gr.CheckboxGroup(["Cough", "Fever", "Runny Nose"]) + submit_btn = gr.Button("Submit") + + with gr.Column(visible=False) as output_col: + diagnosis_box = gr.Textbox(label="Diagnosis") + patient_summary_box = gr.Textbox(label="Patient Summary") + + def submit(name, age, symptoms): + return { + submit_btn: gr.Button(visible=False), + output_col: gr.Column(visible=True), + diagnosis_box: "covid" if "Cough" in symptoms else "flu", + patient_summary_box: f"{name}, {age} y/o", + } + + submit_btn.click( + submit, + [name_box, age_box, symptoms_box], + [submit_btn, diagnosis_box, patient_summary_box, output_col], + ) + +demo.launch() +``` + +## Blocks Hello + +```python +import gradio as gr + +def welcome(name): + return f"Welcome to Gradio, {name}!" + +with gr.Blocks() as demo: + gr.Markdown( + """ + # Hello World! + Start typing below to see the output. + """) + inp = gr.Textbox(placeholder="What is your name?") + out = gr.Textbox() + inp.change(welcome, inp, out) + +demo.launch() +``` + +## Blocks Layout + +```python +import gradio as gr + +demo = gr.Blocks() + +with demo: + with gr.Row(): + gr.Image(interactive=True, scale=2) + gr.Image() + with gr.Row(): + gr.Textbox(label="Text") + gr.Number(label="Count", scale=2) + gr.Radio(choices=["One", "Two"]) + with gr.Row(): + gr.Button("500", scale=0, min_width=500) + gr.Button("A", scale=0) + gr.Button("grow") + with gr.Row(): + gr.Textbox() + gr.Textbox() + gr.Button() + with gr.Row(): + with gr.Row(): + with gr.Column(): + gr.Textbox(label="Text") + gr.Number(label="Count") + gr.Radio(choices=["One", "Two"]) + gr.Image() + with gr.Column(): + gr.Image(interactive=True) + gr.Image() + gr.Image() + gr.Textbox(label="Text") + gr.Number(label="Count") + gr.Radio(choices=["One", "Two"]) + +demo.launch() +``` + +## Calculator + +```python +import gradio as gr + +def calculator(num1, operation, num2): + if operation == "add": + return num1 + num2 + elif operation == "subtract": + return num1 - num2 + elif operation == "multiply": + return num1 * num2 + elif operation == "divide": + if num2 == 0: + raise gr.Error("Cannot divide by zero!") + return num1 / num2 + +demo = gr.Interface( + calculator, + [ + "number", + gr.Radio(["add", "subtract", "multiply", "divide"]), + "number" + ], + "number", + examples=[ + [45, "add", 3], + [3.14, "divide", 2], + [144, "multiply", 2.5], + [0, "subtract", 1.2], + ], + title="Toy Calculator", + description="Here's a sample toy calculator.", + api_name="predict" +) + +demo.launch() +``` + +## Chatbot Simple + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot]) + + def respond(message, chat_history): + bot_message = random.choice(["How are you?", "Today is a great day", "I'm very hungry"]) + chat_history.append({"role": "user", "content": message}) + chat_history.append({"role": "assistant", "content": bot_message}) + time.sleep(2) + return "", chat_history + + msg.submit(respond, [msg, chatbot], [msg, chatbot]) + +demo.launch() +``` + +## Chatbot Streaming + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + clear = gr.Button("Clear") + + def user(user_message, history: list): + return "", history + [{"role": "user", "content": user_message}] + + def bot(history: list): + bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) + history.append({"role": "assistant", "content": ""}) + for character in bot_message: + history[-1]['content'] += character + time.sleep(0.05) + yield history + + msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, chatbot, chatbot + ) + clear.click(lambda: None, None, chatbot, queue=False) + +demo.launch() +``` + +## Custom Css + +```python +import gradio as gr + +with gr.Blocks() as demo: + with gr.Column(elem_classes="cool-col"): + gr.Markdown("### Gradio Demo with Custom CSS", elem_classes="darktest") + gr.Markdown( + elem_classes="markdown", + value="Resize the browser window to see the CSS media query in action.", + ) + +if __name__ == "__main__": + demo.launch(css_paths=["demo/custom_css/custom_css.css"]) +``` + +## Fake Diffusion + +```python +import gradio as gr +import numpy as np +import time + +def fake_diffusion(steps): + rng = np.random.default_rng() + for i in range(steps): + time.sleep(1) + image = rng.random(size=(600, 600, 3)) + yield image + image = np.ones((1000,1000,3), np.uint8) + image[:] = [255, 124, 0] + yield image + +demo = gr.Interface(fake_diffusion, + inputs=gr.Slider(1, 10, 3, step=1), + outputs="image", + api_name="predict") + +demo.launch() +``` + +## Hello World + +```python +import gradio as gr + + +def greet(name): + return "Hello " + name + "!" + + +demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox", api_name="predict") + +demo.launch() +``` + +## Image Editor + +```python +import gradio as gr +import time + + +def sleep(im): + time.sleep(5) + return [im["background"], im["layers"][0], im["layers"][1], im["composite"]] + + +def predict(im): + return im["composite"] + + +with gr.Blocks() as demo: + with gr.Row(): + im = gr.ImageEditor( + type="numpy", + ) + im_preview = gr.Image() + n_upload = gr.Number(0, label="Number of upload events", step=1) + n_change = gr.Number(0, label="Number of change events", step=1) + n_input = gr.Number(0, label="Number of input events", step=1) + + im.upload(lambda x: x + 1, outputs=n_upload, inputs=n_upload) + im.change(lambda x: x + 1, outputs=n_change, inputs=n_change) + im.input(lambda x: x + 1, outputs=n_input, inputs=n_input) + im.change(predict, outputs=im_preview, inputs=im, show_progress="hidden") + +demo.launch() +``` + +## On Listener Decorator + +```python +import gradio as gr + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Output Box") + greet_btn = gr.Button("Greet") + + @gr.on(triggers=[name.submit, greet_btn.click], inputs=name, outputs=output) + def greet(name): + return "Hello " + name + "!" + +demo.launch() +``` + +## Render Merge + +```python +import gradio as gr +import time + +with gr.Blocks() as demo: + text_count = gr.Slider(1, 5, value=1, step=1, label="Textbox Count") + + @gr.render(inputs=text_count) + def render_count(count): + boxes = [] + for i in range(count): + box = gr.Textbox(label=f"Box {i}") + boxes.append(box) + + def merge(*args): + time.sleep(0.2) # simulate a delay + return " ".join(args) + + merge_btn.click(merge, boxes, output) + + def clear(): + time.sleep(0.2) # simulate a delay + return [" "] * count + + clear_btn.click(clear, None, boxes) + + def countup(): + time.sleep(0.2) # simulate a delay + return list(range(count)) + + count_btn.click(countup, None, boxes, queue=False) + + with gr.Row(): + merge_btn = gr.Button("Merge") + clear_btn = gr.Button("Clear") + count_btn = gr.Button("Count") + + output = gr.Textbox() + +demo.launch() +``` + +## Reverse Audio 2 + +```python +import gradio as gr +import numpy as np + +def reverse_audio(audio): + sr, data = audio + return (sr, np.flipud(data)) + +demo = gr.Interface(fn=reverse_audio, + inputs="microphone", + outputs="audio", api_name="predict") + +demo.launch() +``` + +## Sepia Filter + +```python +import numpy as np +import gradio as gr + +def sepia(input_img): + sepia_filter = np.array([ + [0.393, 0.769, 0.189], + [0.349, 0.686, 0.168], + [0.272, 0.534, 0.131] + ]) + sepia_img = input_img.dot(sepia_filter.T) + sepia_img /= sepia_img.max() + return sepia_img + +demo = gr.Interface(sepia, gr.Image(), "image", api_name="predict") +demo.launch() +``` + +## Sort Records + +```python +import gradio as gr + +def sort_records(records): + return records.sort("Quantity") + +demo = gr.Interface( + sort_records, + gr.Dataframe( + headers=["Item", "Quantity"], + datatype=["str", "number"], + row_count=3, + column_count=2, + column_limits=(2, 2), + type="polars" + ), + "dataframe", + description="Sort by Quantity" +) + +demo.launch() +``` + +## Streaming Simple + +```python +import gradio as gr + +with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(): + input_img = gr.Image(label="Input", sources="webcam") + with gr.Column(): + output_img = gr.Image(label="Output") + input_img.stream(lambda s: s, input_img, output_img, time_limit=15, stream_every=0.1, concurrency_limit=30) + +if __name__ == "__main__": + + demo.launch() +``` + +## Tabbed Interface Lite + +```python +import gradio as gr + +hello_world = gr.Interface(lambda name: "Hello " + name, "text", "text", api_name="predict") +bye_world = gr.Interface(lambda name: "Bye " + name, "text", "text", api_name="predict") +chat = gr.ChatInterface(lambda *args: "Hello " + args[0], api_name="chat") + +demo = gr.TabbedInterface([hello_world, bye_world, chat], ["Hello World", "Bye World", "Chat"]) + +demo.launch() +``` + +## Tax Calculator + +```python +import gradio as gr + +def tax_calculator(income, marital_status, assets): + tax_brackets = [(10, 0), (25, 8), (60, 12), (120, 20), (250, 30)] + total_deductible = sum(cost for cost, deductible in zip(assets["Cost"], assets["Deductible"]) if deductible) + taxable_income = income - total_deductible + + total_tax = 0 + for bracket, rate in tax_brackets: + if taxable_income > bracket: + total_tax += (taxable_income - bracket) * rate / 100 + + if marital_status == "Married": + total_tax *= 0.75 + elif marital_status == "Divorced": + total_tax *= 0.8 + + return round(total_tax) + +demo = gr.Interface( + tax_calculator, + [ + "number", + gr.Radio(["Single", "Married", "Divorced"]), + gr.Dataframe( + headers=["Item", "Cost", "Deductible"], + datatype=["str", "number", "bool"], + label="Assets Purchased this Year", + ), + ], + gr.Number(label="Tax due"), + examples=[ + [10000, "Married", [["Suit", 5000, True], ["Laptop (for work)", 800, False], ["Car", 1800, True]]], + [80000, "Single", [["Suit", 800, True], ["Watch", 1800, True], ["Food", 800, True]]], + ], + live=True, + api_name="predict" +) + +demo.launch() +``` + +## Timer Simple + +```python +import gradio as gr +import random +import time + +with gr.Blocks() as demo: + timer = gr.Timer(1) + timestamp = gr.Number(label="Time") + timer.tick(lambda: round(time.time()), outputs=timestamp, api_name="timestamp") + + number = gr.Number(lambda: random.randint(1, 10), every=timer, label="Random Number") + with gr.Row(): + gr.Button("Start").click(lambda: gr.Timer(active=True), None, timer) + gr.Button("Stop").click(lambda: gr.Timer(active=False), None, timer) + gr.Button("Go Fast").click(lambda: 0.2, None, timer) + +if __name__ == "__main__": + demo.launch() +``` + +## Variable Outputs + +```python +import gradio as gr + +max_textboxes = 10 + +def variable_outputs(k): + k = int(k) + return [gr.Textbox(visible=True)]*k + [gr.Textbox(visible=False)]*(max_textboxes-k) + +with gr.Blocks() as demo: + s = gr.Slider(1, max_textboxes, value=max_textboxes, step=1, label="How many textboxes to show:") + textboxes = [] + for i in range(max_textboxes): + t = gr.Textbox(f"Textbox {i}") + textboxes.append(t) + + s.change(variable_outputs, s, textboxes) + +if __name__ == "__main__": + demo.launch() +``` + +## Video Identity + +```python +import gradio as gr +from gradio.media import get_video + +def video_identity(video): + return video + +# get_video() returns file paths to sample media included with Gradio +demo = gr.Interface(video_identity, + gr.Video(), + "playable_video", + examples=[ + get_video("world.mp4") + ], + cache_examples=True, + api_name="predict",) + +demo.launch() +``` diff --git a/skills/hugging-face-jobs/SKILL.md b/skills/hugging-face-jobs/SKILL.md index 9da285b8..ed9f90f0 100644 --- a/skills/hugging-face-jobs/SKILL.md +++ b/skills/hugging-face-jobs/SKILL.md @@ -1,9 +1,9 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-jobs" name: hugging-face-jobs -description: "Run any workload on fully managed Hugging Face infrastructure. No local setup required—jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub." -risk: safe -source: "https://github.com/huggingface/skills/tree/main/skills/hugging-face-jobs" -date_added: "2026-02-27" +description: Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence. +license: Complete terms in LICENSE.txt +risk: unknown --- # Running Workloads on Hugging Face Jobs @@ -66,12 +66,15 @@ Before starting any job, verify: **How to provide tokens:** ```python -{ - "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Recommended: automatic token -} +# hf_jobs MCP tool — $HF_TOKEN is auto-replaced with real token: +{"secrets": {"HF_TOKEN": "$HF_TOKEN"}} + +# HfApi().run_uv_job() — MUST pass actual token: +from huggingface_hub import get_token +secrets={"HF_TOKEN": get_token()} ``` -**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is automatically replaced with your logged-in token. Never hardcode tokens in scripts. +**⚠️ CRITICAL:** The `$HF_TOKEN` placeholder is ONLY auto-replaced by the `hf_jobs` MCP tool. When using `HfApi().run_uv_job()`, you MUST pass the real token via `get_token()`. Passing the literal string `"$HF_TOKEN"` results in a 9-character invalid token and 401 errors. ## Token Usage Guide @@ -539,9 +542,12 @@ requests.post("https://your-api.com/results", json=results) **In job submission:** ```python -{ - "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Enables authentication -} +# hf_jobs MCP tool: +{"secrets": {"HF_TOKEN": "$HF_TOKEN"}} # auto-replaced + +# HfApi().run_uv_job(): +from huggingface_hub import get_token +secrets={"HF_TOKEN": get_token()} # must pass real token ``` **In script:** @@ -560,7 +566,7 @@ api.upload_file(...) Before submitting: - [ ] Results persistence method chosen -- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` if using Hub +- [ ] Token in secrets if using Hub (MCP: `"$HF_TOKEN"`, Python API: `get_token()`) - [ ] Script handles missing token gracefully - [ ] Test persistence path works @@ -950,7 +956,7 @@ hf_jobs("uv", { ### Hub Push Failures **Fix:** -1. Add to job: `secrets={"HF_TOKEN": "$HF_TOKEN"}` +1. Add token to secrets: MCP uses `"$HF_TOKEN"` (auto-replaced), Python API uses `get_token()` (must pass real token) 2. Verify token in script: `assert "HF_TOKEN" in os.environ` 3. Check token permissions 4. Verify repo exists or can be created @@ -969,7 +975,7 @@ Add to PEP 723 header: **Fix:** 1. Check `hf_whoami()` works locally -2. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Verify token in secrets — MCP: `"$HF_TOKEN"`, Python API: `get_token()` (NOT `"$HF_TOKEN"`) 3. Re-login: `hf auth login` 4. Check token has required permissions @@ -1017,7 +1023,7 @@ Add to PEP 723 header: 2. **Jobs are asynchronous** - Don't wait/poll; let user check when ready 3. **Always set timeout** - Default 30 min may be insufficient; set appropriate timeout 4. **Always persist results** - Environment is ephemeral; without persistence, all work is lost -5. **Use tokens securely** - Always use `secrets={"HF_TOKEN": "$HF_TOKEN"}` for Hub operations +5. **Use tokens securely** - MCP: `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API: `secrets={"HF_TOKEN": get_token()}` — `"$HF_TOKEN"` only works with MCP tool 6. **Choose appropriate hardware** - Start small, scale up based on needs (see hardware guide) 7. **Use UV scripts** - Default to `hf_jobs("uv", {...})` with inline scripts for Python workloads 8. **Handle authentication** - Verify tokens are available before Hub operations @@ -1033,6 +1039,7 @@ Add to PEP 723 header: | List jobs | `hf_jobs("ps")` | `hf jobs ps` | `list_jobs()` | | View logs | `hf_jobs("logs", {...})` | `hf jobs logs ` | `fetch_job_logs(job_id)` | | Cancel job | `hf_jobs("cancel", {...})` | `hf jobs cancel ` | `cancel_job(job_id)` | -| Schedule UV | `hf_jobs("scheduled uv", {...})` | - | `create_scheduled_uv_job()` | -| Schedule Docker | `hf_jobs("scheduled run", {...})` | - | `create_scheduled_job()` | - +| Schedule UV | `hf_jobs("scheduled uv", {...})` | `hf jobs scheduled uv run SCHEDULE script.py` | `create_scheduled_uv_job()` | +| Schedule Docker | `hf_jobs("scheduled run", {...})` | `hf jobs scheduled run SCHEDULE image cmd` | `create_scheduled_job()` | +| List scheduled | `hf_jobs("scheduled ps")` | `hf jobs scheduled ps` | `list_scheduled_jobs()` | +| Delete scheduled | `hf_jobs("scheduled delete", {...})` | `hf jobs scheduled delete ` | `delete_scheduled_job()` | diff --git a/skills/hugging-face-jobs/index.html b/skills/hugging-face-jobs/index.html new file mode 100644 index 00000000..6db24014 --- /dev/null +++ b/skills/hugging-face-jobs/index.html @@ -0,0 +1,216 @@ + + + + + + hf-jobs - Run Workloads on Hugging Face Jobs + + + +
+

Agent Skill : hf-jobs

+ +
+

Run any workload on Hugging Face Jobs.

+

Use this skill when you want to run GPU/CPU workloads (batch inference, synthetic data generation, dataset stats, experiments) on Hugging Face Jobs, with correct token handling and result persistence back to the Hub.

+
+ + + +
+

Overview

+

This skill focuses on running real workloads via Hugging Face Jobs. It includes ready-to-run UV scripts and guides for authentication (HF tokens), secrets vs env vars, timeouts, hardware selection, and pushing results to the Hub.

+
+ +
+

Core Documentation

+
    +
  • + SKILL.md +
    hf-jobs/SKILL.md
    +
    Complete skill documentation (how to submit jobs, tokens/secrets, timeouts, persistence, and how to use the bundled scripts)
    +
  • +
+
+ +
+

References

+
    +
  • + token_usage.md +
    hf-jobs/references/token_usage.md
    +
    Token best practices: secrets vs env, permissions, common errors (401/403), and secure patterns
    +
  • +
  • + hub_saving.md +
    hf-jobs/references/hub_saving.md
    +
    How to persist results: push datasets/models/files to the Hub (ephemeral job filesystem)
    +
  • +
  • + hardware_guide.md +
    hf-jobs/references/hardware_guide.md
    +
    Flavor selection guidance for CPU/GPU/TPU workloads
    +
  • +
  • + troubleshooting.md +
    hf-jobs/references/troubleshooting.md
    +
    Common failure modes (timeouts, missing deps, OOM, auth) and fixes
    +
  • +
+
+ +
+

Scripts

+
    +
  • + generate-responses.py +
    hf-jobs/scripts/generate-responses.py
    +
    vLLM batch generation: load prompts/messages from a dataset, generate responses, push dataset + card to Hub
    +
  • +
  • + cot-self-instruct.py +
    hf-jobs/scripts/cot-self-instruct.py
    +
    CoT Self-Instruct synthetic data generation (reasoning/instruction) + optional filtering, pushes dataset + card
    +
  • +
  • + finepdfs-stats.py +
    hf-jobs/scripts/finepdfs-stats.py
    +
    Polars streaming stats over Hub parquet (finepdfs-edu); optional upload of computed stats to a dataset repo
    +
  • +
+
+
+ + + + + + diff --git a/skills/hugging-face-jobs/references/hardware_guide.md b/skills/hugging-face-jobs/references/hardware_guide.md new file mode 100644 index 00000000..0a846338 --- /dev/null +++ b/skills/hugging-face-jobs/references/hardware_guide.md @@ -0,0 +1,336 @@ +# Hardware Selection Guide + +Choosing the right hardware (flavor) is critical for cost-effective workloads. + +> **Reference:** [HF Jobs Hardware Documentation](https://huggingface.co/docs/hub/en/spaces-config-reference) (updated 07/2025) + +## Available Hardware + +### CPU Flavors +| Flavor | Description | Use Case | +|--------|-------------|----------| +| `cpu-basic` | Basic CPU instance | Testing, lightweight scripts | +| `cpu-upgrade` | Enhanced CPU instance | Data processing, parallel workloads | + +**Use cases:** Data processing, testing scripts, lightweight workloads +**Not recommended for:** Model training, GPU-accelerated workloads + +### GPU Flavors + +| Flavor | GPU | VRAM | Use Case | +|--------|-----|------|----------| +| `t4-small` | NVIDIA T4 | 16GB | <1B models, demos, quick tests | +| `t4-medium` | NVIDIA T4 | 16GB | 1-3B models, development | +| `l4x1` | NVIDIA L4 | 24GB | 3-7B models, efficient workloads | +| `l4x4` | 4x NVIDIA L4 | 96GB | Multi-GPU, parallel workloads | +| `a10g-small` | NVIDIA A10G | 24GB | 3-7B models, production | +| `a10g-large` | NVIDIA A10G | 24GB | 7-13B models, batch inference | +| `a10g-largex2` | 2x NVIDIA A10G | 48GB | Multi-GPU, large models | +| `a10g-largex4` | 4x NVIDIA A10G | 96GB | Multi-GPU, very large models | +| `a100-large` | NVIDIA A100 | 40GB | 13B+ models, fastest GPU option | + +### TPU Flavors + +| Flavor | Configuration | Use Case | +|--------|---------------|----------| +| `v5e-1x1` | TPU v5e (1x1) | Small TPU workloads | +| `v5e-2x2` | TPU v5e (2x2) | Medium TPU workloads | +| `v5e-2x4` | TPU v5e (2x4) | Large TPU workloads | + +**TPU Use Cases:** +- JAX/Flax model training +- Large-scale inference +- TPU-optimized workloads + +## Selection Guidelines + +### By Workload Type + +**Data Processing** +- **Recommended:** `cpu-upgrade` or `l4x1` +- **Use case:** Transform, filter, analyze datasets +- **Batch size:** Depends on data size +- **Time:** Varies by dataset size + +**Batch Inference** +- **Recommended:** `a10g-large` or `a100-large` +- **Use case:** Run inference on thousands of samples +- **Batch size:** 8-32 depending on model +- **Time:** Depends on number of samples + +**Experiments & Benchmarks** +- **Recommended:** `a10g-small` or `a10g-large` +- **Use case:** Reproducible ML experiments +- **Batch size:** Varies +- **Time:** Depends on experiment complexity + +**Model Training** (see `model-trainer` skill for details) +- **Recommended:** See model-trainer skill +- **Use case:** Fine-tuning models +- **Batch size:** Depends on model size +- **Time:** Hours to days + +**Synthetic Data Generation** +- **Recommended:** `a10g-large` or `a100-large` +- **Use case:** Generate datasets using LLMs +- **Batch size:** Depends on generation method +- **Time:** Hours for large datasets + +### By Budget + +**Minimal Budget (<$5 total)** +- Use `cpu-basic` or `t4-small` +- Process small datasets +- Quick tests and demos + +**Small Budget ($5-20)** +- Use `t4-medium` or `a10g-small` +- Process medium datasets +- Run experiments + +**Medium Budget ($20-50)** +- Use `a10g-small` or `a10g-large` +- Process large datasets +- Production workloads + +**Large Budget ($50-200)** +- Use `a10g-large` or `a100-large` +- Large-scale processing +- Multiple experiments + +### By Model Size (for inference/processing) + +**Tiny Models (<1B parameters)** +- **Recommended:** `t4-small` +- **Example:** Qwen2.5-0.5B, TinyLlama +- **Batch size:** 8-16 + +**Small Models (1-3B parameters)** +- **Recommended:** `t4-medium` or `a10g-small` +- **Example:** Qwen2.5-1.5B, Phi-2 +- **Batch size:** 4-8 + +**Medium Models (3-7B parameters)** +- **Recommended:** `a10g-small` or `a10g-large` +- **Example:** Qwen2.5-7B, Mistral-7B +- **Batch size:** 2-4 + +**Large Models (7-13B parameters)** +- **Recommended:** `a10g-large` or `a100-large` +- **Example:** Llama-3-8B +- **Batch size:** 1-2 + +**Very Large Models (13B+ parameters)** +- **Recommended:** `a100-large` +- **Example:** Llama-3-13B, Llama-3-70B +- **Batch size:** 1 + +## Memory Considerations + +### Estimating Memory Requirements + +**For inference:** +``` +Memory (GB) ≈ (Model params in billions) × 2-4 +``` + +**For training:** +``` +Memory (GB) ≈ (Model params in billions) × 20 (full) or × 4 (LoRA) +``` + +**Examples:** +- Qwen2.5-0.5B inference: ~1-2GB ✅ fits t4-small +- Qwen2.5-7B inference: ~14-28GB ✅ fits a10g-large +- Qwen2.5-7B training: ~140GB ❌ not feasible without LoRA + +### Memory Optimization + +If hitting memory limits: + +1. **Reduce batch size** + ```python + batch_size = 1 + ``` + +2. **Process in chunks** + ```python + for chunk in chunks: + process(chunk) + ``` + +3. **Use smaller models** + - Use quantized models + - Use LoRA adapters + +4. **Upgrade hardware** + - cpu → t4 → a10g → a100 + +## Cost Estimation + +### Formula + +``` +Total Cost = (Hours of runtime) × (Cost per hour) +``` + +### Example Calculations + +**Data processing:** +- Hardware: cpu-upgrade ($0.50/hour) +- Time: 1 hour +- Cost: $0.50 + +**Batch inference:** +- Hardware: a10g-large ($5/hour) +- Time: 2 hours +- Cost: $10.00 + +**Experiments:** +- Hardware: a10g-small ($3.50/hour) +- Time: 4 hours +- Cost: $14.00 + +### Cost Optimization Tips + +1. **Start small:** Test on cpu-basic or t4-small +2. **Monitor runtime:** Set appropriate timeouts +3. **Optimize code:** Reduce unnecessary compute +4. **Choose right hardware:** Don't over-provision +5. **Use checkpoints:** Resume if job fails +6. **Monitor costs:** Check running jobs regularly + +## Multi-GPU Workloads + +Multi-GPU flavors automatically distribute workloads: + +**Multi-GPU flavors:** +- `l4x4` - 4x L4 GPUs (96GB total VRAM) +- `a10g-largex2` - 2x A10G GPUs (48GB total VRAM) +- `a10g-largex4` - 4x A10G GPUs (96GB total VRAM) + +**When to use:** +- Large models (>13B parameters) +- Need faster processing (linear speedup) +- Large datasets (>100K samples) +- Parallel workloads +- Tensor parallelism for inference + +**MCP Tool Example:** +```python +hf_jobs("uv", { + "script": "process.py", + "flavor": "a10g-largex2", # 2 GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**CLI Equivalent:** +```bash +hf jobs uv run process.py --flavor a10g-largex2 --timeout 4h +``` + +## Choosing Between Options + +### CPU vs GPU + +**Choose CPU when:** +- No GPU acceleration needed +- Data processing only +- Budget constrained +- Simple workloads + +**Choose GPU when:** +- Model inference/training +- GPU-accelerated libraries +- Need faster processing +- Large models + +### a10g vs a100 + +**Choose a10g when:** +- Model <13B parameters +- Budget conscious +- Processing time not critical + +**Choose a100 when:** +- Model 13B+ parameters +- Need fastest processing +- Memory requirements high +- Budget allows + +### Single vs Multi-GPU + +**Choose single GPU when:** +- Model <7B parameters +- Budget constrained +- Simpler debugging + +**Choose multi-GPU when:** +- Model >13B parameters +- Need faster processing +- Large batch sizes required +- Cost-effective for large jobs + +## Quick Reference + +### All Available Flavors + +```python +# Official flavor list (updated 07/2025) +FLAVORS = { + # CPU + "cpu-basic", # Testing, lightweight + "cpu-upgrade", # Data processing + + # GPU - Single + "t4-small", # 16GB - <1B models + "t4-medium", # 16GB - 1-3B models + "l4x1", # 24GB - 3-7B models + "a10g-small", # 24GB - 3-7B production + "a10g-large", # 24GB - 7-13B models + "a100-large", # 40GB - 13B+ models + + # GPU - Multi + "l4x4", # 4x L4 (96GB total) + "a10g-largex2", # 2x A10G (48GB total) + "a10g-largex4", # 4x A10G (96GB total) + + # TPU + "v5e-1x1", # TPU v5e 1x1 + "v5e-2x2", # TPU v5e 2x2 + "v5e-2x4", # TPU v5e 2x4 +} +``` + +### Workload → Hardware Mapping + +```python +HARDWARE_MAP = { + "data_processing": "cpu-upgrade", + "batch_inference_small": "t4-small", + "batch_inference_medium": "a10g-large", + "batch_inference_large": "a100-large", + "experiments": "a10g-small", + "tpu_workloads": "v5e-1x1", + "training": "see model-trainer skill" +} +``` + +### CLI Examples + +```bash +# CPU job +hf jobs run python:3.12 python script.py + +# GPU job +hf jobs run --flavor a10g-large pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel python script.py + +# TPU job +hf jobs run --flavor v5e-1x1 your-tpu-image python script.py + +# UV script with GPU +hf jobs uv run --flavor a10g-small my_script.py +``` + diff --git a/skills/hugging-face-jobs/references/hub_saving.md b/skills/hugging-face-jobs/references/hub_saving.md new file mode 100644 index 00000000..e2af0283 --- /dev/null +++ b/skills/hugging-face-jobs/references/hub_saving.md @@ -0,0 +1,352 @@ +# Saving Results to Hugging Face Hub + +**⚠️ CRITICAL:** Job environments are ephemeral. ALL results are lost when a job completes unless persisted to the Hub or external storage. + +## Why Persistence is Required + +When running on Hugging Face Jobs: +- Environment is temporary +- All files deleted on job completion +- No local disk persistence +- Cannot access results after job ends + +**Without persistence, all work is permanently lost.** + +## Persistence Options + +### Option 1: Push to Hugging Face Hub (Recommended) + +**For models:** +```python +from transformers import AutoModel +model.push_to_hub("username/model-name", token=os.environ.get("HF_TOKEN")) +``` + +**For datasets:** +```python +from datasets import Dataset +dataset.push_to_hub("username/dataset-name", token=os.environ.get("HF_TOKEN")) +``` + +**For files/artifacts:** +```python +from huggingface_hub import HfApi +api = HfApi(token=os.environ.get("HF_TOKEN")) +api.upload_file( + path_or_fileobj="results.json", + path_in_repo="results.json", + repo_id="username/results", + repo_type="dataset" +) +``` + +### Option 2: External Storage + +**S3:** +```python +import boto3 +s3 = boto3.client('s3') +s3.upload_file('results.json', 'my-bucket', 'results.json') +``` + +**Google Cloud Storage:** +```python +from google.cloud import storage +client = storage.Client() +bucket = client.bucket('my-bucket') +blob = bucket.blob('results.json') +blob.upload_from_filename('results.json') +``` + +### Option 3: API Endpoint + +```python +import requests +requests.post("https://your-api.com/results", json=results) +``` + +## Required Configuration for Hub Push + +### Job Configuration + +**Always include HF_TOKEN:** +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required for Hub operations +}) +``` + +### Script Configuration + +**Verify token exists:** +```python +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN required for Hub operations!" +``` + +**Use token for Hub operations:** +```python +from huggingface_hub import HfApi + +# Auto-detects HF_TOKEN from environment +api = HfApi() + +# Or explicitly pass token +api = HfApi(token=os.environ.get("HF_TOKEN")) +``` + +## Complete Examples + +### Example 1: Push Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets", "huggingface-hub"] +# /// + +import os +from datasets import Dataset +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Process data +data = {"text": ["Sample 1", "Sample 2"]} +dataset = Dataset.from_dict(data) + +# Push to Hub +dataset.push_to_hub("username/my-dataset") +print("✅ Dataset pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +### Example 2: Push Model + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["transformers"] +# /// + +import os +from transformers import AutoModel, AutoTokenizer + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load and process model +model = AutoModel.from_pretrained("base-model") +tokenizer = AutoTokenizer.from_pretrained("base-model") +# ... process model ... + +# Push to Hub +model.push_to_hub("username/my-model") +tokenizer.push_to_hub("username/my-model") +print("✅ Model pushed!") +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +### Example 3: Push Artifacts + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["huggingface-hub", "pandas"] +# /// + +import os +import json +import pandas as pd +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Generate results +results = {"accuracy": 0.95, "loss": 0.05} +df = pd.DataFrame([results]) + +# Save files +with open("results.json", "w") as f: + json.dump(results, f) +df.to_csv("results.csv", index=False) + +# Push to Hub +api = HfApi() +api.upload_file("results.json", "results.json", "username/results", repo_type="dataset") +api.upload_file("results.csv", "results.csv", "username/results", repo_type="dataset") +print("✅ Results pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +## Authentication Methods + +### Method 1: Automatic Token (Recommended) + +```python +"secrets": {"HF_TOKEN": "$HF_TOKEN"} +``` + +Uses your logged-in Hugging Face token automatically. + +### Method 2: Explicit Token + +```python +"secrets": {"HF_TOKEN": "hf_abc123..."} +``` + +Provide token explicitly (not recommended for security). + +### Method 3: Environment Variable + +```python +"env": {"HF_TOKEN": "hf_abc123..."} +``` + +Pass as regular environment variable (less secure than secrets). + +**Always prefer Method 1** for security and convenience. + +## Verification Checklist + +Before submitting any job that saves to Hub, verify: + +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Hub push code included in script +- [ ] Repository name doesn't conflict with existing repos +- [ ] You have write access to the target namespace + +## Repository Setup + +### Automatic Creation + +If repository doesn't exist, it's created automatically when first pushing (if token has write permissions). + +### Manual Creation + +Create repository before pushing: + +```python +from huggingface_hub import HfApi + +api = HfApi() +api.create_repo( + repo_id="username/repo-name", + repo_type="model", # or "dataset" + private=False, # or True for private repo +) +``` + +### Repository Naming + +**Valid names:** +- `username/my-model` +- `username/model-name` +- `organization/model-name` + +**Invalid names:** +- `model-name` (missing username) +- `username/model name` (spaces not allowed) +- `username/MODEL` (uppercase discouraged) + +## Troubleshooting + +### Error: 401 Unauthorized + +**Cause:** HF_TOKEN not provided or invalid + +**Solutions:** +1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Check you're logged in: `hf_whoami()` +3. Re-login: `hf auth login` + +### Error: 403 Forbidden + +**Cause:** No write access to repository + +**Solutions:** +1. Check repository namespace matches your username +2. Verify you're a member of organization (if using org namespace) +3. Check token has write permissions + +### Error: Repository not found + +**Cause:** Repository doesn't exist and auto-creation failed + +**Solutions:** +1. Manually create repository first +2. Check repository name format +3. Verify namespace exists + +### Error: Push failed + +**Cause:** Network issues or Hub unavailable + +**Solutions:** +1. Check logs for specific error +2. Verify token is valid +3. Retry push operation + +## Best Practices + +1. **Always verify token exists** before Hub operations +2. **Use descriptive repo names** (e.g., `my-experiment-results` not `results`) +3. **Push incrementally** for large results (use checkpoints) +4. **Verify push success** in logs before job completes +5. **Use appropriate repo types** (model vs dataset) +6. **Add README** with result descriptions +7. **Tag repos** with relevant tags + +## Monitoring Push Progress + +Check logs for push progress: + +**MCP Tool:** +```python +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**CLI:** +```bash +hf jobs logs +``` + +**Python API:** +```python +from huggingface_hub import fetch_job_logs +for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +**Look for:** +``` +Pushing to username/repo-name... +Upload file results.json: 100% +✅ Push successful +``` + +## Key Takeaway + +**Without `secrets={"HF_TOKEN": "$HF_TOKEN"}` and persistence code, all results are permanently lost.** + +Always verify both are configured before submitting any job that produces results. + diff --git a/skills/hugging-face-jobs/references/token_usage.md b/skills/hugging-face-jobs/references/token_usage.md new file mode 100644 index 00000000..89d675d4 --- /dev/null +++ b/skills/hugging-face-jobs/references/token_usage.md @@ -0,0 +1,570 @@ +# Token Usage Guide for Hugging Face Jobs + +**⚠️ CRITICAL:** Proper token usage is essential for any job that interacts with the Hugging Face Hub. + +## Overview + +Hugging Face tokens are authentication credentials that allow your jobs to interact with the Hub. They're required for: +- Pushing models/datasets to Hub +- Accessing private repositories +- Creating new repositories +- Using Hub APIs programmatically +- Any authenticated Hub operations + +## Token Types + +### Read Token +- **Permissions:** Download models/datasets, read private repos +- **Use case:** Jobs that only need to download/read content +- **Creation:** https://huggingface.co/settings/tokens + +### Write Token +- **Permissions:** Push models/datasets, create repos, modify content +- **Use case:** Jobs that need to upload results (most common) +- **Creation:** https://huggingface.co/settings/tokens +- **⚠️ Required for:** Pushing models, datasets, or any uploads + +### Organization Token +- **Permissions:** Act on behalf of an organization +- **Use case:** Jobs running under organization namespace +- **Creation:** Organization settings → Tokens + +## Providing Tokens to Jobs + +### Method 1: `hf_jobs` MCP tool with `$HF_TOKEN` (Recommended) ⭐ + +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Automatic replacement +}) +``` + +**How it works:** +1. `$HF_TOKEN` is a placeholder that gets replaced with your actual token +2. Uses the token from your logged-in session (`hf auth login`) +3. Token is encrypted server-side when passed as a secret +4. Most secure and convenient method + +**Benefits:** +- ✅ No token exposure in code +- ✅ Uses your current login session +- ✅ Automatically updated if you re-login +- ✅ Works seamlessly with MCP tools +- ✅ Token encrypted server-side + +**Requirements:** +- Must be logged in: `hf auth login` or `hf_whoami()` works +- Token must have required permissions + +**⚠️ CRITICAL:** `$HF_TOKEN` auto-replacement is an `hf_jobs` MCP tool feature ONLY. It does NOT work with `HfApi().run_uv_job()` — see Method 1b below. + +### Method 1b: `HfApi().run_uv_job()` with `get_token()` (Required for Python API) + +```python +from huggingface_hub import HfApi, get_token +api = HfApi() +api.run_uv_job( + script="your_script.py", + secrets={"HF_TOKEN": get_token()}, # ✅ Passes actual token value +) +``` + +**How it works:** +1. `get_token()` retrieves the token from your logged-in session +2. The actual token value is passed to the `secrets` parameter +3. Token is encrypted server-side + +**Why `"$HF_TOKEN"` fails with `HfApi().run_uv_job()`:** +- The Python API passes the literal string `"$HF_TOKEN"` (9 characters) as the token +- The Jobs server receives this invalid string instead of a real token +- Result: `401 Unauthorized` errors when the script tries to authenticate +- You MUST use `get_token()` from `huggingface_hub` to get the real token + +### Method 2: Explicit Token (Not Recommended) + +```python +hf_jobs("uv", { + "script": "your_script.py", + "secrets": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Hardcoded token +}) +``` + +**When to use:** +- Only if automatic token doesn't work +- Testing with a specific token +- Organization tokens (use with caution) + +**Security concerns:** +- ❌ Token visible in code/logs +- ❌ Must manually update if token rotates +- ❌ Risk of token exposure +- ❌ Not recommended for production + +### Method 3: Environment Variable (Less Secure) + +```python +hf_jobs("uv", { + "script": "your_script.py", + "env": {"HF_TOKEN": "hf_abc123..."} # ⚠️ Less secure than secrets +}) +``` + +**Difference from secrets:** +- `env` variables are visible in job logs +- `secrets` are encrypted server-side +- Always prefer `secrets` for tokens + +**When to use:** +- Only for non-sensitive configuration +- Never use for tokens (use `secrets` instead) + +## Using Tokens in Scripts + +### Accessing Tokens + +Tokens passed via `secrets` are available as environment variables in your script: + +```python +import os + +# Get token from environment +token = os.environ.get("HF_TOKEN") + +# Verify token exists +if not token: + raise ValueError("HF_TOKEN not found in environment!") +``` + +### Using with Hugging Face Hub + +**Option 1: Explicit token parameter** +```python +from huggingface_hub import HfApi + +api = HfApi(token=os.environ.get("HF_TOKEN")) +api.upload_file(...) +``` + +**Option 2: Auto-detection (Recommended)** +```python +from huggingface_hub import HfApi + +# Automatically uses HF_TOKEN env var +api = HfApi() # ✅ Simpler, uses token from environment +api.upload_file(...) +``` + +**Option 3: With transformers/datasets** +```python +from transformers import AutoModel +from datasets import load_dataset + +# Auto-detects HF_TOKEN from environment +model = AutoModel.from_pretrained("username/model") +dataset = load_dataset("username/dataset") + +# For push operations, token is auto-detected +model.push_to_hub("username/new-model") +dataset.push_to_hub("username/new-dataset") +``` + +### Complete Example + +```python +# /// script +# dependencies = ["huggingface-hub", "datasets"] +# /// + +import os +from huggingface_hub import HfApi +from datasets import Dataset + +# Verify token is available +assert "HF_TOKEN" in os.environ, "HF_TOKEN required for Hub operations!" + +# Use token for Hub operations +api = HfApi() # Auto-detects HF_TOKEN + +# Create and push dataset +data = {"text": ["Hello", "World"]} +dataset = Dataset.from_dict(data) + +# Push to Hub (token auto-detected) +dataset.push_to_hub("username/my-dataset") + +print("✅ Dataset pushed successfully!") +``` + +## Token Verification + +### Check Authentication Locally + +```python +from huggingface_hub import whoami + +try: + user_info = whoami() + print(f"✅ Logged in as: {user_info['name']}") +except Exception as e: + print(f"❌ Not authenticated: {e}") +``` + +### Verify Token in Job + +```python +import os + +# Check token exists +if "HF_TOKEN" not in os.environ: + raise ValueError("HF_TOKEN not found in environment!") + +token = os.environ["HF_TOKEN"] + +# Verify token format (should start with "hf_") +if not token.startswith("hf_"): + raise ValueError(f"Invalid token format: {token[:10]}...") + +# Test token works +from huggingface_hub import whoami +try: + user_info = whoami(token=token) + print(f"✅ Token valid for user: {user_info['name']}") +except Exception as e: + raise ValueError(f"Token validation failed: {e}") +``` + +## Common Token Issues + +### Error: 401 Unauthorized + +**Symptoms:** +``` +401 Client Error: Unauthorized for url: https://huggingface.co/api/... +``` + +**Causes:** +1. Token missing from job +2. Token invalid or expired +3. Token not passed correctly + +**Solutions:** +1. Add `secrets={"HF_TOKEN": "$HF_TOKEN"}` to job config +2. Verify `hf_whoami()` works locally +3. Re-login: `hf auth login` +4. Check token hasn't expired + +**Verification:** +```python +# In your script +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!" +``` + +### Error: 403 Forbidden + +**Symptoms:** +``` +403 Client Error: Forbidden for url: https://huggingface.co/api/... +``` + +**Causes:** +1. Token lacks required permissions (read-only token used for write) +2. No access to private repository +3. Organization permissions insufficient + +**Solutions:** +1. Ensure token has write permissions +2. Check token type at https://huggingface.co/settings/tokens +3. Verify access to target repository +4. Use organization token if needed + +**Check token permissions:** +```python +from huggingface_hub import whoami + +user_info = whoami() +print(f"User: {user_info['name']}") +print(f"Type: {user_info.get('type', 'user')}") +``` + +### Error: Token not found in environment + +**Symptoms:** +``` +KeyError: 'HF_TOKEN' +ValueError: HF_TOKEN not found +``` + +**Causes:** +1. `secrets` not passed in job config +2. Wrong key name (should be `HF_TOKEN`) +3. Using `env` instead of `secrets` + +**Solutions:** +1. Use `secrets={"HF_TOKEN": "$HF_TOKEN"}` (not `env`) +2. Verify key name is exactly `HF_TOKEN` +3. Check job config syntax + +**Correct configuration:** +```python +# ✅ Correct +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) + +# ❌ Wrong - using env instead of secrets +hf_jobs("uv", { + "script": "...", + "env": {"HF_TOKEN": "$HF_TOKEN"} # Less secure +}) + +# ❌ Wrong - wrong key name +hf_jobs("uv", { + "script": "...", + "secrets": {"TOKEN": "$HF_TOKEN"} # Wrong key +}) +``` + +### Error: Repository access denied + +**Symptoms:** +``` +403 Client Error: Forbidden +Repository not found or access denied +``` + +**Causes:** +1. Token doesn't have access to private repo +2. Repository doesn't exist and can't be created +3. Wrong namespace + +**Solutions:** +1. Use token from account with access +2. Verify repo visibility (public vs private) +3. Check namespace matches token owner +4. Create repo first if needed + +**Check repository access:** +```python +from huggingface_hub import HfApi + +api = HfApi() +try: + repo_info = api.repo_info("username/repo-name") + print(f"✅ Access granted: {repo_info.id}") +except Exception as e: + print(f"❌ Access denied: {e}") +``` + +## Token Security Best Practices + +### 1. Never Commit Tokens + +**❌ Bad:** +```python +# Never do this! +token = "hf_abc123xyz..." +api = HfApi(token=token) +``` + +**✅ Good:** +```python +# Use environment variable +token = os.environ.get("HF_TOKEN") +api = HfApi(token=token) +``` + +### 2. Use Secrets, Not Environment Variables + +**❌ Bad:** +```python +hf_jobs("uv", { + "script": "...", + "env": {"HF_TOKEN": "$HF_TOKEN"} # Visible in logs +}) +``` + +**✅ Good:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Encrypted server-side +}) +``` + +### 3. Use Automatic Token Replacement + +**❌ Bad:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "hf_abc123..."} # Hardcoded +}) +``` + +**✅ Good:** +```python +hf_jobs("uv", { + "script": "...", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Automatic +}) +``` + +### 4. Rotate Tokens Regularly + +- Generate new tokens periodically +- Revoke old tokens +- Update job configurations +- Monitor token usage + +### 5. Use Minimal Permissions + +- Create tokens with only needed permissions +- Use read tokens when write isn't needed +- Don't use admin tokens for regular jobs + +### 6. Don't Share Tokens + +- Each user should use their own token +- Don't commit tokens to repositories +- Don't share tokens in logs or messages + +### 7. Monitor Token Usage + +- Check token activity in Hub settings +- Review job logs for token issues +- Set up alerts for unauthorized access + +## Token Workflow Examples + +### Example 1: Push Model to Hub + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["transformers"] +# /// + +import os +from transformers import AutoModel, AutoTokenizer + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load and process model +model = AutoModel.from_pretrained("base-model") +# ... process model ... + +# Push to Hub (token auto-detected) +model.push_to_hub("username/my-model") +print("✅ Model pushed!") +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +### Example 2: Access Private Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets"] +# /// + +import os +from datasets import load_dataset + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Load private dataset (token auto-detected) +dataset = load_dataset("private-org/private-dataset") +print(f"✅ Loaded {len(dataset)} examples") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +### Example 3: Create and Push Dataset + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["datasets", "huggingface-hub"] +# /// + +import os +from datasets import Dataset +from huggingface_hub import HfApi + +# Verify token +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" + +# Create dataset +data = {"text": ["Sample 1", "Sample 2"]} +dataset = Dataset.from_dict(data) + +# Push to Hub +api = HfApi() # Auto-detects HF_TOKEN +dataset.push_to_hub("username/my-dataset") +print("✅ Dataset pushed!") +""", + "flavor": "cpu-basic", + "timeout": "30m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Token provided +}) +``` + +## Quick Reference + +### Token Checklist + +Before submitting a job that uses Hub: + +- [ ] Job includes `secrets={"HF_TOKEN": "$HF_TOKEN"}` +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Token has required permissions (read/write) +- [ ] User is logged in: `hf_whoami()` works +- [ ] Token not hardcoded in script +- [ ] Using `secrets` not `env` for token + +### Common Patterns + +**Pattern 1: Auto-detect token** +```python +from huggingface_hub import HfApi +api = HfApi() # Uses HF_TOKEN from environment +``` + +**Pattern 2: Explicit token** +```python +import os +from huggingface_hub import HfApi +api = HfApi(token=os.environ.get("HF_TOKEN")) +``` + +**Pattern 3: Verify token** +```python +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN required!" +``` + +## Key Takeaways + +1. **Always use `secrets={"HF_TOKEN": "$HF_TOKEN"}`** for Hub operations +2. **Never hardcode tokens** in scripts or job configs +3. **Verify token exists** in script before Hub operations +4. **Use auto-detection** when possible (`HfApi()` without token parameter) +5. **Check permissions** - ensure token has required access +6. **Monitor token usage** - review activity regularly +7. **Rotate tokens** - generate new tokens periodically + diff --git a/skills/hugging-face-jobs/references/troubleshooting.md b/skills/hugging-face-jobs/references/troubleshooting.md new file mode 100644 index 00000000..338b6894 --- /dev/null +++ b/skills/hugging-face-jobs/references/troubleshooting.md @@ -0,0 +1,475 @@ +# Troubleshooting Guide + +Common issues and solutions for Hugging Face Jobs. + +## Authentication Issues + +### Error: 401 Unauthorized + +**Symptoms:** +``` +401 Client Error: Unauthorized for url: https://huggingface.co/api/... +``` + +**Causes:** +- Token missing from job +- Token invalid or expired +- Token not passed correctly + +**Solutions:** +1. Add token to secrets: `hf_jobs` MCP uses `"$HF_TOKEN"` (auto-replaced); `HfApi().run_uv_job()` MUST use `get_token()` from `huggingface_hub` (the literal string `"$HF_TOKEN"` will NOT work with the Python API) +2. Verify `hf_whoami()` works locally +3. Re-login: `hf auth login` +4. Check token hasn't expired + +**Verification:** +```python +# In your script +import os +assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!" +``` + +### Error: 403 Forbidden + +**Symptoms:** +``` +403 Client Error: Forbidden for url: https://huggingface.co/api/... +``` + +**Causes:** +- Token lacks required permissions +- No access to private repository +- Organization permissions insufficient + +**Solutions:** +1. Ensure token has write permissions +2. Check token type at https://huggingface.co/settings/tokens +3. Verify access to target repository +4. Use organization token if needed + +### Error: Token not found in environment + +**Symptoms:** +``` +KeyError: 'HF_TOKEN' +ValueError: HF_TOKEN not found +``` + +**Causes:** +- `secrets` not passed in job config +- Wrong key name (should be `HF_TOKEN`) +- Using `env` instead of `secrets` + +**Solutions:** +1. Use `secrets` (not `env`) — with `hf_jobs` MCP: `"$HF_TOKEN"`; with `HfApi().run_uv_job()`: `get_token()` +2. Verify key name is exactly `HF_TOKEN` +3. Check job config syntax + +## Job Execution Issues + +### Error: Job Timeout + +**Symptoms:** +- Job stops unexpectedly +- Status shows "TIMEOUT" +- Partial results only + +**Causes:** +- Default 30min timeout exceeded +- Job takes longer than expected +- No timeout specified + +**Solutions:** +1. Check logs for actual runtime +2. Increase timeout with buffer: `"timeout": "3h"` +3. Optimize code for faster execution +4. Process data in chunks +5. Add 20-30% buffer to estimated time + +**MCP Tool Example:** +```python +hf_jobs("uv", { + "script": "...", + "timeout": "2h" # Set appropriate timeout +}) +``` + +**Python API Example:** +```python +from huggingface_hub import run_uv_job, inspect_job, fetch_job_logs + +job = run_uv_job("script.py", timeout="4h") + +# Check if job failed +job_info = inspect_job(job_id=job.id) +if job_info.status.stage == "ERROR": + print(f"Job failed: {job_info.status.message}") + # Check logs for details + for log in fetch_job_logs(job_id=job.id): + print(log) +``` + +### Error: Out of Memory (OOM) + +**Symptoms:** +``` +RuntimeError: CUDA out of memory +MemoryError: Unable to allocate array +``` + +**Causes:** +- Batch size too large +- Model too large for hardware +- Insufficient GPU memory + +**Solutions:** +1. Reduce batch size +2. Process data in smaller chunks +3. Upgrade hardware: cpu → t4 → a10g → a100 +4. Use smaller models or quantization +5. Enable gradient checkpointing (for training) + +**Example:** +```python +# Reduce batch size +batch_size = 1 + +# Process in chunks +for chunk in chunks: + process(chunk) +``` + +### Error: Missing Dependencies + +**Symptoms:** +``` +ModuleNotFoundError: No module named 'package_name' +ImportError: cannot import name 'X' +``` + +**Causes:** +- Package not in dependencies +- Wrong package name +- Version mismatch + +**Solutions:** +1. Add to PEP 723 header: + ```python + # /// script + # dependencies = ["package-name>=1.0.0"] + # /// + ``` +2. Check package name spelling +3. Specify version if needed +4. Check package availability + +### Error: Script Not Found + +**Symptoms:** +``` +FileNotFoundError: script.py not found +``` + +**Causes:** +- Local file path used (not supported) +- URL incorrect +- Script not accessible + +**Solutions:** +1. Use inline script (recommended) +2. Use publicly accessible URL +3. Upload script to Hub first +4. Check URL is correct + +**Correct approaches:** +```python +# ✅ Inline code +hf_jobs("uv", {"script": "# /// script\n# dependencies = [...]\n# ///\n\n"}) + +# ✅ From URL +hf_jobs("uv", {"script": "https://huggingface.co/user/repo/resolve/main/script.py"}) +``` + +## Hub Push Issues + +### Error: Push Failed + +**Symptoms:** +``` +Error pushing to Hub +Upload failed +``` + +**Causes:** +- Network issues +- Token missing or invalid +- Repository access denied +- File too large + +**Solutions:** +1. Check token: `assert "HF_TOKEN" in os.environ` +2. Verify repository exists or can be created +3. Check network connectivity in logs +4. Retry push operation +5. Split large files into chunks + +### Error: Repository Not Found + +**Symptoms:** +``` +404 Client Error: Not Found +Repository not found +``` + +**Causes:** +- Repository doesn't exist +- Wrong repository name +- No access to private repo + +**Solutions:** +1. Create repository first: + ```python + from huggingface_hub import HfApi + api = HfApi() + api.create_repo("username/repo-name", repo_type="dataset") + ``` +2. Check repository name format +3. Verify namespace exists +4. Check repository visibility + +### Error: Results Not Saved + +**Symptoms:** +- Job completes successfully +- No results visible on Hub +- Files not persisted + +**Causes:** +- No persistence code in script +- Push code not executed +- Push failed silently + +**Solutions:** +1. Add persistence code to script +2. Verify push executes successfully +3. Check logs for push errors +4. Add error handling around push + +**Example:** +```python +try: + dataset.push_to_hub("username/dataset") + print("✅ Push successful") +except Exception as e: + print(f"❌ Push failed: {e}") + raise +``` + +## Hardware Issues + +### Error: GPU Not Available + +**Symptoms:** +``` +CUDA not available +No GPU found +``` + +**Causes:** +- CPU flavor used instead of GPU +- GPU not requested +- CUDA not installed in image + +**Solutions:** +1. Use GPU flavor: `"flavor": "a10g-large"` +2. Check image has CUDA support +3. Verify GPU availability in logs + +### Error: Slow Performance + +**Symptoms:** +- Job takes longer than expected +- Low GPU utilization +- CPU bottleneck + +**Causes:** +- Wrong hardware selected +- Inefficient code +- Data loading bottleneck + +**Solutions:** +1. Upgrade hardware +2. Optimize code +3. Use batch processing +4. Profile code to find bottlenecks + +## General Issues + +### Error: Job Status Unknown + +**Symptoms:** +- Can't check job status +- Status API returns error + +**Solutions:** +1. Use job URL: `https://huggingface.co/jobs/username/job-id` +2. Check logs: `hf_jobs("logs", {"job_id": "..."})` +3. Inspect job: `hf_jobs("inspect", {"job_id": "..."})` + +### Error: Logs Not Available + +**Symptoms:** +- No logs visible +- Logs delayed + +**Causes:** +- Job just started (logs delayed 30-60s) +- Job failed before logging +- Logs not yet generated + +**Solutions:** +1. Wait 30-60 seconds after job start +2. Check job status first +3. Use job URL for web interface + +### Error: Cost Unexpectedly High + +**Symptoms:** +- Job costs more than expected +- Longer runtime than estimated + +**Causes:** +- Job ran longer than timeout +- Wrong hardware selected +- Inefficient code + +**Solutions:** +1. Monitor job runtime +2. Set appropriate timeout +3. Optimize code +4. Choose right hardware +5. Check cost estimates before running + +## Debugging Tips + +### 1. Add Logging + +```python +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +logger.info("Starting processing...") +logger.info(f"Processed {count} items") +``` + +### 2. Verify Environment + +```python +import os +print(f"Python version: {os.sys.version}") +print(f"CUDA available: {torch.cuda.is_available()}") +print(f"HF_TOKEN present: {'HF_TOKEN' in os.environ}") +``` + +### 3. Test Locally First + +Run script locally before submitting to catch errors early: +```bash +python script.py +# Or with uv +uv run script.py +``` + +### 4. Check Job Logs + +**MCP Tool:** +```python +# View logs +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**CLI:** +```bash +hf jobs logs +``` + +**Python API:** +```python +from huggingface_hub import fetch_job_logs +for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +**Or use job URL:** `https://huggingface.co/jobs/username/job-id` + +### 5. Add Error Handling + +```python +try: + # Your code + process_data() +except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + raise +``` + +### 6. Check Job Status Programmatically + +```python +from huggingface_hub import inspect_job, fetch_job_logs + +job_info = inspect_job(job_id="your-job-id") +print(f"Status: {job_info.status.stage}") +print(f"Message: {job_info.status.message}") + +if job_info.status.stage == "ERROR": + print("Job failed! Logs:") + for log in fetch_job_logs(job_id="your-job-id"): + print(log) +``` + +## Quick Reference + +### Common Error Codes + +| Code | Meaning | Solution | +|------|---------|----------| +| 401 | Unauthorized | Add token to secrets: MCP uses `"$HF_TOKEN"`, Python API uses `get_token()` | +| 403 | Forbidden | Check token permissions | +| 404 | Not Found | Verify repository exists | +| 500 | Server Error | Retry or contact support | + +### Checklist Before Submitting + +- [ ] Token configured: MCP uses `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API uses `secrets={"HF_TOKEN": get_token()}` +- [ ] Script checks for token: `assert "HF_TOKEN" in os.environ` +- [ ] Timeout set appropriately +- [ ] Hardware selected correctly +- [ ] Dependencies listed in PEP 723 header +- [ ] Persistence code included +- [ ] Error handling added +- [ ] Logging added for debugging + +## Getting Help + +If issues persist: + +1. **Check logs** - Most errors include detailed messages +2. **Review documentation** - See main SKILL.md +3. **Check Hub status** - https://status.huggingface.co +4. **Community forums** - https://discuss.huggingface.co +5. **GitHub issues** - For bugs in huggingface_hub + +## Key Takeaways + +1. **Always include token** - MCP: `secrets={"HF_TOKEN": "$HF_TOKEN"}`, Python API: `secrets={"HF_TOKEN": get_token()}` +2. **Set appropriate timeout** - Default 30min may be insufficient +3. **Verify persistence** - Results won't persist without code +4. **Check logs** - Most issues visible in job logs +5. **Test locally** - Catch errors before submitting +6. **Add error handling** - Better debugging information +7. **Monitor costs** - Set timeouts to avoid unexpected charges + diff --git a/skills/hugging-face-jobs/scripts/cot-self-instruct.py b/skills/hugging-face-jobs/scripts/cot-self-instruct.py new file mode 100644 index 00000000..5388438b --- /dev/null +++ b/skills/hugging-face-jobs/scripts/cot-self-instruct.py @@ -0,0 +1,718 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "datasets", +# "transformers", +# "vllm>=0.6.5", +# "huggingface-hub[hf_transfer]", +# "torch", +# "numpy", +# "tqdm", +# "scikit-learn", +# ] +# /// +""" +Generate high-quality synthetic data using Chain-of-Thought Self-Instruct methodology. + +This script implements the CoT-Self-Instruct approach from the paper "CoT-Self-Instruct: +Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025). + +It supports two modes: +1. Reasoning tasks: Generates both questions and answers with Chain-of-Thought +2. Instruction tasks: Generates diverse prompts for general instruction following + +Example usage: + # Reasoning tasks with Answer-Consistency filtering + uv run cot-self-instruct.py \\ + --seed-dataset davanstrien/s1k-reasoning \\ + --output-dataset username/synthetic-math \\ + --task-type reasoning \\ + --num-samples 5000 \\ + --filter-method answer-consistency + + # Instruction tasks with RIP filtering + uv run cot-self-instruct.py \\ + --seed-dataset wildchat-filtered \\ + --output-dataset username/synthetic-prompts \\ + --task-type instruction \\ + --filter-method rip \\ + --reward-model Nexusflow/Athene-RM-8B + + # HF Jobs execution + hf jobs uv run --flavor l4x4 \\ + --image vllm/vllm-openai \\ + -e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\ + https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + [args...] +""" + +import argparse +import json +import logging +import os +import random +import re +import sys +from collections import Counter +from datetime import datetime +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import torch +from datasets import Dataset, load_dataset +from huggingface_hub import DatasetCard, login +from sklearn.cluster import KMeans +from tqdm.auto import tqdm +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +# Enable HF Transfer for faster downloads +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Prompt templates from the paper +REASONING_PROMPT_TEMPLATE = """You are a reasoning question generator assistant. Your goal is to create a novel, and challenging reasoning question. You are provided the following seed questions: +Seed Question 1: {seed1} +Seed Question 2: {seed2} +Your task is to: +1. Write a brand-new, self-contained reasoning question that meets the following requirements: +(a) The question draws inspiration from the seed question without copying it verbatim, remaining novel and of comparable difficulty. +(b) The question's final answer should be a single, unambiguous scalar value (e.g., an integer, reduced fraction, exact radical), or another answer type that can be verified in one step (e.g., 'yes/no,' a choice from A to D). +2. Then reason step by step, solve the new question and format your output as follows: +[New Question Begin]{{your_generated_question}}[New Question End] +[Final Answer to New Question Begin]\\boxed{{your_final_answer}}[Final Answer to New Question End]""" + +INSTRUCTION_PROMPT_TEMPLATE = """You are a prompt generator assistant. Your goal is to create diverse and creative synthetic prompts. +Please follow the steps below to create synthetic prompts. +Step 1: Carefully read #Prompt 1# and #Prompt 2#. Identify and list all the common elements between these two prompts. If no common elements are found, list the main elements from each prompt. +Step 2: Develop a comprehensive plan based on the #Common Elements List# or #Main Elements List# from Step 1. This plan will guide the generation of new synthetic prompts that are similar to the original prompts. +Step 3: Execute the plan step by step and provide one #Synthetic Prompt#. +Please reply strictly in the following format: +- Step 1 #Common Elements List# or #Main Elements List#: +- Step 2 #Plan#: +- Step 3 #Synthetic Prompt#: +#Prompt 1#: +{prompt1} +#Prompt 2#: +{prompt2}""" + + +def check_gpu_availability() -> int: + """Check if CUDA is available and return the number of GPUs.""" + if not torch.cuda.is_available(): + logger.error("CUDA is not available. This script requires a GPU.") + logger.error( + "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor." + ) + sys.exit(1) + + num_gpus = torch.cuda.device_count() + for i in range(num_gpus): + gpu_name = torch.cuda.get_device_name(i) + gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1024**3 + logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory") + + return num_gpus + + +def parse_thinking_output(text: str) -> str: + """Remove thinking tokens from model output.""" + # Remove ... blocks + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + return text.strip() + + +def extract_reasoning_output(text: str) -> Tuple[Optional[str], Optional[str]]: + """Extract question and answer from reasoning task output.""" + text = parse_thinking_output(text) + + # Extract question + question_match = re.search(r'\[New Question Begin\](.*?)\[New Question End\]', text, re.DOTALL) + if not question_match: + return None, None + question = question_match.group(1).strip() + + # Extract answer + answer_match = re.search(r'\[Final Answer to New Question Begin\]\\?boxed\{(.*?)\}\[Final Answer to New Question End\]', text, re.DOTALL) + if not answer_match: + # Try without \boxed + answer_match = re.search(r'\[Final Answer to New Question Begin\](.*?)\[Final Answer to New Question End\]', text, re.DOTALL) + + if not answer_match: + return question, None + + answer = answer_match.group(1).strip() + return question, answer + + +def extract_instruction_output(text: str) -> Optional[str]: + """Extract synthetic prompt from instruction task output.""" + text = parse_thinking_output(text) + + # Look for the synthetic prompt after "Step 3 #Synthetic Prompt#:" + match = re.search(r'Step 3 #Synthetic Prompt#:\s*(.+)', text, re.DOTALL) + if match: + return match.group(1).strip() + return None + + +def categorize_prompts(prompts: List[str], num_categories: int = 8) -> Dict[int, List[int]]: + """Categorize prompts using clustering for instruction tasks.""" + from transformers import AutoModel + + logger.info(f"Categorizing {len(prompts)} prompts into {num_categories} categories...") + + # Use a small model for embeddings + tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + + # Get embeddings + embeddings = [] + for prompt in tqdm(prompts, desc="Computing embeddings"): + inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) + with torch.no_grad(): + outputs = model(**inputs) + embedding = outputs.last_hidden_state.mean(dim=1).numpy() + embeddings.append(embedding[0]) + + # Cluster + kmeans = KMeans(n_clusters=num_categories, random_state=42) + labels = kmeans.fit_predict(embeddings) + + # Group by category + categories = {} + for idx, label in enumerate(labels): + if label not in categories: + categories[label] = [] + categories[label].append(idx) + + return categories + + +def generate_synthetic_data( + llm: LLM, + seed_data: List[Dict], + task_type: str, + num_samples: int, + categories: Optional[Dict[int, List[int]]] = None, +) -> List[Dict]: + """Generate synthetic data using CoT-Self-Instruct.""" + synthetic_data = [] + + # Set up progress bar + pbar = tqdm(total=num_samples, desc="Generating synthetic data") + + while len(synthetic_data) < num_samples: + # Sample seed data + if task_type == "reasoning": + # Random sampling for reasoning tasks + seeds = random.sample(seed_data, min(2, len(seed_data))) + prompt = REASONING_PROMPT_TEMPLATE.format( + seed1=seeds[0].get("question", seeds[0].get("prompt", "")), + seed2=seeds[1].get("question", seeds[1].get("prompt", "")) if len(seeds) > 1 else seeds[0].get("question", seeds[0].get("prompt", "")) + ) + else: + # Category-aware sampling for instruction tasks + if categories: + # Pick a random category + category = random.choice(list(categories.keys())) + category_indices = categories[category] + indices = random.sample(category_indices, min(2, len(category_indices))) + seeds = [seed_data[i] for i in indices] + else: + seeds = random.sample(seed_data, min(2, len(seed_data))) + + prompt = INSTRUCTION_PROMPT_TEMPLATE.format( + prompt1=seeds[0].get("prompt", seeds[0].get("question", "")), + prompt2=seeds[1].get("prompt", seeds[1].get("question", "")) if len(seeds) > 1 else seeds[0].get("prompt", seeds[0].get("question", "")) + ) + + # Generate + sampling_params = SamplingParams( + temperature=0.7 if task_type == "reasoning" else 0.8, + top_p=0.95 if task_type == "reasoning" else 0.9, + max_tokens=2048, + ) + + outputs = llm.generate([prompt], sampling_params) + output_text = outputs[0].outputs[0].text + + # Parse output + if task_type == "reasoning": + question, answer = extract_reasoning_output(output_text) + if question and answer: + synthetic_data.append({ + "question": question, + "answer": answer, + "seed_indices": [seed_data.index(s) for s in seeds], + }) + pbar.update(1) + else: + synthetic_prompt = extract_instruction_output(output_text) + if synthetic_prompt: + synthetic_data.append({ + "prompt": synthetic_prompt, + "seed_indices": [seed_data.index(s) for s in seeds], + }) + pbar.update(1) + + pbar.close() + return synthetic_data + + +def answer_consistency_filter( + llm: LLM, + synthetic_data: List[Dict], + k_responses: int = 16, + threshold: float = 0.5, +) -> List[Dict]: + """Filter reasoning tasks using Answer-Consistency.""" + logger.info(f"Applying Answer-Consistency filter with K={k_responses}") + + filtered_data = [] + + for item in tqdm(synthetic_data, desc="Answer-Consistency filtering"): + question = item["question"] + original_answer = item["answer"] + + # Generate K responses + prompts = [question] * k_responses + sampling_params = SamplingParams( + temperature=0.6, + top_p=0.95, + max_tokens=1024, + ) + + outputs = llm.generate(prompts, sampling_params) + + # Extract answers + answers = [] + for output in outputs: + text = output.outputs[0].text + # Try to extract boxed answer + match = re.search(r'\\boxed\{(.*?)\}', text) + if match: + answers.append(match.group(1).strip()) + + if not answers: + continue + + # Get majority answer + answer_counts = Counter(answers) + if answer_counts: + majority_answer, count = answer_counts.most_common(1)[0] + + # Check if majority answer matches original and meets threshold + if (majority_answer == original_answer and + count / len(answers) >= threshold): + item["consistency_score"] = count / len(answers) + filtered_data.append(item) + + logger.info(f"Answer-Consistency: kept {len(filtered_data)}/{len(synthetic_data)} examples") + return filtered_data + + +def rip_filter( + llm: LLM, + synthetic_data: List[Dict], + reward_model_id: str, + k_responses: int = 32, + threshold: float = 0.5, +) -> List[Dict]: + """Filter using Rejecting Instruction Preferences (RIP).""" + logger.info(f"Applying RIP filter with K={k_responses} and reward model {reward_model_id}") + + # Note: In a full implementation, you would load and use the actual reward model + # For this example, we'll use a placeholder scoring mechanism + logger.warning("RIP filtering requires a reward model implementation - using placeholder") + + filtered_data = [] + + for item in tqdm(synthetic_data, desc="RIP filtering"): + prompt = item.get("prompt", item.get("question", "")) + + # Generate K responses + prompts = [prompt] * k_responses + sampling_params = SamplingParams( + temperature=1.0, + top_p=1.0, + max_tokens=1024, + ) + + outputs = llm.generate(prompts, sampling_params) + + # In real implementation: score each response with reward model + # For now, use length as a proxy (longer responses often score higher) + scores = [len(output.outputs[0].text) for output in outputs] + + # Use minimum score as quality indicator + min_score = min(scores) if scores else 0 + normalized_score = min_score / 1000 # Normalize to 0-1 range + + if normalized_score >= threshold: + item["rip_score"] = normalized_score + filtered_data.append(item) + + logger.info(f"RIP filter: kept {len(filtered_data)}/{len(synthetic_data)} examples") + return filtered_data + + +def create_dataset_card( + task_type: str, + source_dataset: str, + generation_model: str, + filter_method: str, + num_generated: int, + num_filtered: int, + generation_time: str, + additional_info: Dict = None, +) -> str: + """Create a comprehensive dataset card.""" + filter_info = "" + if filter_method == "answer-consistency": + filter_info = """ +### Answer-Consistency Filtering + +This dataset was filtered using Answer-Consistency: +- Generated K responses for each synthetic question +- Kept only examples where majority answer matched the generated answer +- Ensures high-quality, correctly solved problems""" + elif filter_method == "rip": + filter_info = """ +### RIP (Rejecting Instruction Preferences) Filtering + +This dataset was filtered using RIP: +- Generated K responses for each synthetic prompt +- Scored responses using a reward model +- Kept only prompts with high minimum scores""" + + return f"""--- +tags: +- synthetic-data +- cot-self-instruct +- {task_type} +- uv-script +--- + +# CoT-Self-Instruct Synthetic Data + +This dataset contains synthetic {task_type} data generated using the Chain-of-Thought Self-Instruct methodology. + +## Generation Details + +- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset}) +- **Generation Model**: [{generation_model}](https://huggingface.co/{generation_model}) +- **Task Type**: {task_type} +- **Filter Method**: {filter_method} +- **Generated Examples**: {num_generated:,} +- **After Filtering**: {num_filtered:,} ({(num_filtered/num_generated)*100:.1f}% acceptance rate) +- **Generation Date**: {generation_time} +{filter_info} + +## Methodology + +Generated using CoT-Self-Instruct, which: +1. Uses Chain-of-Thought reasoning to analyze seed examples +2. Generates new synthetic examples of similar quality and complexity +3. Applies quality filtering to ensure high-quality outputs + +Based on the paper: "CoT-Self-Instruct: Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025) + +## Generation Script + +Generated using the CoT-Self-Instruct script from [uv-scripts/synthetic-data](https://huggingface.co/datasets/uv-scripts/synthetic-data). + +To reproduce: +```bash +uv run https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + --seed-dataset {source_dataset} \\ + --output-dataset \\ + --task-type {task_type} \\ + --generation-model {generation_model} \\ + --filter-method {filter_method} +``` +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Generate synthetic data using CoT-Self-Instruct", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + # Dataset arguments + parser.add_argument( + "--seed-dataset", + type=str, + required=True, + help="HuggingFace dataset ID containing seed examples", + ) + parser.add_argument( + "--output-dataset", + type=str, + required=True, + help="HuggingFace dataset ID for output", + ) + + # Task configuration + parser.add_argument( + "--task-type", + type=str, + choices=["reasoning", "instruction", "auto"], + default="auto", + help="Type of task (reasoning generates Q&A, instruction generates prompts)", + ) + parser.add_argument( + "--task-column", + type=str, + default=None, + help="Column name containing tasks (auto-detected if not specified)", + ) + + # Model configuration + parser.add_argument( + "--generation-model", + type=str, + default="Qwen/Qwen3-30B-A3B-Thinking-2507", + help="Model for synthetic data generation", + ) + parser.add_argument( + "--filter-model", + type=str, + default=None, + help="Model for filtering (defaults to generation model)", + ) + parser.add_argument( + "--reward-model", + type=str, + default="Nexusflow/Athene-RM-8B", + help="Reward model for RIP filtering", + ) + + # Generation parameters + parser.add_argument( + "--num-samples", + type=int, + default=5000, + help="Number of synthetic examples to generate", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1, + help="Batch size for generation", + ) + + # Filtering parameters + parser.add_argument( + "--filter-method", + type=str, + choices=["answer-consistency", "rip", "both", "none"], + default="answer-consistency", + help="Quality filtering method", + ) + parser.add_argument( + "--k-responses", + type=int, + default=16, + help="Number of responses for filtering", + ) + parser.add_argument( + "--quality-threshold", + type=float, + default=0.5, + help="Minimum quality threshold for filtering", + ) + + # GPU configuration + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=None, + help="Number of GPUs for tensor parallelism (auto-detected if not set)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.9, + help="GPU memory utilization", + ) + + # Other arguments + parser.add_argument( + "--hf-token", + type=str, + default=None, + help="HuggingFace API token", + ) + parser.add_argument( + "--seed", + type=int, + default=42, + help="Random seed", + ) + + args = parser.parse_args() + + # Set random seeds + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + # Check GPU + num_gpus = check_gpu_availability() + tensor_parallel_size = args.tensor_parallel_size or num_gpus + + # Authentication + hf_token = args.hf_token or os.environ.get("HF_TOKEN") + if hf_token: + login(token=hf_token) + + # Load seed dataset + logger.info(f"Loading seed dataset: {args.seed_dataset}") + seed_dataset = load_dataset(args.seed_dataset, split="train") + + # Auto-detect task type and column if needed + if args.task_type == "auto": + columns = seed_dataset.column_names + if "question" in columns and "answer" in columns: + args.task_type = "reasoning" + logger.info("Auto-detected task type: reasoning") + else: + args.task_type = "instruction" + logger.info("Auto-detected task type: instruction") + + if not args.task_column: + if args.task_type == "reasoning": + args.task_column = "question" + else: + # Try to find prompt column + for col in ["prompt", "instruction", "text", "input"]: + if col in seed_dataset.column_names: + args.task_column = col + break + + logger.info(f"Using task column: {args.task_column}") + + # Convert to list of dicts + seed_data = seed_dataset.to_list() + + # Categorize prompts for instruction tasks + categories = None + if args.task_type == "instruction" and len(seed_data) > 100: + prompts = [item.get(args.task_column, "") for item in seed_data] + categories = categorize_prompts(prompts) + + # Initialize generation model + logger.info(f"Loading generation model: {args.generation_model}") + generation_llm = LLM( + model=args.generation_model, + tensor_parallel_size=tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + ) + + # Generate synthetic data + start_time = datetime.now() + synthetic_data = generate_synthetic_data( + generation_llm, + seed_data, + args.task_type, + args.num_samples, + categories, + ) + + # Apply filtering + filter_llm = generation_llm + if args.filter_model and args.filter_model != args.generation_model: + logger.info(f"Loading filter model: {args.filter_model}") + # Clean up generation model + del generation_llm + torch.cuda.empty_cache() + + filter_llm = LLM( + model=args.filter_model, + tensor_parallel_size=tensor_parallel_size, + gpu_memory_utilization=args.gpu_memory_utilization, + ) + + filtered_data = synthetic_data + if args.filter_method != "none": + if args.filter_method == "answer-consistency" and args.task_type == "reasoning": + filtered_data = answer_consistency_filter( + filter_llm, + synthetic_data, + args.k_responses, + args.quality_threshold, + ) + elif args.filter_method == "rip": + filtered_data = rip_filter( + filter_llm, + synthetic_data, + args.reward_model, + args.k_responses, + args.quality_threshold, + ) + elif args.filter_method == "both": + if args.task_type == "reasoning": + filtered_data = answer_consistency_filter( + filter_llm, + synthetic_data, + args.k_responses, + args.quality_threshold, + ) + filtered_data = rip_filter( + filter_llm, + filtered_data, + args.reward_model, + args.k_responses, + args.quality_threshold, + ) + + # Create HuggingFace dataset + logger.info(f"Creating dataset with {len(filtered_data)} examples") + dataset = Dataset.from_list(filtered_data) + + # Create dataset card + generation_time = start_time.strftime("%Y-%m-%d %H:%M:%S UTC") + dataset_card = create_dataset_card( + args.task_type, + args.seed_dataset, + args.generation_model, + args.filter_method, + len(synthetic_data), + len(filtered_data), + generation_time, + ) + + # Push to hub + logger.info(f"Pushing dataset to: {args.output_dataset}") + # Create dataset card + card = DatasetCard(dataset_card) + dataset.push_to_hub(args.output_dataset) + # Push card separately + card.push_to_hub(args.output_dataset) + + logger.info("Done! Dataset available at: https://huggingface.co/datasets/" + args.output_dataset) + + # Print example HF Jobs command if running locally + if len(sys.argv) > 1: + print("\nTo run on HF Jobs:") + print(f"""hf jobs uv run --flavor l4x4 \\ + --image vllm/vllm-openai \\ + -e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\ + https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\ + --seed-dataset {args.seed_dataset} \\ + --output-dataset {args.output_dataset} \\ + --task-type {args.task_type} \\ + --generation-model {args.generation_model} \\ + --filter-method {args.filter_method} \\ + --num-samples {args.num_samples}""") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/skills/hugging-face-jobs/scripts/finepdfs-stats.py b/skills/hugging-face-jobs/scripts/finepdfs-stats.py new file mode 100644 index 00000000..989732b6 --- /dev/null +++ b/skills/hugging-face-jobs/scripts/finepdfs-stats.py @@ -0,0 +1,546 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "polars>=1.31.0", +# "huggingface-hub", +# "datasets", +# "ascii-graph", +# ] +# /// +""" +Analyze educational quality trends across CommonCrawl dumps using Polars streaming. + +Answers: "Is the web getting more educational over time?" + +Demonstrates Polars HF Hub integration - process 50M+ docs without downloading 300GB+. + +Example usage: + # Analyze English PDFs (default) + uv run finepdfs-stats.py + + # Analyze all 70+ languages + uv run finepdfs-stats.py --all-languages + + # Quick test + uv run finepdfs-stats.py --limit 10000 --show-plan + + # Save results to HF Hub + uv run finepdfs-stats.py --output-repo username/finepdfs-temporal-stats + + # Run on HF Jobs + hf jobs uv run \\ + -s HF_TOKEN \\ + -e HF_XET_HIGH_PERFORMANCE=1 \\ + https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\ + -- --output-repo username/stats +""" + +import argparse +import logging +import os +import sys +import time +from pathlib import Path + +import polars as pl +from ascii_graph import Pyasciigraph +from datasets import Dataset +from huggingface_hub import HfApi, create_repo, list_repo_tree, login + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Common language+script codes for finepdfs-edu +COMMON_LANGUAGES = { + "eng_Latn": "English (Latin script)", + "fra_Latn": "French (Latin script)", + "deu_Latn": "German (Latin script)", + "spa_Latn": "Spanish (Latin script)", + "por_Latn": "Portuguese (Latin script)", + "ita_Latn": "Italian (Latin script)", + "nld_Latn": "Dutch (Latin script)", + "pol_Latn": "Polish (Latin script)", + "rus_Cyrl": "Russian (Cyrillic script)", + "zho_Hans": "Chinese (Simplified)", + "zho_Hant": "Chinese (Traditional)", + "jpn_Jpan": "Japanese", + "kor_Hang": "Korean", + "ara_Arab": "Arabic", + "hin_Deva": "Hindi (Devanagari)", +} + + +def list_available_languages(dataset_id: str) -> list[str]: + """List available language subsets in the dataset.""" + try: + tree = list_repo_tree(dataset_id, path_in_repo="data", repo_type="dataset") + languages = [ + item.path.replace("data/", "") + for item in tree + if item.path.startswith("data/") + and "/" not in item.path.replace("data/", "") + ] + return sorted(languages) + except Exception as e: + logger.warning(f"Could not list languages: {e}") + return list(COMMON_LANGUAGES.keys()) + + +def compute_temporal_stats(df: pl.LazyFrame, output_path: Path) -> pl.DataFrame: + """Single scan: compute stats grouped by dump for temporal analysis.""" + query = df.group_by("dump").agg( + pl.len().alias("doc_count"), + pl.col("token_count").sum().alias("total_tokens"), + pl.col("fw_edu_scores").list.mean().mean().alias("avg_edu_score"), + (pl.col("fw_edu_scores").list.mean() >= 3).sum().alias("high_edu_count"), + ) + query.sink_parquet(output_path, engine="streaming") + return pl.read_parquet(output_path) + + +def compute_global_stats(temporal: pl.DataFrame) -> pl.DataFrame: + """Compute global stats from temporal breakdown.""" + total = temporal["doc_count"].sum() + return pl.DataFrame( + { + "total_docs": [total], + "total_tokens": [temporal["total_tokens"].sum()], + "avg_edu_score": [ + (temporal["avg_edu_score"] * temporal["doc_count"]).sum() / total + ], + "high_edu_rate": [temporal["high_edu_count"].sum() / total], + "num_dumps": [len(temporal)], + } + ) + + +def format_temporal_stats(temporal: pl.DataFrame) -> pl.DataFrame: + """Format temporal stats with high_edu_rate, sorted chronologically.""" + return ( + temporal.with_columns( + (pl.col("high_edu_count") / pl.col("doc_count")).alias("high_edu_rate") + ) + .select(["dump", "doc_count", "avg_edu_score", "high_edu_rate"]) + .sort( + "dump" + ) # Chronological order (CC-MAIN-2017-xx comes before CC-MAIN-2024-xx) + ) + + +def create_ascii_charts(temporal_stats: pl.DataFrame) -> str: + """Create ASCII bar charts showing temporal trends.""" + # Extract year from dump name (CC-MAIN-2024-42 -> 2024) + # Group by year and average the values for cleaner display + yearly = ( + temporal_stats.with_columns( + pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year") + ) + .group_by("year") + .agg( + pl.col("doc_count").sum(), + pl.col("avg_edu_score").mean(), + pl.col("high_edu_rate").mean(), + ) + .sort("year") + ) + + lines = [] + + # High edu rate chart (more dramatic differences) + data_rate = [ + (row["year"], row["high_edu_rate"] * 100) + for row in yearly.iter_rows(named=True) + ] + graph = Pyasciigraph(line_length=60, float_format="{0:.1f}%") + lines.extend(graph.graph("High Educational Content (edu >= 3)", data_rate)) + + lines.append("") + + # Avg edu score chart + data_score = [ + (row["year"], row["avg_edu_score"]) for row in yearly.iter_rows(named=True) + ] + graph2 = Pyasciigraph(line_length=60, float_format="{0:.2f}") + lines.extend(graph2.graph("Average Educational Score", data_score)) + + return "\n".join(lines) + + +def create_readme( + args, + global_stats: pl.DataFrame, + temporal_stats: pl.DataFrame, + scan_time: float, + ascii_charts: str, +) -> str: + """Create README content for the stats dataset.""" + stats = global_stats.to_dicts()[0] + total_docs = stats.get("total_docs", 0) + docs_per_sec = total_docs / scan_time if scan_time > 0 else 0 + + # Get first and last year averages for trend (more representative than single dumps) + yearly = ( + temporal_stats.with_columns( + pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year") + ) + .group_by("year") + .agg( + pl.col("doc_count").sum(), + pl.col("avg_edu_score").mean(), + pl.col("high_edu_rate").mean(), + ) + .sort("year") + ) + first_year = yearly.head(1).to_dicts()[0] + last_year = yearly.tail(1).to_dicts()[0] + + scope = ( + "all languages" + if args.all_languages + else COMMON_LANGUAGES.get(args.lang, args.lang) + ) + + return f"""--- +tags: + - uv-script + - statistics + - polars + - finepdfs-edu + - temporal-analysis +license: odc-by +configs: + - config_name: global_stats + data_files: global_stats/train-*.parquet + - config_name: temporal_stats + data_files: temporal_stats/train-*.parquet +default_viewer_config: temporal_stats +--- + +# Is the Web Getting More Educational? + +Temporal analysis of educational quality in **{scope}** across {stats.get("num_dumps", 0)} CommonCrawl dumps. + +## Trend + +``` +{ascii_charts} +``` + +## Key Finding + +| Year | Avg Edu Score | High Edu Rate | +|------|---------------|---------------| +| {first_year["year"]} | {first_year["avg_edu_score"]:.2f} | {first_year["high_edu_rate"] * 100:.1f}% | +| {last_year["year"]} | {last_year["avg_edu_score"]:.2f} | {last_year["high_edu_rate"] * 100:.1f}% | + +## Performance + +- **{total_docs:,} documents** processed in **{scan_time:.0f} seconds** +- **{docs_per_sec:,.0f} docs/sec** using Polars streaming +- Single scan, no full dataset download required + +## Summary + +| Metric | Value | +|--------|-------| +| Scope | {scope} | +| Total Documents | {total_docs:,} | +| Total Tokens | {stats.get("total_tokens", 0):,} | +| Avg Edu Score | {stats.get("avg_edu_score", 0):.3f} | +| High Edu Rate | {stats.get("high_edu_rate", 0) * 100:.1f}% | +| CommonCrawl Dumps | {stats.get("num_dumps", 0)} | + +## Files + +- `global_stats` - Overall summary +- `temporal_stats` - Per-dump breakdown (sorted chronologically) + +## Reproduce + +```bash +uv run https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\ + {"--all-languages" if args.all_languages else f"--lang {args.lang}"} --output-repo your-username/stats +``` + +## Source + +- **Dataset**: [{args.source_dataset}](https://huggingface.co/datasets/{args.source_dataset}) +- **Script**: [uv-scripts/dataset-stats](https://huggingface.co/datasets/uv-scripts/dataset-stats) +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze educational quality trends across CommonCrawl dumps", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + parser.add_argument( + "--source-dataset", + type=str, + default="HuggingFaceFW/finepdfs-edu", + help="Source dataset (default: HuggingFaceFW/finepdfs-edu)", + ) + + parser.add_argument( + "--lang", + type=str, + default="eng_Latn", + help="Language+script code (default: eng_Latn)", + ) + + parser.add_argument( + "--all-languages", + action="store_true", + help="Analyze all languages (70+) instead of single language", + ) + + parser.add_argument( + "--show-plan", + action="store_true", + help="Show Polars query plan (demonstrates optimization)", + ) + + parser.add_argument( + "--list-languages", + action="store_true", + help="List available languages and exit", + ) + + parser.add_argument( + "--limit", + type=int, + help="Limit to first N rows (for testing)", + ) + + parser.add_argument( + "--output-repo", + type=str, + help="HuggingFace dataset repository to upload results", + ) + + parser.add_argument( + "--output-dir", + type=str, + default="./stats_output", + help="Local directory for output files", + ) + + parser.add_argument( + "--hf-token", + type=str, + help="HuggingFace API token (or set HF_TOKEN env var)", + ) + + parser.add_argument( + "--private", + action="store_true", + help="Make the output dataset private", + ) + + args = parser.parse_args() + + # Check for high-performance mode + if os.environ.get("HF_XET_HIGH_PERFORMANCE"): + logger.info("High-performance mode enabled (HF_XET_HIGH_PERFORMANCE=1)") + + # List languages mode + if args.list_languages: + print(f"Available language+script codes for {args.source_dataset}:\n") + print("Common languages:") + for code, name in COMMON_LANGUAGES.items(): + print(f" {code:12} - {name}") + print("\nFetching full list from HF Hub...") + all_langs = list_available_languages(args.source_dataset) + print(f"\nAll available ({len(all_langs)} total):") + for lang in all_langs[:30]: # Show first 30 + name = COMMON_LANGUAGES.get(lang, "") + print(f" {lang:12} {name}") + if len(all_langs) > 30: + print(f" ... and {len(all_langs) - 30} more") + sys.exit(0) + + # Build the parquet path + if args.all_languages: + source_path = f"hf://datasets/{args.source_dataset}/data/*/train/*.parquet" + scope_desc = "all languages" + else: + source_path = ( + f"hf://datasets/{args.source_dataset}/data/{args.lang}/train/*.parquet" + ) + scope_desc = f"{args.lang} ({COMMON_LANGUAGES.get(args.lang, 'unknown')})" + + logger.info(f"Scanning: {source_path}") + logger.info(f"Scope: {scope_desc}") + + # Create lazy frame - this doesn't load any data yet! + logger.info("Creating lazy query plan...") + df = pl.scan_parquet(source_path) + + # Apply limit if specified + if args.limit: + logger.info(f"Limiting to first {args.limit:,} rows") + df = df.head(args.limit) + + # Show query plan if requested + if args.show_plan: + # Build a sample query to show the plan + sample_query = df.select( + pl.len(), + pl.col("token_count").sum(), + pl.col("language").n_unique(), + ) + print("\nQuery Plan (showing Polars optimization):") + print("=" * 60) + print(sample_query.explain()) + print("=" * 60) + print("\nNote: Polars uses projection pushdown - only reads columns needed!") + print("The 'text' column is never loaded, making this very fast.\n") + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Single scan: compute temporal stats + logger.info("Computing temporal stats (single scan)...") + start = time.perf_counter() + temporal_path = output_dir / "temporal_stats.parquet" + temporal_raw = compute_temporal_stats(df, temporal_path) + scan_time = time.perf_counter() - start + logger.info(f"Scan complete in {scan_time:.2f}s - {len(temporal_raw)} dumps") + + # Compute stats + global_stats = compute_global_stats(temporal_raw) + temporal_stats = format_temporal_stats(temporal_raw) + + # Save + global_stats.write_parquet(output_dir / "global_stats.parquet") + temporal_stats.write_parquet(output_dir / "temporal_stats.parquet") + + # Print results + total_docs = global_stats["total_docs"][0] + docs_per_sec = total_docs / scan_time if scan_time > 0 else 0 + + print("\n" + "=" * 70) + print("IS THE WEB GETTING MORE EDUCATIONAL?") + print("=" * 70) + + print(f"\nScope: {scope_desc}") + print(f"Dataset: {args.source_dataset}") + + print("\n" + "-" * 70) + print("GLOBAL STATS") + print("-" * 70) + print(global_stats) + + print("\n" + "-" * 70) + print(f"TEMPORAL TREND ({len(temporal_stats)} CommonCrawl dumps)") + print("-" * 70) + # Show first 5 and last 5 + if len(temporal_stats) > 10: + print("Earliest dumps:") + print(temporal_stats.head(5)) + print("\n...") + print("\nLatest dumps:") + print(temporal_stats.tail(5)) + else: + print(temporal_stats) + + # Create ASCII charts + ascii_charts = create_ascii_charts(temporal_stats) + print("\n" + "-" * 70) + print("TREND VISUALIZATION") + print("-" * 70) + print(ascii_charts) + + print("\n" + "-" * 70) + print("PERFORMANCE") + print("-" * 70) + print(f"Scan time: {scan_time:.2f}s") + print(f"Documents: {total_docs:,}") + print(f"Throughput: {docs_per_sec:,.0f} docs/sec") + + logger.info(f"Results saved to: {output_dir}") + + # Upload to HF Hub if requested + if args.output_repo: + hf_token = args.hf_token or os.environ.get("HF_TOKEN") + if hf_token: + login(token=hf_token) + + api = HfApi(token=hf_token) + + logger.info(f"Creating/updating dataset repository: {args.output_repo}") + create_repo( + args.output_repo, + repo_type="dataset", + private=args.private, + token=hf_token, + exist_ok=True, + ) + + # Upload each as a dataset config + configs = [ + ("global_stats", global_stats), + ("temporal_stats", temporal_stats), + ] + + for config_name, stats_df in configs: + logger.info(f"Uploading {config_name}...") + ds = Dataset.from_polars(stats_df) + ds.push_to_hub( + args.output_repo, + config_name=config_name, + token=hf_token, + private=args.private, + ) + time.sleep(1) # Avoid 409 conflicts + + # Upload README + readme_content = create_readme( + args, global_stats, temporal_stats, scan_time, ascii_charts + ) + api.upload_file( + path_or_fileobj=readme_content.encode(), + path_in_repo="README.md", + repo_id=args.output_repo, + repo_type="dataset", + token=hf_token, + ) + + dataset_url = f"https://huggingface.co/datasets/{args.output_repo}" + logger.info(f"Dataset uploaded: {dataset_url}") + print(f"\nResults uploaded to: {dataset_url}") + + +if __name__ == "__main__": + if len(sys.argv) == 1: + print("Is the Web Getting More Educational?") + print("=" * 40) + print("\nAnalyze educational quality trends across CommonCrawl dumps") + print("using Polars streaming - no download needed!\n") + print("Example commands:\n") + print("# Quick test:") + print("uv run finepdfs-stats.py --limit 10000\n") + print("# Analyze English PDFs:") + print("uv run finepdfs-stats.py\n") + print("# Analyze ALL 70+ languages:") + print("uv run finepdfs-stats.py --all-languages\n") + print("# Show query plan (see Polars optimization):") + print("uv run finepdfs-stats.py --show-plan --limit 1000\n") + print("# Save results to HF Hub:") + print("uv run finepdfs-stats.py --output-repo username/temporal-stats\n") + print("# Run on HF Jobs:") + print("hf jobs uv run \\") + print(" -s HF_TOKEN \\") + print(" -e HF_XET_HIGH_PERFORMANCE=1 \\") + print( + " https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\" + ) + print(" -- --output-repo username/stats") + sys.exit(0) + + main() diff --git a/skills/hugging-face-jobs/scripts/generate-responses.py b/skills/hugging-face-jobs/scripts/generate-responses.py new file mode 100644 index 00000000..1496f449 --- /dev/null +++ b/skills/hugging-face-jobs/scripts/generate-responses.py @@ -0,0 +1,587 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "datasets", +# "flashinfer-python", +# "huggingface-hub[hf_transfer]", +# "hf-xet>= 1.1.7", +# "torch", +# "transformers", +# "vllm>=0.8.5", +# ] +# +# /// +""" +Generate responses for prompts in a dataset using vLLM for efficient GPU inference. + +This script loads a dataset from Hugging Face Hub containing chat-formatted messages, +applies the model's chat template, generates responses using vLLM, and saves the +results back to the Hub with a comprehensive dataset card. + +Example usage: + # Local execution with auto GPU detection + uv run generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --messages-column messages + + # With custom model and sampling parameters + uv run generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --model-id meta-llama/Llama-3.1-8B-Instruct \\ + --temperature 0.9 \\ + --top-p 0.95 \\ + --max-tokens 2048 + + # HF Jobs execution (see script output for full command) + hf jobs uv run --flavor a100x4 ... +""" + +import argparse +import logging +import os +import sys +from datetime import datetime +from typing import Optional + +from datasets import load_dataset +from huggingface_hub import DatasetCard, get_token, login +from torch import cuda +from tqdm.auto import tqdm +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +# Enable HF Transfer for faster downloads +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +def check_gpu_availability() -> int: + """Check if CUDA is available and return the number of GPUs.""" + if not cuda.is_available(): + logger.error("CUDA is not available. This script requires a GPU.") + logger.error( + "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor." + ) + sys.exit(1) + + num_gpus = cuda.device_count() + for i in range(num_gpus): + gpu_name = cuda.get_device_name(i) + gpu_memory = cuda.get_device_properties(i).total_memory / 1024**3 + logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory") + + return num_gpus + + +def create_dataset_card( + source_dataset: str, + model_id: str, + messages_column: str, + prompt_column: Optional[str], + sampling_params: SamplingParams, + tensor_parallel_size: int, + num_examples: int, + generation_time: str, + num_skipped: int = 0, + max_model_len_used: Optional[int] = None, +) -> str: + """Create a comprehensive dataset card documenting the generation process.""" + filtering_section = "" + if num_skipped > 0: + skip_percentage = (num_skipped / num_examples) * 100 + processed = num_examples - num_skipped + filtering_section = f""" + +### Filtering Statistics + +- **Total Examples**: {num_examples:,} +- **Processed**: {processed:,} ({100 - skip_percentage:.1f}%) +- **Skipped (too long)**: {num_skipped:,} ({skip_percentage:.1f}%) +- **Max Model Length Used**: {max_model_len_used:,} tokens + +Note: Prompts exceeding the maximum model length were skipped and have empty responses.""" + + return f"""--- +tags: +- generated +- vllm +- uv-script +--- + +# Generated Responses Dataset + +This dataset contains generated responses for prompts from [{source_dataset}](https://huggingface.co/datasets/{source_dataset}). + +## Generation Details + +- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset}) +- **Input Column**: `{prompt_column if prompt_column else messages_column}` ({"plain text prompts" if prompt_column else "chat messages"}) +- **Model**: [{model_id}](https://huggingface.co/{model_id}) +- **Number of Examples**: {num_examples:,} +- **Generation Date**: {generation_time}{filtering_section} + +### Sampling Parameters + +- **Temperature**: {sampling_params.temperature} +- **Top P**: {sampling_params.top_p} +- **Top K**: {sampling_params.top_k} +- **Min P**: {sampling_params.min_p} +- **Max Tokens**: {sampling_params.max_tokens} +- **Repetition Penalty**: {sampling_params.repetition_penalty} + +### Hardware Configuration + +- **Tensor Parallel Size**: {tensor_parallel_size} +- **GPU Configuration**: {tensor_parallel_size} GPU(s) + +## Dataset Structure + +The dataset contains all columns from the source dataset plus: +- `response`: The generated response from the model + +## Generation Script + +Generated using the vLLM inference script from [uv-scripts/vllm](https://huggingface.co/datasets/uv-scripts/vllm). + +To reproduce this generation: + +```bash +uv run https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\ + {source_dataset} \\ + \\ + --model-id {model_id} \\ + {"--prompt-column " + prompt_column if prompt_column else "--messages-column " + messages_column} \\ + --temperature {sampling_params.temperature} \\ + --top-p {sampling_params.top_p} \\ + --top-k {sampling_params.top_k} \\ + --max-tokens {sampling_params.max_tokens}{f" \\\\\\n --max-model-len {max_model_len_used}" if max_model_len_used else ""} +``` +""" + + +def main( + src_dataset_hub_id: str, + output_dataset_hub_id: str, + model_id: str = "Qwen/Qwen3-30B-A3B-Instruct-2507", + messages_column: str = "messages", + prompt_column: Optional[str] = None, + output_column: str = "response", + temperature: float = 0.7, + top_p: float = 0.8, + top_k: int = 20, + min_p: float = 0.0, + max_tokens: int = 16384, + repetition_penalty: float = 1.0, + gpu_memory_utilization: float = 0.90, + max_model_len: Optional[int] = None, + tensor_parallel_size: Optional[int] = None, + skip_long_prompts: bool = True, + max_samples: Optional[int] = None, + hf_token: Optional[str] = None, +): + """ + Main generation pipeline. + + Args: + src_dataset_hub_id: Input dataset on Hugging Face Hub + output_dataset_hub_id: Where to save results on Hugging Face Hub + model_id: Hugging Face model ID for generation + messages_column: Column name containing chat messages + prompt_column: Column name containing plain text prompts (alternative to messages_column) + output_column: Column name for generated responses + temperature: Sampling temperature + top_p: Top-p sampling parameter + top_k: Top-k sampling parameter + min_p: Minimum probability threshold + max_tokens: Maximum tokens to generate + repetition_penalty: Repetition penalty parameter + gpu_memory_utilization: GPU memory utilization factor + max_model_len: Maximum model context length (None uses model default) + tensor_parallel_size: Number of GPUs to use (auto-detect if None) + skip_long_prompts: Skip prompts exceeding max_model_len instead of failing + max_samples: Maximum number of samples to process (None for all) + hf_token: Hugging Face authentication token + """ + generation_start_time = datetime.now().isoformat() + + # GPU check and configuration + num_gpus = check_gpu_availability() + if tensor_parallel_size is None: + tensor_parallel_size = num_gpus + logger.info( + f"Auto-detected {num_gpus} GPU(s), using tensor_parallel_size={tensor_parallel_size}" + ) + else: + logger.info(f"Using specified tensor_parallel_size={tensor_parallel_size}") + if tensor_parallel_size > num_gpus: + logger.warning( + f"Requested {tensor_parallel_size} GPUs but only {num_gpus} available" + ) + + # Authentication - try multiple methods + HF_TOKEN = hf_token or os.environ.get("HF_TOKEN") or get_token() + + if not HF_TOKEN: + logger.error("No HuggingFace token found. Please provide token via:") + logger.error(" 1. --hf-token argument") + logger.error(" 2. HF_TOKEN environment variable") + logger.error(" 3. Run 'hf auth login' or use login() in Python") + sys.exit(1) + + logger.info("HuggingFace token found, authenticating...") + login(token=HF_TOKEN) + + # Initialize vLLM + logger.info(f"Loading model: {model_id}") + vllm_kwargs = { + "model": model_id, + "tensor_parallel_size": tensor_parallel_size, + "gpu_memory_utilization": gpu_memory_utilization, + } + if max_model_len is not None: + vllm_kwargs["max_model_len"] = max_model_len + logger.info(f"Using max_model_len={max_model_len}") + + llm = LLM(**vllm_kwargs) + + # Load tokenizer for chat template + logger.info("Loading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(model_id) + + # Create sampling parameters + sampling_params = SamplingParams( + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + max_tokens=max_tokens, + repetition_penalty=repetition_penalty, + ) + + # Load dataset + logger.info(f"Loading dataset: {src_dataset_hub_id}") + dataset = load_dataset(src_dataset_hub_id, split="train") + + # Apply max_samples if specified + if max_samples is not None and max_samples < len(dataset): + logger.info(f"Limiting dataset to {max_samples} samples") + dataset = dataset.select(range(max_samples)) + + total_examples = len(dataset) + logger.info(f"Dataset loaded with {total_examples:,} examples") + + # Determine which column to use and validate + if prompt_column: + # Use prompt column mode + if prompt_column not in dataset.column_names: + logger.error( + f"Column '{prompt_column}' not found. Available columns: {dataset.column_names}" + ) + sys.exit(1) + logger.info(f"Using prompt column mode with column: '{prompt_column}'") + use_messages = False + else: + # Use messages column mode + if messages_column not in dataset.column_names: + logger.error( + f"Column '{messages_column}' not found. Available columns: {dataset.column_names}" + ) + sys.exit(1) + logger.info(f"Using messages column mode with column: '{messages_column}'") + use_messages = True + + # Get effective max length for filtering + if max_model_len is not None: + effective_max_len = max_model_len + else: + # Get model's default max length + effective_max_len = llm.llm_engine.model_config.max_model_len + logger.info(f"Using effective max model length: {effective_max_len}") + + # Process messages and apply chat template + logger.info("Preparing prompts...") + all_prompts = [] + valid_prompts = [] + valid_indices = [] + skipped_info = [] + + for i, example in enumerate(tqdm(dataset, desc="Processing prompts")): + if use_messages: + # Messages mode: use existing chat messages + messages = example[messages_column] + # Apply chat template + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + else: + # Prompt mode: convert plain text to messages format + user_prompt = example[prompt_column] + messages = [{"role": "user", "content": user_prompt}] + # Apply chat template + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + all_prompts.append(prompt) + + # Count tokens if filtering is enabled + if skip_long_prompts: + tokens = tokenizer.encode(prompt) + if len(tokens) <= effective_max_len: + valid_prompts.append(prompt) + valid_indices.append(i) + else: + skipped_info.append((i, len(tokens))) + else: + valid_prompts.append(prompt) + valid_indices.append(i) + + # Log filtering results + if skip_long_prompts and skipped_info: + logger.warning( + f"Skipped {len(skipped_info)} prompts that exceed max_model_len ({effective_max_len} tokens)" + ) + logger.info("Skipped prompt details (first 10):") + for idx, (prompt_idx, token_count) in enumerate(skipped_info[:10]): + logger.info( + f" - Example {prompt_idx}: {token_count} tokens (exceeds by {token_count - effective_max_len})" + ) + if len(skipped_info) > 10: + logger.info(f" ... and {len(skipped_info) - 10} more") + + skip_percentage = (len(skipped_info) / total_examples) * 100 + if skip_percentage > 10: + logger.warning(f"WARNING: {skip_percentage:.1f}% of prompts were skipped!") + + if not valid_prompts: + logger.error("No valid prompts to process after filtering!") + sys.exit(1) + + # Generate responses - vLLM handles batching internally + logger.info(f"Starting generation for {len(valid_prompts):,} valid prompts...") + logger.info("vLLM will handle batching and scheduling automatically") + + outputs = llm.generate(valid_prompts, sampling_params) + + # Extract generated text and create full response list + logger.info("Extracting generated responses...") + responses = [""] * total_examples # Initialize with empty strings + + for idx, output in enumerate(outputs): + original_idx = valid_indices[idx] + response = output.outputs[0].text.strip() + responses[original_idx] = response + + # Add responses to dataset + logger.info("Adding responses to dataset...") + dataset = dataset.add_column(output_column, responses) + + # Create dataset card + logger.info("Creating dataset card...") + card_content = create_dataset_card( + source_dataset=src_dataset_hub_id, + model_id=model_id, + messages_column=messages_column, + prompt_column=prompt_column, + sampling_params=sampling_params, + tensor_parallel_size=tensor_parallel_size, + num_examples=total_examples, + generation_time=generation_start_time, + num_skipped=len(skipped_info) if skip_long_prompts else 0, + max_model_len_used=effective_max_len if skip_long_prompts else None, + ) + + # Push dataset to hub + logger.info(f"Pushing dataset to: {output_dataset_hub_id}") + dataset.push_to_hub(output_dataset_hub_id, token=HF_TOKEN) + + # Push dataset card + card = DatasetCard(card_content) + card.push_to_hub(output_dataset_hub_id, token=HF_TOKEN) + + logger.info("✅ Generation complete!") + logger.info( + f"Dataset available at: https://huggingface.co/datasets/{output_dataset_hub_id}" + ) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + parser = argparse.ArgumentParser( + description="Generate responses for dataset prompts using vLLM", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Basic usage with default Qwen model + uv run generate-responses.py input-dataset output-dataset + + # With custom model and parameters + uv run generate-responses.py input-dataset output-dataset \\ + --model-id meta-llama/Llama-3.1-8B-Instruct \\ + --temperature 0.9 \\ + --max-tokens 2048 + + # Force specific GPU configuration + uv run generate-responses.py input-dataset output-dataset \\ + --tensor-parallel-size 2 \\ + --gpu-memory-utilization 0.95 + + # Using environment variable for token + HF_TOKEN=hf_xxx uv run generate-responses.py input-dataset output-dataset + """, + ) + + parser.add_argument( + "src_dataset_hub_id", + help="Input dataset on Hugging Face Hub (e.g., username/dataset-name)", + ) + parser.add_argument( + "output_dataset_hub_id", help="Output dataset name on Hugging Face Hub" + ) + parser.add_argument( + "--model-id", + type=str, + default="Qwen/Qwen3-30B-A3B-Instruct-2507", + help="Model to use for generation (default: Qwen3-30B-A3B-Instruct-2507)", + ) + parser.add_argument( + "--messages-column", + type=str, + default="messages", + help="Column containing chat messages (default: messages)", + ) + parser.add_argument( + "--prompt-column", + type=str, + help="Column containing plain text prompts (alternative to --messages-column)", + ) + parser.add_argument( + "--output-column", + type=str, + default="response", + help="Column name for generated responses (default: response)", + ) + parser.add_argument( + "--max-samples", + type=int, + help="Maximum number of samples to process (default: all)", + ) + parser.add_argument( + "--temperature", + type=float, + default=0.7, + help="Sampling temperature (default: 0.7)", + ) + parser.add_argument( + "--top-p", + type=float, + default=0.8, + help="Top-p sampling parameter (default: 0.8)", + ) + parser.add_argument( + "--top-k", + type=int, + default=20, + help="Top-k sampling parameter (default: 20)", + ) + parser.add_argument( + "--min-p", + type=float, + default=0.0, + help="Minimum probability threshold (default: 0.0)", + ) + parser.add_argument( + "--max-tokens", + type=int, + default=16384, + help="Maximum tokens to generate (default: 16384)", + ) + parser.add_argument( + "--repetition-penalty", + type=float, + default=1.0, + help="Repetition penalty (default: 1.0)", + ) + parser.add_argument( + "--gpu-memory-utilization", + type=float, + default=0.90, + help="GPU memory utilization factor (default: 0.90)", + ) + parser.add_argument( + "--max-model-len", + type=int, + help="Maximum model context length (default: model's default)", + ) + parser.add_argument( + "--tensor-parallel-size", + type=int, + help="Number of GPUs to use (default: auto-detect)", + ) + parser.add_argument( + "--hf-token", + type=str, + help="Hugging Face token (can also use HF_TOKEN env var)", + ) + parser.add_argument( + "--skip-long-prompts", + action="store_true", + default=True, + help="Skip prompts that exceed max_model_len instead of failing (default: True)", + ) + parser.add_argument( + "--no-skip-long-prompts", + dest="skip_long_prompts", + action="store_false", + help="Fail on prompts that exceed max_model_len", + ) + + args = parser.parse_args() + + main( + src_dataset_hub_id=args.src_dataset_hub_id, + output_dataset_hub_id=args.output_dataset_hub_id, + model_id=args.model_id, + messages_column=args.messages_column, + prompt_column=args.prompt_column, + output_column=args.output_column, + temperature=args.temperature, + top_p=args.top_p, + top_k=args.top_k, + min_p=args.min_p, + max_tokens=args.max_tokens, + repetition_penalty=args.repetition_penalty, + gpu_memory_utilization=args.gpu_memory_utilization, + max_model_len=args.max_model_len, + tensor_parallel_size=args.tensor_parallel_size, + skip_long_prompts=args.skip_long_prompts, + max_samples=args.max_samples, + hf_token=args.hf_token, + ) + else: + # Show HF Jobs example when run without arguments + print(""" +vLLM Response Generation Script +============================== + +This script requires arguments. For usage information: + uv run generate-responses.py --help + +Example HF Jobs command with multi-GPU: + # If you're logged in with hf auth, token will be auto-detected + hf jobs uv run \\ + --flavor l4x4 \\ + https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\ + username/input-dataset \\ + username/output-dataset \\ + --messages-column messages \\ + --model-id Qwen/Qwen3-30B-A3B-Instruct-2507 \\ + --temperature 0.7 \\ + --max-tokens 16384 + """) diff --git a/skills/hugging-face-model-trainer/SKILL.md b/skills/hugging-face-model-trainer/SKILL.md index 95994b17..d6d5f742 100644 --- a/skills/hugging-face-model-trainer/SKILL.md +++ b/skills/hugging-face-model-trainer/SKILL.md @@ -1,9 +1,9 @@ --- +source: "https://github.com/huggingface/skills/tree/main/skills/huggingface-llm-trainer" name: hugging-face-model-trainer -description: "Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required—models train on cloud GPUs and results are automatically saved to the Hugging Face Hub." +description: Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export. license: Complete terms in LICENSE.txt risk: unknown -source: community --- # TRL Training on Hugging Face Jobs @@ -60,11 +60,12 @@ When assisting with training jobs: 4. **Use example scripts as templates** - Reference `scripts/train_sft_example.py`, `scripts/train_dpo_example.py`, etc. as starting points. -## Local Script Dependencies +## Local Script Execution -To run scripts locally (like `estimate_cost.py`), install dependencies: +Repository scripts use PEP 723 inline dependencies. Run them with `uv run`: ```bash -pip install -r requirements.txt +uv run scripts/estimate_cost.py --help +uv run scripts/dataset_inspector.py --help ``` ## Prerequisites Checklist @@ -240,8 +241,8 @@ hf_jobs("uv", {"script": "https://gist.githubusercontent.com/user/id/raw/train.p **To use local scripts:** Upload to HF Hub first: ```bash -huggingface-cli repo create my-training-scripts --type model -huggingface-cli upload my-training-scripts ./train.py train.py +hf repos create my-training-scripts --type model +hf upload my-training-scripts ./train.py train.py # Use: https://huggingface.co/USERNAME/my-training-scripts/resolve/main/train.py ``` @@ -331,13 +332,10 @@ hf jobs cancel # Cancel a job The `trl-jobs` package provides optimized defaults and one-liner training. ```bash -# Install -pip install trl-jobs - -# Train with SFT (simplest possible) -trl-jobs sft \ +uvx trl-jobs sft \ --model_name Qwen/Qwen2.5-0.5B \ --dataset_name trl-lib/Capybara + ``` **Benefits:** Pre-configured settings, automatic Trackio integration, automatic Hub push, one-line commands @@ -685,6 +683,7 @@ Add to PEP 723 header: - `references/hardware_guide.md` - Hardware specs and selection - `references/hub_saving.md` - Hub authentication troubleshooting - `references/troubleshooting.md` - Common issues and solutions +- `references/local_training_macos.md` - Local training on macOS ### Scripts (In This Skill) - `scripts/train_sft_example.py` - Production SFT template diff --git a/skills/hugging-face-model-trainer/references/gguf_conversion.md b/skills/hugging-face-model-trainer/references/gguf_conversion.md new file mode 100644 index 00000000..a99ea0e8 --- /dev/null +++ b/skills/hugging-face-model-trainer/references/gguf_conversion.md @@ -0,0 +1,296 @@ +# GGUF Conversion Guide + +After training models with TRL on Hugging Face Jobs, convert them to **GGUF format** for use with llama.cpp, Ollama, LM Studio, and other local inference tools. + +**This guide provides production-ready, tested code based on successful conversions.** All critical dependencies and build steps are included. + +## What is GGUF? + +**GGUF** (GPT-Generated Unified Format): +- Optimized format for CPU/GPU inference with llama.cpp +- Supports quantization (4-bit, 5-bit, 8-bit) to reduce model size +- Compatible with: Ollama, LM Studio, Jan, GPT4All, llama.cpp +- Typically 2-8GB for 7B models (vs 14GB unquantized) + +## When to Convert to GGUF + +**Convert when:** +- Running models locally with Ollama or LM Studio +- Using CPU-optimized inference +- Reducing model size with quantization +- Deploying to edge devices +- Sharing models for local-first use + +## Critical Success Factors + +Based on production testing, these are **essential** for reliable conversion: + +### 1. ✅ Install Build Tools FIRST +**Before cloning llama.cpp**, install build dependencies: +```python +subprocess.run(["apt-get", "update", "-qq"], check=True, capture_output=True) +subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True, capture_output=True) +``` + +**Why:** The quantization tool requires gcc and cmake. Installing after cloning doesn't help. + +### 2. ✅ Use CMake (Not Make) +**Build the quantize tool with CMake:** +```python +# Create build directory +os.makedirs("/tmp/llama.cpp/build", exist_ok=True) + +# Configure +subprocess.run([ + "cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", + "-DGGML_CUDA=OFF" # Faster build, CUDA not needed for quantization +], check=True, capture_output=True, text=True) + +# Build +subprocess.run([ + "cmake", "--build", "/tmp/llama.cpp/build", + "--target", "llama-quantize", "-j", "4" +], check=True, capture_output=True, text=True) + +# Binary path +quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize" +``` + +**Why:** CMake is more reliable than `make` and produces consistent binary paths. + +### 3. ✅ Include All Dependencies +**PEP 723 header must include:** +```python +# /// script +# dependencies = [ +# "transformers>=4.36.0", +# "peft>=0.7.0", +# "torch>=2.0.0", +# "accelerate>=0.24.0", +# "huggingface_hub>=0.20.0", +# "sentencepiece>=0.1.99", # Required for tokenizer +# "protobuf>=3.20.0", # Required for tokenizer +# "numpy", +# "gguf", +# ] +# /// +``` + +**Why:** `sentencepiece` and `protobuf` are critical for tokenizer conversion. Missing them causes silent failures. + +### 4. ✅ Verify Names Before Use +**Always verify repos exist:** +```python +# Before submitting job, verify: +hub_repo_details([ADAPTER_MODEL], repo_type="model") +hub_repo_details([BASE_MODEL], repo_type="model") +``` + +**Why:** Non-existent dataset/model names cause job failures that could be caught in seconds. + +## Complete Conversion Script + +See `scripts/convert_to_gguf.py` for the complete, production-ready script. + +**Key features:** +- ✅ All dependencies in PEP 723 header +- ✅ Build tools installed automatically +- ✅ CMake build process (reliable) +- ✅ Comprehensive error handling +- ✅ Environment variable configuration +- ✅ Automatic README generation + +## Quick Conversion Job + +```python +# Before submitting: VERIFY MODELS EXIST +hub_repo_details(["username/my-finetuned-model"], repo_type="model") +hub_repo_details(["Qwen/Qwen2.5-0.5B"], repo_type="model") + +# Submit conversion job +hf_jobs("uv", { + "script": open("trl/scripts/convert_to_gguf.py").read(), # Or inline the script + "flavor": "a10g-large", + "timeout": "45m", + "secrets": {"HF_TOKEN": "$HF_TOKEN"}, + "env": { + "ADAPTER_MODEL": "username/my-finetuned-model", + "BASE_MODEL": "Qwen/Qwen2.5-0.5B", + "OUTPUT_REPO": "username/my-model-gguf", + "HF_USERNAME": "username" # Optional, for README + } +}) +``` + +## Conversion Process + +The script performs these steps: + +1. **Load and Merge** - Load base model and LoRA adapter, merge them +2. **Install Build Tools** - Install gcc, cmake (CRITICAL: before cloning llama.cpp) +3. **Setup llama.cpp** - Clone repo, install Python dependencies +4. **Convert to GGUF** - Create FP16 GGUF using llama.cpp converter +5. **Build Quantize Tool** - Use CMake to build `llama-quantize` +6. **Quantize** - Create Q4_K_M, Q5_K_M, Q8_0 versions +7. **Upload** - Upload all versions + README to Hub + +## Quantization Options + +Common quantization formats (from smallest to largest): + +| Format | Size | Quality | Use Case | +|--------|------|---------|----------| +| **Q4_K_M** | ~300MB | Good | **Recommended** - best balance of size/quality | +| **Q5_K_M** | ~350MB | Better | Higher quality, slightly larger | +| **Q8_0** | ~500MB | Very High | Near-original quality | +| **F16** | ~1GB | Original | Full precision, largest file | + +**Recommendation:** Create Q4_K_M, Q5_K_M, and Q8_0 versions to give users options. + +## Hardware Requirements + +**For conversion:** +- Small models (<1B): CPU-basic works, but slow +- Medium models (1-7B): a10g-large recommended +- Large models (7B+): a10g-large or a100-large + +**Time estimates:** +- 0.5B model: ~15-25 minutes on A10G +- 3B model: ~30-45 minutes on A10G +- 7B model: ~45-60 minutes on A10G + +## Using GGUF Models + +**GGUF models work on both CPU and GPU.** They're optimized for CPU inference but can also leverage GPU acceleration when available. + +### With Ollama (auto-detects GPU) +```bash +# Download GGUF +hf download username/my-model-gguf model-q4_k_m.gguf + +# Create Modelfile +echo "FROM ./model-q4_k_m.gguf" > Modelfile + +# Create and run (uses GPU automatically if available) +ollama create my-model -f Modelfile +ollama run my-model +``` + +### With llama.cpp +```bash +# CPU only +./llama-cli -m model-q4_k_m.gguf -p "Your prompt" + +# With GPU acceleration (offload 32 layers to GPU) +./llama-cli -m model-q4_k_m.gguf -ngl 32 -p "Your prompt" +``` + +### With LM Studio +1. Download the `.gguf` file +2. Import into LM Studio +3. Start chatting + +## Best Practices + +### ✅ DO: +1. **Verify repos exist** before submitting jobs (use `hub_repo_details`) +2. **Install build tools FIRST** before cloning llama.cpp +3. **Use CMake** for building quantize tool (not make) +4. **Include all dependencies** in PEP 723 header (especially sentencepiece, protobuf) +5. **Create multiple quantizations** - Give users choice +6. **Test on known models** before production use +7. **Use A10G GPU** for faster conversion + +### ❌ DON'T: +1. **Assume repos exist** - Always verify with hub tools +2. **Use make** instead of CMake - Less reliable +3. **Remove dependencies** to "simplify" - They're all needed +4. **Skip build tools** - Quantization will fail silently +5. **Use default paths** - CMake puts binaries in build/bin/ + +## Common Issues + +### Out of memory during merge +**Fix:** +- Use larger GPU (a10g-large or a100-large) +- Ensure `device_map="auto"` for automatic placement +- Use `dtype=torch.float16` or `torch.bfloat16` + +### Conversion fails with architecture error +**Fix:** +- Ensure llama.cpp supports the model architecture +- Check for standard architecture (Qwen, Llama, Mistral, etc.) +- Update llama.cpp to latest: `git clone --depth 1 https://github.com/ggerganov/llama.cpp.git` +- Check llama.cpp documentation for model support + +### Quantization fails +**Fix:** +- Verify build tools installed: `apt-get install build-essential cmake` +- Use CMake (not make) to build quantize tool +- Check binary path: `/tmp/llama.cpp/build/bin/llama-quantize` +- Verify FP16 GGUF exists before quantizing + +### Missing sentencepiece error +**Fix:** +- Add to PEP 723 header: `"sentencepiece>=0.1.99", "protobuf>=3.20.0"` +- Don't remove dependencies to "simplify" - all are required + +### Upload fails or times out +**Fix:** +- Large models (>2GB) need longer timeout: `"timeout": "1h"` +- Upload quantized versions separately if needed +- Check network/Hub status + +## Lessons Learned + +These are from production testing and real failures: + +### 1. Always Verify Before Use +**Lesson:** Don't assume repos/datasets exist. Check first. +```python +# BEFORE submitting job +hub_repo_details(["trl-lib/argilla-dpo-mix-7k"], repo_type="dataset") # Would catch error +``` +**Prevented failures:** Non-existent dataset names, typos in model names + +### 2. Prioritize Reliability Over Performance +**Lesson:** Default to what's most likely to succeed. +- Use CMake (not make) - more reliable +- Disable CUDA in build - faster, not needed +- Include all dependencies - don't "simplify" + +**Prevented failures:** Build failures, missing binaries + +### 3. Create Atomic, Self-Contained Scripts +**Lesson:** Don't remove dependencies or steps. Scripts should work as a unit. +- All dependencies in PEP 723 header +- All build steps included +- Clear error messages + +**Prevented failures:** Missing tokenizer libraries, build tool failures + +## References + +**In this skill:** +- `scripts/convert_to_gguf.py` - Complete, production-ready script + +**External:** +- [llama.cpp Repository](https://github.com/ggerganov/llama.cpp) +- [GGUF Specification](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) +- [Ollama Documentation](https://ollama.ai) +- [LM Studio](https://lmstudio.ai) + +## Summary + +**Critical checklist for GGUF conversion:** +- [ ] Verify adapter and base models exist on Hub +- [ ] Use production script from `scripts/convert_to_gguf.py` +- [ ] All dependencies in PEP 723 header (including sentencepiece, protobuf) +- [ ] Build tools installed before cloning llama.cpp +- [ ] CMake used for building quantize tool (not make) +- [ ] Correct binary path: `/tmp/llama.cpp/build/bin/llama-quantize` +- [ ] A10G GPU selected for reasonable conversion time +- [ ] Timeout set to 45m minimum +- [ ] HF_TOKEN in secrets for Hub upload + +**The script in `scripts/convert_to_gguf.py` incorporates all these lessons and has been tested successfully in production.** diff --git a/skills/hugging-face-model-trainer/references/hardware_guide.md b/skills/hugging-face-model-trainer/references/hardware_guide.md new file mode 100644 index 00000000..22eba945 --- /dev/null +++ b/skills/hugging-face-model-trainer/references/hardware_guide.md @@ -0,0 +1,283 @@ +# Hardware Selection Guide + +Choosing the right hardware (flavor) is critical for cost-effective training. + +## Available Hardware + +### CPU +- `cpu-basic` - Basic CPU, testing only +- `cpu-upgrade` - Enhanced CPU + +**Use cases:** Dataset validation, preprocessing, testing scripts +**Not recommended for training:** Too slow for any meaningful training + +### GPU Options + +| Flavor | GPU | Memory | Use Case | Cost/hour | +|--------|-----|--------|----------|-----------| +| `t4-small` | NVIDIA T4 | 16GB | <1B models, demos | ~$0.50-1 | +| `t4-medium` | NVIDIA T4 | 16GB | 1-3B models, development | ~$1-2 | +| `l4x1` | NVIDIA L4 | 24GB | 3-7B models, efficient training | ~$2-3 | +| `l4x4` | 4x NVIDIA L4 | 96GB | Multi-GPU training | ~$8-12 | +| `a10g-small` | NVIDIA A10G | 24GB | 3-7B models, production | ~$3-4 | +| `a10g-large` | NVIDIA A10G | 24GB | 7-13B models | ~$4-6 | +| `a10g-largex2` | 2x NVIDIA A10G | 48GB | Multi-GPU, large models | ~$8-12 | +| `a10g-largex4` | 4x NVIDIA A10G | 96GB | Multi-GPU, very large models | ~$16-24 | +| `a100-large` | NVIDIA A100 | 40GB | 13B+ models, fast training | ~$8-12 | + +### TPU Options + +| Flavor | Type | Use Case | +|--------|------|----------| +| `v5e-1x1` | TPU v5e | Small TPU workloads | +| `v5e-2x2` | 4x TPU v5e | Medium TPU workloads | +| `v5e-2x4` | 8x TPU v5e | Large TPU workloads | + +**Note:** TPUs require TPU-optimized code. Most TRL training uses GPUs. + +## Selection Guidelines + +### By Model Size + +**Tiny Models (<1B parameters)** +- **Recommended:** `t4-small` +- **Example:** Qwen2.5-0.5B, TinyLlama +- **Batch size:** 4-8 +- **Training time:** 1-2 hours for 1K examples + +**Small Models (1-3B parameters)** +- **Recommended:** `t4-medium` or `a10g-small` +- **Example:** Qwen2.5-1.5B, Phi-2 +- **Batch size:** 2-4 +- **Training time:** 2-4 hours for 10K examples + +**Medium Models (3-7B parameters)** +- **Recommended:** `a10g-small` or `a10g-large` +- **Example:** Qwen2.5-7B, Mistral-7B +- **Batch size:** 1-2 (or LoRA with 4-8) +- **Training time:** 4-8 hours for 10K examples + +**Large Models (7-13B parameters)** +- **Recommended:** `a10g-large` or `a100-large` +- **Example:** Llama-3-8B, Mixtral-8x7B (with LoRA) +- **Batch size:** 1 (full fine-tuning) or 2-4 (LoRA) +- **Training time:** 6-12 hours for 10K examples +- **Note:** Always use LoRA/PEFT + +**Very Large Models (13B+ parameters)** +- **Recommended:** `a100-large` with LoRA +- **Example:** Llama-3-13B, Llama-3-70B (LoRA only) +- **Batch size:** 1-2 with LoRA +- **Training time:** 8-24 hours for 10K examples +- **Note:** Full fine-tuning not feasible, use LoRA/PEFT + +### By Budget + +**Minimal Budget (<$5 total)** +- Use `t4-small` +- Train on subset of data (100-500 examples) +- Limit to 1-2 epochs +- Use small model (<1B) + +**Small Budget ($5-20)** +- Use `t4-medium` or `a10g-small` +- Train on 1K-5K examples +- 2-3 epochs +- Model up to 3B parameters + +**Medium Budget ($20-50)** +- Use `a10g-small` or `a10g-large` +- Train on 5K-20K examples +- 3-5 epochs +- Model up to 7B parameters + +**Large Budget ($50-200)** +- Use `a10g-large` or `a100-large` +- Full dataset training +- Multiple epochs +- Model up to 13B parameters with LoRA + +### By Training Type + +**Quick Demo/Experiment** +- `t4-small` +- 50-100 examples +- 5-10 steps +- ~10-15 minutes + +**Development/Iteration** +- `t4-medium` or `a10g-small` +- 1K examples +- 1 epoch +- ~30-60 minutes + +**Production Training** +- `a10g-large` or `a100-large` +- Full dataset +- 3-5 epochs +- 4-12 hours + +**Research/Experimentation** +- `a100-large` +- Multiple runs +- Various hyperparameters +- Budget for 20-50 hours + +## Memory Considerations + +### Estimating Memory Requirements + +**Full fine-tuning:** +``` +Memory (GB) ≈ (Model params in billions) × 20 +``` + +**LoRA fine-tuning:** +``` +Memory (GB) ≈ (Model params in billions) × 4 +``` + +**Examples:** +- Qwen2.5-0.5B full: ~10GB ✅ fits t4-small +- Qwen2.5-1.5B full: ~30GB ❌ exceeds most GPUs +- Qwen2.5-1.5B LoRA: ~6GB ✅ fits t4-small +- Qwen2.5-7B full: ~140GB ❌ not feasible +- Qwen2.5-7B LoRA: ~28GB ✅ fits a10g-large + +### Memory Optimization + +If hitting memory limits: + +1. **Use LoRA/PEFT** + ```python + peft_config=LoraConfig(r=16, lora_alpha=32) + ``` + +2. **Reduce batch size** + ```python + per_device_train_batch_size=1 + ``` + +3. **Increase gradient accumulation** + ```python + gradient_accumulation_steps=8 # Effective batch size = 1×8 + ``` + +4. **Enable gradient checkpointing** + ```python + gradient_checkpointing=True + ``` + +5. **Use mixed precision** + ```python + bf16=True # or fp16=True + ``` + +6. **Upgrade to larger GPU** + - t4 → a10g → a100 + +## Cost Estimation + +### Formula + +``` +Total Cost = (Hours of training) × (Cost per hour) +``` + +### Example Calculations + +**Quick demo:** +- Hardware: t4-small ($0.75/hour) +- Time: 15 minutes (0.25 hours) +- Cost: $0.19 + +**Development training:** +- Hardware: a10g-small ($3.50/hour) +- Time: 2 hours +- Cost: $7.00 + +**Production training:** +- Hardware: a10g-large ($5/hour) +- Time: 6 hours +- Cost: $30.00 + +**Large model with LoRA:** +- Hardware: a100-large ($10/hour) +- Time: 8 hours +- Cost: $80.00 + +### Cost Optimization Tips + +1. **Start small:** Test on t4-small with subset +2. **Use LoRA:** 4-5x cheaper than full fine-tuning +3. **Optimize hyperparameters:** Fewer epochs if possible +4. **Set appropriate timeout:** Don't waste compute on stalled jobs +5. **Use checkpointing:** Resume if job fails +6. **Monitor costs:** Check running jobs regularly + +## Multi-GPU Training + +TRL automatically handles multi-GPU training with Accelerate when using multi-GPU flavors. + +**Multi-GPU flavors:** +- `l4x4` - 4x L4 GPUs +- `a10g-largex2` - 2x A10G GPUs +- `a10g-largex4` - 4x A10G GPUs + +**When to use:** +- Models >13B parameters +- Need faster training (linear speedup) +- Large datasets (>50K examples) + +**Example:** +```python +hf_jobs("uv", { + "script": "train.py", + "flavor": "a10g-largex2", # 2 GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +No code changes needed—TRL/Accelerate handles distribution automatically. + +## Choosing Between Options + +### a10g vs a100 + +**Choose a10g when:** +- Model <13B parameters +- Budget conscious +- Training time not critical + +**Choose a100 when:** +- Model 13B+ parameters +- Need fastest training +- Memory requirements high +- Budget allows + +### Single vs Multi-GPU + +**Choose single GPU when:** +- Model <7B parameters +- Budget constrained +- Simpler debugging + +**Choose multi-GPU when:** +- Model >13B parameters +- Need faster training +- Large batch sizes required +- Cost-effective for large jobs + +## Quick Reference + +```python +# Model size → Hardware selection +HARDWARE_MAP = { + "<1B": "t4-small", + "1-3B": "a10g-small", + "3-7B": "a10g-large", + "7-13B": "a10g-large (LoRA) or a100-large", + ">13B": "a100-large (LoRA required)" +} +``` diff --git a/skills/hugging-face-model-trainer/references/hub_saving.md b/skills/hugging-face-model-trainer/references/hub_saving.md new file mode 100644 index 00000000..734e49b5 --- /dev/null +++ b/skills/hugging-face-model-trainer/references/hub_saving.md @@ -0,0 +1,364 @@ +# Saving Training Results to Hugging Face Hub + +**⚠️ CRITICAL:** Training environments are ephemeral. ALL results are lost when a job completes unless pushed to the Hub. + +## Why Hub Push is Required + +When running on Hugging Face Jobs: +- Environment is temporary +- All files deleted on job completion +- No local disk persistence +- Cannot access results after job ends + +**Without Hub push, training is completely wasted.** + +## Required Configuration + +### 1. Training Configuration + +In your SFTConfig or trainer config: + +```python +SFTConfig( + push_to_hub=True, # Enable Hub push + hub_model_id="username/model-name", # Target repository +) +``` + +### 2. Job Configuration + +When submitting the job: + +```python +hf_jobs("uv", { + "script": "train.py", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Provide authentication +}) +``` + +**The `$HF_TOKEN` placeholder is automatically replaced with your Hugging Face token.** + +## Complete Example + +```python +# train.py +# /// script +# dependencies = ["trl"] +# /// + +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +dataset = load_dataset("trl-lib/Capybara", split="train") + +# Configure with Hub push +config = SFTConfig( + output_dir="my-model", + num_train_epochs=3, + + # ✅ CRITICAL: Hub push configuration + push_to_hub=True, + hub_model_id="myusername/my-trained-model", + + # Optional: Push strategy + push_to_hub_model_id="myusername/my-trained-model", + push_to_hub_organization=None, + push_to_hub_token=None, # Uses environment token +) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=config, +) + +trainer.train() + +# ✅ Push final model +trainer.push_to_hub() + +print("✅ Model saved to: https://huggingface.co/myusername/my-trained-model") +``` + +**Submit with authentication:** + +```python +hf_jobs("uv", { + "script": "train.py", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required! +}) +``` + +## What Gets Saved + +When `push_to_hub=True`: + +1. **Model weights** - Final trained parameters +2. **Tokenizer** - Associated tokenizer +3. **Configuration** - Model config (config.json) +4. **Training arguments** - Hyperparameters used +5. **Model card** - Auto-generated documentation +6. **Checkpoints** - If `save_strategy="steps"` enabled + +## Checkpoint Saving + +Save intermediate checkpoints during training: + +```python +SFTConfig( + output_dir="my-model", + push_to_hub=True, + hub_model_id="username/my-model", + + # Checkpoint configuration + save_strategy="steps", + save_steps=100, # Save every 100 steps + save_total_limit=3, # Keep only last 3 checkpoints +) +``` + +**Benefits:** +- Resume training if job fails +- Compare checkpoint performance +- Use intermediate models + +**Checkpoints are pushed to:** `username/my-model` (same repo) + +## Authentication Methods + +### Method 1: Automatic Token (Recommended) + +```python +"secrets": {"HF_TOKEN": "$HF_TOKEN"} +``` + +Uses your logged-in Hugging Face token automatically. + +### Method 2: Explicit Token + +```python +"secrets": {"HF_TOKEN": "hf_abc123..."} +``` + +Provide token explicitly (not recommended for security). + +### Method 3: Environment Variable + +```python +"env": {"HF_TOKEN": "hf_abc123..."} +``` + +Pass as regular environment variable (less secure than secrets). + +**Always prefer Method 1** for security and convenience. + +## Verification Checklist + +Before submitting any training job, verify: + +- [ ] `push_to_hub=True` in training config +- [ ] `hub_model_id` is specified (format: `username/model-name`) +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +- [ ] Repository name doesn't conflict with existing repos +- [ ] You have write access to the target namespace + +## Repository Setup + +### Automatic Creation + +If repository doesn't exist, it's created automatically when first pushing. + +### Manual Creation + +Create repository before training: + +```python +from huggingface_hub import HfApi + +api = HfApi() +api.create_repo( + repo_id="username/model-name", + repo_type="model", + private=False, # or True for private repo +) +``` + +### Repository Naming + +**Valid names:** +- `username/my-model` +- `username/model-name` +- `organization/model-name` + +**Invalid names:** +- `model-name` (missing username) +- `username/model name` (spaces not allowed) +- `username/MODEL` (uppercase discouraged) + +## Troubleshooting + +### Error: 401 Unauthorized + +**Cause:** HF_TOKEN not provided or invalid + +**Solutions:** +1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config +2. Check you're logged in: `hf auth whoami` +3. Re-login: `hf auth login` + +### Error: 403 Forbidden + +**Cause:** No write access to repository + +**Solutions:** +1. Check repository namespace matches your username +2. Verify you're a member of organization (if using org namespace) +3. Check repository isn't private (if accessing org repo) + +### Error: Repository not found + +**Cause:** Repository doesn't exist and auto-creation failed + +**Solutions:** +1. Manually create repository first +2. Check repository name format +3. Verify namespace exists + +### Error: Push failed during training + +**Cause:** Network issues or Hub unavailable + +**Solutions:** +1. Training continues but final push fails +2. Checkpoints may be saved +3. Re-run push manually after job completes + +### Issue: Model saved but not visible + +**Possible causes:** +1. Repository is private—check https://huggingface.co/username +2. Wrong namespace—verify `hub_model_id` matches login +3. Push still in progress—wait a few minutes + +## Manual Push After Training + +If training completes but push fails, push manually: + +```python +from transformers import AutoModel, AutoTokenizer + +# Load from local checkpoint +model = AutoModel.from_pretrained("./output_dir") +tokenizer = AutoTokenizer.from_pretrained("./output_dir") + +# Push to Hub +model.push_to_hub("username/model-name", token="hf_abc123...") +tokenizer.push_to_hub("username/model-name", token="hf_abc123...") +``` + +**Note:** Only possible if job hasn't completed (files still exist). + +## Best Practices + +1. **Always enable `push_to_hub=True`** +2. **Use checkpoint saving** for long training runs +3. **Verify Hub push** in logs before job completes +4. **Set appropriate `save_total_limit`** to avoid excessive checkpoints +5. **Use descriptive repo names** (e.g., `qwen-capybara-sft` not `model1`) +6. **Add model card** with training details +7. **Tag models** with relevant tags (e.g., `text-generation`, `fine-tuned`) + +## Monitoring Push Progress + +Check logs for push progress: + +```python +hf_jobs("logs", {"job_id": "your-job-id"}) +``` + +**Look for:** +``` +Pushing model to username/model-name... +Upload file pytorch_model.bin: 100% +✅ Model pushed successfully +``` + +## Example: Full Production Setup + +```python +# production_train.py +# /// script +# dependencies = ["trl>=0.12.0", "peft>=0.7.0"] +# /// + +from datasets import load_dataset +from peft import LoraConfig +from trl import SFTTrainer, SFTConfig +import os + +# Verify token is available +assert "HF_TOKEN" in os.environ, "HF_TOKEN not found in environment!" + +# Load dataset +dataset = load_dataset("trl-lib/Capybara", split="train") +print(f"✅ Dataset loaded: {len(dataset)} examples") + +# Configure with comprehensive Hub settings +config = SFTConfig( + output_dir="qwen-capybara-sft", + + # Hub configuration + push_to_hub=True, + hub_model_id="myusername/qwen-capybara-sft", + hub_strategy="checkpoint", # Push checkpoints + + # Checkpoint configuration + save_strategy="steps", + save_steps=100, + save_total_limit=3, + + # Training settings + num_train_epochs=3, + per_device_train_batch_size=4, + + # Logging + logging_steps=10, + logging_first_step=True, +) + +# Train with LoRA +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=config, + peft_config=LoraConfig(r=16, lora_alpha=32), +) + +print("🚀 Starting training...") +trainer.train() + +print("💾 Pushing final model to Hub...") +trainer.push_to_hub() + +print("✅ Training complete!") +print(f"Model available at: https://huggingface.co/myusername/qwen-capybara-sft") +``` + +**Submit:** + +```python +hf_jobs("uv", { + "script": "production_train.py", + "flavor": "a10g-large", + "timeout": "6h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +## Key Takeaway + +**Without `push_to_hub=True` and `secrets={"HF_TOKEN": "$HF_TOKEN"}`, all training results are permanently lost.** + +Always verify both are configured before submitting any training job. diff --git a/skills/hugging-face-model-trainer/references/local_training_macos.md b/skills/hugging-face-model-trainer/references/local_training_macos.md new file mode 100644 index 00000000..fdf5dede --- /dev/null +++ b/skills/hugging-face-model-trainer/references/local_training_macos.md @@ -0,0 +1,231 @@ +# Local Training on macOS (Apple Silicon) + +Run small LoRA fine-tuning jobs locally on Mac for smoke tests and quick iteration before submitting to HF Jobs. + +## When to Use Local Mac vs HF Jobs + +| Local Mac | HF Jobs / Cloud GPU | +|-----------|-------------------| +| Model ≤3B, text-only | Model 7B+ | +| LoRA/PEFT only | QLoRA 4-bit (CUDA/bitsandbytes) | +| Short context (≤1024) | Long context / full fine-tuning | +| Smoke tests, dataset validation | Production runs, VLMs | + +**Typical workflow:** local smoke test → HF Jobs with same config → export/quantize ([gguf_conversion.md](gguf_conversion.md)) + +## Recommended Defaults + +| Setting | Value | Notes | +|---------|-------|-------| +| Model size | 0.5B–1.5B first run | Scale up after verifying | +| Max seq length | 512–1024 | Lower = less memory | +| Batch size | 1 | Scale via gradient accumulation | +| Gradient accumulation | 8–16 | Effective batch = 8–16 | +| LoRA rank (r) | 8–16 | alpha = 2×r | +| Dtype | float32 | fp16 causes NaN on MPS; bf16 only on M1 Pro+ and M2/M3/M4 | + +### Memory by hardware + +| Unified RAM | Max Model Size | +|-------------|---------------| +| 16 GB | ~0.5B–1.5B | +| 32 GB | ~1.5B–3B | +| 64 GB | ~3B (short context) | + +## Setup + +```bash +xcode-select --install +python3 -m venv .venv && source .venv/bin/activate +pip install -U "torch>=2.2" "transformers>=4.40" "trl>=0.12" "peft>=0.10" \ + datasets accelerate safetensors huggingface_hub +``` + +Verify MPS: +```bash +python -c "import torch; print(torch.__version__, '| MPS:', torch.backends.mps.is_available())" +``` + +Optional — configure Accelerate for local Mac (no distributed, no mixed precision, MPS device): +```bash +accelerate config +``` + +## Training Script + +
+train_lora_sft.py + +```python +import os +from dataclasses import dataclass +from typing import Optional +import torch +from datasets import load_dataset +from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed +from peft import LoraConfig +from trl import SFTTrainer, SFTConfig + +set_seed(42) + +@dataclass +class Cfg: + model_id: str = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-0.5B-Instruct") + dataset_id: str = os.environ.get("DATASET_ID", "HuggingFaceH4/ultrachat_200k") + dataset_split: str = os.environ.get("DATASET_SPLIT", "train_sft[:500]") + data_files: Optional[str] = os.environ.get("DATA_FILES", None) + text_field: str = os.environ.get("TEXT_FIELD", "") + messages_field: str = os.environ.get("MESSAGES_FIELD", "messages") + out_dir: str = os.environ.get("OUT_DIR", "outputs/local-lora") + max_seq_length: int = int(os.environ.get("MAX_SEQ_LENGTH", "512")) + max_steps: int = int(os.environ.get("MAX_STEPS", "-1")) + +cfg = Cfg() +device = "mps" if torch.backends.mps.is_available() else "cpu" + +tokenizer = AutoTokenizer.from_pretrained(cfg.model_id, use_fast=True) +if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token +tokenizer.padding_side = "right" + +model = AutoModelForCausalLM.from_pretrained(cfg.model_id, torch_dtype=torch.float32) +model.to(device) +model.config.use_cache = False + +if cfg.data_files: + ds = load_dataset("json", data_files=cfg.data_files, split="train") +else: + ds = load_dataset(cfg.dataset_id, split=cfg.dataset_split) + +def format_example(ex): + if cfg.text_field and isinstance(ex.get(cfg.text_field), str): + ex["text"] = ex[cfg.text_field] + return ex + msgs = ex.get(cfg.messages_field) + if isinstance(msgs, list): + if hasattr(tokenizer, "apply_chat_template"): + try: + ex["text"] = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=False) + return ex + except Exception: + pass + ex["text"] = "\n".join([str(m) for m in msgs]) + return ex + ex["text"] = str(ex) + return ex + +ds = ds.map(format_example) +ds = ds.remove_columns([c for c in ds.column_names if c != "text"]) + +lora = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, bias="none", + task_type="CAUSAL_LM", target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]) + +sft_kwargs = dict( + output_dir=cfg.out_dir, per_device_train_batch_size=1, gradient_accumulation_steps=8, + learning_rate=2e-4, logging_steps=10, save_steps=200, save_total_limit=2, + gradient_checkpointing=True, report_to="none", fp16=False, bf16=False, + max_seq_length=cfg.max_seq_length, dataset_text_field="text", +) +if cfg.max_steps > 0: + sft_kwargs["max_steps"] = cfg.max_steps +else: + sft_kwargs["num_train_epochs"] = 1 + +trainer = SFTTrainer(model=model, train_dataset=ds, peft_config=lora, + args=SFTConfig(**sft_kwargs), processing_class=tokenizer) +trainer.train() +trainer.save_model(cfg.out_dir) +print(f"✅ Saved to: {cfg.out_dir}") +``` + +
+ +### Run + +```bash +python train_lora_sft.py +``` + +**Env overrides:** + +```bash +MODEL_ID="Qwen/Qwen2.5-1.5B-Instruct" python train_lora_sft.py # different model +MAX_STEPS=50 python train_lora_sft.py # quick 50-step test +DATA_FILES="my_data.jsonl" python train_lora_sft.py # local JSONL file +PYTORCH_ENABLE_MPS_FALLBACK=1 python train_lora_sft.py # MPS op fallback to CPU +PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 python train_lora_sft.py # disable MPS memory limit (use with caution) +``` + +**Local JSONL format** — chat messages or plain text: +```jsonl +{"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}]} +``` +```jsonl +{"text": "User: Hello\nAssistant: Hi!"} +``` +For plain text: `DATA_FILES="file.jsonl" TEXT_FIELD="text" MESSAGES_FIELD="" python train_lora_sft.py` + +### Verify Success + +- Loss decreases over steps +- `outputs/local-lora/` contains `adapter_config.json` + `*.safetensors` + +## Quick Evaluation + +
+eval_generate.py + +```python +import os, torch +from transformers import AutoTokenizer, AutoModelForCausalLM +from peft import PeftModel + +BASE = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-0.5B-Instruct") +ADAPTER = os.environ.get("ADAPTER_DIR", "outputs/local-lora") +device = "mps" if torch.backends.mps.is_available() else "cpu" + +tokenizer = AutoTokenizer.from_pretrained(BASE, use_fast=True) +model = AutoModelForCausalLM.from_pretrained(BASE, torch_dtype=torch.float32) +model.to(device) +model = PeftModel.from_pretrained(model, ADAPTER) + +prompt = os.environ.get("PROMPT", "Explain gradient accumulation in 3 bullet points.") +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) +with torch.no_grad(): + out = model.generate(**inputs, max_new_tokens=120, do_sample=True, temperature=0.7, top_p=0.9) +print(tokenizer.decode(out[0], skip_special_tokens=True)) +``` + +
+ +## Troubleshooting (macOS-Specific) + +For general training issues, see [troubleshooting.md](troubleshooting.md). + +| Problem | Fix | +|---------|-----| +| MPS unsupported op / crash | `PYTORCH_ENABLE_MPS_FALLBACK=1` | +| OOM / system instability | Reduce `MAX_SEQ_LENGTH`, use smaller model, set `PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0` (caution) | +| fp16 NaN / loss explosion | Keep `fp16=False` (default), lower learning rate | +| LoRA "module not found" | Print `model.named_modules()` to find correct target names | +| TRL TypeError on args | Check TRL version; script uses `SFTConfig` + `processing_class` (TRL ≥0.12) | +| Intel Mac | No MPS — use HF Jobs instead | + +**Common LoRA target modules by architecture:** + +| Architecture | target_modules | +|-------------|---------------| +| Llama/Qwen/Mistral | `q_proj`, `k_proj`, `v_proj`, `o_proj` | +| GPT-2/GPT-J | `c_attn`, `c_proj` | +| BLOOM | `query_key_value`, `dense` | + +## MLX Alternative + +[MLX](https://github.com/ml-explore/mlx) offers tighter Apple Silicon integration but has a smaller ecosystem and less mature training APIs. For this skill's workflow (local validation → HF Jobs), PyTorch + MPS is recommended for consistency. See [mlx-lm](https://github.com/ml-explore/mlx-lm) for MLX-based fine-tuning. + +## See Also + +- [troubleshooting.md](troubleshooting.md) — General TRL troubleshooting +- [hardware_guide.md](hardware_guide.md) — GPU selection for HF Jobs +- [gguf_conversion.md](gguf_conversion.md) — Export for on-device inference +- [training_methods.md](training_methods.md) — SFT, DPO, GRPO overview diff --git a/skills/hugging-face-model-trainer/references/reliability_principles.md b/skills/hugging-face-model-trainer/references/reliability_principles.md new file mode 100644 index 00000000..bf2f7458 --- /dev/null +++ b/skills/hugging-face-model-trainer/references/reliability_principles.md @@ -0,0 +1,371 @@ +# Reliability Principles for Training Jobs + +These principles are derived from real production failures and successful fixes. Following them prevents common failure modes and ensures reliable job execution. + +## Principle 1: Always Verify Before Use + +**Rule:** Never assume repos, datasets, or resources exist. Verify with tools first. + +### What It Prevents + +- **Non-existent datasets** - Jobs fail immediately when dataset doesn't exist +- **Typos in names** - Simple mistakes like "argilla-dpo-mix-7k" vs "ultrafeedback_binarized" +- **Incorrect paths** - Old or moved repos, renamed files +- **Missing dependencies** - Undocumented requirements + +### How to Apply + +**Before submitting ANY job:** + +```python +# Verify dataset exists +dataset_search({"query": "dataset-name", "author": "author-name", "limit": 5}) +hub_repo_details(["author/dataset-name"], repo_type="dataset") + +# Verify model exists +hub_repo_details(["org/model-name"], repo_type="model") + +# Check script/file paths (for URL-based scripts) +# Verify before using: https://github.com/user/repo/blob/main/script.py +``` + +**Examples that would have caught errors:** + +```python +# ❌ WRONG: Assumed dataset exists +hf_jobs("uv", { + "script": """...""", + "env": {"DATASET": "trl-lib/argilla-dpo-mix-7k"} # Doesn't exist! +}) + +# ✅ CORRECT: Verify first +dataset_search({"query": "argilla dpo", "author": "trl-lib"}) +# Would show: "trl-lib/ultrafeedback_binarized" is the correct name + +hub_repo_details(["trl-lib/ultrafeedback_binarized"], repo_type="dataset") +# Confirms it exists before using +``` + +### Implementation Checklist + +- [ ] Check dataset exists before training +- [ ] Verify base model exists before fine-tuning +- [ ] Confirm adapter model exists before GGUF conversion +- [ ] Test script URLs are valid before submitting +- [ ] Validate file paths in repositories +- [ ] Check for recent updates/renames of resources + +**Time cost:** 5-10 seconds +**Time saved:** Hours of failed job time + debugging + +--- + +## Principle 2: Prioritize Reliability Over Performance + +**Rule:** Default to what is most likely to succeed, not what is theoretically fastest. + +### What It Prevents + +- **Hardware incompatibilities** - Features that fail on certain GPUs +- **Unstable optimizations** - Speed-ups that cause crashes +- **Complex configurations** - More failure points +- **Build system issues** - Unreliable compilation methods + +### How to Apply + +**Choose reliability:** + +```python +# ❌ RISKY: Aggressive optimization that may fail +SFTConfig( + torch_compile=True, # Can fail on T4, A10G GPUs + optim="adamw_bnb_8bit", # Requires specific setup + fp16=False, # May cause training instability + ... +) + +# ✅ SAFE: Proven defaults +SFTConfig( + # torch_compile=True, # Commented with note: "Enable on H100 for 20% speedup" + optim="adamw_torch", # Standard, always works + fp16=True, # Stable and fast + ... +) +``` + +**For build processes:** + +```python +# ❌ UNRELIABLE: Uses make (platform-dependent) +subprocess.run(["make", "-C", "/tmp/llama.cpp", "llama-quantize"], check=True) + +# ✅ RELIABLE: Uses CMake (consistent, documented) +subprocess.run([ + "cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", + "-DGGML_CUDA=OFF" # Disable CUDA for faster, more reliable build +], check=True) + +subprocess.run([ + "cmake", "--build", "/tmp/llama.cpp/build", + "--target", "llama-quantize", "-j", "4" +], check=True) +``` + +### Real-World Example + +**The `torch.compile` failure:** +- Added for "20% speedup" on H100 +- **Failed fatally on T4-medium** with cryptic error +- Misdiagnosed as dataset issue (cost hours) +- **Fix:** Disable by default, add as optional comment + +**Result:** Reliability > 20% performance gain + +### Implementation Checklist + +- [ ] Use proven, standard configurations by default +- [ ] Comment out performance optimizations with hardware notes +- [ ] Use stable build systems (CMake > make) +- [ ] Test on target hardware before production +- [ ] Document known incompatibilities +- [ ] Provide "safe" and "fast" variants when needed + +**Performance loss:** 10-20% in best case +**Reliability gain:** 95%+ success rate vs 60-70% + +--- + +## Principle 3: Create Atomic, Self-Contained Scripts + +**Rule:** Scripts should work as complete, independent units. Don't remove parts to "simplify." + +### What It Prevents + +- **Missing dependencies** - Removed "unnecessary" packages that are actually required +- **Incomplete processes** - Skipped steps that seem redundant +- **Environment assumptions** - Scripts that need pre-setup +- **Partial failures** - Some parts work, others fail silently + +### How to Apply + +**Complete dependency specifications:** + +```python +# ❌ INCOMPLETE: "Simplified" by removing dependencies +# /// script +# dependencies = [ +# "transformers", +# "peft", +# "torch", +# ] +# /// + +# ✅ COMPLETE: All dependencies explicit +# /// script +# dependencies = [ +# "transformers>=4.36.0", +# "peft>=0.7.0", +# "torch>=2.0.0", +# "accelerate>=0.24.0", +# "huggingface_hub>=0.20.0", +# "sentencepiece>=0.1.99", # Required for tokenizers +# "protobuf>=3.20.0", # Required for tokenizers +# "numpy", +# "gguf", +# ] +# /// +``` + +**Complete build processes:** + +```python +# ❌ INCOMPLETE: Assumes build tools exist +subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"]) +subprocess.run(["make", "-C", "/tmp/llama.cpp", "llama-quantize"]) # FAILS: no gcc/make + +# ✅ COMPLETE: Installs all requirements +subprocess.run(["apt-get", "update", "-qq"], check=True) +subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True) +subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"]) +# ... then build +``` + +### Real-World Example + +**The `sentencepiece` failure:** +- Original script had it: worked fine +- "Simplified" version removed it: "doesn't look necessary" +- **GGUF conversion failed silently** - tokenizer couldn't convert +- Hard to debug: no obvious error message +- **Fix:** Restore all original dependencies + +**Result:** Don't remove dependencies without thorough testing + +### Implementation Checklist + +- [ ] All dependencies in PEP 723 header with version pins +- [ ] All system packages installed by script +- [ ] No assumptions about pre-existing environment +- [ ] No "optional" steps that are actually required +- [ ] Test scripts in clean environment +- [ ] Document why each dependency is needed + +**Complexity:** Slightly longer scripts +**Reliability:** Scripts "just work" every time + +--- + +## Principle 4: Provide Clear Error Context + +**Rule:** When things fail, make it obvious what went wrong and how to fix it. + +### How to Apply + +**Wrap subprocess calls:** + +```python +# ❌ UNCLEAR: Silent failure +subprocess.run([...], check=True, capture_output=True) + +# ✅ CLEAR: Shows what failed +try: + result = subprocess.run( + [...], + check=True, + capture_output=True, + text=True + ) + print(result.stdout) + if result.stderr: + print("Warnings:", result.stderr) +except subprocess.CalledProcessError as e: + print(f"❌ Command failed!") + print("STDOUT:", e.stdout) + print("STDERR:", e.stderr) + raise +``` + +**Validate inputs:** + +```python +# ❌ UNCLEAR: Fails later with cryptic error +model = load_model(MODEL_NAME) + +# ✅ CLEAR: Fails fast with clear message +if not MODEL_NAME: + raise ValueError("MODEL_NAME environment variable not set!") + +print(f"Loading model: {MODEL_NAME}") +try: + model = load_model(MODEL_NAME) + print(f"✅ Model loaded successfully") +except Exception as e: + print(f"❌ Failed to load model: {MODEL_NAME}") + print(f"Error: {e}") + print("Hint: Check that model exists on Hub") + raise +``` + +### Implementation Checklist + +- [ ] Wrap external calls with try/except +- [ ] Print stdout/stderr on failure +- [ ] Validate environment variables early +- [ ] Add progress indicators (✅, ❌, 🔄) +- [ ] Include hints for common failures +- [ ] Log configuration at start + +--- + +## Principle 5: Test the Happy Path on Known-Good Inputs + +**Rule:** Before using new code in production, test with inputs you know work. + +### How to Apply + +**Known-good test inputs:** + +```python +# For training +TEST_DATASET = "trl-lib/Capybara" # Small, well-formatted, widely used +TEST_MODEL = "Qwen/Qwen2.5-0.5B" # Small, fast, reliable + +# For GGUF conversion +TEST_ADAPTER = "evalstate/qwen-capybara-medium" # Known working model +TEST_BASE = "Qwen/Qwen2.5-0.5B" # Compatible base +``` + +**Testing workflow:** + +1. Test with known-good inputs first +2. If that works, try production inputs +3. If production fails, you know it's the inputs (not code) +4. Isolate the difference + +### Implementation Checklist + +- [ ] Maintain list of known-good test models/datasets +- [ ] Test new scripts with test inputs first +- [ ] Document what makes inputs "good" +- [ ] Keep test jobs cheap (small models, short timeouts) +- [ ] Only move to production after test succeeds + +**Time cost:** 5-10 minutes for test run +**Debugging time saved:** Hours + +--- + +## Summary: The Reliability Checklist + +Before submitting ANY job: + +### Pre-Flight Checks +- [ ] **Verified** all repos/datasets exist (hub_repo_details) +- [ ] **Tested** with known-good inputs if new code +- [ ] **Using** proven hardware/configuration +- [ ] **Included** all dependencies in PEP 723 header +- [ ] **Installed** system requirements (build tools, etc.) +- [ ] **Set** appropriate timeout (not default 30m) +- [ ] **Configured** Hub push with HF_TOKEN +- [ ] **Added** clear error handling + +### Script Quality +- [ ] Self-contained (no external setup needed) +- [ ] Complete dependencies listed +- [ ] Build tools installed by script +- [ ] Progress indicators included +- [ ] Error messages are clear +- [ ] Configuration logged at start + +### Job Configuration +- [ ] Timeout > expected runtime + 30% buffer +- [ ] Hardware appropriate for model size +- [ ] Secrets include HF_TOKEN +- [ ] Environment variables set correctly +- [ ] Cost estimated and acceptable + +**Following these principles transforms job success rate from ~60-70% to ~95%+** + +--- + +## When Principles Conflict + +Sometimes reliability and performance conflict. Here's how to choose: + +| Scenario | Choose | Rationale | +|----------|--------|-----------| +| Demo/test | Reliability | Fast failure is worse than slow success | +| Production (first run) | Reliability | Prove it works before optimizing | +| Production (proven) | Performance | Safe to optimize after validation | +| Time-critical | Reliability | Failures cause more delay than slow runs | +| Cost-critical | Balanced | Test with small model, then optimize | + +**General rule:** Reliability first, optimize second. + +--- + +## Further Reading + +- `troubleshooting.md` - Common issues and fixes +- `training_patterns.md` - Proven training configurations +- `gguf_conversion.md` - Production GGUF workflow diff --git a/skills/hugging-face-model-trainer/references/trackio_guide.md b/skills/hugging-face-model-trainer/references/trackio_guide.md new file mode 100644 index 00000000..342045ee --- /dev/null +++ b/skills/hugging-face-model-trainer/references/trackio_guide.md @@ -0,0 +1,189 @@ +# Trackio Integration for TRL Training + +**Trackio** is an experiment tracking library that provides real-time metrics visualization for remote training on Hugging Face Jobs infrastructure. + +⚠️ **IMPORTANT**: For Jobs training (remote cloud GPUs): +- Training happens on ephemeral cloud runners (not your local machine) +- Trackio syncs metrics to a Hugging Face Space for real-time monitoring +- Without a Space, metrics are lost when the job completes +- The Space dashboard persists your training metrics permanently + +## Setting Up Trackio for Jobs + +**Step 1: Add trackio dependency** +```python +# /// script +# dependencies = [ +# "trl>=0.12.0", +# "trackio", # Required! +# ] +# /// +``` + +**Step 2: Create a Trackio Space (one-time setup)** + +**Option A: Let Trackio auto-create (Recommended)** +Pass a `space_id` to `trackio.init()` and Trackio will automatically create the Space if it doesn't exist. + +**Option B: Create manually** +- Create Space via Hub UI at https://huggingface.co/new-space +- Select Gradio SDK +- OR use command: `hf repos create my-trackio-dashboard --type space --space-sdk gradio` + +**Step 3: Initialize Trackio with space_id** +```python +import trackio + +trackio.init( + project="my-training", + space_id="username/trackio", # CRITICAL for Jobs! Replace 'username' with your HF username + config={ + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + } +) +``` + +**Step 4: Configure TRL to use Trackio** +```python +SFTConfig( + report_to="trackio", + # ... other config +) +``` + +**Step 5: Finish tracking** +```python +trainer.train() +trackio.finish() # Ensures final metrics are synced +``` + +## What Trackio Tracks + +Trackio automatically logs: +- ✅ Training loss +- ✅ Learning rate +- ✅ GPU utilization +- ✅ Memory usage +- ✅ Training throughput +- ✅ Custom metrics + +## How It Works with Jobs + +1. **Training runs** → Metrics logged to local SQLite DB +2. **Every 5 minutes** → Trackio syncs DB to HF Dataset (Parquet) +3. **Space dashboard** → Reads from Dataset, displays metrics in real-time +4. **Job completes** → Final sync ensures all metrics persisted + +## Default Configuration Pattern + +**Use sensible defaults for trackio configuration unless user requests otherwise.** + +### Recommended Defaults + +```python +import trackio + +trackio.init( + project="qwen-capybara-sft", + name="baseline-run", # Descriptive name user will recognize + space_id="username/trackio", # Default space: {username}/trackio + config={ + # Keep config minimal - hyperparameters and model/dataset info only + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + "num_epochs": 3, + } +) +``` + +**Key principles:** +- **Space ID**: Use `{username}/trackio` with "trackio" as default space name +- **Run naming**: Unless otherwise specified, name the run in a way the user will recognize +- **Config**: Keep minimal - don't automatically capture job metadata unless requested +- **Grouping**: Optional - only use if user requests organizing related experiments + +## Grouping Runs (Optional) + +The `group` parameter helps organize related runs together in the dashboard sidebar. This is useful when user is running multiple experiments with different configurations but wants to compare them together: + +```python +# Example: Group runs by experiment type +trackio.init(project="my-project", run_name="baseline-run-1", group="baseline") +trackio.init(project="my-project", run_name="augmented-run-1", group="augmented") +trackio.init(project="my-project", run_name="tuned-run-1", group="tuned") +``` + +Runs with the same group name can be grouped together in the sidebar, making it easier to compare related experiments. You can group by any configuration parameter: + +```python +# Hyperparameter sweep - group by learning rate +trackio.init(project="hyperparam-sweep", run_name="lr-0.001-run", group="lr_0.001") +trackio.init(project="hyperparam-sweep", run_name="lr-0.01-run", group="lr_0.01") +``` + +## Environment Variables for Jobs + +You can configure trackio using environment variables instead of passing parameters to `trackio.init()`. This is useful for managing configuration across multiple jobs. + + + +**`HF_TOKEN`** +Required for creating Spaces and writing to datasets (passed via `secrets`): +```python +hf_jobs("uv", { + "script": "...", + "secrets": { + "HF_TOKEN": "$HF_TOKEN" # Enables Space creation and Hub push + } +}) +``` + +### Example with Environment Variables + +```python +hf_jobs("uv", { + "script": """ +# Training script - trackio config from environment +import trackio +from datetime import datetime + +# Auto-generate run name +timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M") +run_name = f"sft_qwen25_{timestamp}" + +# Project and space_id can come from environment variables +trackio.init(run_name=run_name, group="SFT") + +# ... training code ... +trackio.finish() +""", + "flavor": "a10g-large", + "timeout": "2h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**When to use environment variables:** +- Managing multiple jobs with same configuration +- Keeping training scripts portable across projects +- Separating configuration from code + +**When to use direct parameters:** +- Single job with specific configuration +- When clarity in code is preferred +- When each job has different project/space + +## Viewing the Dashboard + +After starting training: +1. Navigate to the Space: `https://huggingface.co/spaces/username/trackio` +2. The Gradio dashboard shows all tracked experiments +3. Filter by project, compare runs, view charts with smoothing + +## Recommendation + +- **Trackio**: Best for real-time monitoring during long training runs +- **Weights & Biases**: Best for team collaboration, requires account diff --git a/skills/hugging-face-model-trainer/references/training_methods.md b/skills/hugging-face-model-trainer/references/training_methods.md new file mode 100644 index 00000000..2393d773 --- /dev/null +++ b/skills/hugging-face-model-trainer/references/training_methods.md @@ -0,0 +1,150 @@ +# TRL Training Methods Overview + +TRL (Transformer Reinforcement Learning) provides multiple training methods for fine-tuning and aligning language models. This reference provides a brief overview of each method. + +## Supervised Fine-Tuning (SFT) + +**What it is:** Standard instruction tuning with supervised learning on demonstration data. + +**When to use:** +- Initial fine-tuning of base models on task-specific data +- Teaching new capabilities or domains +- Most common starting point for fine-tuning + +**Dataset format:** Conversational format with "messages" field, OR text field, OR prompt/completion pairs + +**Example:** +```python +from trl import SFTTrainer, SFTConfig + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + args=SFTConfig( + output_dir="my-model", + push_to_hub=True, + hub_model_id="username/my-model", + eval_strategy="no", # Disable eval for simple example + # max_length=1024 is the default - only set if you need different length + ) +) +trainer.train() +``` + +**Note:** For production training with evaluation monitoring, see `scripts/train_sft_example.py` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/sft_trainer")` + +## Direct Preference Optimization (DPO) + +**What it is:** Alignment method that trains directly on preference pairs (chosen vs rejected responses) without requiring a reward model. + +**When to use:** +- Aligning models to human preferences +- Improving response quality after SFT +- Have paired preference data (chosen/rejected responses) + +**Dataset format:** Preference pairs with "chosen" and "rejected" fields + +**Example:** +```python +from trl import DPOTrainer, DPOConfig + +trainer = DPOTrainer( + model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model + train_dataset=dataset, + args=DPOConfig( + output_dir="dpo-model", + beta=0.1, # KL penalty coefficient + eval_strategy="no", # Disable eval for simple example + # max_length=1024 is the default - only set if you need different length + ) +) +trainer.train() +``` + +**Note:** For production training with evaluation monitoring, see `scripts/train_dpo_example.py` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer")` + +## Group Relative Policy Optimization (GRPO) + +**What it is:** Online RL method that optimizes relative to group performance, useful for tasks with verifiable rewards. + +**When to use:** +- Tasks with automatic reward signals (code execution, math verification) +- Online learning scenarios +- When DPO offline data is insufficient + +**Dataset format:** Prompt-only format (model generates responses, reward computed online) + +**Example:** +```python +# Use TRL maintained script +hf_jobs("uv", { + "script": "https://raw.githubusercontent.com/huggingface/trl/main/examples/scripts/grpo.py", + "script_args": [ + "--model_name_or_path", "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset_name", "trl-lib/math_shepherd", + "--output_dir", "grpo-model" + ], + "flavor": "a10g-large", + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/grpo_trainer")` + +## Reward Modeling + +**What it is:** Train a reward model to score responses, used as a component in RLHF pipelines. + +**When to use:** +- Building RLHF pipeline +- Need automatic quality scoring +- Creating reward signals for PPO training + +**Dataset format:** Preference pairs with "chosen" and "rejected" responses + +**Documentation:** `hf_doc_fetch("https://huggingface.co/docs/trl/reward_trainer")` + +## Method Selection Guide + +| Method | Complexity | Data Required | Use Case | +|--------|-----------|---------------|----------| +| **SFT** | Low | Demonstrations | Initial fine-tuning | +| **DPO** | Medium | Paired preferences | Post-SFT alignment | +| **GRPO** | Medium | Prompts + reward fn | Online RL with automatic rewards | +| **Reward** | Medium | Paired preferences | Building RLHF pipeline | + +## Recommended Pipeline + +**For most use cases:** +1. **Start with SFT** - Fine-tune base model on task data +2. **Follow with DPO** - Align to preferences using paired data +3. **Optional: GGUF conversion** - Deploy for local inference + +**For advanced RL scenarios:** +1. **Start with SFT** - Fine-tune base model +2. **Train reward model** - On preference data + +## Dataset Format Reference + +For complete dataset format specifications, use: +```python +hf_doc_fetch("https://huggingface.co/docs/trl/dataset_formats") +``` + +Or validate your dataset: +```bash +uv run https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py \ + --dataset your/dataset --split train +``` + +## See Also + +- `references/training_patterns.md` - Common training patterns and examples +- `scripts/train_sft_example.py` - Complete SFT template +- `scripts/train_dpo_example.py` - Complete DPO template +- [Dataset Inspector](https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py) - Dataset format validation tool diff --git a/skills/hugging-face-model-trainer/references/training_patterns.md b/skills/hugging-face-model-trainer/references/training_patterns.md new file mode 100644 index 00000000..2101e12a --- /dev/null +++ b/skills/hugging-face-model-trainer/references/training_patterns.md @@ -0,0 +1,203 @@ +# Common Training Patterns + +This guide provides common training patterns and use cases for TRL on Hugging Face Jobs. + +## Multi-GPU Training + +Automatic distributed training across multiple GPUs. TRL/Accelerate handles distribution automatically: + +```python +hf_jobs("uv", { + "script": """ +# Your training script here (same as single GPU) +# No changes needed - Accelerate detects multiple GPUs +""", + "flavor": "a10g-largex2", # 2x A10G GPUs + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**Tips for multi-GPU:** +- No code changes needed +- Use `per_device_train_batch_size` (per GPU, not total) +- Effective batch size = `per_device_train_batch_size` × `num_gpus` × `gradient_accumulation_steps` +- Monitor GPU utilization to ensure both GPUs are being used + +## DPO Training (Preference Learning) + +Train with preference data for alignment: + +```python +hf_jobs("uv", { + "script": """ +# /// script +# dependencies = ["trl>=0.12.0", "trackio"] +# /// + +from datasets import load_dataset +from trl import DPOTrainer, DPOConfig +import trackio + +dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train") + +# Create train/eval split +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +config = DPOConfig( + output_dir="dpo-model", + push_to_hub=True, + hub_model_id="username/dpo-model", + num_train_epochs=1, + beta=0.1, # KL penalty coefficient + eval_strategy="steps", + eval_steps=50, + report_to="trackio", + run_name="baseline_run", # use a meaningful run name + # max_length=1024, # Default - only set if you need different sequence length +) + +trainer = DPOTrainer( + model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model as base + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # IMPORTANT: Provide eval_dataset when eval_strategy is enabled + args=config, +) + +trainer.train() +trainer.push_to_hub() +trackio.finish() +""", + "flavor": "a10g-large", + "timeout": "3h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**For DPO documentation:** Use `hf_doc_fetch("https://huggingface.co/docs/trl/dpo_trainer")` + +## GRPO Training (Online RL) + +Group Relative Policy Optimization for online reinforcement learning: + +```python +hf_jobs("uv", { + "script": "https://raw.githubusercontent.com/huggingface/trl/main/examples/scripts/grpo.py", + "script_args": [ + "--model_name_or_path", "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset_name", "trl-lib/math_shepherd", + "--output_dir", "grpo-model", + "--push_to_hub", + "--hub_model_id", "username/grpo-model" + ], + "flavor": "a10g-large", + "timeout": "4h", + "secrets": {"HF_TOKEN": "$HF_TOKEN"} +}) +``` + +**For GRPO documentation:** Use `hf_doc_fetch("https://huggingface.co/docs/trl/grpo_trainer")` + +## Trackio Configuration + +**Use sensible defaults for trackio setup.** See `references/trackio_guide.md` for complete documentation including grouping runs for experiments. + +### Basic Pattern + +```python +import trackio + +trackio.init( + project="my-training", + run_name="baseline-run", # Descriptive name user will recognize + space_id="username/trackio", # Default space: {username}/trackio + config={ + # Keep config minimal - hyperparameters and model/dataset info only + "model": "Qwen/Qwen2.5-0.5B", + "dataset": "trl-lib/Capybara", + "learning_rate": 2e-5, + } +) + +# Your training code... + +trackio.finish() +``` + +### Grouping for Experiments (Optional) + +When user wants to compare related runs, use the `group` parameter: + +```python +# Hyperparameter sweep +trackio.init(project="hyperparam-sweep", run_name="lr-0.001", group="lr_0.001") +trackio.init(project="hyperparam-sweep", run_name="lr-0.01", group="lr_0.01") +``` + +## Pattern Selection Guide + +| Use Case | Pattern | Hardware | Time | +|----------|---------|----------|------| +| SFT training | `scripts/train_sft_example.py` | a10g-large | 2-6 hours | +| Large dataset (>10K) | Multi-GPU | a10g-largex2 | 4-12 hours | +| Preference learning | DPO Training | a10g-large | 2-4 hours | +| Online RL | GRPO Training | a10g-large | 3-6 hours | + +## Critical: Evaluation Dataset Requirements + +**⚠️ IMPORTANT**: If you set `eval_strategy="steps"` or `eval_strategy="epoch"`, you **MUST** provide an `eval_dataset` to the trainer, or the training will hang. + +### ✅ CORRECT - With eval dataset: +```python +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # ← MUST provide when eval_strategy is enabled + args=SFTConfig(eval_strategy="steps", ...), +) +``` + +### ❌ WRONG - Will hang: +```python +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # NO eval_dataset but eval_strategy="steps" ← WILL HANG + args=SFTConfig(eval_strategy="steps", ...), +) +``` + +### Option: Disable evaluation if no eval dataset +```python +config = SFTConfig( + eval_strategy="no", # ← Explicitly disable evaluation + # ... other config +) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # No eval_dataset needed + args=config, +) +``` + +## Best Practices + +1. **Use train/eval splits** - Create evaluation split for monitoring progress +2. **Enable Trackio** - Monitor progress in real-time +3. **Add 20-30% buffer to timeout** - Account for loading/saving overhead +4. **Test with TRL official scripts first** - Use maintained examples before custom code +5. **Always provide eval_dataset** - When using eval_strategy, or set to "no" +6. **Use multi-GPU for large models** - 7B+ models benefit significantly + +## See Also + +- `scripts/train_sft_example.py` - Complete SFT template with Trackio and eval split +- `scripts/train_dpo_example.py` - Complete DPO template +- `scripts/train_grpo_example.py` - Complete GRPO template +- `references/hardware_guide.md` - Detailed hardware specifications +- `references/training_methods.md` - Overview of all TRL training methods +- `references/troubleshooting.md` - Common issues and solutions diff --git a/skills/hugging-face-model-trainer/references/troubleshooting.md b/skills/hugging-face-model-trainer/references/troubleshooting.md new file mode 100644 index 00000000..430816ce --- /dev/null +++ b/skills/hugging-face-model-trainer/references/troubleshooting.md @@ -0,0 +1,282 @@ +# Troubleshooting TRL Training Jobs + +Common issues and solutions when training with TRL on Hugging Face Jobs. + +## Training Hangs at "Starting training..." Step + +**Problem:** Job starts but hangs at the training step - never progresses, never times out, just sits there. + +**Root Cause:** Using `eval_strategy="steps"` or `eval_strategy="epoch"` without providing an `eval_dataset` to the trainer. + +**Solution:** + +**Option A: Provide eval_dataset (recommended)** +```python +# Create train/eval split +dataset_split = dataset.train_test_split(test_size=0.1, seed=42) + +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset_split["train"], + eval_dataset=dataset_split["test"], # ← MUST provide when eval_strategy is enabled + args=SFTConfig( + eval_strategy="steps", + eval_steps=50, + ... + ), +) +``` + +**Option B: Disable evaluation** +```python +trainer = SFTTrainer( + model="Qwen/Qwen2.5-0.5B", + train_dataset=dataset, + # No eval_dataset + args=SFTConfig( + eval_strategy="no", # ← Explicitly disable + ... + ), +) +``` + +**Prevention:** +- Always create train/eval split for better monitoring +- Use `dataset.train_test_split(test_size=0.1, seed=42)` +- Check example scripts: `scripts/train_sft_example.py` includes proper eval setup + +## Job Times Out + +**Problem:** Job terminates before training completes, all progress lost. + +**Solutions:** +- Increase timeout parameter (e.g., `"timeout": "4h"`) +- Reduce `num_train_epochs` or use smaller dataset slice +- Use smaller model or enable LoRA/PEFT to speed up training +- Add 20-30% buffer to estimated time for loading/saving overhead + +**Prevention:** +- Always start with a quick demo run to estimate timing +- Use `scripts/estimate_cost.py` to get time estimates +- Monitor first runs closely via Trackio or logs + +## Model Not Saved to Hub + +**Problem:** Training completes but model doesn't appear on Hub - all work lost. + +**Check:** +- [ ] `push_to_hub=True` in training config +- [ ] `hub_model_id` specified with username (e.g., `"username/model-name"`) +- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job submission +- [ ] User has write access to target repo +- [ ] Token has write permissions (check at https://huggingface.co/settings/tokens) +- [ ] Training script calls `trainer.push_to_hub()` at the end + +**See:** `references/hub_saving.md` for detailed Hub authentication troubleshooting + +## Out of Memory (OOM) + +**Problem:** Job fails with CUDA out of memory error. + +**Solutions (in order of preference):** +1. **Reduce batch size:** Lower `per_device_train_batch_size` (try 4 → 2 → 1) +2. **Increase gradient accumulation:** Raise `gradient_accumulation_steps` to maintain effective batch size +3. **Disable evaluation:** Remove `eval_dataset` and `eval_strategy` (saves ~40% memory, good for demos) +4. **Enable LoRA/PEFT:** Use `peft_config=LoraConfig(r=8, lora_alpha=16)` to train adapters only (smaller rank = less memory) +5. **Use larger GPU:** Switch from `t4-small` → `l4x1` → `a10g-large` → `a100-large` +6. **Enable gradient checkpointing:** Set `gradient_checkpointing=True` in config (slower but saves memory) +7. **Use smaller model:** Try a smaller variant (e.g., 0.5B instead of 3B) + +**Memory guidelines:** +- T4 (16GB): <1B models with LoRA +- A10G (24GB): 1-3B models with LoRA, <1B full fine-tune +- A100 (40GB/80GB): 7B+ models with LoRA, 3B full fine-tune + +## Parameter Naming Issues + +**Problem:** `TypeError: SFTConfig.__init__() got an unexpected keyword argument 'max_seq_length'` + +**Cause:** TRL config classes use `max_length`, not `max_seq_length`. + +**Solution:** +```python +# ✅ CORRECT - TRL uses max_length +SFTConfig(max_length=512) +DPOConfig(max_length=512) + +# ❌ WRONG - This will fail +SFTConfig(max_seq_length=512) +``` + +**Note:** Most TRL configs don't require explicit max_length - the default (1024) works well. Only set if you need a specific value. + +## Dataset Format Error + +**Problem:** Training fails with dataset format errors or missing fields. + +**Solutions:** +1. **Check format documentation:** + ```python + hf_doc_fetch("https://huggingface.co/docs/trl/dataset_formats") + ``` + +2. **Validate dataset before training:** + ```bash + uv run https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py \ + --dataset --split train + ``` + Or via hf_jobs: + ```python + hf_jobs("uv", { + "script": "https://huggingface.co/datasets/mcp-tools/skills/raw/main/dataset_inspector.py", + "script_args": ["--dataset", "dataset-name", "--split", "train"] + }) + ``` + +3. **Verify field names:** + - **SFT:** Needs "messages" field (conversational), OR "text" field, OR "prompt"/"completion" + - **DPO:** Needs "chosen" and "rejected" fields + - **GRPO:** Needs prompt-only format + +4. **Check dataset split:** + - Ensure split exists (e.g., `split="train"`) + - Preview dataset: `load_dataset("name", split="train[:5]")` + +## Import/Module Errors + +**Problem:** Job fails with "ModuleNotFoundError" or import errors. + +**Solutions:** +1. **Add PEP 723 header with dependencies:** + ```python + # /// script + # dependencies = [ + # "trl>=0.12.0", + # "peft>=0.7.0", + # "transformers>=4.36.0", + # ] + # /// + ``` + +2. **Verify exact format:** + - Must have `# ///` delimiters (with space after `#`) + - Dependencies must be valid PyPI package names + - Check spelling and version constraints + +3. **Test locally first:** + ```bash + uv run train.py # Tests if dependencies are correct + ``` + +## Authentication Errors + +**Problem:** Job fails with authentication or permission errors when pushing to Hub. + +**Solutions:** +1. **Verify authentication:** + ```python + mcp__huggingface__hf_whoami() # Check who's authenticated + ``` + +2. **Check token permissions:** + - Go to https://huggingface.co/settings/tokens + - Ensure token has "write" permission + - Token must not be "read-only" + +3. **Verify token in job:** + ```python + "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Must be in job config + ``` + +4. **Check repo permissions:** + - User must have write access to target repo + - If org repo, user must be member with write access + - Repo must exist or user must have permission to create + +## Job Stuck or Not Starting + +**Problem:** Job shows "pending" or "starting" for extended period. + +**Solutions:** +- Check Jobs dashboard for status: https://huggingface.co/jobs +- Verify hardware availability (some GPU types may have queues) +- Try different hardware flavor if one is heavily utilized +- Check for account billing issues (Jobs requires paid plan) + +**Typical startup times:** +- CPU jobs: 10-30 seconds +- GPU jobs: 30-90 seconds +- If >3 minutes: likely queued or stuck + +## Training Loss Not Decreasing + +**Problem:** Training runs but loss stays flat or doesn't improve. + +**Solutions:** +1. **Check learning rate:** May be too low (try 2e-5 to 5e-5) or too high (try 1e-6) +2. **Verify dataset quality:** Inspect examples to ensure they're reasonable +3. **Check model size:** Very small models may not have capacity for task +4. **Increase training steps:** May need more epochs or larger dataset +5. **Verify dataset format:** Wrong format may cause degraded training + +## Logs Not Appearing + +**Problem:** Cannot see training logs or progress. + +**Solutions:** +1. **Wait 30-60 seconds:** Initial logs can be delayed +2. **Check logs via MCP tool:** + ```python + hf_jobs("logs", {"job_id": "your-job-id"}) + ``` +3. **Use Trackio for real-time monitoring:** See `references/trackio_guide.md` +4. **Verify job is actually running:** + ```python + hf_jobs("inspect", {"job_id": "your-job-id"}) + ``` + +## Checkpoint/Resume Issues + +**Problem:** Cannot resume from checkpoint or checkpoint not saved. + +**Solutions:** +1. **Enable checkpoint saving:** + ```python + SFTConfig( + save_strategy="steps", + save_steps=100, + hub_strategy="every_save", # Push each checkpoint + ) + ``` + +2. **Verify checkpoints pushed to Hub:** Check model repo for checkpoint folders + +3. **Resume from checkpoint:** + ```python + trainer = SFTTrainer( + model="username/model-name", # Can be checkpoint path + resume_from_checkpoint="username/model-name/checkpoint-1000", + ) + ``` + +## Getting Help + +If issues persist: + +1. **Check TRL documentation:** + ```python + hf_doc_search("your issue", product="trl") + ``` + +2. **Check Jobs documentation:** + ```python + hf_doc_fetch("https://huggingface.co/docs/huggingface_hub/guides/jobs") + ``` + +3. **Review related guides:** + - `references/hub_saving.md` - Hub authentication issues + - `references/hardware_guide.md` - Hardware selection and specs + - `references/training_patterns.md` - Eval dataset requirements + - SKILL.md "Working with Scripts" section - Script format and URL issues + +4. **Ask in HF forums:** https://discuss.huggingface.co/ diff --git a/skills/hugging-face-model-trainer/references/unsloth.md b/skills/hugging-face-model-trainer/references/unsloth.md new file mode 100644 index 00000000..83e1e116 --- /dev/null +++ b/skills/hugging-face-model-trainer/references/unsloth.md @@ -0,0 +1,313 @@ +# Unsloth: Fast Fine-Tuning with Memory Optimization + +**Unsloth** is a fine-tuning library that provides ~2x faster training and ~60% less VRAM usage for LLM training. It's particularly useful when working with limited GPU memory or when speed is critical. + +- **GitHub**: [unslothai/unsloth](https://github.com/unslothai/unsloth) +- **Docs**: [unsloth.ai/docs](https://unsloth.ai/docs) + +## When to Use Unsloth + +Use Unsloth if instructed to do so, or one of the following use cases applies: + +| Use Case | Recommendation | +|----------|----------------| +| Standard text LLM fine-tuning | TRL is sufficient, but Unsloth is faster | +| Limited GPU memory | **Use Unsloth** - 60% less VRAM | +| Need maximum speed | **Use Unsloth** - 2x faster | +| Large models (>13B) | **Use Unsloth** - memory efficiency critical | + +## Supported Models + +Unsloth supports many popular models including: +- **Text LLMs**: Llama 3/3.1/3.2/3.3, Qwen 2.5/3, Mistral, Phi-4, Gemma 2/3, LFM2/2.5 +- **Vision LLMs**: Qwen3-VL, Gemma 3, Llama 3.2 Vision, Pixtral + +Use Unsloth's pre-optimized model variants when available: +```python +# Unsloth-optimized models load faster and use less memory +model_id = "unsloth/LFM2.5-1.2B-Instruct" # 4-bit quantized +model_id = "unsloth/gemma-3-4b-pt" # Vision model +model_id = "unsloth/Qwen3-VL-8B-Instruct" # Vision model +``` + +## Installation + +```python +# /// script +# dependencies = [ +# "unsloth", +# "trl", +# "datasets", +# "trackio", +# ] +# /// +``` + +## Basic Usage: Text LLM + +```python +from unsloth import FastLanguageModel +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +# Load model with Unsloth optimizations +model, tokenizer = FastLanguageModel.from_pretrained( + model_name="LiquidAI/LFM2.5-1.2B-Instruct", + max_seq_length=4096, +) + +# Add LoRA adapters +model = FastLanguageModel.get_peft_model( + model, + r=16, + lora_alpha=16, + target_modules=["q_proj", "k_proj", "v_proj", "out_proj", "in_proj", "w1", "w2", "w3"], + lora_dropout=0, + bias="none", + use_gradient_checkpointing="unsloth", + random_state=3407, +) + +# Load dataset +dataset = load_dataset("trl-lib/Capybara", split="train") + +# Train with TRL +trainer = SFTTrainer( + model=model, + tokenizer=tokenizer, + train_dataset=dataset, + args=SFTConfig( + output_dir="./output", + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + max_steps=500, + learning_rate=2e-4, + report_to="trackio", + ), +) + +trainer.train() +``` + +## LFM2.5 Specific Settings + +For LFM2.5 inference, use these recommended generation parameters: + +**Instruct models:** +```python +temperature = 0.1 +top_k = 50 +top_p = 0.1 +repetition_penalty = 1.05 +``` + +**Thinking models:** +```python +temperature = 0.05 +top_k = 50 +repetition_penalty = 1.05 +``` + +## Vision-Language Models (VLMs) + +Unsloth provides specialized support for VLMs with `FastVisionModel`: + +```python +from unsloth import FastVisionModel, get_chat_template +from unsloth.trainer import UnslothVisionDataCollator +from trl import SFTTrainer, SFTConfig +from datasets import load_dataset + +# Load VLM with Unsloth +model, processor = FastVisionModel.from_pretrained( + "unsloth/gemma-3-4b-pt", # or "unsloth/Qwen3-VL-8B-Instruct" + load_in_4bit=True, + use_gradient_checkpointing="unsloth", +) + +# Add LoRA for all modalities +model = FastVisionModel.get_peft_model( + model, + finetune_vision_layers=True, # Train vision encoder + finetune_language_layers=True, # Train language model + finetune_attention_modules=True, # Train attention + finetune_mlp_modules=True, # Train MLPs + r=16, + lora_alpha=32, + target_modules="all-linear", +) + +# Apply chat template (required for base models) +processor = get_chat_template(processor, "gemma-3") + +# Load VLM dataset (with images and messages) +dataset = load_dataset("your-vlm-dataset", split="train", streaming=True) + +# Enable training mode +FastVisionModel.for_training(model) + +# Train with VLM-specific collator +trainer = SFTTrainer( + model=model, + train_dataset=dataset, + processing_class=processor.tokenizer, + data_collator=UnslothVisionDataCollator(model, processor), + args=SFTConfig( + output_dir="./vlm-output", + per_device_train_batch_size=2, + gradient_accumulation_steps=4, + max_steps=500, + learning_rate=2e-4, + # VLM-specific settings + remove_unused_columns=False, + dataset_text_field="", + dataset_kwargs={"skip_prepare_dataset": True}, + report_to="trackio", + ), +) + +trainer.train() +``` + +## Key Differences from Standard TRL + +| Aspect | Standard TRL | Unsloth | +|--------|--------------|---------| +| Model loading | `AutoModelForCausalLM.from_pretrained()` | `FastLanguageModel.from_pretrained()` | +| LoRA setup | `PeftModel` / `LoraConfig` | `FastLanguageModel.get_peft_model()` | +| VLM loading | Limited support | `FastVisionModel.from_pretrained()` | +| VLM collator | Manual | `UnslothVisionDataCollator` | +| Memory usage | Standard | ~60% less | +| Training speed | Standard | ~2x faster | + +## VLM Dataset Format + +VLM datasets should have: +- `images`: List of PIL images or image paths +- `messages`: Conversation format with image references + +```python +{ + "images": [, ...], + "messages": [ + {"role": "user", "content": [ + {"type": "image"}, + {"type": "text", "text": "Describe this image"} + ]}, + {"role": "assistant", "content": "This image shows..."} + ] +} +``` + +## Streaming Datasets + +For large VLM datasets, use streaming to avoid disk space issues: + +```python +dataset = load_dataset( + "your-vlm-dataset", + split="train", + streaming=True, # Stream from Hub +) + +# Must use max_steps with streaming (no epoch-based training) +SFTConfig(max_steps=500, ...) +``` + +## Saving Models + +### Save LoRA Adapter + +```python +model.save_pretrained("./adapter") +processor.save_pretrained("./adapter") + +# Push to Hub +model.push_to_hub("username/my-vlm-adapter") +processor.push_to_hub("username/my-vlm-adapter") +``` + +### Merge and Save Full Model + +```python +# Merge LoRA weights into base model +model = model.merge_and_unload() + +# Save merged model +model.save_pretrained("./merged") +tokenizer.save_pretrained("./merged") +``` + +### Convert to GGUF + +Unsloth models can be converted to GGUF for llama.cpp/Ollama: + +```python +# Save in 16-bit for GGUF conversion +model.save_pretrained_gguf("./gguf", tokenizer, quantization_method="f16") + +# Or directly quantize +model.save_pretrained_gguf("./gguf", tokenizer, quantization_method="q4_k_m") +``` + +## Qwen3-VL Specific Settings + +For Qwen3-VL models, use these recommended settings: + +**Instruct models:** +```python +temperature = 0.7 +top_p = 0.8 +presence_penalty = 1.5 +``` + +**Thinking models:** +```python +temperature = 1.0 +top_p = 0.95 +presence_penalty = 0.0 +``` + +## Hardware Requirements + +| Model | Min VRAM (Unsloth 4-bit) | Recommended GPU | +|-------|--------------------------|-----------------| +| 2B-4B | 8GB | T4, L4 | +| 7B-8B | 16GB | A10G, L4x4 | +| 13B | 24GB | A10G-large | +| 30B+ | 48GB+ | A100 | + +## Example: Full VLM Training Script + +See `scripts/unsloth_sft_example.py` for a complete production-ready example that includes: +- Unsloth VLM setup +- Streaming dataset support +- Trackio monitoring +- Hub push +- CLI arguments + +Run locally: +```bash +uv run scripts/unsloth_sft_example.py \ + --dataset trl-lib/Capybara \ + --max-steps 500 \ + --output-repo username/my-model +``` + +Run on HF Jobs: +```python +hf_jobs("uv", { + "script": " +``` + +## Core Concepts + +### 1. Pipeline API +The pipeline API is the easiest way to use models. It groups together preprocessing, model inference, and postprocessing: + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// Create a pipeline for a specific task +const pipe = await pipeline('sentiment-analysis'); + +// Use the pipeline +const result = await pipe('I love transformers!'); +// Output: [{ label: 'POSITIVE', score: 0.999817686 }] + +// IMPORTANT: Always dispose when done to free memory +await classifier.dispose(); +``` + +**⚠️ Memory Management:** All pipelines must be disposed with `pipe.dispose()` when finished to prevent memory leaks. See examples in [Code Examples](./references/EXAMPLES.md) for cleanup patterns across different environments. + +### 2. Model Selection +You can specify a custom model as the second argument: + +```javascript +const pipe = await pipeline( + 'sentiment-analysis', + 'Xenova/bert-base-multilingual-uncased-sentiment' +); +``` + +**Finding Models:** + +Browse available Transformers.js models on Hugging Face Hub: +- **All models**: https://huggingface.co/models?library=transformers.js&sort=trending +- **By task**: Add `pipeline_tag` parameter + - Text generation: https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending + - Image classification: https://huggingface.co/models?pipeline_tag=image-classification&library=transformers.js&sort=trending + - Speech recognition: https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js&sort=trending + +**Tip:** Filter by task type, sort by trending/downloads, and check model cards for performance metrics and usage examples. + +### 3. Device Selection +Choose where to run the model: + +```javascript +// Run on CPU (default for WASM) +const pipe = await pipeline('sentiment-analysis', 'model-id'); + +// Run on GPU (WebGPU - experimental) +const pipe = await pipeline('sentiment-analysis', 'model-id', { + device: 'webgpu', +}); +``` + +### 4. Quantization Options +Control model precision vs. performance: + +```javascript +// Use quantized model (faster, smaller) +const pipe = await pipeline('sentiment-analysis', 'model-id', { + dtype: 'q4', // Options: 'fp32', 'fp16', 'q8', 'q4' +}); +``` + +## Supported Tasks + +**Note:** All examples below show basic usage. + +### Natural Language Processing + +#### Text Classification +```javascript +const classifier = await pipeline('text-classification'); +const result = await classifier('This movie was amazing!'); +``` + +#### Named Entity Recognition (NER) +```javascript +const ner = await pipeline('token-classification'); +const entities = await ner('My name is John and I live in New York.'); +``` + +#### Question Answering +```javascript +const qa = await pipeline('question-answering'); +const answer = await qa({ + question: 'What is the capital of France?', + context: 'Paris is the capital and largest city of France.' +}); +``` + +#### Text Generation +```javascript +const generator = await pipeline('text-generation', 'onnx-community/gemma-3-270m-it-ONNX'); +const text = await generator('Once upon a time', { + max_new_tokens: 100, + temperature: 0.7 +}); +``` + +**For streaming and chat:** See **[Text Generation Guide](./references/TEXT_GENERATION.md)** for: +- Streaming token-by-token output with `TextStreamer` +- Chat/conversation format with system/user/assistant roles +- Generation parameters (temperature, top_k, top_p) +- Browser and Node.js examples +- React components and API endpoints + +#### Translation +```javascript +const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M'); +const output = await translator('Hello, how are you?', { + src_lang: 'eng_Latn', + tgt_lang: 'fra_Latn' +}); +``` + +#### Summarization +```javascript +const summarizer = await pipeline('summarization'); +const summary = await summarizer(longText, { + max_length: 100, + min_length: 30 +}); +``` + +#### Zero-Shot Classification +```javascript +const classifier = await pipeline('zero-shot-classification'); +const result = await classifier('This is a story about sports.', ['politics', 'sports', 'technology']); +``` + +### Computer Vision + +#### Image Classification +```javascript +const classifier = await pipeline('image-classification'); +const result = await classifier('https://example.com/image.jpg'); +// Or with local file +const result = await classifier(imageUrl); +``` + +#### Object Detection +```javascript +const detector = await pipeline('object-detection'); +const objects = await detector('https://example.com/image.jpg'); +// Returns: [{ label: 'person', score: 0.95, box: { xmin, ymin, xmax, ymax } }, ...] +``` + +#### Image Segmentation +```javascript +const segmenter = await pipeline('image-segmentation'); +const segments = await segmenter('https://example.com/image.jpg'); +``` + +#### Depth Estimation +```javascript +const depthEstimator = await pipeline('depth-estimation'); +const depth = await depthEstimator('https://example.com/image.jpg'); +``` + +#### Zero-Shot Image Classification +```javascript +const classifier = await pipeline('zero-shot-image-classification'); +const result = await classifier('image.jpg', ['cat', 'dog', 'bird']); +``` + +### Audio Processing + +#### Automatic Speech Recognition +```javascript +const transcriber = await pipeline('automatic-speech-recognition'); +const result = await transcriber('audio.wav'); +// Returns: { text: 'transcribed text here' } +``` + +#### Audio Classification +```javascript +const classifier = await pipeline('audio-classification'); +const result = await classifier('audio.wav'); +``` + +#### Text-to-Speech +```javascript +const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts'); +const audio = await synthesizer('Hello, this is a test.', { + speaker_embeddings: speakerEmbeddings +}); +``` + +### Multimodal + +#### Image-to-Text (Image Captioning) +```javascript +const captioner = await pipeline('image-to-text'); +const caption = await captioner('image.jpg'); +``` + +#### Document Question Answering +```javascript +const docQA = await pipeline('document-question-answering'); +const answer = await docQA('document-image.jpg', 'What is the total amount?'); +``` + +#### Zero-Shot Object Detection +```javascript +const detector = await pipeline('zero-shot-object-detection'); +const objects = await detector('image.jpg', ['person', 'car', 'tree']); +``` + +### Feature Extraction (Embeddings) + +```javascript +const extractor = await pipeline('feature-extraction'); +const embeddings = await extractor('This is a sentence to embed.'); +// Returns: tensor of shape [1, sequence_length, hidden_size] + +// For sentence embeddings (mean pooling) +const extractor = await pipeline('feature-extraction', 'onnx-community/all-MiniLM-L6-v2-ONNX'); +const embeddings = await extractor('Text to embed', { pooling: 'mean', normalize: true }); +``` + +## Finding and Choosing Models + +### Browsing the Hugging Face Hub + +Discover compatible Transformers.js models on Hugging Face Hub: + +**Base URL (all models):** +``` +https://huggingface.co/models?library=transformers.js&sort=trending +``` + +**Filter by task** using the `pipeline_tag` parameter: + +| Task | URL | +|------|-----| +| **Text Generation** | https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending | +| **Text Classification** | https://huggingface.co/models?pipeline_tag=text-classification&library=transformers.js&sort=trending | +| **Translation** | https://huggingface.co/models?pipeline_tag=translation&library=transformers.js&sort=trending | +| **Summarization** | https://huggingface.co/models?pipeline_tag=summarization&library=transformers.js&sort=trending | +| **Question Answering** | https://huggingface.co/models?pipeline_tag=question-answering&library=transformers.js&sort=trending | +| **Image Classification** | https://huggingface.co/models?pipeline_tag=image-classification&library=transformers.js&sort=trending | +| **Object Detection** | https://huggingface.co/models?pipeline_tag=object-detection&library=transformers.js&sort=trending | +| **Image Segmentation** | https://huggingface.co/models?pipeline_tag=image-segmentation&library=transformers.js&sort=trending | +| **Speech Recognition** | https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js&sort=trending | +| **Audio Classification** | https://huggingface.co/models?pipeline_tag=audio-classification&library=transformers.js&sort=trending | +| **Image-to-Text** | https://huggingface.co/models?pipeline_tag=image-to-text&library=transformers.js&sort=trending | +| **Feature Extraction** | https://huggingface.co/models?pipeline_tag=feature-extraction&library=transformers.js&sort=trending | +| **Zero-Shot Classification** | https://huggingface.co/models?pipeline_tag=zero-shot-classification&library=transformers.js&sort=trending | + +**Sort options:** +- `&sort=trending` - Most popular recently +- `&sort=downloads` - Most downloaded overall +- `&sort=likes` - Most liked by community +- `&sort=modified` - Recently updated + +### Choosing the Right Model + +Consider these factors when selecting a model: + +**1. Model Size** +- **Small (< 100MB)**: Fast, suitable for browsers, limited accuracy +- **Medium (100MB - 500MB)**: Balanced performance, good for most use cases +- **Large (> 500MB)**: High accuracy, slower, better for Node.js or powerful devices + +**2. Quantization** +Models are often available in different quantization levels: +- `fp32` - Full precision (largest, most accurate) +- `fp16` - Half precision (smaller, still accurate) +- `q8` - 8-bit quantized (much smaller, slight accuracy loss) +- `q4` - 4-bit quantized (smallest, noticeable accuracy loss) + +**3. Task Compatibility** +Check the model card for: +- Supported tasks (some models support multiple tasks) +- Input/output formats +- Language support (multilingual vs. English-only) +- License restrictions + +**4. Performance Metrics** +Model cards typically show: +- Accuracy scores +- Benchmark results +- Inference speed +- Memory requirements + +### Example: Finding a Text Generation Model + +```javascript +// 1. Visit: https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending + +// 2. Browse and select a model (e.g., onnx-community/gemma-3-270m-it-ONNX) + +// 3. Check model card for: +// - Model size: ~270M parameters +// - Quantization: q4 available +// - Language: English +// - Use case: Instruction-following chat + +// 4. Use the model: +import { pipeline } from '@huggingface/transformers'; + +const generator = await pipeline( + 'text-generation', + 'onnx-community/gemma-3-270m-it-ONNX', + { dtype: 'q4' } // Use quantized version for faster inference +); + +const output = await generator('Explain quantum computing in simple terms.', { + max_new_tokens: 100 +}); + +await generator.dispose(); +``` + +### Tips for Model Selection + +1. **Start Small**: Test with a smaller model first, then upgrade if needed +2. **Check ONNX Support**: Ensure the model has ONNX files (look for `onnx` folder in model repo) +3. **Read Model Cards**: Model cards contain usage examples, limitations, and benchmarks +4. **Test Locally**: Benchmark inference speed and memory usage in your environment +5. **Community Models**: Look for models by `Xenova` (Transformers.js maintainer) or `onnx-community` +6. **Version Pin**: Use specific git commits in production for stability: + ```javascript + const pipe = await pipeline('task', 'model-id', { revision: 'abc123' }); + ``` + +## Advanced Configuration + +### Environment Configuration (`env`) + +The `env` object provides comprehensive control over Transformers.js execution, caching, and model loading. + +**Quick Overview:** + +```javascript +import { env } from '@huggingface/transformers'; + +// View version +console.log(env.version); // e.g., '3.8.1' + +// Common settings +env.allowRemoteModels = true; // Load from Hugging Face Hub +env.allowLocalModels = false; // Load from file system +env.localModelPath = '/models/'; // Local model directory +env.useFSCache = true; // Cache models on disk (Node.js) +env.useBrowserCache = true; // Cache models in browser +env.cacheDir = './.cache'; // Cache directory location +``` + +**Configuration Patterns:** + +```javascript +// Development: Fast iteration with remote models +env.allowRemoteModels = true; +env.useFSCache = true; + +// Production: Local models only +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; + +// Custom CDN +env.remoteHost = 'https://cdn.example.com/models'; + +// Disable caching (testing) +env.useFSCache = false; +env.useBrowserCache = false; +``` + +For complete documentation on all configuration options, caching strategies, cache management, pre-downloading models, and more, see: + +**→ [Configuration Reference](./references/CONFIGURATION.md)** + +### Working with Tensors + +```javascript +import { AutoTokenizer, AutoModel } from '@huggingface/transformers'; + +// Load tokenizer and model separately for more control +const tokenizer = await AutoTokenizer.from_pretrained('bert-base-uncased'); +const model = await AutoModel.from_pretrained('bert-base-uncased'); + +// Tokenize input +const inputs = await tokenizer('Hello world!'); + +// Run model +const outputs = await model(inputs); +``` + +### Batch Processing + +```javascript +const classifier = await pipeline('sentiment-analysis'); + +// Process multiple texts +const results = await classifier([ + 'I love this!', + 'This is terrible.', + 'It was okay.' +]); +``` + +## Browser-Specific Considerations + +### WebGPU Usage +WebGPU provides GPU acceleration in browsers: + +```javascript +const pipe = await pipeline('text-generation', 'onnx-community/gemma-3-270m-it-ONNX', { + device: 'webgpu', + dtype: 'fp32' +}); +``` + +**Note**: WebGPU is experimental. Check browser compatibility and file issues if problems occur. + +### WASM Performance +Default browser execution uses WASM: + +```javascript +// Optimized for browsers with quantization +const pipe = await pipeline('sentiment-analysis', 'model-id', { + dtype: 'q8' // or 'q4' for even smaller size +}); +``` + +### Progress Tracking & Loading Indicators + +Models can be large (ranging from a few MB to several GB) and consist of multiple files. Track download progress by passing a callback to the `pipeline()` function: + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// Track progress for each file +const fileProgress = {}; + +function onProgress(info) { + console.log(`${info.status}: ${info.file}`); + + if (info.status === 'progress') { + fileProgress[info.file] = info.progress; + console.log(`${info.file}: ${info.progress.toFixed(1)}%`); + } + + if (info.status === 'done') { + console.log(`✓ ${info.file} complete`); + } +} + +// Pass callback to pipeline +const classifier = await pipeline('sentiment-analysis', null, { + progress_callback: onProgress +}); +``` + +**Progress Info Properties:** + +```typescript +interface ProgressInfo { + status: 'initiate' | 'download' | 'progress' | 'done' | 'ready'; + name: string; // Model id or path + file: string; // File being processed + progress?: number; // Percentage (0-100, only for 'progress' status) + loaded?: number; // Bytes downloaded (only for 'progress' status) + total?: number; // Total bytes (only for 'progress' status) +} +``` + +For complete examples including browser UIs, React components, CLI progress bars, and retry logic, see: + +**→ [Pipeline Options - Progress Callback](./references/PIPELINE_OPTIONS.md#progress-callback)** + +## Error Handling + +```javascript +try { + const pipe = await pipeline('sentiment-analysis', 'model-id'); + const result = await pipe('text to analyze'); +} catch (error) { + if (error.message.includes('fetch')) { + console.error('Model download failed. Check internet connection.'); + } else if (error.message.includes('ONNX')) { + console.error('Model execution failed. Check model compatibility.'); + } else { + console.error('Unknown error:', error); + } +} +``` + +## Performance Tips + +1. **Reuse Pipelines**: Create pipeline once, reuse for multiple inferences +2. **Use Quantization**: Start with `q8` or `q4` for faster inference +3. **Batch Processing**: Process multiple inputs together when possible +4. **Cache Models**: Models are cached automatically (see **[Caching Reference](./references/CACHE.md)** for details on browser Cache API, Node.js filesystem cache, and custom implementations) +5. **WebGPU for Large Models**: Use WebGPU for models that benefit from GPU acceleration +6. **Prune Context**: For text generation, limit `max_new_tokens` to avoid memory issues +7. **Clean Up Resources**: Call `pipe.dispose()` when done to free memory + +## Memory Management + +**IMPORTANT:** Always call `pipe.dispose()` when finished to prevent memory leaks. + +```javascript +const pipe = await pipeline('sentiment-analysis'); +const result = await pipe('Great product!'); +await pipe.dispose(); // ✓ Free memory (100MB - several GB per model) +``` + +**When to dispose:** +- Application shutdown or component unmount +- Before loading a different model +- After batch processing in long-running apps + +Models consume significant memory and hold GPU/CPU resources. Disposal is critical for browser memory limits and server stability. + +For detailed patterns (React cleanup, servers, browser), see **[Code Examples](./references/EXAMPLES.md)** + +## Troubleshooting + +### Model Not Found +- Verify model exists on Hugging Face Hub +- Check model name spelling +- Ensure model has ONNX files (look for `onnx` folder in model repo) + +### Memory Issues +- Use smaller models or quantized versions (`dtype: 'q4'`) +- Reduce batch size +- Limit sequence length with `max_length` + +### WebGPU Errors +- Check browser compatibility (Chrome 113+, Edge 113+) +- Try `dtype: 'fp16'` if `fp32` fails +- Fall back to WASM if WebGPU unavailable + +## Reference Documentation + +### This Skill +- **[Pipeline Options](./references/PIPELINE_OPTIONS.md)** - Configure `pipeline()` with `progress_callback`, `device`, `dtype`, etc. +- **[Configuration Reference](./references/CONFIGURATION.md)** - Global `env` configuration for caching and model loading +- **[Caching Reference](./references/CACHE.md)** - Browser Cache API, Node.js filesystem cache, and custom cache implementations +- **[Text Generation Guide](./references/TEXT_GENERATION.md)** - Streaming, chat format, and generation parameters +- **[Model Architectures](./references/MODEL_ARCHITECTURES.md)** - Supported models and selection tips +- **[Code Examples](./references/EXAMPLES.md)** - Real-world implementations for different runtimes + +### Official Transformers.js +- Official docs: https://huggingface.co/docs/transformers.js +- API reference: https://huggingface.co/docs/transformers.js/api/pipelines +- Model hub: https://huggingface.co/models?library=transformers.js +- GitHub: https://github.com/huggingface/transformers.js +- Examples: https://github.com/huggingface/transformers.js/tree/main/examples + +## Best Practices + +1. **Always Dispose Pipelines**: Call `pipe.dispose()` when done - critical for preventing memory leaks +2. **Start with Pipelines**: Use the pipeline API unless you need fine-grained control +3. **Test Locally First**: Test models with small inputs before deploying +4. **Monitor Model Sizes**: Be aware of model download sizes for web applications +5. **Handle Loading States**: Show progress indicators for better UX +6. **Version Pin**: Pin specific model versions for production stability +7. **Error Boundaries**: Always wrap pipeline calls in try-catch blocks +8. **Progressive Enhancement**: Provide fallbacks for unsupported browsers +9. **Reuse Models**: Load once, use many times - don't recreate pipelines unnecessarily +10. **Graceful Shutdown**: Dispose models on SIGTERM/SIGINT in servers + +## Quick Reference: Task IDs + +| Task | Task ID | +|------|---------| +| Text classification | `text-classification` or `sentiment-analysis` | +| Token classification | `token-classification` or `ner` | +| Question answering | `question-answering` | +| Fill mask | `fill-mask` | +| Summarization | `summarization` | +| Translation | `translation` | +| Text generation | `text-generation` | +| Text-to-text generation | `text2text-generation` | +| Zero-shot classification | `zero-shot-classification` | +| Image classification | `image-classification` | +| Image segmentation | `image-segmentation` | +| Object detection | `object-detection` | +| Depth estimation | `depth-estimation` | +| Image-to-image | `image-to-image` | +| Zero-shot image classification | `zero-shot-image-classification` | +| Zero-shot object detection | `zero-shot-object-detection` | +| Automatic speech recognition | `automatic-speech-recognition` | +| Audio classification | `audio-classification` | +| Text-to-speech | `text-to-speech` or `text-to-audio` | +| Image-to-text | `image-to-text` | +| Document question answering | `document-question-answering` | +| Feature extraction | `feature-extraction` | +| Sentence similarity | `sentence-similarity` | + +--- + +This skill enables you to integrate state-of-the-art machine learning capabilities directly into JavaScript applications without requiring separate ML servers or Python environments. diff --git a/skills/transformers-js/references/CACHE.md b/skills/transformers-js/references/CACHE.md new file mode 100644 index 00000000..6f97b2cd --- /dev/null +++ b/skills/transformers-js/references/CACHE.md @@ -0,0 +1,339 @@ +# Caching Reference + +Complete guide to caching strategies for Transformers.js models across different environments. + +## Table of Contents + +1. [Overview](#overview) +2. [Browser Caching](#browser-caching) +3. [Node.js Caching](#nodejs-caching) +4. [Custom Cache Implementation](#custom-cache-implementation) +5. [Cache Configuration](#cache-configuration) + +## Overview + +Transformers.js models can be large (from a few MB to several GB), so caching is critical for performance. The caching strategy differs based on the environment: + +- **Browser**: Uses the Cache API (browser cache storage) +- **Node.js**: Uses filesystem cache in `~/.cache/huggingface/` +- **Custom**: Implement your own cache (database, cloud storage, etc.) + +### Default Behavior + +```javascript +import { pipeline } from '@huggingface/transformers'; + +// First load: downloads model +const pipe = await pipeline('sentiment-analysis'); + +// Subsequent loads: uses cached model +const pipe2 = await pipeline('sentiment-analysis'); // Fast! +``` + +Caching is **automatic** and enabled by default. Models are cached after the first download. + +## Browser Caching + +### Using the Cache API + +In browser environments, Transformers.js uses the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) to store models: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Browser cache is enabled by default +console.log(env.useBrowserCache); // true + +// Load model (cached automatically) +const classifier = await pipeline('sentiment-analysis'); +``` + +**How it works:** + +1. Model files are downloaded from Hugging Face Hub +2. Files are stored in the browser's Cache Storage +3. Subsequent loads retrieve from cache (no network request) +4. Cache persists across page reloads and browser sessions + +### Cache Location + +Browser caches are stored in: +- **Chrome/Edge**: `Cache Storage` in DevTools → Application tab → Cache storage +- **Firefox**: `about:cache` → Storage +- **Safari**: Web Inspector → Storage tab + +### Disable Browser Cache + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable browser caching (not recommended) +env.useBrowserCache = false; + +// Models will be re-downloaded on every page load +``` + +**Use case:** Testing, development, or debugging cache issues. + +### Browser Storage Limits + +Browsers impose storage quotas: + +- **Chrome**: ~60% of available disk space (but can evict data) +- **Firefox**: ~50% of available disk space +- **Safari**: ~1GB per origin (prompt for more) + +**Tip:** Monitor storage usage with the [Storage API](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API): + +```javascript +if ('storage' in navigator && 'estimate' in navigator.storage) { + const estimate = await navigator.storage.estimate(); + const percentUsed = (estimate.usage / estimate.quota) * 100; + console.log(`Storage: ${percentUsed.toFixed(2)}% used`); + console.log(`Available: ${((estimate.quota - estimate.usage) / 1024 / 1024).toFixed(2)} MB`); +} +``` + +## Node.js Caching + +### Filesystem Cache + +In Node.js, models are cached to the filesystem: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Default cache directory (Node.js) +console.log(env.cacheDir); // './.cache' (relative to current directory) + +// Filesystem cache is enabled by default +console.log(env.useFSCache); // true + +// Load model (cached to disk) +const classifier = await pipeline('sentiment-analysis'); +``` + +### Default Cache Location + +**Default behavior:** +- Cache directory: `./.cache` (relative to where Node.js process runs) +- Full default path: `~/.cache/huggingface/` when using Hugging Face tools + +**Note:** The statement "Models are cached automatically in `~/.cache/huggingface/`" from performance tips is specific to Hugging Face's Python tooling convention. In Transformers.js for Node.js, the default is `./.cache` unless configured otherwise. + +### Custom Cache Directory + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Set custom cache directory +env.cacheDir = '/var/cache/transformers'; + +// Or use environment variable (Node.js convention) +env.cacheDir = process.env.HF_HOME || '~/.cache/huggingface'; + +// Now load model +const classifier = await pipeline('sentiment-analysis'); +// Cached to: /var/cache/transformers/models--Xenova--distilbert-base-uncased-finetuned-sst-2-english/ +``` + +**Pattern:** `models--{organization}--{model-name}/` + +### Disable Filesystem Cache + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable filesystem caching (not recommended) +env.useFSCache = false; + +// Models will be re-downloaded on every load +``` + +**Use case:** Testing, CI/CD environments, or containers with ephemeral storage. + +## Custom Cache Implementation + +Implement your own cache for specialized storage backends. + +### Custom Cache Interface + +```typescript +interface CacheInterface { + /** + * Check if a URL is cached + */ + match(url: string): Promise; + + /** + * Store a URL and its response + */ + put(url: string, response: Response): Promise; +} +``` + +### Example: Cloud Storage Cache (S3) + +```javascript +import { env, pipeline } from '@huggingface/transformers'; +import { S3Client, GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3'; +import { Readable } from 'stream'; + +class S3Cache { + constructor(bucket, region = 'us-east-1') { + this.bucket = bucket; + this.s3 = new S3Client({ region }); + } + + async match(url) { + const key = this.urlToKey(url); + + try { + const command = new GetObjectCommand({ + Bucket: this.bucket, + Key: key + }); + const response = await this.s3.send(command); + + // Convert stream to buffer + const chunks = []; + for await (const chunk of response.Body) { + chunks.push(chunk); + } + const body = Buffer.concat(chunks); + + return new Response(body, { + status: 200, + headers: JSON.parse(response.Metadata.headers || '{}') + }); + } catch (error) { + if (error.name === 'NoSuchKey') return undefined; + throw error; + } + } + + async put(url, response) { + const key = this.urlToKey(url); + const clonedResponse = response.clone(); + const body = Buffer.from(await clonedResponse.arrayBuffer()); + const headers = JSON.stringify(Object.fromEntries(response.headers.entries())); + + const command = new PutObjectCommand({ + Bucket: this.bucket, + Key: key, + Body: body, + Metadata: { headers } + }); + + await this.s3.send(command); + } + + urlToKey(url) { + // Convert URL to S3 key (remove protocol, replace slashes) + return url.replace(/^https?:\/\//, '').replace(/\//g, '_'); + } +} + +// Configure S3 cache +env.useCustomCache = true; +env.customCache = new S3Cache('my-transformers-cache', 'us-east-1'); +env.useFSCache = false; + +// Use S3 cache +const classifier = await pipeline('sentiment-analysis'); +``` + +## Cache Configuration + +### Environment Variables + +Use environment variables to configure caching: + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure cache directory from environment +env.cacheDir = process.env.TRANSFORMERS_CACHE || './.cache'; + +// Disable caching in CI/CD +if (process.env.CI === 'true') { + env.useFSCache = false; + env.useBrowserCache = false; +} + +// Production: use pre-cached models +if (process.env.NODE_ENV === 'production') { + env.allowRemoteModels = false; + env.allowLocalModels = true; + env.localModelPath = process.env.MODEL_PATH || '/app/models'; +} +``` + +### Configuration Patterns + +#### Development: Enable All Caching + +```javascript +import { env } from '@huggingface/transformers'; + +env.allowRemoteModels = true; +env.useFSCache = true; // Node.js +env.useBrowserCache = true; // Browser +env.cacheDir = './.cache'; +``` + +#### Production: Local Models Only + +```javascript +import { env } from '@huggingface/transformers'; + +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models'; +env.useFSCache = true; +``` + +#### Testing: Disable Caching + +```javascript +import { env } from '@huggingface/transformers'; + +env.useFSCache = false; +env.useBrowserCache = false; +env.allowRemoteModels = true; // Download every time +``` + +#### Hybrid: Cache + Remote Fallback + +```javascript +import { env } from '@huggingface/transformers'; + +// Try local cache first, fall back to remote +env.allowRemoteModels = true; +env.allowLocalModels = true; +env.useFSCache = true; +env.localModelPath = './models'; +``` + +--- + +## Summary + +Transformers.js provides flexible caching options: + +- **Browser**: Cache API (automatic, persistent) +- **Node.js**: Filesystem cache (default `./.cache`, configurable) +- **Custom**: Implement your own (database, cloud storage, etc.) + +**Key takeaways:** + +1. Caching is enabled by default and automatic +2. Configure cache **before** loading models +3. Browser uses Cache API, Node.js uses filesystem +4. Custom caches enable advanced storage backends +5. Monitor cache size and implement cleanup strategies +6. Pre-download models for production deployments + +For more configuration options, see: +- [Configuration Reference](./CONFIGURATION.md) +- [Pipeline Options](./PIPELINE_OPTIONS.md) diff --git a/skills/transformers-js/references/CONFIGURATION.md b/skills/transformers-js/references/CONFIGURATION.md new file mode 100644 index 00000000..52e18d96 --- /dev/null +++ b/skills/transformers-js/references/CONFIGURATION.md @@ -0,0 +1,390 @@ +# Environment Configuration Reference + +Complete guide to configuring Transformers.js behavior using the `env` object. + +## Table of Contents + +1. [Overview](#overview) +2. [Remote Model Configuration](#remote-model-configuration) +3. [Local Model Configuration](#local-model-configuration) +4. [Cache Configuration](#cache-configuration) +5. [WASM Configuration](#wasm-configuration) +6. [Common Configuration Patterns](#common-configuration-patterns) +7. [Environment Best Practices](#environment-best-practices) + +## Overview + +The `env` object provides comprehensive control over Transformers.js execution, caching, and model loading: + +```javascript +import { env } from '@huggingface/transformers'; + +// View current version +console.log(env.version); // e.g., '3.8.1' +``` + +### Available Properties + +```typescript +interface TransformersEnvironment { + // Version info + version: string; + + // Backend configuration + backends: { + onnx: Partial; + }; + + // Remote model settings + allowRemoteModels: boolean; + remoteHost: string; + remotePathTemplate: string; + + // Local model settings + allowLocalModels: boolean; + localModelPath: string; + useFS: boolean; + + // Cache settings + useBrowserCache: boolean; + useFSCache: boolean; + cacheDir: string | null; + useCustomCache: boolean; + customCache: CacheInterface | null; + useWasmCache: boolean; + cacheKey: string; +} +``` + +## Remote Model Configuration + +Control how models are loaded from remote sources (default: Hugging Face Hub). + +### Disable Remote Loading + +```javascript +import { env } from '@huggingface/transformers'; + +// Force local-only mode (no network requests) +env.allowRemoteModels = false; +``` + +**Use case:** Offline applications, security requirements, or air-gapped environments. + +### Custom Model Host + +```javascript +import { env } from '@huggingface/transformers'; + +// Use your own CDN or model server +env.remoteHost = 'https://cdn.example.com/models'; + +// Customize the URL pattern +// Default: '{model}/resolve/{revision}/{file}' +env.remotePathTemplate = 'custom/{model}/{file}'; +``` + +**Use case:** Self-hosting models, using a CDN for faster downloads, or corporate proxies. + +### Example: Private Model Server + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// Configure custom model host +env.remoteHost = 'https://models.mycompany.com'; +env.remotePathTemplate = '{model}/{file}'; + +// Models will be loaded from: +// https://models.mycompany.com/my-model/model.onnx +const pipe = await pipeline('sentiment-analysis', 'my-model'); +``` + +## Local Model Configuration + +Control loading models from the local file system. + +### Enable Local Models + +```javascript +import { env } from '@huggingface/transformers'; + +// Enable local file system loading +env.allowLocalModels = true; + +// Set the base path for local models +env.localModelPath = '/path/to/models/'; +``` + +**Default values:** +- Browser: `allowLocalModels = false`, `localModelPath = '/models/'` +- Node.js: `allowLocalModels = true`, `localModelPath = '/models/'` + +### File System Control + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable file system usage entirely (Node.js only) +env.useFS = false; +``` + +### Example: Local Model Directory Structure + +``` +/app/models/ +├── onnx-community/ +│ ├── Supertonic-TTS-ONNX/ +│ │ ├── config.json +│ │ ├── tokenizer.json +│ │ ├── model.onnx +│ │ └── ... +│ └── yolo26l-pose-ONNX/ +│ ├── config.json +│ ├── preprocessor_config.json +│ ├── model.onnx +│ └── ... +``` + +```javascript +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; +env.allowRemoteModels = false; // Offline mode + +const classifier = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english'); +``` + +## Cache Configuration + +Transformers.js supports multiple caching strategies to improve performance and reduce network usage. + +### Quick Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +// Browser cache (Cache API) +env.useBrowserCache = true; // default: true +env.cacheKey = 'my-app-transformers-cache'; // default: 'transformers-cache' + +// Node.js filesystem cache +env.useFSCache = true; // default: true +env.cacheDir = './custom-cache-dir'; // default: './.cache' + +// Custom cache implementation +env.useCustomCache = true; +env.customCache = new CustomCache(); // Implement Cache API interface + +// WASM binary caching +env.useWasmCache = true; // default: true +``` + +### Disable Caching + +```javascript +import { env } from '@huggingface/transformers'; + +// Disable all caching (re-download on every load) +env.useFSCache = false; +env.useBrowserCache = false; +env.useWasmCache = false; +env.cacheDir = null; +``` + +For comprehensive caching documentation including: +- Browser Cache API details and storage limits +- Node.js filesystem cache structure and management +- Custom cache implementations (Redis, database, S3) +- Cache clearing and monitoring strategies +- Best practices and troubleshooting + +See **[Caching Reference](./CACHE.md)** + +## WASM Configuration + +Configure ONNX Runtime Web Assembly backend settings. + +### Basic WASM Settings + +```javascript +import { env } from '@huggingface/transformers'; + +// Set custom WASM paths +env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/'; + +// Configure number of threads (Node.js only) +env.backends.onnx.wasm.numThreads = 4; + +// Enable/disable SIMD (single instruction, multiple data) +env.backends.onnx.wasm.simd = true; +``` + +### Proxy Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure proxy for WASM downloads +env.backends.onnx.wasm.proxy = true; +``` + +### Self-Hosted WASM Files + +```javascript +import { env } from '@huggingface/transformers'; + +// Host WASM files on your own server +env.backends.onnx.wasm.wasmPaths = '/static/wasm/'; +``` + +**Required files:** +- `ort-wasm.wasm` - Main WASM binary +- `ort-wasm-simd.wasm` - SIMD-enabled WASM binary +- `ort-wasm-threaded.wasm` - Multi-threaded WASM binary +- `ort-wasm-simd-threaded.wasm` - SIMD + multi-threaded WASM binary + +## Common Configuration Patterns + +### Development Setup + +```javascript +import { env } from '@huggingface/transformers'; + +// Fast iteration with caching +env.allowRemoteModels = true; +env.useBrowserCache = true; // Browser +env.useFSCache = true; // Node.js +env.cacheDir = './.cache'; +``` + +### Production (Local Models) + +```javascript +import { env } from '@huggingface/transformers'; + +// Secure, offline-capable setup +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = '/app/models/'; +env.useFSCache = false; // Models already local +``` + +### Offline-First Application + +```javascript +import { env } from '@huggingface/transformers'; + +// Try local first, fall back to remote +env.allowLocalModels = true; +env.localModelPath = './models/'; +env.allowRemoteModels = true; +env.useFSCache = true; +env.cacheDir = './cache'; +``` + +### Custom CDN + +```javascript +import { env } from '@huggingface/transformers'; + +// Use your own model hosting +env.remoteHost = 'https://cdn.example.com/ml-models'; +env.remotePathTemplate = '{model}/{file}'; +env.useBrowserCache = true; +``` + +### Memory-Constrained Environment + +```javascript +import { env } from '@huggingface/transformers'; + +// Minimize disk/memory usage +env.useFSCache = false; +env.useBrowserCache = false; +env.useWasmCache = false; +env.cacheDir = null; +``` + +### Testing/CI Environment + +```javascript +import { env } from '@huggingface/transformers'; + +// Predictable, isolated testing +env.allowRemoteModels = false; +env.allowLocalModels = true; +env.localModelPath = './test-fixtures/models/'; +env.useFSCache = false; +``` + + + +## Environment Best Practices + +### 1. Configure Early + +Set `env` properties before loading any models: + +```javascript +import { env, pipeline } from '@huggingface/transformers'; + +// ✓ Good: Configure before loading +env.allowRemoteModels = false; +env.localModelPath = '/app/models/'; +const pipe = await pipeline('sentiment-analysis'); + +// ✗ Bad: Configuring after loading may not take effect +const pipe = await pipeline('sentiment-analysis'); +env.allowRemoteModels = false; // Too late! +``` + +### 2. Use Environment Variables + +```javascript +import { env } from '@huggingface/transformers'; + +// Configure based on environment +env.allowRemoteModels = process.env.NODE_ENV === 'development'; +env.cacheDir = process.env.MODEL_CACHE_DIR || './.cache'; +env.localModelPath = process.env.LOCAL_MODELS_PATH || '/app/models/'; +``` + +### 3. Handle Errors Gracefully + +```javascript +import { pipeline, env } from '@huggingface/transformers'; + +try { + env.allowRemoteModels = false; + const pipe = await pipeline('sentiment-analysis', 'my-model'); +} catch (error) { + if (error.message.includes('not found')) { + console.error('Model not found locally. Enable remote models or download the model.'); + } + throw error; +} +``` + +### 4. Log Configuration + +```javascript +import { env } from '@huggingface/transformers'; + +console.log('Transformers.js Configuration:', { + version: env.version, + allowRemoteModels: env.allowRemoteModels, + allowLocalModels: env.allowLocalModels, + localModelPath: env.localModelPath, + cacheDir: env.cacheDir, + useFSCache: env.useFSCache, + useBrowserCache: env.useBrowserCache +}); +``` + +## Related Documentation + +- **[Caching Reference](./CACHE.md)** - Comprehensive caching guide (browser, Node.js, custom implementations) +- [Pipeline Options](./PIPELINE_OPTIONS.md) - Configure pipeline loading with `progress_callback`, `device`, `dtype`, etc. +- [Model Architectures](./MODEL_ARCHITECTURES.md) - Supported models and architectures +- [Examples](./EXAMPLES.md) - Code examples for different runtimes +- [Main Skill Guide](../SKILL.md) - Getting started and common usage diff --git a/skills/transformers-js/references/EXAMPLES.md b/skills/transformers-js/references/EXAMPLES.md new file mode 100644 index 00000000..6a6e9b74 --- /dev/null +++ b/skills/transformers-js/references/EXAMPLES.md @@ -0,0 +1,605 @@ +# Transformers.js Code Examples + +Working examples showing how to use Transformers.js across different runtimes and frameworks. + +All examples use the same task and model for consistency: +- **Task**: `feature-extraction` +- **Model**: `onnx-community/all-MiniLM-L6-v2-ONNX` + +## Table of Contents +1. [Browser (Vanilla JS)](#browser-vanilla-js) +2. [Node.js](#nodejs) +3. [React](#react) +4. [Express API](#express-api) + +## Browser (Vanilla JS) + +### Basic Usage + +```html + + + + Feature Extraction + + +

Text Embedding Generator

+ + +
+ + + + + +``` + +### With Progress Tracking + +```html + + + + Feature Extraction with Progress + + + +

Text Embedding Generator

+
+

Loading model...

+
+
+ + + + + +``` + +## Node.js + +### Basic Script + +```javascript +// embed.js +import { pipeline } from '@huggingface/transformers'; + +async function generateEmbedding(text) { + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + + const output = await extractor(text, { pooling: 'mean', normalize: true }); + + console.log('Text:', text); + console.log('Embedding dimensions:', output.data.length); + console.log('First 5 values:', Array.from(output.data).slice(0, 5)); + + await extractor.dispose(); +} + +generateEmbedding('Hello, world!'); +``` + +### Batch Processing + +```javascript +// batch-embed.js +import { pipeline } from '@huggingface/transformers'; +import fs from 'fs/promises'; + +async function embedDocuments(documents) { + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + + console.log(`Processing ${documents.length} documents...`); + + const embeddings = []; + + for (let i = 0; i < documents.length; i++) { + const output = await extractor(documents[i], { + pooling: 'mean', + normalize: true + }); + + embeddings.push({ + text: documents[i], + embedding: Array.from(output.data) + }); + + console.log(`Processed ${i + 1}/${documents.length}`); + } + + await fs.writeFile( + 'embeddings.json', + JSON.stringify(embeddings, null, 2) + ); + + console.log('Saved to embeddings.json'); + + await extractor.dispose(); +} + +const documents = [ + 'The cat sat on the mat', + 'A dog played in the park', + 'Machine learning is fascinating' +]; + +embedDocuments(documents); +``` + +### CLI with Progress + +```javascript +// cli-embed.js +import { pipeline } from '@huggingface/transformers'; + +async function main() { + const text = process.argv[2] || 'Hello, world!'; + + console.log('Loading model...'); + + const fileProgress = {}; + + const extractor = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX', + { + progress_callback: (info) => { + if (info.status === 'progress') { + fileProgress[info.file] = info.progress; + + // Show all files progress + const progressLines = Object.entries(fileProgress) + .map(([file, progress]) => ` ${file}: ${progress.toFixed(1)}%`) + .join('\n'); + + process.stdout.write(`\r\x1b[K${progressLines}`); + } + + if (info.status === 'done') { + console.log(`\n✓ ${info.file} complete`); + } + + if (info.status === 'ready') { + console.log('\nModel ready!'); + } + } + } + ); + + console.log('Generating embedding...'); + const output = await extractor(text, { pooling: 'mean', normalize: true }); + + console.log(`\nText: "${text}"`); + console.log(`Dimensions: ${output.data.length}`); + console.log(`First 5 values: ${Array.from(output.data).slice(0, 5).join(', ')}`); + + await extractor.dispose(); +} + +main(); +``` + +## React + +### Basic Component + +```jsx +// EmbeddingGenerator.jsx +import { useState, useRef, useEffect } from 'react'; +import { pipeline } from '@huggingface/transformers'; + +export function EmbeddingGenerator() { + const extractorRef = useRef(null); + const [text, setText] = useState(''); + const [embedding, setEmbedding] = useState(null); + const [loading, setLoading] = useState(false); + + const generate = async () => { + if (!text) return; + + setLoading(true); + + // Load model on first generate + if (!extractorRef.current) { + extractorRef.current = await pipeline( + 'feature-extraction', + 'onnx-community/all-MiniLM-L6-v2-ONNX' + ); + } + + const output = await extractorRef.current(text, { + pooling: 'mean', + normalize: true + }); + setEmbedding(Array.from(output.data)); + setLoading(false); + }; + + // Cleanup on unmount + useEffect(() => { + return () => { + if (extractorRef.current) { + extractorRef.current.dispose(); + } + }; + }, []); + + return ( +
+

Text Embedding Generator

+ + + +
+ + + + +``` + +### React + +```jsx +import { useState, useRef, useEffect } from 'react'; +import { pipeline, TextStreamer } from '@huggingface/transformers'; + +function StreamingGenerator() { + const generatorRef = useRef(null); + const [output, setOutput] = useState(''); + const [loading, setLoading] = useState(false); + + const handleGenerate = async (prompt) => { + if (!prompt) return; + + setLoading(true); + setOutput(''); + + // Load model on first generate + if (!generatorRef.current) { + generatorRef.current = await pipeline( + 'text-generation', + 'onnx-community/Qwen2.5-0.5B-Instruct', + { dtype: 'q4' } + ); + } + + const streamer = new TextStreamer(generatorRef.current.tokenizer, { + skip_prompt: true, + skip_special_tokens: true, + callback_function: (token) => { + setOutput((prev) => prev + token); + }, + }); + + await generatorRef.current(prompt, { + max_new_tokens: 200, + temperature: 0.7, + streamer, + }); + + setLoading(false); + }; + + // Cleanup on unmount + useEffect(() => { + return () => { + if (generatorRef.current) { + generatorRef.current.dispose(); + } + }; + }, []); + + return ( +
+ +
{output}
+
+ ); +} +``` + +## Chat Format + +Use structured messages for conversations. Works with both basic generation and streaming (just add `streamer` parameter). + +### Single Turn + +```javascript +import { pipeline } from '@huggingface/transformers'; + +const generator = await pipeline( + 'text-generation', + 'onnx-community/Qwen2.5-0.5B-Instruct', + { dtype: 'q4' } +); + +const messages = [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'How do I create an async function?' } +]; + +const result = await generator(messages, { + max_new_tokens: 256, + temperature: 0.7, +}); + +console.log(result[0].generated_text); +``` + +### Multi-turn Conversation + +```javascript +const conversation = [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'What is JavaScript?' }, + { role: 'assistant', content: 'JavaScript is a programming language...' }, + { role: 'user', content: 'Can you show an example?' } +]; + +const result = await generator(conversation, { + max_new_tokens: 200, + temperature: 0.7, +}); + +// To add streaming, just pass a streamer: +// streamer: new TextStreamer(generator.tokenizer, {...}) +``` + +## Generation Parameters + +### Common Parameters + +```javascript +await generator(prompt, { + // Token limits + max_new_tokens: 512, // Maximum tokens to generate + min_new_tokens: 0, // Minimum tokens to generate + + // Sampling + temperature: 0.7, // Randomness (0.0-2.0) + top_k: 50, // Consider top K tokens + top_p: 0.95, // Nucleus sampling + do_sample: true, // Use random sampling (false = always pick most likely token) + + // Repetition control + repetition_penalty: 1.0, // Penalty for repeating (1.0 = no penalty) + no_repeat_ngram_size: 0, // Prevent repeating n-grams + + // Streaming + streamer: streamer, // TextStreamer instance +}); +``` + +### Parameter Effects + +**Temperature:** +- Low (0.1-0.5): More focused and deterministic +- Medium (0.6-0.9): Balanced creativity and coherence +- High (1.0-2.0): More creative and random + +```javascript +// Focused output +await generator(prompt, { temperature: 0.3, max_new_tokens: 100 }); + +// Creative output +await generator(prompt, { temperature: 1.2, max_new_tokens: 100 }); +``` + +**Sampling Methods:** + +```javascript +// Greedy (deterministic) +await generator(prompt, { + do_sample: false, + max_new_tokens: 100 +}); + +// Top-k sampling +await generator(prompt, { + top_k: 50, + temperature: 0.7, + max_new_tokens: 100 +}); + +// Top-p (nucleus) sampling +await generator(prompt, { + top_p: 0.95, + temperature: 0.7, + max_new_tokens: 100 +}); +``` + +## Model Selection + +Browse available text generation models on Hugging Face Hub: + +**https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending** + +### Selection Tips + +- **Small models (< 1B params)**: Fast, browser-friendly, use `dtype: 'q4'` +- **Medium models (1-3B params)**: Balanced quality/speed, use `dtype: 'q4'` or `fp16` +- **Large models (> 3B params)**: High quality, slower, best for Node.js with `dtype: 'fp16'` + +Check model cards for: +- Parameter count and model size +- Supported languages +- Benchmark scores +- License restrictions + +## Best Practices + +1. **Model Size**: Use quantized models (`q4`) for browsers, larger models (`fp16`) for servers +2. **Streaming**: Use streaming for better UX - shows progress and feels responsive +3. **Token Limits**: Set `max_new_tokens` to prevent runaway generation +4. **Temperature**: Tune based on use case (creative: 0.8-1.2, factual: 0.3-0.7) +5. **Memory**: Always call `dispose()` when done +6. **Caching**: Load model once, reuse for multiple requests + +## Related Documentation + +- [Pipeline Options](./PIPELINE_OPTIONS.md) - Configure pipeline loading +- [Configuration Reference](./CONFIGURATION.md) - Environment settings +- [Code Examples](./EXAMPLES.md) - More examples for different runtimes +- [Main Skill Guide](../SKILL.md) - Getting started guide diff --git a/skills_index.json b/skills_index.json index 64fd7c5e..d1d99fff 100644 --- a/skills_index.json +++ b/skills_index.json @@ -14598,10 +14598,32 @@ "path": "skills/hugging-face-cli", "category": "ai-ml", "name": "hugging-face-cli", - "description": "The hf CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources.", - "risk": "safe", - "source": "https://github.com/huggingface/skills/tree/main/skills/hugging-face-cli", - "date_added": "2026-02-27", + "description": "Use the Hugging Face Hub CLI (`hf`) to download, upload, and manage models, datasets, and Spaces.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/hf-cli", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, + { + "id": "hugging-face-community-evals", + "path": "skills/hugging-face-community-evals", + "category": "ai-ml", + "name": "hugging-face-community-evals", + "description": "Run local evaluations for Hugging Face Hub models with inspect-ai or lighteval.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-community-evals", + "date_added": null, "plugin": { "targets": { "codex": "supported", @@ -14620,9 +14642,9 @@ "path": "skills/hugging-face-dataset-viewer", "category": "ai-ml", "name": "hugging-face-dataset-viewer", - "description": "Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics.", + "description": "Query Hugging Face datasets through the Dataset Viewer API for splits, rows, search, filters, and parquet links.", "risk": "unknown", - "source": "community", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-datasets", "date_added": null, "plugin": { "targets": { @@ -14681,15 +14703,37 @@ "reasons": [] } }, + { + "id": "hugging-face-gradio", + "path": "skills/hugging-face-gradio", + "category": "ai-ml", + "name": "hugging-face-gradio", + "description": "Build or edit Gradio apps, layouts, components, and chat interfaces in Python.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-gradio", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "hugging-face-jobs", "path": "skills/hugging-face-jobs", "category": "ai-ml", "name": "hugging-face-jobs", - "description": "Run any workload on fully managed Hugging Face infrastructure. No local setup required\u2014jobs run on cloud CPUs, GPUs, or TPUs and can persist results to the Hugging Face Hub.", - "risk": "safe", - "source": "https://github.com/huggingface/skills/tree/main/skills/hugging-face-jobs", - "date_added": "2026-02-27", + "description": "Run workloads on Hugging Face Jobs with managed CPUs, GPUs, TPUs, secrets, and Hub persistence.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-jobs", + "date_added": null, "plugin": { "targets": { "codex": "supported", @@ -14708,9 +14752,9 @@ "path": "skills/hugging-face-model-trainer", "category": "ai-ml", "name": "hugging-face-model-trainer", - "description": "Train language models using TRL (Transformer Reinforcement Learning) on fully managed Hugging Face infrastructure. No local GPU setup required\u2014models train on cloud GPUs and results are automatically saved to the Hugging Face Hub.", + "description": "Train or fine-tune TRL language models on Hugging Face Jobs, including SFT, DPO, GRPO, and GGUF export.", "risk": "unknown", - "source": "community", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-llm-trainer", "date_added": null, "plugin": { "targets": { @@ -14732,7 +14776,29 @@ "name": "hugging-face-paper-publisher", "description": "Publish and manage research papers on Hugging Face Hub. Supports creating paper pages, linking papers to models/datasets, claiming authorship, and generating professional markdown-based research articles.", "risk": "unknown", - "source": "community", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-paper-publisher", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, + { + "id": "hugging-face-papers", + "path": "skills/hugging-face-papers", + "category": "ai-ml", + "name": "hugging-face-papers", + "description": "Read and analyze Hugging Face paper pages or arXiv papers with markdown and papers API metadata.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-papers", "date_added": null, "plugin": { "targets": { @@ -14769,6 +14835,50 @@ "reasons": [] } }, + { + "id": "hugging-face-trackio", + "path": "skills/hugging-face-trackio", + "category": "ai-ml", + "name": "hugging-face-trackio", + "description": "Track ML experiments with Trackio using Python logging, alerts, and CLI metric retrieval.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-trackio", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, + { + "id": "hugging-face-vision-trainer", + "path": "skills/hugging-face-vision-trainer", + "category": "ai-ml", + "name": "hugging-face-vision-trainer", + "description": "Train or fine-tune vision models on Hugging Face Jobs for detection, classification, and SAM or SAM2 segmentation.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/huggingface-vision-trainer", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "hybrid-cloud-architect", "path": "skills/hybrid-cloud-architect", @@ -15583,6 +15693,28 @@ "reasons": [] } }, + { + "id": "jq", + "path": "skills/jq", + "category": "development", + "name": "jq", + "description": "Expert jq usage for JSON querying, filtering, transformation, and pipeline integration. Practical patterns for real shell workflows.", + "risk": "safe", + "source": "community", + "date_added": "2026-03-28", + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "json-canvas", "path": "skills/json-canvas", @@ -26897,6 +27029,28 @@ "reasons": [] } }, + { + "id": "tmux", + "path": "skills/tmux", + "category": "development", + "name": "tmux", + "description": "Expert tmux session, window, and pane management for terminal multiplexing, persistent remote workflows, and shell scripting automation.", + "risk": "safe", + "source": "community", + "date_added": "2026-03-28", + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "todoist-automation", "path": "skills/todoist-automation", @@ -27007,6 +27161,28 @@ "reasons": [] } }, + { + "id": "transformers-js", + "path": "skills/transformers-js", + "category": "web-development", + "name": "transformers-js", + "description": "Run Hugging Face models in JavaScript or TypeScript with Transformers.js in Node.js or the browser.", + "risk": "unknown", + "source": "https://github.com/huggingface/skills/tree/main/skills/transformers-js", + "date_added": null, + "plugin": { + "targets": { + "codex": "supported", + "claude": "supported" + }, + "setup": { + "type": "none", + "summary": "", + "docs": null + }, + "reasons": [] + } + }, { "id": "travel-health-analyzer", "path": "skills/travel-health-analyzer",