diff --git a/skills/apify-actorization/SKILL.md b/skills/apify-actorization/SKILL.md index 4f90b1d0..bf1a726d 100644 --- a/skills/apify-actorization/SKILL.md +++ b/skills/apify-actorization/SKILL.md @@ -33,11 +33,10 @@ apify --help If not installed: ```bash -curl -fsSL https://apify.com/install-cli.sh | bash +brew install apify-cli -# Or (Mac): brew install apify-cli -# Or (Windows): irm https://apify.com/install-cli.ps1 | iex # Or: npm install -g apify-cli +# Or install from an official release package that your OS package manager verifies ``` Verify CLI is logged in: @@ -49,7 +48,8 @@ apify info # Should return your username If not logged in, check if `APIFY_TOKEN` environment variable is defined. If not, ask the user to generate one at https://console.apify.com/settings/integrations, then: ```bash -apify login -t $APIFY_TOKEN +export APIFY_TOKEN="your_token_here" +apify login ``` ## Actorization Checklist diff --git a/skills/audio-transcriber/examples/basic-transcription.sh b/skills/audio-transcriber/examples/basic-transcription.sh index 9d74d0ac..ee679d62 100755 --- a/skills/audio-transcriber/examples/basic-transcription.sh +++ b/skills/audio-transcriber/examples/basic-transcription.sh @@ -112,18 +112,24 @@ fi info "Step 2: Transcribing audio..." OUTPUT_FILE="${AUDIO_FILE%.*}.md" -TEMP_JSON="/tmp/transcription_$$.json" +TEMP_JSON="$(mktemp "${TMPDIR:-/tmp}/transcription.XXXXXX.json")" -python3 << EOF +AUDIO_FILE_ENV="$AUDIO_FILE" MODEL_ENV="$MODEL" TRANSCRIBER_ENV="$TRANSCRIBER" TEMP_JSON_ENV="$TEMP_JSON" python3 << 'EOF' +import os import sys import json from datetime import datetime try: - if "$TRANSCRIBER" == "faster-whisper": + audio_file = os.environ["AUDIO_FILE_ENV"] + model_name = os.environ["MODEL_ENV"] + transcriber = os.environ["TRANSCRIBER_ENV"] + temp_json = os.environ["TEMP_JSON_ENV"] + + if transcriber == "faster-whisper": from faster_whisper import WhisperModel - model = WhisperModel("$MODEL", device="cpu", compute_type="int8") - segments, info = model.transcribe("$AUDIO_FILE", language=None, vad_filter=True) + model = WhisperModel(model_name, device="cpu", compute_type="int8") + segments, info = model.transcribe(audio_file, language=None, vad_filter=True) data = { "language": info.language, @@ -140,8 +146,8 @@ try: }) else: import whisper - model = whisper.load_model("$MODEL") - result = model.transcribe("$AUDIO_FILE") + model = whisper.load_model(model_name) + result = model.transcribe(audio_file) data = { "language": result["language"], @@ -149,7 +155,7 @@ try: "segments": result["segments"] } - with open("$TEMP_JSON", "w") as f: + with open(temp_json, "w", encoding="utf-8") as f: json.dump(data, f) print(f"✅ Language detected: {data['language']}") @@ -168,22 +174,24 @@ fi # Step 3: Generate Markdown output info "Step 3: Generating Markdown report..." -python3 << 'EOF' +AUDIO_FILE_ENV="$AUDIO_FILE" FILE_SIZE_ENV="$FILE_SIZE" DURATION_HMS_ENV="$DURATION_HMS" TRANSCRIBER_ENV="$TRANSCRIBER" MODEL_ENV="$MODEL" TEMP_JSON_ENV="$TEMP_JSON" OUTPUT_FILE_ENV="$OUTPUT_FILE" python3 << 'EOF' import json -import sys +import os from datetime import datetime # Load transcription data -with open("${TEMP_JSON}") as f: +with open(os.environ["TEMP_JSON_ENV"], encoding="utf-8") as f: data = json.load(f) # Prepare metadata -filename = "${AUDIO_FILE}".split("/")[-1] -file_size = "${FILE_SIZE}" -duration_hms = "${DURATION_HMS}" +filename = os.path.basename(os.environ["AUDIO_FILE_ENV"]) +file_size = os.environ["FILE_SIZE_ENV"] +duration_hms = os.environ["DURATION_HMS_ENV"] language = data["language"] process_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") num_segments = len(data["segments"]) +transcriber = os.environ["TRANSCRIBER_ENV"] +model_name = os.environ["MODEL_ENV"] # Generate Markdown markdown = f"""# Audio Transcription Report @@ -198,7 +206,7 @@ markdown = f"""# Audio Transcription Report | **Language** | {language.upper()} | | **Processed Date** | {process_date} | | **Segments** | {num_segments} | -| **Transcription Engine** | ${TRANSCRIBER} (model: ${MODEL}) | +| **Transcription Engine** | {transcriber} (model: {model_name}) | --- @@ -222,14 +230,14 @@ markdown += """--- --- *Generated by audio-transcriber skill example script* -*Transcription engine: ${TRANSCRIBER} | Model: ${MODEL}* +*Transcription engine: {transcriber} | Model: {model_name}* """ # Write to file -with open("${OUTPUT_FILE}", "w") as f: +with open(os.environ["OUTPUT_FILE_ENV"], "w", encoding="utf-8") as f: f.write(markdown) -print(f"✅ Markdown report saved: ${OUTPUT_FILE}") +print(f"✅ Markdown report saved: {os.environ['OUTPUT_FILE_ENV']}") EOF # Clean up @@ -247,4 +255,3 @@ info "Next steps:" echo " 1. Review the transcription: cat $OUTPUT_FILE" echo " 2. Edit if needed: vim $OUTPUT_FILE" echo " 3. Share with team or archive" -EOF diff --git a/tools/scripts/tests/docs_security_content.test.js b/tools/scripts/tests/docs_security_content.test.js new file mode 100644 index 00000000..a3d953dd --- /dev/null +++ b/tools/scripts/tests/docs_security_content.test.js @@ -0,0 +1,21 @@ +const assert = require("assert"); +const fs = require("fs"); +const path = require("path"); + +const repoRoot = path.resolve(__dirname, "../..", ".."); + +const apifySkill = fs.readFileSync( + path.join(repoRoot, "skills", "apify-actorization", "SKILL.md"), + "utf8", +); +const audioExample = fs.readFileSync( + path.join(repoRoot, "skills", "audio-transcriber", "examples", "basic-transcription.sh"), + "utf8", +); + +assert.strictEqual(/\|\s*(bash|sh)\b/.test(apifySkill), false, "SKILL.md must not recommend pipe-to-shell installs"); +assert.strictEqual(/\|\s*iex\b/i.test(apifySkill), false, "SKILL.md must not recommend PowerShell pipe-to-iex installs"); +assert.strictEqual(/apify login -t\b/.test(apifySkill), false, "SKILL.md must not put tokens on the command line"); + +assert.match(audioExample, /python3 << 'EOF'/, "audio example should use a quoted heredoc for Python"); +assert.match(audioExample, /AUDIO_FILE_ENV/, "audio example should pass shell variables through the environment");