fix: remove unsafe shell guidance

2026-03-15 08:41:29 +01:00
parent a8b1e88f11
commit e677e29015
3 changed files with 51 additions and 23 deletions
--- a/skills/apify-actorization/SKILL.md
+++ b/skills/apify-actorization/SKILL.md
@@ -33,11 +33,10 @@ apify --help
 If not installed:

 ```bash
-curl -fsSL https://apify.com/install-cli.sh | bash
+brew install apify-cli

-# Or (Mac): brew install apify-cli
-# Or (Windows): irm https://apify.com/install-cli.ps1 | iex
 # Or: npm install -g apify-cli
+# Or install from an official release package that your OS package manager verifies
 ```

 Verify CLI is logged in:
@@ -49,7 +48,8 @@ apify info  # Should return your username
 If not logged in, check if `APIFY_TOKEN` environment variable is defined. If not, ask the user to generate one at https://console.apify.com/settings/integrations, then:

 ```bash
-apify login -t $APIFY_TOKEN
+export APIFY_TOKEN="your_token_here"
+apify login
 ```

 ## Actorization Checklist
--- a/skills/audio-transcriber/examples/basic-transcription.sh
+++ b/skills/audio-transcriber/examples/basic-transcription.sh
@@ -112,18 +112,24 @@ fi
 info "Step 2: Transcribing audio..."

 OUTPUT_FILE="${AUDIO_FILE%.*}.md"
-TEMP_JSON="/tmp/transcription_$$.json"
+TEMP_JSON="$(mktemp "${TMPDIR:-/tmp}/transcription.XXXXXX.json")"

-python3 << EOF
+AUDIO_FILE_ENV="$AUDIO_FILE" MODEL_ENV="$MODEL" TRANSCRIBER_ENV="$TRANSCRIBER" TEMP_JSON_ENV="$TEMP_JSON" python3 << 'EOF'
+import os
 import sys
 import json
 from datetime import datetime

 try:
-    if "$TRANSCRIBER" == "faster-whisper":
+    audio_file = os.environ["AUDIO_FILE_ENV"]
+    model_name = os.environ["MODEL_ENV"]
+    transcriber = os.environ["TRANSCRIBER_ENV"]
+    temp_json = os.environ["TEMP_JSON_ENV"]
+
+    if transcriber == "faster-whisper":
        from faster_whisper import WhisperModel
-        model = WhisperModel("$MODEL", device="cpu", compute_type="int8")
-        segments, info = model.transcribe("$AUDIO_FILE", language=None, vad_filter=True)
+        model = WhisperModel(model_name, device="cpu", compute_type="int8")
+        segments, info = model.transcribe(audio_file, language=None, vad_filter=True)
        
        data = {
            "language": info.language,
@@ -140,8 +146,8 @@ try:
            })
    else:
        import whisper
-        model = whisper.load_model("$MODEL")
-        result = model.transcribe("$AUDIO_FILE")
+        model = whisper.load_model(model_name)
+        result = model.transcribe(audio_file)
        
        data = {
            "language": result["language"],
@@ -149,7 +155,7 @@ try:
            "segments": result["segments"]
        }
    
-    with open("$TEMP_JSON", "w") as f:
+    with open(temp_json, "w", encoding="utf-8") as f:
        json.dump(data, f)
    
    print(f"✅ Language detected: {data['language']}")
@@ -168,22 +174,24 @@ fi
 # Step 3: Generate Markdown output
 info "Step 3: Generating Markdown report..."

-python3 << 'EOF'
+AUDIO_FILE_ENV="$AUDIO_FILE" FILE_SIZE_ENV="$FILE_SIZE" DURATION_HMS_ENV="$DURATION_HMS" TRANSCRIBER_ENV="$TRANSCRIBER" MODEL_ENV="$MODEL" TEMP_JSON_ENV="$TEMP_JSON" OUTPUT_FILE_ENV="$OUTPUT_FILE" python3 << 'EOF'
 import json
-import sys
+import os
 from datetime import datetime

 # Load transcription data
-with open("${TEMP_JSON}") as f:
+with open(os.environ["TEMP_JSON_ENV"], encoding="utf-8") as f:
    data = json.load(f)

 # Prepare metadata
-filename = "${AUDIO_FILE}".split("/")[-1]
-file_size = "${FILE_SIZE}"
-duration_hms = "${DURATION_HMS}"
+filename = os.path.basename(os.environ["AUDIO_FILE_ENV"])
+file_size = os.environ["FILE_SIZE_ENV"]
+duration_hms = os.environ["DURATION_HMS_ENV"]
 language = data["language"]
 process_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 num_segments = len(data["segments"])
+transcriber = os.environ["TRANSCRIBER_ENV"]
+model_name = os.environ["MODEL_ENV"]

 # Generate Markdown
 markdown = f"""# Audio Transcription Report
@@ -198,7 +206,7 @@ markdown = f"""# Audio Transcription Report
 | **Language** | {language.upper()} |
 | **Processed Date** | {process_date} |
 | **Segments** | {num_segments} |
-| **Transcription Engine** | ${TRANSCRIBER} (model: ${MODEL}) |
+| **Transcription Engine** | {transcriber} (model: {model_name}) |

 ---

@@ -222,14 +230,14 @@ markdown += """---
 ---

 *Generated by audio-transcriber skill example script*  
-*Transcription engine: ${TRANSCRIBER} | Model: ${MODEL}*
+*Transcription engine: {transcriber} | Model: {model_name}*
 """

 # Write to file
-with open("${OUTPUT_FILE}", "w") as f:
+with open(os.environ["OUTPUT_FILE_ENV"], "w", encoding="utf-8") as f:
    f.write(markdown)

-print(f"✅ Markdown report saved: ${OUTPUT_FILE}")
+print(f"✅ Markdown report saved: {os.environ['OUTPUT_FILE_ENV']}")
 EOF

 # Clean up
@@ -247,4 +255,3 @@ info "Next steps:"
 echo "  1. Review the transcription: cat $OUTPUT_FILE"
 echo "  2. Edit if needed: vim $OUTPUT_FILE"
 echo "  3. Share with team or archive"
-EOF
--- a/tools/scripts/tests/docs_security_content.test.js
+++ b/tools/scripts/tests/docs_security_content.test.js
@@ -0,0 +1,21 @@
+const assert = require("assert");
+const fs = require("fs");
+const path = require("path");
+
+const repoRoot = path.resolve(__dirname, "../..", "..");
+
+const apifySkill = fs.readFileSync(
+  path.join(repoRoot, "skills", "apify-actorization", "SKILL.md"),
+  "utf8",
+);
+const audioExample = fs.readFileSync(
+  path.join(repoRoot, "skills", "audio-transcriber", "examples", "basic-transcription.sh"),
+  "utf8",
+);
+
+assert.strictEqual(/\|\s*(bash|sh)\b/.test(apifySkill), false, "SKILL.md must not recommend pipe-to-shell installs");
+assert.strictEqual(/\|\s*iex\b/i.test(apifySkill), false, "SKILL.md must not recommend PowerShell pipe-to-iex installs");
+assert.strictEqual(/apify login -t\b/.test(apifySkill), false, "SKILL.md must not put tokens on the command line");
+
+assert.match(audioExample, /python3 << 'EOF'/, "audio example should use a quoted heredoc for Python");
+assert.match(audioExample, /AUDIO_FILE_ENV/, "audio example should pass shell variables through the environment");