- Add audio-transcriber skill (v1.2.0): Transform audio to Markdown with Whisper - Add youtube-summarizer skill (v1.2.0): Generate summaries from YouTube videos - Update prompt-engineer skill: Enhanced with 11 optimization frameworks - Update skill-creator skill: Improved automation workflow All skills are zero-config, cross-platform (Claude Code, Copilot CLI, Codex) and follow Quality Bar V4 standards. Source: https://github.com/ericgandrade/cli-ai-skills
251 lines
6.3 KiB
Bash
Executable File
251 lines
6.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
|
||
# Basic Audio Transcription Example
|
||
# Demonstrates how to use the audio-transcriber skill manually
|
||
|
||
set -euo pipefail
|
||
|
||
# Configuration
|
||
AUDIO_FILE="${1:-}"
|
||
MODEL="${MODEL:-base}" # Options: tiny, base, small, medium, large
|
||
OUTPUT_FORMAT="${OUTPUT_FORMAT:-markdown}" # Options: markdown, txt, srt, vtt, json
|
||
|
||
# Colors for output
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# Helper functions
|
||
error() {
|
||
echo -e "${RED}❌ Error: $1${NC}" >&2
|
||
exit 1
|
||
}
|
||
|
||
success() {
|
||
echo -e "${GREEN}✅ $1${NC}"
|
||
}
|
||
|
||
info() {
|
||
echo -e "${BLUE}ℹ️ $1${NC}"
|
||
}
|
||
|
||
warn() {
|
||
echo -e "${YELLOW}⚠️ $1${NC}"
|
||
}
|
||
|
||
# Check if audio file is provided
|
||
if [[ -z "$AUDIO_FILE" ]]; then
|
||
error "Usage: $0 <audio_file>"
|
||
fi
|
||
|
||
# Verify file exists
|
||
if [[ ! -f "$AUDIO_FILE" ]]; then
|
||
error "File not found: $AUDIO_FILE"
|
||
fi
|
||
|
||
# Step 0: Discovery - Check for transcription tools
|
||
info "Step 0: Discovering transcription tools..."
|
||
|
||
TRANSCRIBER=""
|
||
if python3 -c "import faster_whisper" 2>/dev/null; then
|
||
TRANSCRIBER="faster-whisper"
|
||
success "Faster-Whisper detected (optimized)"
|
||
elif python3 -c "import whisper" 2>/dev/null; then
|
||
TRANSCRIBER="whisper"
|
||
success "OpenAI Whisper detected"
|
||
else
|
||
error "No transcription tool found. Install with: pip install faster-whisper"
|
||
fi
|
||
|
||
# Check for ffmpeg
|
||
if command -v ffmpeg &>/dev/null; then
|
||
success "ffmpeg available (format conversion enabled)"
|
||
else
|
||
warn "ffmpeg not found (limited format support)"
|
||
fi
|
||
|
||
# Step 1: Extract metadata
|
||
info "Step 1: Extracting audio metadata..."
|
||
|
||
FILE_SIZE=$(du -h "$AUDIO_FILE" | cut -f1)
|
||
info "File size: $FILE_SIZE"
|
||
|
||
# Get duration if ffprobe is available
|
||
if command -v ffprobe &>/dev/null; then
|
||
DURATION=$(ffprobe -v error -show_entries format=duration \
|
||
-of default=noprint_wrappers=1:nokey=1 "$AUDIO_FILE" 2>/dev/null || echo "0")
|
||
|
||
# Convert to HH:MM:SS
|
||
if command -v date &>/dev/null; then
|
||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||
# macOS
|
||
DURATION_HMS=$(date -u -r "${DURATION%.*}" +%H:%M:%S 2>/dev/null || echo "Unknown")
|
||
else
|
||
# Linux
|
||
DURATION_HMS=$(date -u -d @"${DURATION%.*}" +%H:%M:%S 2>/dev/null || echo "Unknown")
|
||
fi
|
||
else
|
||
DURATION_HMS="Unknown"
|
||
fi
|
||
|
||
info "Duration: $DURATION_HMS"
|
||
else
|
||
warn "ffprobe not found - cannot extract duration"
|
||
DURATION="0"
|
||
DURATION_HMS="Unknown"
|
||
fi
|
||
|
||
# Check file size warning
|
||
SIZE_MB=$(du -m "$AUDIO_FILE" | cut -f1)
|
||
if [[ $SIZE_MB -gt 25 ]]; then
|
||
warn "Large file ($FILE_SIZE) - processing may take several minutes"
|
||
read -p "Continue? [Y/n]: " CONTINUE
|
||
if [[ "$CONTINUE" =~ ^[Nn] ]]; then
|
||
info "Transcription cancelled"
|
||
exit 0
|
||
fi
|
||
fi
|
||
|
||
# Step 2: Transcribe using Python
|
||
info "Step 2: Transcribing audio..."
|
||
|
||
OUTPUT_FILE="${AUDIO_FILE%.*}.md"
|
||
TEMP_JSON="/tmp/transcription_$$.json"
|
||
|
||
python3 << EOF
|
||
import sys
|
||
import json
|
||
from datetime import datetime
|
||
|
||
try:
|
||
if "$TRANSCRIBER" == "faster-whisper":
|
||
from faster_whisper import WhisperModel
|
||
model = WhisperModel("$MODEL", device="cpu", compute_type="int8")
|
||
segments, info = model.transcribe("$AUDIO_FILE", language=None, vad_filter=True)
|
||
|
||
data = {
|
||
"language": info.language,
|
||
"language_probability": round(info.language_probability, 2),
|
||
"duration": info.duration,
|
||
"segments": []
|
||
}
|
||
|
||
for segment in segments:
|
||
data["segments"].append({
|
||
"start": round(segment.start, 2),
|
||
"end": round(segment.end, 2),
|
||
"text": segment.text.strip()
|
||
})
|
||
else:
|
||
import whisper
|
||
model = whisper.load_model("$MODEL")
|
||
result = model.transcribe("$AUDIO_FILE")
|
||
|
||
data = {
|
||
"language": result["language"],
|
||
"duration": result["segments"][-1]["end"] if result["segments"] else 0,
|
||
"segments": result["segments"]
|
||
}
|
||
|
||
with open("$TEMP_JSON", "w") as f:
|
||
json.dump(data, f)
|
||
|
||
print(f"✅ Language detected: {data['language']}")
|
||
print(f"📝 Transcribed {len(data['segments'])} segments")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error: {e}", file=sys.stderr)
|
||
sys.exit(1)
|
||
EOF
|
||
|
||
# Check if transcription succeeded
|
||
if [[ ! -f "$TEMP_JSON" ]]; then
|
||
error "Transcription failed"
|
||
fi
|
||
|
||
# Step 3: Generate Markdown output
|
||
info "Step 3: Generating Markdown report..."
|
||
|
||
python3 << 'EOF'
|
||
import json
|
||
import sys
|
||
from datetime import datetime
|
||
|
||
# Load transcription data
|
||
with open("${TEMP_JSON}") as f:
|
||
data = json.load(f)
|
||
|
||
# Prepare metadata
|
||
filename = "${AUDIO_FILE}".split("/")[-1]
|
||
file_size = "${FILE_SIZE}"
|
||
duration_hms = "${DURATION_HMS}"
|
||
language = data["language"]
|
||
process_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||
num_segments = len(data["segments"])
|
||
|
||
# Generate Markdown
|
||
markdown = f"""# Audio Transcription Report
|
||
|
||
## 📊 Metadata
|
||
|
||
| Field | Value |
|
||
|-------|-------|
|
||
| **File Name** | {filename} |
|
||
| **File Size** | {file_size} |
|
||
| **Duration** | {duration_hms} |
|
||
| **Language** | {language.upper()} |
|
||
| **Processed Date** | {process_date} |
|
||
| **Segments** | {num_segments} |
|
||
| **Transcription Engine** | ${TRANSCRIBER} (model: ${MODEL}) |
|
||
|
||
---
|
||
|
||
## 🎙️ Full Transcription
|
||
|
||
"""
|
||
|
||
# Add transcription with timestamps
|
||
for seg in data["segments"]:
|
||
start_time = f"{int(seg['start'] // 60):02d}:{int(seg['start'] % 60):02d}"
|
||
end_time = f"{int(seg['end'] // 60):02d}:{int(seg['end'] % 60):02d}"
|
||
markdown += f"**[{start_time} → {end_time}]** \n{seg['text']}\n\n"
|
||
|
||
markdown += """---
|
||
|
||
## 📝 Summary
|
||
|
||
*Automatic summary generation requires AI integration (Claude/GPT).*
|
||
*For now, review the full transcription above.*
|
||
|
||
---
|
||
|
||
*Generated by audio-transcriber skill example script*
|
||
*Transcription engine: ${TRANSCRIBER} | Model: ${MODEL}*
|
||
"""
|
||
|
||
# Write to file
|
||
with open("${OUTPUT_FILE}", "w") as f:
|
||
f.write(markdown)
|
||
|
||
print(f"✅ Markdown report saved: ${OUTPUT_FILE}")
|
||
EOF
|
||
|
||
# Clean up
|
||
rm -f "$TEMP_JSON"
|
||
|
||
# Step 4: Display summary
|
||
success "Transcription complete!"
|
||
echo ""
|
||
echo "📊 Results:"
|
||
echo " Output file: $OUTPUT_FILE"
|
||
echo " Transcription engine: $TRANSCRIBER"
|
||
echo " Model: $MODEL"
|
||
echo ""
|
||
info "Next steps:"
|
||
echo " 1. Review the transcription: cat $OUTPUT_FILE"
|
||
echo " 2. Edit if needed: vim $OUTPUT_FILE"
|
||
echo " 3. Share with team or archive"
|
||
EOF
|