Files
antigravity-skills-reference/skills/audio-transcriber/examples/basic-transcription.sh
Eric Andrade 801c8fa475 feat: add 4 universal skills from cli-ai-skills
- Add audio-transcriber skill (v1.2.0): Transform audio to Markdown with Whisper
- Add youtube-summarizer skill (v1.2.0): Generate summaries from YouTube videos
- Update prompt-engineer skill: Enhanced with 11 optimization frameworks
- Update skill-creator skill: Improved automation workflow

All skills are zero-config, cross-platform (Claude Code, Copilot CLI, Codex)
and follow Quality Bar V4 standards.

Source: https://github.com/ericgandrade/cli-ai-skills
2026-02-04 17:37:45 -03:00

251 lines
6.3 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# Basic Audio Transcription Example
# Demonstrates how to use the audio-transcriber skill manually
set -euo pipefail
# Configuration
AUDIO_FILE="${1:-}"
MODEL="${MODEL:-base}" # Options: tiny, base, small, medium, large
OUTPUT_FORMAT="${OUTPUT_FORMAT:-markdown}" # Options: markdown, txt, srt, vtt, json
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
error() {
echo -e "${RED}❌ Error: $1${NC}" >&2
exit 1
}
success() {
echo -e "${GREEN}$1${NC}"
}
info() {
echo -e "${BLUE} $1${NC}"
}
warn() {
echo -e "${YELLOW}⚠️ $1${NC}"
}
# Check if audio file is provided
if [[ -z "$AUDIO_FILE" ]]; then
error "Usage: $0 <audio_file>"
fi
# Verify file exists
if [[ ! -f "$AUDIO_FILE" ]]; then
error "File not found: $AUDIO_FILE"
fi
# Step 0: Discovery - Check for transcription tools
info "Step 0: Discovering transcription tools..."
TRANSCRIBER=""
if python3 -c "import faster_whisper" 2>/dev/null; then
TRANSCRIBER="faster-whisper"
success "Faster-Whisper detected (optimized)"
elif python3 -c "import whisper" 2>/dev/null; then
TRANSCRIBER="whisper"
success "OpenAI Whisper detected"
else
error "No transcription tool found. Install with: pip install faster-whisper"
fi
# Check for ffmpeg
if command -v ffmpeg &>/dev/null; then
success "ffmpeg available (format conversion enabled)"
else
warn "ffmpeg not found (limited format support)"
fi
# Step 1: Extract metadata
info "Step 1: Extracting audio metadata..."
FILE_SIZE=$(du -h "$AUDIO_FILE" | cut -f1)
info "File size: $FILE_SIZE"
# Get duration if ffprobe is available
if command -v ffprobe &>/dev/null; then
DURATION=$(ffprobe -v error -show_entries format=duration \
-of default=noprint_wrappers=1:nokey=1 "$AUDIO_FILE" 2>/dev/null || echo "0")
# Convert to HH:MM:SS
if command -v date &>/dev/null; then
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS
DURATION_HMS=$(date -u -r "${DURATION%.*}" +%H:%M:%S 2>/dev/null || echo "Unknown")
else
# Linux
DURATION_HMS=$(date -u -d @"${DURATION%.*}" +%H:%M:%S 2>/dev/null || echo "Unknown")
fi
else
DURATION_HMS="Unknown"
fi
info "Duration: $DURATION_HMS"
else
warn "ffprobe not found - cannot extract duration"
DURATION="0"
DURATION_HMS="Unknown"
fi
# Check file size warning
SIZE_MB=$(du -m "$AUDIO_FILE" | cut -f1)
if [[ $SIZE_MB -gt 25 ]]; then
warn "Large file ($FILE_SIZE) - processing may take several minutes"
read -p "Continue? [Y/n]: " CONTINUE
if [[ "$CONTINUE" =~ ^[Nn] ]]; then
info "Transcription cancelled"
exit 0
fi
fi
# Step 2: Transcribe using Python
info "Step 2: Transcribing audio..."
OUTPUT_FILE="${AUDIO_FILE%.*}.md"
TEMP_JSON="/tmp/transcription_$$.json"
python3 << EOF
import sys
import json
from datetime import datetime
try:
if "$TRANSCRIBER" == "faster-whisper":
from faster_whisper import WhisperModel
model = WhisperModel("$MODEL", device="cpu", compute_type="int8")
segments, info = model.transcribe("$AUDIO_FILE", language=None, vad_filter=True)
data = {
"language": info.language,
"language_probability": round(info.language_probability, 2),
"duration": info.duration,
"segments": []
}
for segment in segments:
data["segments"].append({
"start": round(segment.start, 2),
"end": round(segment.end, 2),
"text": segment.text.strip()
})
else:
import whisper
model = whisper.load_model("$MODEL")
result = model.transcribe("$AUDIO_FILE")
data = {
"language": result["language"],
"duration": result["segments"][-1]["end"] if result["segments"] else 0,
"segments": result["segments"]
}
with open("$TEMP_JSON", "w") as f:
json.dump(data, f)
print(f"✅ Language detected: {data['language']}")
print(f"📝 Transcribed {len(data['segments'])} segments")
except Exception as e:
print(f"❌ Error: {e}", file=sys.stderr)
sys.exit(1)
EOF
# Check if transcription succeeded
if [[ ! -f "$TEMP_JSON" ]]; then
error "Transcription failed"
fi
# Step 3: Generate Markdown output
info "Step 3: Generating Markdown report..."
python3 << 'EOF'
import json
import sys
from datetime import datetime
# Load transcription data
with open("${TEMP_JSON}") as f:
data = json.load(f)
# Prepare metadata
filename = "${AUDIO_FILE}".split("/")[-1]
file_size = "${FILE_SIZE}"
duration_hms = "${DURATION_HMS}"
language = data["language"]
process_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
num_segments = len(data["segments"])
# Generate Markdown
markdown = f"""# Audio Transcription Report
## 📊 Metadata
| Field | Value |
|-------|-------|
| **File Name** | {filename} |
| **File Size** | {file_size} |
| **Duration** | {duration_hms} |
| **Language** | {language.upper()} |
| **Processed Date** | {process_date} |
| **Segments** | {num_segments} |
| **Transcription Engine** | ${TRANSCRIBER} (model: ${MODEL}) |
---
## 🎙️ Full Transcription
"""
# Add transcription with timestamps
for seg in data["segments"]:
start_time = f"{int(seg['start'] // 60):02d}:{int(seg['start'] % 60):02d}"
end_time = f"{int(seg['end'] // 60):02d}:{int(seg['end'] % 60):02d}"
markdown += f"**[{start_time} → {end_time}]** \n{seg['text']}\n\n"
markdown += """---
## 📝 Summary
*Automatic summary generation requires AI integration (Claude/GPT).*
*For now, review the full transcription above.*
---
*Generated by audio-transcriber skill example script*
*Transcription engine: ${TRANSCRIBER} | Model: ${MODEL}*
"""
# Write to file
with open("${OUTPUT_FILE}", "w") as f:
f.write(markdown)
print(f"✅ Markdown report saved: ${OUTPUT_FILE}")
EOF
# Clean up
rm -f "$TEMP_JSON"
# Step 4: Display summary
success "Transcription complete!"
echo ""
echo "📊 Results:"
echo " Output file: $OUTPUT_FILE"
echo " Transcription engine: $TRANSCRIBER"
echo " Model: $MODEL"
echo ""
info "Next steps:"
echo " 1. Review the transcription: cat $OUTPUT_FILE"
echo " 2. Edit if needed: vim $OUTPUT_FILE"
echo " 3. Share with team or archive"
EOF