Files
antigravity-skills-reference/skills/pipecat-friday-agent/scripts/friday_agent.py
Takura 7b9b1095d0 pipecat-friday-agent — Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) with Pipecat, Gemini, and OpenAI. (#261)
* feat: add pipecat-friday-agent skill and sync generated files

* chore: sync generated files and update changelog for pipecat-friday-agent

* fix: normalize path separators in data registry for cross-platform CI

* chore: sync generated registry files and resolve CI drift

* feat: Add initial skill catalog, aliases, and bundles data files and update README.

* fix: normalize path separators in skill utils and resolve CI drift

* feat: Introduce skill catalog, aliases, and bundles with a new utility and updated documentation.

* feat: Add new skill catalog data files for aliases, bundles, and the main catalog, and update total skill count.

* feat: Update skill and star counts, and registry sync metadata in README.md.

* chore: sync generated registry files

* chore: fix drift

---------

Co-authored-by: sck_0 <samujackson1337@gmail.com>
2026-03-10 19:13:49 +01:00

147 lines
7.1 KiB
Python

import asyncio
import os
import sys
from dotenv import load_dotenv
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask, PipelineParams
from pipecat.services.openai.stt import OpenAISTTService
from pipecat.services.openai.tts import OpenAITTSService
from pipecat.services.google.llm import GoogleLLMService
from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator, LLMAssistantContextAggregator
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
# ── Config ────────────────────────────────────────────────────────────────────
# run test_audio_output.py to find your device index
# [4] Speaker (Realtek) Windows default speakers
# [6] Headphones (soundcore Space One) Bluetooth headphones
OUTPUT_DEVICE = 6
# "whisper-1" (classic) or "gpt-4o-transcribe" (GPT-4o powered, higher accuracy)
WHISPER_MODEL = "whisper-1"
# OpenAI TTS voice — alloy, ash, coral, echo, fable, nova, onyx, sage, shimmer
# "nova" is calm and professional; "shimmer" is warm; "onyx" is deep
TTS_VOICE = "nova"
# ── Google compatibility shim ─────────────────────────────────────────────────
# Pipecat's context aggregators use OpenAI-style {role, content} messages,
# but GoogleLLMService expects {role, parts: [{text}]}.
# These wrapper classes handle that translation.
class GoogleSafeMessage(dict):
def __init__(self, role, content):
super().__init__(role=role, content=content)
self.role = role
self.content = content
def to_json_dict(self):
return {"role": self.role, "parts": [{"text": self.content}]}
class GoogleSafeContext:
def __init__(self, messages=None):
self.messages = [GoogleSafeMessage(m['role'], m['content']) for m in messages] if messages else []
self.tools = []
self.tool_choice = None
def add_message(self, message):
if isinstance(message, dict):
self.messages.append(GoogleSafeMessage(message.get("role", "user"), message.get("content", "")))
elif hasattr(message, "text"):
self.messages.append(GoogleSafeMessage("user", message.text))
def get_messages(self, *args, **kwargs): return self.messages
def get_messages_for_token_count(self): return self.messages
def clear(self): self.messages = []
# ─────────────────────────────────────────────────────────────────────────────
load_dotenv()
async def main():
print("\n" + "="*60)
print("🛡️ F.R.I.D.A.Y. — FULL OPENAI EDITION")
print(f" STT: OpenAI {WHISPER_MODEL}")
print(" LLM: Gemini 2.5 Flash")
print(f" TTS: OpenAI TTS ({TTS_VOICE})")
print("="*60)
# ── API key check ─────────────────────────────────────────────────────────
openai_key = os.getenv("OPENAI_API_KEY")
google_key = os.getenv("GOOGLE_API_KEY")
if not openai_key: print("❌ OPENAI_API_KEY missing in .env"); sys.exit(1)
if not google_key: print("❌ GOOGLE_API_KEY missing in .env"); sys.exit(1)
print("✅ All API keys loaded\n")
# ── 1. Transport ──────────────────────────────────────────────────────────
transport = LocalAudioTransport(
params=LocalAudioTransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_in_sample_rate=16000,
audio_out_sample_rate=24000, # OpenAI TTS only outputs 24kHz
output_device_index=OUTPUT_DEVICE,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
)
)
# ── 2. STT — OpenAI Whisper ───────────────────────────────────────────────
# Whisper receives the full audio segment (after VAD detects silence)
# and returns a transcript. No streaming — waits for the full utterance.
stt = OpenAISTTService(
api_key=openai_key,
model=WHISPER_MODEL,
)
# ── 3. LLM — Gemini 2.5 Flash ────────────────────────────────────────────
llm = GoogleLLMService(
api_key=google_key,
model="gemini-2.5-flash",
)
# ── 4. TTS — OpenAI TTS ──────────────────────────────────────────────────
# OpenAI TTS streams audio at 24kHz PCM. Change TTS_VOICE at the top of the file.
tts = OpenAITTSService(
api_key=openai_key,
voice=TTS_VOICE,
model="gpt-4o-mini-tts",
sample_rate=24000,
)
# ── 5. Personality ────────────────────────────────────────────────────────
system_prompt = (
"You are F.R.I.D.A.Y., a tactical support AI replacing JARVIS. "
"Address me as 'Boss'. "
"Be concise, soft-spoken, and focus on situational awareness. "
"Prioritize clear data over polite formalities. "
"If asked about status, report 'Systems nominal'."
)
context = GoogleSafeContext([{"role": "system", "content": system_prompt}])
user_agg = LLMUserContextAggregator(context)
assistant_agg = LLMAssistantContextAggregator(context)
# ── 6. Pipeline ───────────────────────────────────────────────────────────
# Mic → VAD → Whisper STT → LLM → ElevenLabs TTS → Speaker
pipeline = Pipeline([
transport.input(), # mic audio
stt, # Whisper: audio → transcript
user_agg, # add transcript to context
llm, # Gemini: context → response
tts, # ElevenLabs: text → speech
transport.output(), # speaker
assistant_agg, # store response in context
])
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
runner = PipelineRunner()
print("🎤 Ready. Speak after silence — Whisper transcribes on each pause.")
print(" Press Ctrl+C to stop.\n")
await runner.run(task)
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\n👋 Systems offline.")