From 7b9b1095d08e1af7200fe92f396e143195295bf8 Mon Sep 17 00:00:00 2001 From: Takura <73918732+taksrules@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:13:49 +0200 Subject: [PATCH] =?UTF-8?q?pipecat-friday-agent=20=E2=80=94=20Iron=20Man-i?= =?UTF-8?q?nspired=20tactical=20voice=20assistant=20(F.R.I.D.A.Y.)=20with?= =?UTF-8?q?=20Pipecat,=20Gemini,=20and=20OpenAI.=20(#261)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add pipecat-friday-agent skill and sync generated files * chore: sync generated files and update changelog for pipecat-friday-agent * fix: normalize path separators in data registry for cross-platform CI * chore: sync generated registry files and resolve CI drift * feat: Add initial skill catalog, aliases, and bundles data files and update README. * fix: normalize path separators in skill utils and resolve CI drift * feat: Introduce skill catalog, aliases, and bundles with a new utility and updated documentation. * feat: Add new skill catalog data files for aliases, bundles, and the main catalog, and update total skill count. * feat: Update skill and star counts, and registry sync metadata in README.md. * chore: sync generated registry files * chore: fix drift --------- Co-authored-by: sck_0 --- CATALOG.md | 5 +- CHANGELOG.md | 2 + README.md | 12 +- data/bundles.json | 1 + data/catalog.json | 30 +++- skills/pipecat-friday-agent/SKILL.md | 79 ++++++++++ .../scripts/friday_agent.py | 146 ++++++++++++++++++ skills_index.json | 10 ++ 8 files changed, 276 insertions(+), 9 deletions(-) create mode 100644 skills/pipecat-friday-agent/SKILL.md create mode 100644 skills/pipecat-friday-agent/scripts/friday_agent.py diff --git a/CATALOG.md b/CATALOG.md index c8ab676f..35391c64 100644 --- a/CATALOG.md +++ b/CATALOG.md @@ -2,7 +2,7 @@ Generated at: 2026-02-08T00:00:00.000Z -Total skills: 1236 +Total skills: 1237 ## architecture (80) @@ -408,7 +408,7 @@ Scope::with_data, save state, load state, serde, | `yann-lecun` | Agente que simula Yann LeCun β€” inventor das Convolutional Neural Networks, Chief AI Scientist da Meta, PrΓͺmio Turing 2018. Use quando quiser: perspectivas so... | persona, cnn, meta, ai-safety-critic, open-source | persona, cnn, meta, ai-safety-critic, open-source, yann, lecun, agente, que, simula, inventor, das | | `youtube-automation` | Automate YouTube tasks via Rube MCP (Composio): upload videos, manage playlists, search content, get analytics, and handle comments. Always search tools firs... | youtube | youtube, automation, automate, tasks, via, rube, mcp, composio, upload, videos, playlists, search | -## development (178) +## development (179) | Skill | Description | Tags | Triggers | | --- | --- | --- | --- | @@ -547,6 +547,7 @@ no matching field, parse error, widget... | makepad, reference | makepad, refere | `odoo-shopify-integration` | Connect Odoo with Shopify: sync products, inventory, orders, and customers using the Shopify API and Odoo's external API or connector modules. | odoo, shopify, integration | odoo, shopify, integration, connect, sync, products, inventory, orders, customers, api, external, connector | | `odoo-woocommerce-bridge` | Sync Odoo with WooCommerce: products, inventory, orders, and customers via WooCommerce REST API and Odoo external API. | odoo, woocommerce, bridge | odoo, woocommerce, bridge, sync, products, inventory, orders, customers, via, rest, api, external | | `pakistan-payments-stack` | Design and implement production-grade Pakistani payment integrations (JazzCash, Easypaisa, bank/PSP rails, optional Raast) for SaaS with PKR billing, webhook... | saas, payments, pakistan, nextjs, b2b, pkr, reconciliation | saas, payments, pakistan, nextjs, b2b, pkr, reconciliation, stack, grade, pakistani, payment, integrations | +| `pipecat-friday-agent` | Build a low-latency, Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) using Pipecat, Gemini, and OpenAI. | pipecat, voice, gemini, openai, python | pipecat, voice, gemini, openai, python, friday, agent, low, latency, iron, man, inspired | | `playwright-java` | Scaffold, write, debug, and enhance enterprise-grade Playwright E2E tests in Java using Page Object Model, JUnit 5, Allure reporting, and parallel execution. | playwright, java, e2e-testing, junit5, page-object-model, allure, selenium-alternative | playwright, java, e2e-testing, junit5, page-object-model, allure, selenium-alternative, scaffold, write, debug, enhance, enterprise | | `product-manager-toolkit` | Comprehensive toolkit for product managers including RICE prioritization, customer interview analysis, PRD templates, discovery frameworks, and go-to-market ... | product, manager | product, manager, toolkit, managers, including, rice, prioritization, customer, interview, analysis, prd, discovery | | `python-development-python-scaffold` | You are a Python project architecture expert specializing in scaffolding production-ready Python applications. Generate complete project structures with mode... | python | python, development, scaffold, architecture, specializing, scaffolding, applications, generate, complete, structures, tooling, uv | diff --git a/CHANGELOG.md b/CHANGELOG.md index b4bb6b20..f8e824bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- **pipecat-friday-agent** β€” Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) with Pipecat, Gemini, and OpenAI. + --- ## [7.4.0] - 2026-03-10 - "Planning & Dashboards" diff --git a/README.md b/README.md index 9ddb0006..a9e69461 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ - -# 🌌 Antigravity Awesome Skills: 1,236+ Agentic Skills for Claude Code, Gemini CLI, Cursor, Copilot & More + +# 🌌 Antigravity Awesome Skills: 1,237+ Agentic Skills for Claude Code, Gemini CLI, Cursor, Copilot & More -> **The Ultimate Collection of 1,236+ Universal Agentic Skills for AI Coding Assistants β€” Claude Code, Gemini CLI, Codex CLI, Antigravity IDE, GitHub Copilot, Cursor, OpenCode, AdaL** +> **The Ultimate Collection of 1,237+ Universal Agentic Skills for AI Coding Assistants β€” Claude Code, Gemini CLI, Codex CLI, Antigravity IDE, GitHub Copilot, Cursor, OpenCode, AdaL** [![GitHub stars](https://img.shields.io/badge/⭐%2021%2C000%2B%20Stars-gold?style=for-the-badge)](https://github.com/sickn33/antigravity-awesome-skills/stargazers) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) @@ -18,7 +18,7 @@ [![Web App](https://img.shields.io/badge/Web%20App-Browse%20Skills-blue)](apps/web-app) [![Buy Me a Book](https://img.shields.io/badge/Buy%20me%20a-book-d13610?logo=buymeacoffee&logoColor=white)](https://buymeacoffee.com/sickn33) -**Antigravity Awesome Skills** is a curated, battle-tested library of **1,236+ high-performance agentic skills** designed to work seamlessly across the major AI coding assistants. +**Antigravity Awesome Skills** is a curated, battle-tested library of **1,237+ high-performance agentic skills** designed to work seamlessly across the major AI coding assistants. **Welcome to the V7.4.0 Release!** This repository gives your agent reusable playbooks for planning, coding, debugging, testing, security review, infrastructure work, product thinking, and much more. @@ -32,7 +32,7 @@ - [🎁 Curated Collections (Bundles)](#curated-collections) - [🧭 Antigravity Workflows](#antigravity-workflows) - [πŸ“¦ Features & Categories](#features--categories) -- [πŸ“š Browse 1,236+ Skills](#browse-1236-skills) +- [πŸ“š Browse 1,237+ Skills](#browse-1237-skills) - [🀝 How to Contribute](#how-to-contribute) - [πŸ’¬ Community](#community) - [β˜• Support the Project](#support-the-project) @@ -281,7 +281,7 @@ The repository is organized into specialized domains to transform your AI into a Counts change as new skills are added. For the current full registry, see [CATALOG.md](CATALOG.md). -## Browse 1,236+ Skills +## Browse 1,237+ Skills - Open the interactive browser in [`apps/web-app`](apps/web-app). - Read the full catalog in [`CATALOG.md`](CATALOG.md). diff --git a/data/bundles.json b/data/bundles.json index 1e6b5342..83c9a40c 100644 --- a/data/bundles.json +++ b/data/bundles.json @@ -237,6 +237,7 @@ "openapi-spec-generation", "pakistan-payments-stack", "php-pro", + "pipecat-friday-agent", "plaid-fintech", "playwright-java", "polars", diff --git a/data/catalog.json b/data/catalog.json index 4bc6acf8..3c78fecb 100644 --- a/data/catalog.json +++ b/data/catalog.json @@ -1,6 +1,6 @@ { "generatedAt": "2026-02-08T00:00:00.000Z", - "total": 1236, + "total": 1237, "skills": [ { "id": "00-andruia-consultant", @@ -21598,6 +21598,34 @@ ], "path": "skills/php-pro/SKILL.md" }, + { + "id": "pipecat-friday-agent", + "name": "pipecat-friday-agent", + "description": "Build a low-latency, Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) using Pipecat, Gemini, and OpenAI.", + "category": "development", + "tags": [ + "pipecat", + "voice", + "gemini", + "openai", + "python" + ], + "triggers": [ + "pipecat", + "voice", + "gemini", + "openai", + "python", + "friday", + "agent", + "low", + "latency", + "iron", + "man", + "inspired" + ], + "path": "skills/pipecat-friday-agent/SKILL.md" + }, { "id": "pipedrive-automation", "name": "pipedrive-automation", diff --git a/skills/pipecat-friday-agent/SKILL.md b/skills/pipecat-friday-agent/SKILL.md new file mode 100644 index 00000000..f61b5078 --- /dev/null +++ b/skills/pipecat-friday-agent/SKILL.md @@ -0,0 +1,79 @@ +--- +name: pipecat-friday-agent +description: "Build a low-latency, Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) using Pipecat, Gemini, and OpenAI." +category: voice-agents +risk: safe +source: community +date_added: "2026-03-10" +tags: [pipecat, voice, gemini, openai, python] +tools: [pipecat] +--- + +# Pipecat Friday Agent + +## Overview + +This skill provides a blueprint for building **F.R.I.D.A.Y.** (Replacement Integrated Digital Assistant Youth), a local voice assistant inspired by the tactical AI from the Iron Man films. It uses the **Pipecat** framework to orchestrate a low-latency pipeline: +- **STT**: OpenAI Whisper (`whisper-1`) or `gpt-4o-transcribe` +- **LLM**: Google Gemini 2.5 Flash (via a compatibility shim) +- **TTS**: OpenAI TTS (`nova` voice) +- **Transport**: Local Audio (Hardware Mic/Speakers) + +## When to Use This Skill + +- Use when you want to build a real-time, conversational voice agent. +- Use when working with the Pipecat framework for pipeline-based AI. +- Use when you need to integrate multiple providers (Google and OpenAI) into a single voice loop. +- Use when building Iron Man-themed or tactical-themed voice applications. + +## How It Works + +### Step 1: Install Dependencies + +You will need the Pipecat framework and its service providers installed: +```bash +pip install pipecat-ai[openai,google,silero] python-dotenv +``` + +### Step 2: Configure Environment + +Create a `.env` file with your API keys: +```env +OPENAI_API_KEY=your_openai_key +GOOGLE_API_KEY=your_google_key +``` + +### Step 3: Run the Agent + +Execute the provided Python script to start the interface: +```bash +python scripts/friday_agent.py +``` + +## Core Concepts + +### Pipeline Architecture +The agent follows a linear pipeline: `Mic -> VAD -> STT -> LLM -> TTS -> Speaker`. This allows for granular control over each stage, unlike end-to-end speech-to-speech models. + +### Google Compatibility Shim +Since Google's Gemini API has a different message format than OpenAI's standard (which Pipecat aggregators expect), the script includes a `GoogleSafeContext` and `GoogleSafeMessage` class to bridge the gap. + +## Best Practices + +- βœ… **Use Silero VAD**: It is robust for local hardware and prevents background noise from triggering the LLM. +- βœ… **Concise Prompts**: Tactical agents should give short, data-dense responses to minimize latency. +- βœ… **Sample Rate Match**: OpenAI TTS outputs at 24kHz; ensure your `audio_out_sample_rate` matches to avoid high-pitched or slowed audio. +- ❌ **No Polite Fillers**: Avoid "Hello, how can I help you today?" Instead, use "Systems nominal. Ready for commands." + +## Troubleshooting + +- **Problem:** Audio is choppy or delayed. + - **Solution:** Check your `OUTPUT_DEVICE` index. Run a script like `test_audio_output.py` to find the correct hardware index for your OS. +- **Problem:** "Validation error" for message format. + - **Solution:** Ensure the `GoogleSafeContext` shim is correctly translating OpenAI-style dicts to Gemini-style schema. + +## Related Skills + +- `@voice-agents` - General principles of voice AI. +- `@agent-tool-builder` - Add tools (Search, Lights, etc.) to your Friday agent. +- `@llm-architect` - Optimizing the LLM layer. diff --git a/skills/pipecat-friday-agent/scripts/friday_agent.py b/skills/pipecat-friday-agent/scripts/friday_agent.py new file mode 100644 index 00000000..3a6ba644 --- /dev/null +++ b/skills/pipecat-friday-agent/scripts/friday_agent.py @@ -0,0 +1,146 @@ +import asyncio +import os +import sys +from dotenv import load_dotenv + +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineTask, PipelineParams +from pipecat.services.openai.stt import OpenAISTTService +from pipecat.services.openai.tts import OpenAITTSService +from pipecat.services.google.llm import GoogleLLMService +from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator, LLMAssistantContextAggregator +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams + +# ── Config ──────────────────────────────────────────────────────────────────── +# run test_audio_output.py to find your device index +# [4] Speaker (Realtek) Windows default speakers +# [6] Headphones (soundcore Space One) Bluetooth headphones +OUTPUT_DEVICE = 6 + +# "whisper-1" (classic) or "gpt-4o-transcribe" (GPT-4o powered, higher accuracy) +WHISPER_MODEL = "whisper-1" + +# OpenAI TTS voice β€” alloy, ash, coral, echo, fable, nova, onyx, sage, shimmer +# "nova" is calm and professional; "shimmer" is warm; "onyx" is deep +TTS_VOICE = "nova" + +# ── Google compatibility shim ───────────────────────────────────────────────── +# Pipecat's context aggregators use OpenAI-style {role, content} messages, +# but GoogleLLMService expects {role, parts: [{text}]}. +# These wrapper classes handle that translation. +class GoogleSafeMessage(dict): + def __init__(self, role, content): + super().__init__(role=role, content=content) + self.role = role + self.content = content + def to_json_dict(self): + return {"role": self.role, "parts": [{"text": self.content}]} + +class GoogleSafeContext: + def __init__(self, messages=None): + self.messages = [GoogleSafeMessage(m['role'], m['content']) for m in messages] if messages else [] + self.tools = [] + self.tool_choice = None + def add_message(self, message): + if isinstance(message, dict): + self.messages.append(GoogleSafeMessage(message.get("role", "user"), message.get("content", ""))) + elif hasattr(message, "text"): + self.messages.append(GoogleSafeMessage("user", message.text)) + def get_messages(self, *args, **kwargs): return self.messages + def get_messages_for_token_count(self): return self.messages + def clear(self): self.messages = [] + +# ───────────────────────────────────────────────────────────────────────────── +load_dotenv() + +async def main(): + print("\n" + "="*60) + print("πŸ›‘οΈ F.R.I.D.A.Y. β€” FULL OPENAI EDITION") + print(f" STT: OpenAI {WHISPER_MODEL}") + print(" LLM: Gemini 2.5 Flash") + print(f" TTS: OpenAI TTS ({TTS_VOICE})") + print("="*60) + + # ── API key check ───────────────────────────────────────────────────────── + openai_key = os.getenv("OPENAI_API_KEY") + google_key = os.getenv("GOOGLE_API_KEY") + + if not openai_key: print("❌ OPENAI_API_KEY missing in .env"); sys.exit(1) + if not google_key: print("❌ GOOGLE_API_KEY missing in .env"); sys.exit(1) + print("βœ… All API keys loaded\n") + + # ── 1. Transport ────────────────────────────────────────────────────────── + transport = LocalAudioTransport( + params=LocalAudioTransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + audio_in_sample_rate=16000, + audio_out_sample_rate=24000, # OpenAI TTS only outputs 24kHz + output_device_index=OUTPUT_DEVICE, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + vad_audio_passthrough=True, + ) + ) + + # ── 2. STT β€” OpenAI Whisper ─────────────────────────────────────────────── + # Whisper receives the full audio segment (after VAD detects silence) + # and returns a transcript. No streaming β€” waits for the full utterance. + stt = OpenAISTTService( + api_key=openai_key, + model=WHISPER_MODEL, + ) + + # ── 3. LLM β€” Gemini 2.5 Flash ──────────────────────────────────────────── + llm = GoogleLLMService( + api_key=google_key, + model="gemini-2.5-flash", + ) + + # ── 4. TTS β€” OpenAI TTS ────────────────────────────────────────────────── + # OpenAI TTS streams audio at 24kHz PCM. Change TTS_VOICE at the top of the file. + tts = OpenAITTSService( + api_key=openai_key, + voice=TTS_VOICE, + model="gpt-4o-mini-tts", + sample_rate=24000, + ) + + # ── 5. Personality ──────────────────────────────────────────────────────── + system_prompt = ( + "You are F.R.I.D.A.Y., a tactical support AI replacing JARVIS. " + "Address me as 'Boss'. " + "Be concise, soft-spoken, and focus on situational awareness. " + "Prioritize clear data over polite formalities. " + "If asked about status, report 'Systems nominal'." + ) + context = GoogleSafeContext([{"role": "system", "content": system_prompt}]) + user_agg = LLMUserContextAggregator(context) + assistant_agg = LLMAssistantContextAggregator(context) + + # ── 6. Pipeline ─────────────────────────────────────────────────────────── + # Mic β†’ VAD β†’ Whisper STT β†’ LLM β†’ ElevenLabs TTS β†’ Speaker + pipeline = Pipeline([ + transport.input(), # mic audio + stt, # Whisper: audio β†’ transcript + user_agg, # add transcript to context + llm, # Gemini: context β†’ response + tts, # ElevenLabs: text β†’ speech + transport.output(), # speaker + assistant_agg, # store response in context + ]) + + task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True)) + runner = PipelineRunner() + + print("🎀 Ready. Speak after silence β€” Whisper transcribes on each pause.") + print(" Press Ctrl+C to stop.\n") + await runner.run(task) + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + print("\nπŸ‘‹ Systems offline.") diff --git a/skills_index.json b/skills_index.json index a38d406a..af28238e 100644 --- a/skills_index.json +++ b/skills_index.json @@ -8729,6 +8729,16 @@ "source": "community", "date_added": "2026-02-27" }, + { + "id": "pipecat-friday-agent", + "path": "skills/pipecat-friday-agent", + "category": "voice-agents", + "name": "pipecat-friday-agent", + "description": "Build a low-latency, Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) using Pipecat, Gemini, and OpenAI.", + "risk": "safe", + "source": "community", + "date_added": "2026-03-10" + }, { "id": "pipedrive-automation", "path": "skills/pipedrive-automation",