add voice skill

2026-04-01 10:26:17 +08:00 · 2026-04-01 10:26:17 +08:00 · 6d6c7a92ef
commit 6d6c7a92ef
parent a0e0c8c7b6
6 changed files with 231 additions and 5 deletions
--- a/agent/plugin_hook_loader.py
+++ b/agent/plugin_hook_loader.py
@ -166,11 +166,11 @@ async def _execute_command(skill_path: str, command: str, hook_type: str, config
    try:
        # 设置环境变量，传递给子进程
        env = os.environ.copy()
-        env['ASSISTANT_ID'] = getattr(config, 'bot_id', '')
-        env['USER_IDENTIFIER'] = getattr(config, 'user_identifier', '')
-        env['TRACE_ID'] = getattr(config, 'trace_id', '')
-        env['SESSION_ID'] = getattr(config, 'session_id', '')
-        env['LANGUAGE'] = getattr(config, 'language', '')
+        env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', ''))
+        env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', ''))
+        env['TRACE_ID'] = str(getattr(config, 'trace_id', ''))
+        env['SESSION_ID'] = str(getattr(config, 'session_id', ''))
+        env['LANGUAGE'] = str(getattr(config, 'language', ''))
        env['HOOK_TYPE'] = hook_type

        # 合并 config 中的自定义 shell 环境变量
--- a/routes/bot_manager.py
+++ b/routes/bot_manager.py
@ -705,6 +705,7 @@ class BotSettingsResponse(BaseModel):
    voice_speaker: Optional[str] = None  # 语音音色
    voice_system_role: Optional[str] = None  # 语音对话系统角色
    voice_speaking_style: Optional[str] = None  # 语音说话风格
+    enable_voice: bool = False  # 语音对话开关
    mcp_settings: Optional[str] = None  # MCP 服务器配置 (JSON 字符串)
    updated_at: str

@ -1881,6 +1882,7 @@ async def get_bot_settings(bot_uuid: str, authorization: Optional[str] = Header(
                voice_speaker=settings.get('voice_speaker'),
                voice_system_role=settings.get('voice_system_role'),
                voice_speaking_style=settings.get('voice_speaking_style'),
+                enable_voice=settings.get('enable_voice', False),
                mcp_settings=settings.get('mcp_settings'),
                updated_at=datetime_to_str(updated_at)
            )
@ -1998,6 +2000,8 @@ async def update_bot_settings(
        update_json['voice_system_role'] = request.voice_system_role
    if request.voice_speaking_style is not None:
        update_json['voice_speaking_style'] = request.voice_speaking_style
+    if request.enable_voice is not None:
+        update_json['enable_voice'] = request.enable_voice
    if request.mcp_settings is not None:
        update_json['mcp_settings'] = request.mcp_settings

--- a/routes/voice.py
+++ b/routes/voice.py
@ -5,6 +5,7 @@ import logging
 from typing import Optional

 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+from pydantic import BaseModel

 from services.voice_session_manager import VoiceSession
 from utils.settings import VOICE_DEFAULT_MODE
@ -13,6 +14,27 @@ logger = logging.getLogger('app')

 router = APIRouter()

+# Global message queue for broadcast feature
+_pending_messages: dict[str, list[str]] = {}
+
+
+def _get_queue_key(bot_id: str, user_identifier: str) -> str:
+    return f"{bot_id}_{user_identifier}"
+
+
+class BroadcastRequest(BaseModel):
+    bot_id: str
+    user_identifier: str
+    message: str
+
+
+@router.post("/api/v3/voice/broadcast")
+async def voice_broadcast(req: BroadcastRequest):
+    """Push a message to be spoken by an active voice session."""
+    key = _get_queue_key(req.bot_id, req.user_identifier)
+    _pending_messages.setdefault(key, []).append(req.message)
+    return {"success": True, "queued": True}
+

@router.websocket("/api/v3/voice/realtime")
 async def voice_realtime(websocket: WebSocket):
@ -111,6 +133,14 @@ async def voice_realtime(websocket: WebSocket):

                if voice_mode == "lite":
                    from services.voice_lite_session import VoiceLiteSession
+                    # Create callback for broadcast messages
+                    queue_key = _get_queue_key(bot_id, msg.get("user_identifier", ""))
+
+                    async def get_pending_message() -> Optional[str]:
+                        msgs = _pending_messages.get(queue_key, [])
+                        return msgs.pop(0) if msgs else None
+
+                    session_kwargs["get_pending_message"] = get_pending_message
                    session = VoiceLiteSession(**session_kwargs)
                    logger.info(f"[Voice] Using lite mode for bot_id={bot_id}")
                else:
@ -118,6 +148,9 @@ async def voice_realtime(websocket: WebSocket):

                try:
                    await session.start()
+                    # Clear old messages on new session connection
+                    if voice_mode == "lite":
+                        _pending_messages[queue_key] = []
                except Exception as e:
                    logger.error(f"Failed to start voice session: {e}", exc_info=True)
                    await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"})
--- a/services/voice_lite_session.py
+++ b/services/voice_lite_session.py
@ -35,6 +35,7 @@ class VoiceLiteSession:
        on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None,
        on_status: Optional[Callable[[str], Awaitable[None]]] = None,
        on_error: Optional[Callable[[str], Awaitable[None]]] = None,
+        get_pending_message: Optional[Callable[[], Awaitable[Optional[str]]]] = None,
    ):
        self.bot_id = bot_id
        self.session_id = session_id or str(uuid.uuid4())
@ -52,8 +53,11 @@ class VoiceLiteSession:
        self._on_llm_text = on_llm_text
        self._on_status = on_status
        self._on_error = on_error
+        self._get_pending_message = get_pending_message

        self._running = False
+        self._status: str = "ready"  # Current session status
+        self._idle_check_task: Optional[asyncio.Task] = None
        self._asr_client: Optional[StreamingASRClient] = None
        self._asr_receive_task: Optional[asyncio.Task] = None
        self._agent_task: Optional[asyncio.Task] = None
@ -86,10 +90,17 @@ class VoiceLiteSession:
        self._running = True
        await self._emit_status("ready")

+        # Start idle check task for broadcast messages
+        if self._get_pending_message:
+            self._idle_check_task = asyncio.create_task(self._idle_check_loop())
+
    async def stop(self) -> None:
        """Gracefully stop the session."""
        self._running = False

+        if self._idle_check_task and not self._idle_check_task.done():
+            self._idle_check_task.cancel()
+
        if self._vad_finish_task and not self._vad_finish_task.done():
            self._vad_finish_task.cancel()

@ -511,9 +522,49 @@ class VoiceLiteSession:
                    await self._on_audio(audio_chunk)

    async def _emit_status(self, status: str) -> None:
+        self._status = status
        if self._on_status:
            await self._on_status(status)

    async def _emit_error(self, message: str) -> None:
        if self._on_error:
            await self._on_error(message)
+
+    async def _idle_check_loop(self) -> None:
+        """Background task: check and play pending broadcast messages when idle."""
+        while self._running:
+            try:
+                await asyncio.sleep(1.0)  # Check every second
+                # Check in both "ready" and "idle" states
+                if self._status in ("ready", "idle") and self._get_pending_message:
+                    msg = await self._get_pending_message()
+                    if msg:
+                        await self.speak_text(msg)
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.warning(f"[VoiceLite] Idle check error: {e}")
+
+    async def speak_text(self, text: str) -> None:
+        """Play text directly via TTS (skip agent, used for broadcast messages)."""
+        if not text.strip():
+            return
+
+        logger.info(f"[VoiceLite] Broadcasting: '{text[:80]}'")
+        await self._emit_status("speaking")
+
+        try:
+            tts_client = StreamingTTSClient(speaker=self._speaker)
+            if self._client_sample_rate != 24000:
+                async for audio_chunk in tts_client.synthesize_raw(text):
+                    if self._on_audio:
+                        await self._on_audio(self._resample_output(audio_chunk))
+            else:
+                async for audio_chunk in tts_client.synthesize(text):
+                    if self._on_audio:
+                        await self._on_audio(audio_chunk)
+        except Exception as e:
+            logger.error(f"[VoiceLite] Broadcast TTS error: {e}", exc_info=True)
+        finally:
+            if self._running:
+                await self._emit_status("idle")
--- a/skills/voice-notification/SKILL.md
+++ b/skills/voice-notification/SKILL.md
@ -0,0 +1,57 @@
+---
+name: voice-notification
+description: Voice Notification - Push voice broadcast messages to active voice sessions for real-time TTS playback
+---
+
+# Voice Notification - Voice Broadcast
+
+Push voice broadcast messages to users' active voice sessions. The message will be played via TTS when the session is in idle state.
+
+## Quick Start
+
+When a user requests to send a voice notification:
+1. Compose the message content
+2. Call voice_notify.py to send the broadcast
+
+## Instructions
+
+### Tool Path
+
+```bash
+python {skill_dir}/scripts/voice_notify.py broadcast --message "Your message here"
+```
+
+### Parameters
+
+| Parameter | Required | Description |
+|-----------|----------|-------------|
+| `--message` | Yes | The message content to be spoken via TTS |
+
+### Response
+
+- Success: `{"success": true, "queued": true}`
+- Error: `{"success": false, "error": "..."}`
+
+## Examples
+
+**User**: "Send a voice notification: the meeting is starting"
+
+```bash
+python {skill_dir}/scripts/voice_notify.py broadcast \
+  --message "The meeting is starting soon, please get ready"
+```
+
+**User**: "Notify me via voice that my coffee is ready"
+
+```bash
+python {skill_dir}/scripts/voice_notify.py broadcast \
+  --message "Your coffee is ready, please come pick it up"
+```
+
+## Guidelines
+
+- The target user must have an active voice session connected to `/api/v3/voice/realtime`
+- The voice session must be in lite mode (`voice_mode: "lite"`)
+- Messages are queued and played when the session enters idle state
+- Keep messages concise for better TTS experience
+- Message language should match the user's preferred language
--- a/skills/voice-notification/scripts/voice_notify.py
+++ b/skills/voice-notification/scripts/voice_notify.py
@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+"""Voice notification script for broadcasting messages to active voice sessions."""
+
+import argparse
+import json
+import os
+import sys
+from urllib.request import Request, urlopen
+from urllib.error import URLError, HTTPError
+
+# Default API endpoint
+DEFAULT_API_URL = "http://localhost:8001/api/v3/voice/broadcast"
+
+
+def broadcast_message(message: str, api_url: str = DEFAULT_API_URL) -> dict:
+    """Send a broadcast message to the voice API.
+
+    Args:
+        message: The message content to be spoken
+        api_url: The API endpoint URL
+
+    Returns:
+        Response dict from the API
+    """
+    bot_id = os.environ.get("BOT_ID", "")
+    user_identifier = os.environ.get("USER_IDENTIFIER", "")
+
+    if not bot_id:
+        return {"success": False, "error": "BOT_ID environment variable not set"}
+    if not user_identifier:
+        return {"success": False, "error": "USER_IDENTIFIER environment variable not set"}
+
+    payload = {
+        "bot_id": bot_id,
+        "user_identifier": user_identifier,
+        "message": message
+    }
+
+    req = Request(
+        api_url,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST"
+    )
+
+    try:
+        with urlopen(req, timeout=10) as response:
+            return json.loads(response.read().decode("utf-8"))
+    except HTTPError as e:
+        return {"success": False, "error": f"HTTP {e.code}: {e.reason}"}
+    except URLError as e:
+        return {"success": False, "error": f"Connection error: {e.reason}"}
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Voice notification broadcast tool")
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    # Broadcast command
+    broadcast_parser = subparsers.add_parser("broadcast", help="Broadcast a message to active voice session")
+    broadcast_parser.add_argument("--message", required=True, help="Message content to be spoken")
+    broadcast_parser.add_argument("--api-url", default=DEFAULT_API_URL, help="API endpoint URL")
+
+    args = parser.parse_args()
+
+    if args.command == "broadcast":
+        result = broadcast_message(
+            message=args.message,
+            api_url=args.api_url
+        )
+        print(json.dumps(result, ensure_ascii=False, indent=2))
+        sys.exit(0 if result.get("success") else 1)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()