diff --git a/agent/plugin_hook_loader.py b/agent/plugin_hook_loader.py index 7be815c..ffc1fdc 100644 --- a/agent/plugin_hook_loader.py +++ b/agent/plugin_hook_loader.py @@ -166,11 +166,11 @@ async def _execute_command(skill_path: str, command: str, hook_type: str, config try: # 设置环境变量,传递给子进程 env = os.environ.copy() - env['ASSISTANT_ID'] = getattr(config, 'bot_id', '') - env['USER_IDENTIFIER'] = getattr(config, 'user_identifier', '') - env['TRACE_ID'] = getattr(config, 'trace_id', '') - env['SESSION_ID'] = getattr(config, 'session_id', '') - env['LANGUAGE'] = getattr(config, 'language', '') + env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', '')) + env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', '')) + env['TRACE_ID'] = str(getattr(config, 'trace_id', '')) + env['SESSION_ID'] = str(getattr(config, 'session_id', '')) + env['LANGUAGE'] = str(getattr(config, 'language', '')) env['HOOK_TYPE'] = hook_type # 合并 config 中的自定义 shell 环境变量 diff --git a/routes/bot_manager.py b/routes/bot_manager.py index 3e53050..3b2c5c9 100644 --- a/routes/bot_manager.py +++ b/routes/bot_manager.py @@ -705,6 +705,7 @@ class BotSettingsResponse(BaseModel): voice_speaker: Optional[str] = None # 语音音色 voice_system_role: Optional[str] = None # 语音对话系统角色 voice_speaking_style: Optional[str] = None # 语音说话风格 + enable_voice: bool = False # 语音对话开关 mcp_settings: Optional[str] = None # MCP 服务器配置 (JSON 字符串) updated_at: str @@ -1881,6 +1882,7 @@ async def get_bot_settings(bot_uuid: str, authorization: Optional[str] = Header( voice_speaker=settings.get('voice_speaker'), voice_system_role=settings.get('voice_system_role'), voice_speaking_style=settings.get('voice_speaking_style'), + enable_voice=settings.get('enable_voice', False), mcp_settings=settings.get('mcp_settings'), updated_at=datetime_to_str(updated_at) ) @@ -1998,6 +2000,8 @@ async def update_bot_settings( update_json['voice_system_role'] = request.voice_system_role if request.voice_speaking_style is not None: update_json['voice_speaking_style'] = request.voice_speaking_style + if request.enable_voice is not None: + update_json['enable_voice'] = request.enable_voice if request.mcp_settings is not None: update_json['mcp_settings'] = request.mcp_settings diff --git a/routes/voice.py b/routes/voice.py index 499f14d..fbf67d7 100644 --- a/routes/voice.py +++ b/routes/voice.py @@ -5,6 +5,7 @@ import logging from typing import Optional from fastapi import APIRouter, WebSocket, WebSocketDisconnect +from pydantic import BaseModel from services.voice_session_manager import VoiceSession from utils.settings import VOICE_DEFAULT_MODE @@ -13,6 +14,27 @@ logger = logging.getLogger('app') router = APIRouter() +# Global message queue for broadcast feature +_pending_messages: dict[str, list[str]] = {} + + +def _get_queue_key(bot_id: str, user_identifier: str) -> str: + return f"{bot_id}_{user_identifier}" + + +class BroadcastRequest(BaseModel): + bot_id: str + user_identifier: str + message: str + + +@router.post("/api/v3/voice/broadcast") +async def voice_broadcast(req: BroadcastRequest): + """Push a message to be spoken by an active voice session.""" + key = _get_queue_key(req.bot_id, req.user_identifier) + _pending_messages.setdefault(key, []).append(req.message) + return {"success": True, "queued": True} + @router.websocket("/api/v3/voice/realtime") async def voice_realtime(websocket: WebSocket): @@ -111,6 +133,14 @@ async def voice_realtime(websocket: WebSocket): if voice_mode == "lite": from services.voice_lite_session import VoiceLiteSession + # Create callback for broadcast messages + queue_key = _get_queue_key(bot_id, msg.get("user_identifier", "")) + + async def get_pending_message() -> Optional[str]: + msgs = _pending_messages.get(queue_key, []) + return msgs.pop(0) if msgs else None + + session_kwargs["get_pending_message"] = get_pending_message session = VoiceLiteSession(**session_kwargs) logger.info(f"[Voice] Using lite mode for bot_id={bot_id}") else: @@ -118,6 +148,9 @@ async def voice_realtime(websocket: WebSocket): try: await session.start() + # Clear old messages on new session connection + if voice_mode == "lite": + _pending_messages[queue_key] = [] except Exception as e: logger.error(f"Failed to start voice session: {e}", exc_info=True) await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"}) diff --git a/services/voice_lite_session.py b/services/voice_lite_session.py index c3db322..7770a2c 100644 --- a/services/voice_lite_session.py +++ b/services/voice_lite_session.py @@ -35,6 +35,7 @@ class VoiceLiteSession: on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None, on_status: Optional[Callable[[str], Awaitable[None]]] = None, on_error: Optional[Callable[[str], Awaitable[None]]] = None, + get_pending_message: Optional[Callable[[], Awaitable[Optional[str]]]] = None, ): self.bot_id = bot_id self.session_id = session_id or str(uuid.uuid4()) @@ -52,8 +53,11 @@ class VoiceLiteSession: self._on_llm_text = on_llm_text self._on_status = on_status self._on_error = on_error + self._get_pending_message = get_pending_message self._running = False + self._status: str = "ready" # Current session status + self._idle_check_task: Optional[asyncio.Task] = None self._asr_client: Optional[StreamingASRClient] = None self._asr_receive_task: Optional[asyncio.Task] = None self._agent_task: Optional[asyncio.Task] = None @@ -86,10 +90,17 @@ class VoiceLiteSession: self._running = True await self._emit_status("ready") + # Start idle check task for broadcast messages + if self._get_pending_message: + self._idle_check_task = asyncio.create_task(self._idle_check_loop()) + async def stop(self) -> None: """Gracefully stop the session.""" self._running = False + if self._idle_check_task and not self._idle_check_task.done(): + self._idle_check_task.cancel() + if self._vad_finish_task and not self._vad_finish_task.done(): self._vad_finish_task.cancel() @@ -511,9 +522,49 @@ class VoiceLiteSession: await self._on_audio(audio_chunk) async def _emit_status(self, status: str) -> None: + self._status = status if self._on_status: await self._on_status(status) async def _emit_error(self, message: str) -> None: if self._on_error: await self._on_error(message) + + async def _idle_check_loop(self) -> None: + """Background task: check and play pending broadcast messages when idle.""" + while self._running: + try: + await asyncio.sleep(1.0) # Check every second + # Check in both "ready" and "idle" states + if self._status in ("ready", "idle") and self._get_pending_message: + msg = await self._get_pending_message() + if msg: + await self.speak_text(msg) + except asyncio.CancelledError: + break + except Exception as e: + logger.warning(f"[VoiceLite] Idle check error: {e}") + + async def speak_text(self, text: str) -> None: + """Play text directly via TTS (skip agent, used for broadcast messages).""" + if not text.strip(): + return + + logger.info(f"[VoiceLite] Broadcasting: '{text[:80]}'") + await self._emit_status("speaking") + + try: + tts_client = StreamingTTSClient(speaker=self._speaker) + if self._client_sample_rate != 24000: + async for audio_chunk in tts_client.synthesize_raw(text): + if self._on_audio: + await self._on_audio(self._resample_output(audio_chunk)) + else: + async for audio_chunk in tts_client.synthesize(text): + if self._on_audio: + await self._on_audio(audio_chunk) + except Exception as e: + logger.error(f"[VoiceLite] Broadcast TTS error: {e}", exc_info=True) + finally: + if self._running: + await self._emit_status("idle") diff --git a/skills/voice-notification/SKILL.md b/skills/voice-notification/SKILL.md new file mode 100644 index 0000000..6ad7861 --- /dev/null +++ b/skills/voice-notification/SKILL.md @@ -0,0 +1,57 @@ +--- +name: voice-notification +description: Voice Notification - Push voice broadcast messages to active voice sessions for real-time TTS playback +--- + +# Voice Notification - Voice Broadcast + +Push voice broadcast messages to users' active voice sessions. The message will be played via TTS when the session is in idle state. + +## Quick Start + +When a user requests to send a voice notification: +1. Compose the message content +2. Call voice_notify.py to send the broadcast + +## Instructions + +### Tool Path + +```bash +python {skill_dir}/scripts/voice_notify.py broadcast --message "Your message here" +``` + +### Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `--message` | Yes | The message content to be spoken via TTS | + +### Response + +- Success: `{"success": true, "queued": true}` +- Error: `{"success": false, "error": "..."}` + +## Examples + +**User**: "Send a voice notification: the meeting is starting" + +```bash +python {skill_dir}/scripts/voice_notify.py broadcast \ + --message "The meeting is starting soon, please get ready" +``` + +**User**: "Notify me via voice that my coffee is ready" + +```bash +python {skill_dir}/scripts/voice_notify.py broadcast \ + --message "Your coffee is ready, please come pick it up" +``` + +## Guidelines + +- The target user must have an active voice session connected to `/api/v3/voice/realtime` +- The voice session must be in lite mode (`voice_mode: "lite"`) +- Messages are queued and played when the session enters idle state +- Keep messages concise for better TTS experience +- Message language should match the user's preferred language diff --git a/skills/voice-notification/scripts/voice_notify.py b/skills/voice-notification/scripts/voice_notify.py new file mode 100644 index 0000000..bbce0c2 --- /dev/null +++ b/skills/voice-notification/scripts/voice_notify.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +"""Voice notification script for broadcasting messages to active voice sessions.""" + +import argparse +import json +import os +import sys +from urllib.request import Request, urlopen +from urllib.error import URLError, HTTPError + +# Default API endpoint +DEFAULT_API_URL = "http://localhost:8001/api/v3/voice/broadcast" + + +def broadcast_message(message: str, api_url: str = DEFAULT_API_URL) -> dict: + """Send a broadcast message to the voice API. + + Args: + message: The message content to be spoken + api_url: The API endpoint URL + + Returns: + Response dict from the API + """ + bot_id = os.environ.get("BOT_ID", "") + user_identifier = os.environ.get("USER_IDENTIFIER", "") + + if not bot_id: + return {"success": False, "error": "BOT_ID environment variable not set"} + if not user_identifier: + return {"success": False, "error": "USER_IDENTIFIER environment variable not set"} + + payload = { + "bot_id": bot_id, + "user_identifier": user_identifier, + "message": message + } + + req = Request( + api_url, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST" + ) + + try: + with urlopen(req, timeout=10) as response: + return json.loads(response.read().decode("utf-8")) + except HTTPError as e: + return {"success": False, "error": f"HTTP {e.code}: {e.reason}"} + except URLError as e: + return {"success": False, "error": f"Connection error: {e.reason}"} + except Exception as e: + return {"success": False, "error": str(e)} + + +def main(): + parser = argparse.ArgumentParser(description="Voice notification broadcast tool") + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # Broadcast command + broadcast_parser = subparsers.add_parser("broadcast", help="Broadcast a message to active voice session") + broadcast_parser.add_argument("--message", required=True, help="Message content to be spoken") + broadcast_parser.add_argument("--api-url", default=DEFAULT_API_URL, help="API endpoint URL") + + args = parser.parse_args() + + if args.command == "broadcast": + result = broadcast_message( + message=args.message, + api_url=args.api_url + ) + print(json.dumps(result, ensure_ascii=False, indent=2)) + sys.exit(0 if result.get("success") else 1) + else: + parser.print_help() + sys.exit(1) + + +if __name__ == "__main__": + main()