add voice skill

This commit is contained in:
朱潮 2026-04-01 10:26:17 +08:00
parent a0e0c8c7b6
commit 6d6c7a92ef
6 changed files with 231 additions and 5 deletions

View File

@ -166,11 +166,11 @@ async def _execute_command(skill_path: str, command: str, hook_type: str, config
try: try:
# 设置环境变量,传递给子进程 # 设置环境变量,传递给子进程
env = os.environ.copy() env = os.environ.copy()
env['ASSISTANT_ID'] = getattr(config, 'bot_id', '') env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', ''))
env['USER_IDENTIFIER'] = getattr(config, 'user_identifier', '') env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', ''))
env['TRACE_ID'] = getattr(config, 'trace_id', '') env['TRACE_ID'] = str(getattr(config, 'trace_id', ''))
env['SESSION_ID'] = getattr(config, 'session_id', '') env['SESSION_ID'] = str(getattr(config, 'session_id', ''))
env['LANGUAGE'] = getattr(config, 'language', '') env['LANGUAGE'] = str(getattr(config, 'language', ''))
env['HOOK_TYPE'] = hook_type env['HOOK_TYPE'] = hook_type
# 合并 config 中的自定义 shell 环境变量 # 合并 config 中的自定义 shell 环境变量

View File

@ -705,6 +705,7 @@ class BotSettingsResponse(BaseModel):
voice_speaker: Optional[str] = None # 语音音色 voice_speaker: Optional[str] = None # 语音音色
voice_system_role: Optional[str] = None # 语音对话系统角色 voice_system_role: Optional[str] = None # 语音对话系统角色
voice_speaking_style: Optional[str] = None # 语音说话风格 voice_speaking_style: Optional[str] = None # 语音说话风格
enable_voice: bool = False # 语音对话开关
mcp_settings: Optional[str] = None # MCP 服务器配置 (JSON 字符串) mcp_settings: Optional[str] = None # MCP 服务器配置 (JSON 字符串)
updated_at: str updated_at: str
@ -1881,6 +1882,7 @@ async def get_bot_settings(bot_uuid: str, authorization: Optional[str] = Header(
voice_speaker=settings.get('voice_speaker'), voice_speaker=settings.get('voice_speaker'),
voice_system_role=settings.get('voice_system_role'), voice_system_role=settings.get('voice_system_role'),
voice_speaking_style=settings.get('voice_speaking_style'), voice_speaking_style=settings.get('voice_speaking_style'),
enable_voice=settings.get('enable_voice', False),
mcp_settings=settings.get('mcp_settings'), mcp_settings=settings.get('mcp_settings'),
updated_at=datetime_to_str(updated_at) updated_at=datetime_to_str(updated_at)
) )
@ -1998,6 +2000,8 @@ async def update_bot_settings(
update_json['voice_system_role'] = request.voice_system_role update_json['voice_system_role'] = request.voice_system_role
if request.voice_speaking_style is not None: if request.voice_speaking_style is not None:
update_json['voice_speaking_style'] = request.voice_speaking_style update_json['voice_speaking_style'] = request.voice_speaking_style
if request.enable_voice is not None:
update_json['enable_voice'] = request.enable_voice
if request.mcp_settings is not None: if request.mcp_settings is not None:
update_json['mcp_settings'] = request.mcp_settings update_json['mcp_settings'] = request.mcp_settings

View File

@ -5,6 +5,7 @@ import logging
from typing import Optional from typing import Optional
from fastapi import APIRouter, WebSocket, WebSocketDisconnect from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from pydantic import BaseModel
from services.voice_session_manager import VoiceSession from services.voice_session_manager import VoiceSession
from utils.settings import VOICE_DEFAULT_MODE from utils.settings import VOICE_DEFAULT_MODE
@ -13,6 +14,27 @@ logger = logging.getLogger('app')
router = APIRouter() router = APIRouter()
# Global message queue for broadcast feature
_pending_messages: dict[str, list[str]] = {}
def _get_queue_key(bot_id: str, user_identifier: str) -> str:
return f"{bot_id}_{user_identifier}"
class BroadcastRequest(BaseModel):
bot_id: str
user_identifier: str
message: str
@router.post("/api/v3/voice/broadcast")
async def voice_broadcast(req: BroadcastRequest):
"""Push a message to be spoken by an active voice session."""
key = _get_queue_key(req.bot_id, req.user_identifier)
_pending_messages.setdefault(key, []).append(req.message)
return {"success": True, "queued": True}
@router.websocket("/api/v3/voice/realtime") @router.websocket("/api/v3/voice/realtime")
async def voice_realtime(websocket: WebSocket): async def voice_realtime(websocket: WebSocket):
@ -111,6 +133,14 @@ async def voice_realtime(websocket: WebSocket):
if voice_mode == "lite": if voice_mode == "lite":
from services.voice_lite_session import VoiceLiteSession from services.voice_lite_session import VoiceLiteSession
# Create callback for broadcast messages
queue_key = _get_queue_key(bot_id, msg.get("user_identifier", ""))
async def get_pending_message() -> Optional[str]:
msgs = _pending_messages.get(queue_key, [])
return msgs.pop(0) if msgs else None
session_kwargs["get_pending_message"] = get_pending_message
session = VoiceLiteSession(**session_kwargs) session = VoiceLiteSession(**session_kwargs)
logger.info(f"[Voice] Using lite mode for bot_id={bot_id}") logger.info(f"[Voice] Using lite mode for bot_id={bot_id}")
else: else:
@ -118,6 +148,9 @@ async def voice_realtime(websocket: WebSocket):
try: try:
await session.start() await session.start()
# Clear old messages on new session connection
if voice_mode == "lite":
_pending_messages[queue_key] = []
except Exception as e: except Exception as e:
logger.error(f"Failed to start voice session: {e}", exc_info=True) logger.error(f"Failed to start voice session: {e}", exc_info=True)
await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"}) await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"})

View File

@ -35,6 +35,7 @@ class VoiceLiteSession:
on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None, on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None,
on_status: Optional[Callable[[str], Awaitable[None]]] = None, on_status: Optional[Callable[[str], Awaitable[None]]] = None,
on_error: Optional[Callable[[str], Awaitable[None]]] = None, on_error: Optional[Callable[[str], Awaitable[None]]] = None,
get_pending_message: Optional[Callable[[], Awaitable[Optional[str]]]] = None,
): ):
self.bot_id = bot_id self.bot_id = bot_id
self.session_id = session_id or str(uuid.uuid4()) self.session_id = session_id or str(uuid.uuid4())
@ -52,8 +53,11 @@ class VoiceLiteSession:
self._on_llm_text = on_llm_text self._on_llm_text = on_llm_text
self._on_status = on_status self._on_status = on_status
self._on_error = on_error self._on_error = on_error
self._get_pending_message = get_pending_message
self._running = False self._running = False
self._status: str = "ready" # Current session status
self._idle_check_task: Optional[asyncio.Task] = None
self._asr_client: Optional[StreamingASRClient] = None self._asr_client: Optional[StreamingASRClient] = None
self._asr_receive_task: Optional[asyncio.Task] = None self._asr_receive_task: Optional[asyncio.Task] = None
self._agent_task: Optional[asyncio.Task] = None self._agent_task: Optional[asyncio.Task] = None
@ -86,10 +90,17 @@ class VoiceLiteSession:
self._running = True self._running = True
await self._emit_status("ready") await self._emit_status("ready")
# Start idle check task for broadcast messages
if self._get_pending_message:
self._idle_check_task = asyncio.create_task(self._idle_check_loop())
async def stop(self) -> None: async def stop(self) -> None:
"""Gracefully stop the session.""" """Gracefully stop the session."""
self._running = False self._running = False
if self._idle_check_task and not self._idle_check_task.done():
self._idle_check_task.cancel()
if self._vad_finish_task and not self._vad_finish_task.done(): if self._vad_finish_task and not self._vad_finish_task.done():
self._vad_finish_task.cancel() self._vad_finish_task.cancel()
@ -511,9 +522,49 @@ class VoiceLiteSession:
await self._on_audio(audio_chunk) await self._on_audio(audio_chunk)
async def _emit_status(self, status: str) -> None: async def _emit_status(self, status: str) -> None:
self._status = status
if self._on_status: if self._on_status:
await self._on_status(status) await self._on_status(status)
async def _emit_error(self, message: str) -> None: async def _emit_error(self, message: str) -> None:
if self._on_error: if self._on_error:
await self._on_error(message) await self._on_error(message)
async def _idle_check_loop(self) -> None:
"""Background task: check and play pending broadcast messages when idle."""
while self._running:
try:
await asyncio.sleep(1.0) # Check every second
# Check in both "ready" and "idle" states
if self._status in ("ready", "idle") and self._get_pending_message:
msg = await self._get_pending_message()
if msg:
await self.speak_text(msg)
except asyncio.CancelledError:
break
except Exception as e:
logger.warning(f"[VoiceLite] Idle check error: {e}")
async def speak_text(self, text: str) -> None:
"""Play text directly via TTS (skip agent, used for broadcast messages)."""
if not text.strip():
return
logger.info(f"[VoiceLite] Broadcasting: '{text[:80]}'")
await self._emit_status("speaking")
try:
tts_client = StreamingTTSClient(speaker=self._speaker)
if self._client_sample_rate != 24000:
async for audio_chunk in tts_client.synthesize_raw(text):
if self._on_audio:
await self._on_audio(self._resample_output(audio_chunk))
else:
async for audio_chunk in tts_client.synthesize(text):
if self._on_audio:
await self._on_audio(audio_chunk)
except Exception as e:
logger.error(f"[VoiceLite] Broadcast TTS error: {e}", exc_info=True)
finally:
if self._running:
await self._emit_status("idle")

View File

@ -0,0 +1,57 @@
---
name: voice-notification
description: Voice Notification - Push voice broadcast messages to active voice sessions for real-time TTS playback
---
# Voice Notification - Voice Broadcast
Push voice broadcast messages to users' active voice sessions. The message will be played via TTS when the session is in idle state.
## Quick Start
When a user requests to send a voice notification:
1. Compose the message content
2. Call voice_notify.py to send the broadcast
## Instructions
### Tool Path
```bash
python {skill_dir}/scripts/voice_notify.py broadcast --message "Your message here"
```
### Parameters
| Parameter | Required | Description |
|-----------|----------|-------------|
| `--message` | Yes | The message content to be spoken via TTS |
### Response
- Success: `{"success": true, "queued": true}`
- Error: `{"success": false, "error": "..."}`
## Examples
**User**: "Send a voice notification: the meeting is starting"
```bash
python {skill_dir}/scripts/voice_notify.py broadcast \
--message "The meeting is starting soon, please get ready"
```
**User**: "Notify me via voice that my coffee is ready"
```bash
python {skill_dir}/scripts/voice_notify.py broadcast \
--message "Your coffee is ready, please come pick it up"
```
## Guidelines
- The target user must have an active voice session connected to `/api/v3/voice/realtime`
- The voice session must be in lite mode (`voice_mode: "lite"`)
- Messages are queued and played when the session enters idle state
- Keep messages concise for better TTS experience
- Message language should match the user's preferred language

View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""Voice notification script for broadcasting messages to active voice sessions."""
import argparse
import json
import os
import sys
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
# Default API endpoint
DEFAULT_API_URL = "http://localhost:8001/api/v3/voice/broadcast"
def broadcast_message(message: str, api_url: str = DEFAULT_API_URL) -> dict:
"""Send a broadcast message to the voice API.
Args:
message: The message content to be spoken
api_url: The API endpoint URL
Returns:
Response dict from the API
"""
bot_id = os.environ.get("BOT_ID", "")
user_identifier = os.environ.get("USER_IDENTIFIER", "")
if not bot_id:
return {"success": False, "error": "BOT_ID environment variable not set"}
if not user_identifier:
return {"success": False, "error": "USER_IDENTIFIER environment variable not set"}
payload = {
"bot_id": bot_id,
"user_identifier": user_identifier,
"message": message
}
req = Request(
api_url,
data=json.dumps(payload).encode("utf-8"),
headers={"Content-Type": "application/json"},
method="POST"
)
try:
with urlopen(req, timeout=10) as response:
return json.loads(response.read().decode("utf-8"))
except HTTPError as e:
return {"success": False, "error": f"HTTP {e.code}: {e.reason}"}
except URLError as e:
return {"success": False, "error": f"Connection error: {e.reason}"}
except Exception as e:
return {"success": False, "error": str(e)}
def main():
parser = argparse.ArgumentParser(description="Voice notification broadcast tool")
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# Broadcast command
broadcast_parser = subparsers.add_parser("broadcast", help="Broadcast a message to active voice session")
broadcast_parser.add_argument("--message", required=True, help="Message content to be spoken")
broadcast_parser.add_argument("--api-url", default=DEFAULT_API_URL, help="API endpoint URL")
args = parser.parse_args()
if args.command == "broadcast":
result = broadcast_message(
message=args.message,
api_url=args.api_url
)
print(json.dumps(result, ensure_ascii=False, indent=2))
sys.exit(0 if result.get("success") else 1)
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()