add voice skill
This commit is contained in:
parent
a0e0c8c7b6
commit
6d6c7a92ef
@ -166,11 +166,11 @@ async def _execute_command(skill_path: str, command: str, hook_type: str, config
|
|||||||
try:
|
try:
|
||||||
# 设置环境变量,传递给子进程
|
# 设置环境变量,传递给子进程
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env['ASSISTANT_ID'] = getattr(config, 'bot_id', '')
|
env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', ''))
|
||||||
env['USER_IDENTIFIER'] = getattr(config, 'user_identifier', '')
|
env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', ''))
|
||||||
env['TRACE_ID'] = getattr(config, 'trace_id', '')
|
env['TRACE_ID'] = str(getattr(config, 'trace_id', ''))
|
||||||
env['SESSION_ID'] = getattr(config, 'session_id', '')
|
env['SESSION_ID'] = str(getattr(config, 'session_id', ''))
|
||||||
env['LANGUAGE'] = getattr(config, 'language', '')
|
env['LANGUAGE'] = str(getattr(config, 'language', ''))
|
||||||
env['HOOK_TYPE'] = hook_type
|
env['HOOK_TYPE'] = hook_type
|
||||||
|
|
||||||
# 合并 config 中的自定义 shell 环境变量
|
# 合并 config 中的自定义 shell 环境变量
|
||||||
|
|||||||
@ -705,6 +705,7 @@ class BotSettingsResponse(BaseModel):
|
|||||||
voice_speaker: Optional[str] = None # 语音音色
|
voice_speaker: Optional[str] = None # 语音音色
|
||||||
voice_system_role: Optional[str] = None # 语音对话系统角色
|
voice_system_role: Optional[str] = None # 语音对话系统角色
|
||||||
voice_speaking_style: Optional[str] = None # 语音说话风格
|
voice_speaking_style: Optional[str] = None # 语音说话风格
|
||||||
|
enable_voice: bool = False # 语音对话开关
|
||||||
mcp_settings: Optional[str] = None # MCP 服务器配置 (JSON 字符串)
|
mcp_settings: Optional[str] = None # MCP 服务器配置 (JSON 字符串)
|
||||||
updated_at: str
|
updated_at: str
|
||||||
|
|
||||||
@ -1881,6 +1882,7 @@ async def get_bot_settings(bot_uuid: str, authorization: Optional[str] = Header(
|
|||||||
voice_speaker=settings.get('voice_speaker'),
|
voice_speaker=settings.get('voice_speaker'),
|
||||||
voice_system_role=settings.get('voice_system_role'),
|
voice_system_role=settings.get('voice_system_role'),
|
||||||
voice_speaking_style=settings.get('voice_speaking_style'),
|
voice_speaking_style=settings.get('voice_speaking_style'),
|
||||||
|
enable_voice=settings.get('enable_voice', False),
|
||||||
mcp_settings=settings.get('mcp_settings'),
|
mcp_settings=settings.get('mcp_settings'),
|
||||||
updated_at=datetime_to_str(updated_at)
|
updated_at=datetime_to_str(updated_at)
|
||||||
)
|
)
|
||||||
@ -1998,6 +2000,8 @@ async def update_bot_settings(
|
|||||||
update_json['voice_system_role'] = request.voice_system_role
|
update_json['voice_system_role'] = request.voice_system_role
|
||||||
if request.voice_speaking_style is not None:
|
if request.voice_speaking_style is not None:
|
||||||
update_json['voice_speaking_style'] = request.voice_speaking_style
|
update_json['voice_speaking_style'] = request.voice_speaking_style
|
||||||
|
if request.enable_voice is not None:
|
||||||
|
update_json['enable_voice'] = request.enable_voice
|
||||||
if request.mcp_settings is not None:
|
if request.mcp_settings is not None:
|
||||||
update_json['mcp_settings'] = request.mcp_settings
|
update_json['mcp_settings'] = request.mcp_settings
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import logging
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from services.voice_session_manager import VoiceSession
|
from services.voice_session_manager import VoiceSession
|
||||||
from utils.settings import VOICE_DEFAULT_MODE
|
from utils.settings import VOICE_DEFAULT_MODE
|
||||||
@ -13,6 +14,27 @@ logger = logging.getLogger('app')
|
|||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
# Global message queue for broadcast feature
|
||||||
|
_pending_messages: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_queue_key(bot_id: str, user_identifier: str) -> str:
|
||||||
|
return f"{bot_id}_{user_identifier}"
|
||||||
|
|
||||||
|
|
||||||
|
class BroadcastRequest(BaseModel):
|
||||||
|
bot_id: str
|
||||||
|
user_identifier: str
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/v3/voice/broadcast")
|
||||||
|
async def voice_broadcast(req: BroadcastRequest):
|
||||||
|
"""Push a message to be spoken by an active voice session."""
|
||||||
|
key = _get_queue_key(req.bot_id, req.user_identifier)
|
||||||
|
_pending_messages.setdefault(key, []).append(req.message)
|
||||||
|
return {"success": True, "queued": True}
|
||||||
|
|
||||||
|
|
||||||
@router.websocket("/api/v3/voice/realtime")
|
@router.websocket("/api/v3/voice/realtime")
|
||||||
async def voice_realtime(websocket: WebSocket):
|
async def voice_realtime(websocket: WebSocket):
|
||||||
@ -111,6 +133,14 @@ async def voice_realtime(websocket: WebSocket):
|
|||||||
|
|
||||||
if voice_mode == "lite":
|
if voice_mode == "lite":
|
||||||
from services.voice_lite_session import VoiceLiteSession
|
from services.voice_lite_session import VoiceLiteSession
|
||||||
|
# Create callback for broadcast messages
|
||||||
|
queue_key = _get_queue_key(bot_id, msg.get("user_identifier", ""))
|
||||||
|
|
||||||
|
async def get_pending_message() -> Optional[str]:
|
||||||
|
msgs = _pending_messages.get(queue_key, [])
|
||||||
|
return msgs.pop(0) if msgs else None
|
||||||
|
|
||||||
|
session_kwargs["get_pending_message"] = get_pending_message
|
||||||
session = VoiceLiteSession(**session_kwargs)
|
session = VoiceLiteSession(**session_kwargs)
|
||||||
logger.info(f"[Voice] Using lite mode for bot_id={bot_id}")
|
logger.info(f"[Voice] Using lite mode for bot_id={bot_id}")
|
||||||
else:
|
else:
|
||||||
@ -118,6 +148,9 @@ async def voice_realtime(websocket: WebSocket):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
await session.start()
|
await session.start()
|
||||||
|
# Clear old messages on new session connection
|
||||||
|
if voice_mode == "lite":
|
||||||
|
_pending_messages[queue_key] = []
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
||||||
await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"})
|
await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"})
|
||||||
|
|||||||
@ -35,6 +35,7 @@ class VoiceLiteSession:
|
|||||||
on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None,
|
on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||||
on_status: Optional[Callable[[str], Awaitable[None]]] = None,
|
on_status: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||||
on_error: Optional[Callable[[str], Awaitable[None]]] = None,
|
on_error: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||||
|
get_pending_message: Optional[Callable[[], Awaitable[Optional[str]]]] = None,
|
||||||
):
|
):
|
||||||
self.bot_id = bot_id
|
self.bot_id = bot_id
|
||||||
self.session_id = session_id or str(uuid.uuid4())
|
self.session_id = session_id or str(uuid.uuid4())
|
||||||
@ -52,8 +53,11 @@ class VoiceLiteSession:
|
|||||||
self._on_llm_text = on_llm_text
|
self._on_llm_text = on_llm_text
|
||||||
self._on_status = on_status
|
self._on_status = on_status
|
||||||
self._on_error = on_error
|
self._on_error = on_error
|
||||||
|
self._get_pending_message = get_pending_message
|
||||||
|
|
||||||
self._running = False
|
self._running = False
|
||||||
|
self._status: str = "ready" # Current session status
|
||||||
|
self._idle_check_task: Optional[asyncio.Task] = None
|
||||||
self._asr_client: Optional[StreamingASRClient] = None
|
self._asr_client: Optional[StreamingASRClient] = None
|
||||||
self._asr_receive_task: Optional[asyncio.Task] = None
|
self._asr_receive_task: Optional[asyncio.Task] = None
|
||||||
self._agent_task: Optional[asyncio.Task] = None
|
self._agent_task: Optional[asyncio.Task] = None
|
||||||
@ -86,10 +90,17 @@ class VoiceLiteSession:
|
|||||||
self._running = True
|
self._running = True
|
||||||
await self._emit_status("ready")
|
await self._emit_status("ready")
|
||||||
|
|
||||||
|
# Start idle check task for broadcast messages
|
||||||
|
if self._get_pending_message:
|
||||||
|
self._idle_check_task = asyncio.create_task(self._idle_check_loop())
|
||||||
|
|
||||||
async def stop(self) -> None:
|
async def stop(self) -> None:
|
||||||
"""Gracefully stop the session."""
|
"""Gracefully stop the session."""
|
||||||
self._running = False
|
self._running = False
|
||||||
|
|
||||||
|
if self._idle_check_task and not self._idle_check_task.done():
|
||||||
|
self._idle_check_task.cancel()
|
||||||
|
|
||||||
if self._vad_finish_task and not self._vad_finish_task.done():
|
if self._vad_finish_task and not self._vad_finish_task.done():
|
||||||
self._vad_finish_task.cancel()
|
self._vad_finish_task.cancel()
|
||||||
|
|
||||||
@ -511,9 +522,49 @@ class VoiceLiteSession:
|
|||||||
await self._on_audio(audio_chunk)
|
await self._on_audio(audio_chunk)
|
||||||
|
|
||||||
async def _emit_status(self, status: str) -> None:
|
async def _emit_status(self, status: str) -> None:
|
||||||
|
self._status = status
|
||||||
if self._on_status:
|
if self._on_status:
|
||||||
await self._on_status(status)
|
await self._on_status(status)
|
||||||
|
|
||||||
async def _emit_error(self, message: str) -> None:
|
async def _emit_error(self, message: str) -> None:
|
||||||
if self._on_error:
|
if self._on_error:
|
||||||
await self._on_error(message)
|
await self._on_error(message)
|
||||||
|
|
||||||
|
async def _idle_check_loop(self) -> None:
|
||||||
|
"""Background task: check and play pending broadcast messages when idle."""
|
||||||
|
while self._running:
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(1.0) # Check every second
|
||||||
|
# Check in both "ready" and "idle" states
|
||||||
|
if self._status in ("ready", "idle") and self._get_pending_message:
|
||||||
|
msg = await self._get_pending_message()
|
||||||
|
if msg:
|
||||||
|
await self.speak_text(msg)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[VoiceLite] Idle check error: {e}")
|
||||||
|
|
||||||
|
async def speak_text(self, text: str) -> None:
|
||||||
|
"""Play text directly via TTS (skip agent, used for broadcast messages)."""
|
||||||
|
if not text.strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[VoiceLite] Broadcasting: '{text[:80]}'")
|
||||||
|
await self._emit_status("speaking")
|
||||||
|
|
||||||
|
try:
|
||||||
|
tts_client = StreamingTTSClient(speaker=self._speaker)
|
||||||
|
if self._client_sample_rate != 24000:
|
||||||
|
async for audio_chunk in tts_client.synthesize_raw(text):
|
||||||
|
if self._on_audio:
|
||||||
|
await self._on_audio(self._resample_output(audio_chunk))
|
||||||
|
else:
|
||||||
|
async for audio_chunk in tts_client.synthesize(text):
|
||||||
|
if self._on_audio:
|
||||||
|
await self._on_audio(audio_chunk)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[VoiceLite] Broadcast TTS error: {e}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
if self._running:
|
||||||
|
await self._emit_status("idle")
|
||||||
|
|||||||
57
skills/voice-notification/SKILL.md
Normal file
57
skills/voice-notification/SKILL.md
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
name: voice-notification
|
||||||
|
description: Voice Notification - Push voice broadcast messages to active voice sessions for real-time TTS playback
|
||||||
|
---
|
||||||
|
|
||||||
|
# Voice Notification - Voice Broadcast
|
||||||
|
|
||||||
|
Push voice broadcast messages to users' active voice sessions. The message will be played via TTS when the session is in idle state.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
When a user requests to send a voice notification:
|
||||||
|
1. Compose the message content
|
||||||
|
2. Call voice_notify.py to send the broadcast
|
||||||
|
|
||||||
|
## Instructions
|
||||||
|
|
||||||
|
### Tool Path
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python {skill_dir}/scripts/voice_notify.py broadcast --message "Your message here"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
| Parameter | Required | Description |
|
||||||
|
|-----------|----------|-------------|
|
||||||
|
| `--message` | Yes | The message content to be spoken via TTS |
|
||||||
|
|
||||||
|
### Response
|
||||||
|
|
||||||
|
- Success: `{"success": true, "queued": true}`
|
||||||
|
- Error: `{"success": false, "error": "..."}`
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
**User**: "Send a voice notification: the meeting is starting"
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python {skill_dir}/scripts/voice_notify.py broadcast \
|
||||||
|
--message "The meeting is starting soon, please get ready"
|
||||||
|
```
|
||||||
|
|
||||||
|
**User**: "Notify me via voice that my coffee is ready"
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python {skill_dir}/scripts/voice_notify.py broadcast \
|
||||||
|
--message "Your coffee is ready, please come pick it up"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Guidelines
|
||||||
|
|
||||||
|
- The target user must have an active voice session connected to `/api/v3/voice/realtime`
|
||||||
|
- The voice session must be in lite mode (`voice_mode: "lite"`)
|
||||||
|
- Messages are queued and played when the session enters idle state
|
||||||
|
- Keep messages concise for better TTS experience
|
||||||
|
- Message language should match the user's preferred language
|
||||||
81
skills/voice-notification/scripts/voice_notify.py
Normal file
81
skills/voice-notification/scripts/voice_notify.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Voice notification script for broadcasting messages to active voice sessions."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from urllib.request import Request, urlopen
|
||||||
|
from urllib.error import URLError, HTTPError
|
||||||
|
|
||||||
|
# Default API endpoint
|
||||||
|
DEFAULT_API_URL = "http://localhost:8001/api/v3/voice/broadcast"
|
||||||
|
|
||||||
|
|
||||||
|
def broadcast_message(message: str, api_url: str = DEFAULT_API_URL) -> dict:
|
||||||
|
"""Send a broadcast message to the voice API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: The message content to be spoken
|
||||||
|
api_url: The API endpoint URL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response dict from the API
|
||||||
|
"""
|
||||||
|
bot_id = os.environ.get("BOT_ID", "")
|
||||||
|
user_identifier = os.environ.get("USER_IDENTIFIER", "")
|
||||||
|
|
||||||
|
if not bot_id:
|
||||||
|
return {"success": False, "error": "BOT_ID environment variable not set"}
|
||||||
|
if not user_identifier:
|
||||||
|
return {"success": False, "error": "USER_IDENTIFIER environment variable not set"}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"bot_id": bot_id,
|
||||||
|
"user_identifier": user_identifier,
|
||||||
|
"message": message
|
||||||
|
}
|
||||||
|
|
||||||
|
req = Request(
|
||||||
|
api_url,
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urlopen(req, timeout=10) as response:
|
||||||
|
return json.loads(response.read().decode("utf-8"))
|
||||||
|
except HTTPError as e:
|
||||||
|
return {"success": False, "error": f"HTTP {e.code}: {e.reason}"}
|
||||||
|
except URLError as e:
|
||||||
|
return {"success": False, "error": f"Connection error: {e.reason}"}
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Voice notification broadcast tool")
|
||||||
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||||
|
|
||||||
|
# Broadcast command
|
||||||
|
broadcast_parser = subparsers.add_parser("broadcast", help="Broadcast a message to active voice session")
|
||||||
|
broadcast_parser.add_argument("--message", required=True, help="Message content to be spoken")
|
||||||
|
broadcast_parser.add_argument("--api-url", default=DEFAULT_API_URL, help="API endpoint URL")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.command == "broadcast":
|
||||||
|
result = broadcast_message(
|
||||||
|
message=args.message,
|
||||||
|
api_url=args.api_url
|
||||||
|
)
|
||||||
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||||
|
sys.exit(0 if result.get("success") else 1)
|
||||||
|
else:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user