add voice skill
This commit is contained in:
parent
a0e0c8c7b6
commit
6d6c7a92ef
@ -166,11 +166,11 @@ async def _execute_command(skill_path: str, command: str, hook_type: str, config
|
||||
try:
|
||||
# 设置环境变量,传递给子进程
|
||||
env = os.environ.copy()
|
||||
env['ASSISTANT_ID'] = getattr(config, 'bot_id', '')
|
||||
env['USER_IDENTIFIER'] = getattr(config, 'user_identifier', '')
|
||||
env['TRACE_ID'] = getattr(config, 'trace_id', '')
|
||||
env['SESSION_ID'] = getattr(config, 'session_id', '')
|
||||
env['LANGUAGE'] = getattr(config, 'language', '')
|
||||
env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', ''))
|
||||
env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', ''))
|
||||
env['TRACE_ID'] = str(getattr(config, 'trace_id', ''))
|
||||
env['SESSION_ID'] = str(getattr(config, 'session_id', ''))
|
||||
env['LANGUAGE'] = str(getattr(config, 'language', ''))
|
||||
env['HOOK_TYPE'] = hook_type
|
||||
|
||||
# 合并 config 中的自定义 shell 环境变量
|
||||
|
||||
@ -705,6 +705,7 @@ class BotSettingsResponse(BaseModel):
|
||||
voice_speaker: Optional[str] = None # 语音音色
|
||||
voice_system_role: Optional[str] = None # 语音对话系统角色
|
||||
voice_speaking_style: Optional[str] = None # 语音说话风格
|
||||
enable_voice: bool = False # 语音对话开关
|
||||
mcp_settings: Optional[str] = None # MCP 服务器配置 (JSON 字符串)
|
||||
updated_at: str
|
||||
|
||||
@ -1881,6 +1882,7 @@ async def get_bot_settings(bot_uuid: str, authorization: Optional[str] = Header(
|
||||
voice_speaker=settings.get('voice_speaker'),
|
||||
voice_system_role=settings.get('voice_system_role'),
|
||||
voice_speaking_style=settings.get('voice_speaking_style'),
|
||||
enable_voice=settings.get('enable_voice', False),
|
||||
mcp_settings=settings.get('mcp_settings'),
|
||||
updated_at=datetime_to_str(updated_at)
|
||||
)
|
||||
@ -1998,6 +2000,8 @@ async def update_bot_settings(
|
||||
update_json['voice_system_role'] = request.voice_system_role
|
||||
if request.voice_speaking_style is not None:
|
||||
update_json['voice_speaking_style'] = request.voice_speaking_style
|
||||
if request.enable_voice is not None:
|
||||
update_json['enable_voice'] = request.enable_voice
|
||||
if request.mcp_settings is not None:
|
||||
update_json['mcp_settings'] = request.mcp_settings
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.voice_session_manager import VoiceSession
|
||||
from utils.settings import VOICE_DEFAULT_MODE
|
||||
@ -13,6 +14,27 @@ logger = logging.getLogger('app')
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Global message queue for broadcast feature
|
||||
_pending_messages: dict[str, list[str]] = {}
|
||||
|
||||
|
||||
def _get_queue_key(bot_id: str, user_identifier: str) -> str:
|
||||
return f"{bot_id}_{user_identifier}"
|
||||
|
||||
|
||||
class BroadcastRequest(BaseModel):
|
||||
bot_id: str
|
||||
user_identifier: str
|
||||
message: str
|
||||
|
||||
|
||||
@router.post("/api/v3/voice/broadcast")
|
||||
async def voice_broadcast(req: BroadcastRequest):
|
||||
"""Push a message to be spoken by an active voice session."""
|
||||
key = _get_queue_key(req.bot_id, req.user_identifier)
|
||||
_pending_messages.setdefault(key, []).append(req.message)
|
||||
return {"success": True, "queued": True}
|
||||
|
||||
|
||||
@router.websocket("/api/v3/voice/realtime")
|
||||
async def voice_realtime(websocket: WebSocket):
|
||||
@ -111,6 +133,14 @@ async def voice_realtime(websocket: WebSocket):
|
||||
|
||||
if voice_mode == "lite":
|
||||
from services.voice_lite_session import VoiceLiteSession
|
||||
# Create callback for broadcast messages
|
||||
queue_key = _get_queue_key(bot_id, msg.get("user_identifier", ""))
|
||||
|
||||
async def get_pending_message() -> Optional[str]:
|
||||
msgs = _pending_messages.get(queue_key, [])
|
||||
return msgs.pop(0) if msgs else None
|
||||
|
||||
session_kwargs["get_pending_message"] = get_pending_message
|
||||
session = VoiceLiteSession(**session_kwargs)
|
||||
logger.info(f"[Voice] Using lite mode for bot_id={bot_id}")
|
||||
else:
|
||||
@ -118,6 +148,9 @@ async def voice_realtime(websocket: WebSocket):
|
||||
|
||||
try:
|
||||
await session.start()
|
||||
# Clear old messages on new session connection
|
||||
if voice_mode == "lite":
|
||||
_pending_messages[queue_key] = []
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
||||
await send_json({"type": "error", "message": f"Failed to connect: {str(e)}"})
|
||||
|
||||
@ -35,6 +35,7 @@ class VoiceLiteSession:
|
||||
on_llm_text: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||
on_status: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||
on_error: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||
get_pending_message: Optional[Callable[[], Awaitable[Optional[str]]]] = None,
|
||||
):
|
||||
self.bot_id = bot_id
|
||||
self.session_id = session_id or str(uuid.uuid4())
|
||||
@ -52,8 +53,11 @@ class VoiceLiteSession:
|
||||
self._on_llm_text = on_llm_text
|
||||
self._on_status = on_status
|
||||
self._on_error = on_error
|
||||
self._get_pending_message = get_pending_message
|
||||
|
||||
self._running = False
|
||||
self._status: str = "ready" # Current session status
|
||||
self._idle_check_task: Optional[asyncio.Task] = None
|
||||
self._asr_client: Optional[StreamingASRClient] = None
|
||||
self._asr_receive_task: Optional[asyncio.Task] = None
|
||||
self._agent_task: Optional[asyncio.Task] = None
|
||||
@ -86,10 +90,17 @@ class VoiceLiteSession:
|
||||
self._running = True
|
||||
await self._emit_status("ready")
|
||||
|
||||
# Start idle check task for broadcast messages
|
||||
if self._get_pending_message:
|
||||
self._idle_check_task = asyncio.create_task(self._idle_check_loop())
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Gracefully stop the session."""
|
||||
self._running = False
|
||||
|
||||
if self._idle_check_task and not self._idle_check_task.done():
|
||||
self._idle_check_task.cancel()
|
||||
|
||||
if self._vad_finish_task and not self._vad_finish_task.done():
|
||||
self._vad_finish_task.cancel()
|
||||
|
||||
@ -511,9 +522,49 @@ class VoiceLiteSession:
|
||||
await self._on_audio(audio_chunk)
|
||||
|
||||
async def _emit_status(self, status: str) -> None:
|
||||
self._status = status
|
||||
if self._on_status:
|
||||
await self._on_status(status)
|
||||
|
||||
async def _emit_error(self, message: str) -> None:
|
||||
if self._on_error:
|
||||
await self._on_error(message)
|
||||
|
||||
async def _idle_check_loop(self) -> None:
|
||||
"""Background task: check and play pending broadcast messages when idle."""
|
||||
while self._running:
|
||||
try:
|
||||
await asyncio.sleep(1.0) # Check every second
|
||||
# Check in both "ready" and "idle" states
|
||||
if self._status in ("ready", "idle") and self._get_pending_message:
|
||||
msg = await self._get_pending_message()
|
||||
if msg:
|
||||
await self.speak_text(msg)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"[VoiceLite] Idle check error: {e}")
|
||||
|
||||
async def speak_text(self, text: str) -> None:
|
||||
"""Play text directly via TTS (skip agent, used for broadcast messages)."""
|
||||
if not text.strip():
|
||||
return
|
||||
|
||||
logger.info(f"[VoiceLite] Broadcasting: '{text[:80]}'")
|
||||
await self._emit_status("speaking")
|
||||
|
||||
try:
|
||||
tts_client = StreamingTTSClient(speaker=self._speaker)
|
||||
if self._client_sample_rate != 24000:
|
||||
async for audio_chunk in tts_client.synthesize_raw(text):
|
||||
if self._on_audio:
|
||||
await self._on_audio(self._resample_output(audio_chunk))
|
||||
else:
|
||||
async for audio_chunk in tts_client.synthesize(text):
|
||||
if self._on_audio:
|
||||
await self._on_audio(audio_chunk)
|
||||
except Exception as e:
|
||||
logger.error(f"[VoiceLite] Broadcast TTS error: {e}", exc_info=True)
|
||||
finally:
|
||||
if self._running:
|
||||
await self._emit_status("idle")
|
||||
|
||||
57
skills/voice-notification/SKILL.md
Normal file
57
skills/voice-notification/SKILL.md
Normal file
@ -0,0 +1,57 @@
|
||||
---
|
||||
name: voice-notification
|
||||
description: Voice Notification - Push voice broadcast messages to active voice sessions for real-time TTS playback
|
||||
---
|
||||
|
||||
# Voice Notification - Voice Broadcast
|
||||
|
||||
Push voice broadcast messages to users' active voice sessions. The message will be played via TTS when the session is in idle state.
|
||||
|
||||
## Quick Start
|
||||
|
||||
When a user requests to send a voice notification:
|
||||
1. Compose the message content
|
||||
2. Call voice_notify.py to send the broadcast
|
||||
|
||||
## Instructions
|
||||
|
||||
### Tool Path
|
||||
|
||||
```bash
|
||||
python {skill_dir}/scripts/voice_notify.py broadcast --message "Your message here"
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
| Parameter | Required | Description |
|
||||
|-----------|----------|-------------|
|
||||
| `--message` | Yes | The message content to be spoken via TTS |
|
||||
|
||||
### Response
|
||||
|
||||
- Success: `{"success": true, "queued": true}`
|
||||
- Error: `{"success": false, "error": "..."}`
|
||||
|
||||
## Examples
|
||||
|
||||
**User**: "Send a voice notification: the meeting is starting"
|
||||
|
||||
```bash
|
||||
python {skill_dir}/scripts/voice_notify.py broadcast \
|
||||
--message "The meeting is starting soon, please get ready"
|
||||
```
|
||||
|
||||
**User**: "Notify me via voice that my coffee is ready"
|
||||
|
||||
```bash
|
||||
python {skill_dir}/scripts/voice_notify.py broadcast \
|
||||
--message "Your coffee is ready, please come pick it up"
|
||||
```
|
||||
|
||||
## Guidelines
|
||||
|
||||
- The target user must have an active voice session connected to `/api/v3/voice/realtime`
|
||||
- The voice session must be in lite mode (`voice_mode: "lite"`)
|
||||
- Messages are queued and played when the session enters idle state
|
||||
- Keep messages concise for better TTS experience
|
||||
- Message language should match the user's preferred language
|
||||
81
skills/voice-notification/scripts/voice_notify.py
Normal file
81
skills/voice-notification/scripts/voice_notify.py
Normal file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Voice notification script for broadcasting messages to active voice sessions."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.error import URLError, HTTPError
|
||||
|
||||
# Default API endpoint
|
||||
DEFAULT_API_URL = "http://localhost:8001/api/v3/voice/broadcast"
|
||||
|
||||
|
||||
def broadcast_message(message: str, api_url: str = DEFAULT_API_URL) -> dict:
|
||||
"""Send a broadcast message to the voice API.
|
||||
|
||||
Args:
|
||||
message: The message content to be spoken
|
||||
api_url: The API endpoint URL
|
||||
|
||||
Returns:
|
||||
Response dict from the API
|
||||
"""
|
||||
bot_id = os.environ.get("BOT_ID", "")
|
||||
user_identifier = os.environ.get("USER_IDENTIFIER", "")
|
||||
|
||||
if not bot_id:
|
||||
return {"success": False, "error": "BOT_ID environment variable not set"}
|
||||
if not user_identifier:
|
||||
return {"success": False, "error": "USER_IDENTIFIER environment variable not set"}
|
||||
|
||||
payload = {
|
||||
"bot_id": bot_id,
|
||||
"user_identifier": user_identifier,
|
||||
"message": message
|
||||
}
|
||||
|
||||
req = Request(
|
||||
api_url,
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urlopen(req, timeout=10) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
except HTTPError as e:
|
||||
return {"success": False, "error": f"HTTP {e.code}: {e.reason}"}
|
||||
except URLError as e:
|
||||
return {"success": False, "error": f"Connection error: {e.reason}"}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Voice notification broadcast tool")
|
||||
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||
|
||||
# Broadcast command
|
||||
broadcast_parser = subparsers.add_parser("broadcast", help="Broadcast a message to active voice session")
|
||||
broadcast_parser.add_argument("--message", required=True, help="Message content to be spoken")
|
||||
broadcast_parser.add_argument("--api-url", default=DEFAULT_API_URL, help="API endpoint URL")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "broadcast":
|
||||
result = broadcast_message(
|
||||
message=args.message,
|
||||
api_url=args.api_url
|
||||
)
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
sys.exit(0 if result.get("success") else 1)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user