语音播放优化

2026-03-21 17:24:42 +08:00 · 2026-03-21 17:24:42 +08:00 · 43a77b3015
commit 43a77b3015
parent 3ee80a637e
1 changed files with 33 additions and 0 deletions
--- a/services/voice_session_manager.py
+++ b/services/voice_session_manager.py
@ -25,9 +25,11 @@ class _StreamTagFilter:
        self.state = "idle"  # idle, answer, skip
        self.found_any_tag = False
        self._pending = ""  # buffer for partial tag like "[TOO..."
+        self.answer_ended = False  # True when ANSWER block ends (e.g. hit [TOOL_CALL])

    def feed(self, chunk: str) -> str:
        """Feed a chunk, return text that should be passed to TTS."""
+        self.answer_ended = False
        self._pending += chunk
        output = []

@ -61,6 +63,8 @@ class _StreamTagFilter:
                    if tag_name == "ANSWER":
                        self.state = "answer"
                    else:
+                        if self.state == "answer":
+                            self.answer_ended = True
                        self.state = "skip"

            elif self.state == "skip":
@ -332,6 +336,35 @@ class VoiceSession:
                passthrough = tag_filter.feed(chunk)

                if not passthrough:
+                    # ANSWER block ended (e.g. hit [TOOL_CALL]), flush sentence_buf immediately
+                    if tag_filter.answer_ended and sentence_buf:
+                        flush = sentence_buf.strip()
+                        sentence_buf = ""
+                        if flush:
+                            flush = self._clean_markdown(flush)
+                        if flush:
+                            if tts_started and self._tts_segment_done:
+                                logger.info(f"[Voice] TTS segment done, closing session and waiting for delivery (answer ended)")
+                                await self.realtime_client.chat_tts_text(content="", start=False, end=True)
+                                self._tts_complete_event.clear()
+                                try:
+                                    await asyncio.wait_for(self._tts_complete_event.wait(), timeout=10)
+                                except asyncio.TimeoutError:
+                                    logger.warning(f"[Voice] Timeout waiting for TTS complete, proceeding anyway")
+                                tts_started = False
+                                self._tts_segment_done = False
+                                logger.info(f"[Voice] TTS delivery done, starting new session (answer ended)")
+
+                            logger.info(f"[Voice] Sending TTS sentence (answer ended): '{flush[:80]}'")
+                            await self.realtime_client.chat_tts_text(
+                                content=flush,
+                                start=not tts_started,
+                                end=False,
+                            )
+                            if not tts_started:
+                                await self._emit_status("speaking")
+                            tts_started = True
+                            self._tts_segment_done = False
                    continue

                sentence_buf += passthrough