This commit is contained in:
朱潮 2026-03-21 02:44:56 +08:00
parent 4b70da5bb0
commit ba65c44755

View File

@ -268,6 +268,27 @@ class VoiceSession:
# Sentence-ending punctuation pattern for splitting TTS # Sentence-ending punctuation pattern for splitting TTS
_SENTENCE_END_RE = re.compile(r'[。!?;\n.!?;]') _SENTENCE_END_RE = re.compile(r'[。!?;\n.!?;]')
# Markdown syntax to strip before TTS
_MD_CLEAN_RE = re.compile(r'#{1,6}\s*|(?<!\w)\*{1,3}|(?<!\w)_{1,3}|\*{1,3}(?!\w)|_{1,3}(?!\w)|~~|`{1,3}|^>\s*|^\s*[-*+]\s+|^\s*\d+\.\s+|\[([^\]]*)\]\([^)]*\)|!\[([^\]]*)\]\([^)]*\)', re.MULTILINE)
@staticmethod
def _clean_markdown(text: str) -> str:
"""Strip Markdown formatting characters for TTS readability."""
# Replace links/images with their display text
text = re.sub(r'!\[([^\]]*)\]\([^)]*\)', r'\1', text)
text = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', text)
# Remove headings, bold, italic, strikethrough, code marks, blockquote
text = re.sub(r'#{1,6}\s*', '', text)
text = re.sub(r'\*{1,3}|_{1,3}|~~|`{1,3}', '', text)
text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
# Remove list markers
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
# Remove horizontal rules
text = re.sub(r'^[\s]*[-*_]{3,}[\s]*$', '', text, flags=re.MULTILINE)
# Collapse extra whitespace
text = re.sub(r'\n{2,}', '\n', text)
return text.strip()
async def _on_asr_text_received(self, text: str) -> None: async def _on_asr_text_received(self, text: str) -> None:
"""Called when ASR text is received — stream agent output, send TTS sentence by sentence""" """Called when ASR text is received — stream agent output, send TTS sentence by sentence"""
@ -307,6 +328,8 @@ class VoiceSession:
sentence = sentence_buf[:end_pos].strip() sentence = sentence_buf[:end_pos].strip()
sentence_buf = sentence_buf[end_pos:] sentence_buf = sentence_buf[end_pos:]
if sentence:
sentence = self._clean_markdown(sentence)
if sentence: if sentence:
logger.info(f"[Voice] Sending TTS sentence: '{sentence[:80]}'") logger.info(f"[Voice] Sending TTS sentence: '{sentence[:80]}'")
await self.realtime_client.chat_tts_text( await self.realtime_client.chat_tts_text(
@ -318,6 +341,8 @@ class VoiceSession:
# Handle remaining text in buffer (last sentence without ending punctuation) # Handle remaining text in buffer (last sentence without ending punctuation)
remaining = sentence_buf.strip() remaining = sentence_buf.strip()
if remaining:
remaining = self._clean_markdown(remaining)
if remaining: if remaining:
logger.info(f"[Voice] Sending TTS remaining: '{remaining[:80]}'") logger.info(f"[Voice] Sending TTS remaining: '{remaining[:80]}'")
await self.realtime_client.chat_tts_text( await self.realtime_client.chat_tts_text(