diff --git a/services/voice_session_manager.py b/services/voice_session_manager.py index f3a35a4..7c8ca2b 100644 --- a/services/voice_session_manager.py +++ b/services/voice_session_manager.py @@ -268,6 +268,27 @@ class VoiceSession: # Sentence-ending punctuation pattern for splitting TTS _SENTENCE_END_RE = re.compile(r'[。!?;\n.!?;]') + # Markdown syntax to strip before TTS + _MD_CLEAN_RE = re.compile(r'#{1,6}\s*|(?\s*|^\s*[-*+]\s+|^\s*\d+\.\s+|\[([^\]]*)\]\([^)]*\)|!\[([^\]]*)\]\([^)]*\)', re.MULTILINE) + + @staticmethod + def _clean_markdown(text: str) -> str: + """Strip Markdown formatting characters for TTS readability.""" + # Replace links/images with their display text + text = re.sub(r'!\[([^\]]*)\]\([^)]*\)', r'\1', text) + text = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', text) + # Remove headings, bold, italic, strikethrough, code marks, blockquote + text = re.sub(r'#{1,6}\s*', '', text) + text = re.sub(r'\*{1,3}|_{1,3}|~~|`{1,3}', '', text) + text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE) + # Remove list markers + text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE) + text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE) + # Remove horizontal rules + text = re.sub(r'^[\s]*[-*_]{3,}[\s]*$', '', text, flags=re.MULTILINE) + # Collapse extra whitespace + text = re.sub(r'\n{2,}', '\n', text) + return text.strip() async def _on_asr_text_received(self, text: str) -> None: """Called when ASR text is received — stream agent output, send TTS sentence by sentence""" @@ -307,6 +328,8 @@ class VoiceSession: sentence = sentence_buf[:end_pos].strip() sentence_buf = sentence_buf[end_pos:] + if sentence: + sentence = self._clean_markdown(sentence) if sentence: logger.info(f"[Voice] Sending TTS sentence: '{sentence[:80]}'") await self.realtime_client.chat_tts_text( @@ -318,6 +341,8 @@ class VoiceSession: # Handle remaining text in buffer (last sentence without ending punctuation) remaining = sentence_buf.strip() + if remaining: + remaining = self._clean_markdown(remaining) if remaining: logger.info(f"[Voice] Sending TTS remaining: '{remaining[:80]}'") await self.realtime_client.chat_tts_text(