diff --git a/services/voice_session_manager.py b/services/voice_session_manager.py
index f3a35a4..7c8ca2b 100644
--- a/services/voice_session_manager.py
+++ b/services/voice_session_manager.py
@@ -268,6 +268,27 @@ class VoiceSession:
 
     # Sentence-ending punctuation pattern for splitting TTS
     _SENTENCE_END_RE = re.compile(r'[。！？；\n.!?;]')
+    # Markdown syntax to strip before TTS
+    _MD_CLEAN_RE = re.compile(r'#{1,6}\s*|(?<!\w)\*{1,3}|(?<!\w)_{1,3}|\*{1,3}(?!\w)|_{1,3}(?!\w)|~~|`{1,3}|^>\s*|^\s*[-*+]\s+|^\s*\d+\.\s+|\[([^\]]*)\]\([^)]*\)|!\[([^\]]*)\]\([^)]*\)', re.MULTILINE)
+
+    @staticmethod
+    def _clean_markdown(text: str) -> str:
+        """Strip Markdown formatting characters for TTS readability."""
+        # Replace links/images with their display text
+        text = re.sub(r'!\[([^\]]*)\]\([^)]*\)', r'\1', text)
+        text = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', text)
+        # Remove headings, bold, italic, strikethrough, code marks, blockquote
+        text = re.sub(r'#{1,6}\s*', '', text)
+        text = re.sub(r'\*{1,3}|_{1,3}|~~|`{1,3}', '', text)
+        text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
+        # Remove list markers
+        text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
+        text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+        # Remove horizontal rules
+        text = re.sub(r'^[\s]*[-*_]{3,}[\s]*$', '', text, flags=re.MULTILINE)
+        # Collapse extra whitespace
+        text = re.sub(r'\n{2,}', '\n', text)
+        return text.strip()
 
     async def _on_asr_text_received(self, text: str) -> None:
         """Called when ASR text is received — stream agent output, send TTS sentence by sentence"""
@@ -307,6 +328,8 @@ class VoiceSession:
                     sentence = sentence_buf[:end_pos].strip()
                     sentence_buf = sentence_buf[end_pos:]
 
+                    if sentence:
+                        sentence = self._clean_markdown(sentence)
                     if sentence:
                         logger.info(f"[Voice] Sending TTS sentence: '{sentence[:80]}'")
                         await self.realtime_client.chat_tts_text(
@@ -318,6 +341,8 @@ class VoiceSession:
 
             # Handle remaining text in buffer (last sentence without ending punctuation)
             remaining = sentence_buf.strip()
+            if remaining:
+                remaining = self._clean_markdown(remaining)
             if remaining:
                 logger.info(f"[Voice] Sending TTS remaining: '{remaining[:80]}'")
                 await self.realtime_client.chat_tts_text(