vad
This commit is contained in:
parent
7a547322e3
commit
2d2e1dbcdf
@ -71,6 +71,8 @@ class VoiceLiteSession:
|
||||
self._vad_silence_start: float = 0 # When silence started
|
||||
self._vad_finish_task: Optional[asyncio.Task] = None
|
||||
self._pre_buffer: list = [] # Buffer audio before VAD triggers
|
||||
self._vad_voice_streak: int = 0 # Consecutive voiced chunks count
|
||||
self._vad_silence_streak: int = 0 # Consecutive silent chunks count
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Fetch bot config, mark session as running."""
|
||||
@ -113,6 +115,8 @@ class VoiceLiteSession:
|
||||
VAD_SOURCE_RATE = 24000 # Input audio sample rate
|
||||
VAD_TARGET_RATE = 16000 # webrtcvad supported sample rate
|
||||
VAD_FRAME_DURATION_MS = 30 # Frame duration for webrtcvad (10, 20, or 30 ms)
|
||||
VAD_SPEECH_CHUNKS = 3 # Consecutive voiced chunks required to start speech
|
||||
VAD_SILENCE_CHUNKS = 5 # Consecutive silent chunks required to confirm silence
|
||||
|
||||
_audio_chunk_count = 0
|
||||
|
||||
@ -164,13 +168,21 @@ class VoiceLiteSession:
|
||||
has_voice = self._webrtcvad_detect(audio_data)
|
||||
now = asyncio.get_event_loop().time()
|
||||
|
||||
# Update consecutive streaks
|
||||
if has_voice:
|
||||
self._vad_voice_streak += 1
|
||||
self._vad_silence_streak = 0
|
||||
else:
|
||||
self._vad_silence_streak += 1
|
||||
self._vad_voice_streak = 0
|
||||
|
||||
if has_voice:
|
||||
# Cancel any pending finish
|
||||
if self._vad_finish_task and not self._vad_finish_task.done():
|
||||
self._vad_finish_task.cancel()
|
||||
self._vad_finish_task = None
|
||||
|
||||
if not self._vad_speaking:
|
||||
if not self._vad_speaking and self._vad_voice_streak >= self.VAD_SPEECH_CHUNKS:
|
||||
# Speech just started — connect ASR
|
||||
self._vad_speaking = True
|
||||
logger.info(f"[VoiceLite] VAD: speech started (webrtcvad), connecting ASR...")
|
||||
@ -205,8 +217,9 @@ class VoiceLiteSession:
|
||||
if self._vad_silence_start == 0:
|
||||
self._vad_silence_start = now
|
||||
|
||||
# Silence exceeded threshold -> send finish
|
||||
if (now - self._vad_silence_start) >= self.VAD_SILENCE_DURATION:
|
||||
# Require both consecutive silent chunks AND time threshold
|
||||
if (self._vad_silence_streak >= self.VAD_SILENCE_CHUNKS
|
||||
and (now - self._vad_silence_start) >= self.VAD_SILENCE_DURATION):
|
||||
if not self._vad_finish_task or self._vad_finish_task.done():
|
||||
self._vad_finish_task = asyncio.create_task(self._vad_send_finish())
|
||||
else:
|
||||
@ -220,6 +233,8 @@ class VoiceLiteSession:
|
||||
logger.info(f"[VoiceLite] VAD: silence detected, sending finish to ASR")
|
||||
self._vad_speaking = False
|
||||
self._vad_silence_start = 0
|
||||
self._vad_voice_streak = 0
|
||||
self._vad_silence_streak = 0
|
||||
if self._asr_client:
|
||||
try:
|
||||
await self._asr_client.send_finish()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user