This commit is contained in:
朱潮 2025-09-19 19:44:17 +08:00
parent 53d53e4555
commit e432417299
7 changed files with 56 additions and 22 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@ -103,6 +103,11 @@ class DialogSession:
self.pre_pause_time = 0 # 预暂停时间 self.pre_pause_time = 0 # 预暂停时间
self.last_recording_state = False # 上次录音状态 self.last_recording_state = False # 上次录音状态
self.say_hello_completed = False # say hello 是否已完成 self.say_hello_completed = False # say hello 是否已完成
# 新增:音频输入流控制
self.input_stream_paused = False # 输入流是否被暂停
self.force_silence_mode = False # 强制静音模式
self.echo_suppression_start_time = 0 # 回声抑制开始时间
signal.signal(signal.SIGINT, self._keyboard_signal) signal.signal(signal.SIGINT, self._keyboard_signal)
self.audio_queue = queue.Queue() self.audio_queue = queue.Queue()
@ -134,7 +139,8 @@ class DialogSession:
if audio_data is not None: if audio_data is not None:
with self.audio_queue_lock: with self.audio_queue_lock:
# 第三重保险:播放开始时最终确认暂停状态 # 第三重保险:播放开始时最终确认暂停状态
if not hasattr(self, 'last_audio_time') or not self.is_playing_audio: was_not_playing = not self.is_playing_audio
if not hasattr(self, 'last_audio_time') or was_not_playing:
# 从非播放状态进入播放状态 # 从非播放状态进入播放状态
self.is_playing_audio = True self.is_playing_audio = True
# 确保录音已暂停 # 确保录音已暂停
@ -145,6 +151,13 @@ class DialogSession:
# 更新最后音频时间 # 更新最后音频时间
self.last_audio_time = time.time() self.last_audio_time = time.time()
# 播放前额外发送静音数据清理管道
if was_not_playing:
print("播放开始前,额外发送静音数据清理管道")
for _ in range(3):
self.output_stream.write(b'\x00' * len(audio_data))
time.sleep(0.1)
# 播放音频数据 # 播放音频数据
self.output_stream.write(audio_data) self.output_stream.write(audio_data)
@ -157,6 +170,8 @@ class DialogSession:
# 超过1秒没有新音频认为播放结束 # 超过1秒没有新音频认为播放结束
self.is_playing_audio = False self.is_playing_audio = False
self.is_recording_paused = False self.is_recording_paused = False
self.force_silence_mode = False # 关闭强制静音模式
self.input_stream_paused = False # 恢复输入流
# 标记 say hello 完成 # 标记 say hello 完成
if hasattr(self, 'say_hello_completed') and not self.say_hello_completed: if hasattr(self, 'say_hello_completed') and not self.say_hello_completed:
self.say_hello_completed = True self.say_hello_completed = True
@ -256,17 +271,20 @@ class DialogSession:
if not self.is_recording_paused: if not self.is_recording_paused:
self.is_recording_paused = True self.is_recording_paused = True
self.is_playing_audio = True # 同时设置播放状态,双重保险 self.is_playing_audio = True # 同时设置播放状态,双重保险
self.pre_pause_time = time.time() self.pre_pause_time = time.time() - 2.0 # 提前2秒预暂停
self.force_silence_mode = True # 启用强制静音模式
self.echo_suppression_start_time = time.time() # 记录回声抑制开始时间
print("服务器开始响应,预暂停录音防止回声") print("服务器开始响应,预暂停录音防止回声")
# 立即发送静音数据清理管道防止前1-2秒回声 # 立即发送静音数据清理管道防止前1-2秒回声
print("预暂停期间立即发送静音数据清理管道") print("预暂停期间立即发送静音数据清理管道")
# 设置批量静音发送,确保管道完全清理 # 设置批量静音发送,确保管道完全清理
self.silence_send_count = 8 # 增加到8组,确保彻底清理 self.silence_send_count = 20 # 增加到20组,确保彻底清理
self.should_send_silence = True self.should_send_silence = True
# 强制重置录音状态 # 强制重置录音状态
self.last_recording_state = True # 标记为已暂停 self.last_recording_state = True # 标记为已暂停
self.input_stream_paused = True # 暂停输入流
if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") in ["chat_tts_text", "external_rag"]: if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") in ["chat_tts_text", "external_rag"]:
while not self.audio_queue.empty(): while not self.audio_queue.empty():
@ -283,6 +301,8 @@ class DialogSession:
was_paused = self.is_recording_paused was_paused = self.is_recording_paused
self.is_recording_paused = False self.is_recording_paused = False
self.is_playing_audio = False self.is_playing_audio = False
self.force_silence_mode = False # 关闭强制静音模式
self.input_stream_paused = False # 恢复输入流
if was_paused: if was_paused:
print("服务器响应完成,立即恢复录音") print("服务器响应完成,立即恢复录音")
# 设置标志发送静音数据 # 设置标志发送静音数据
@ -516,17 +536,35 @@ class DialogSession:
try: try:
current_time = time.time() current_time = time.time()
# say hello 期间强制静音处理 # 强制静音模式检查:包括回声抑制窗口期
with self.audio_queue_lock: with self.audio_queue_lock:
is_currently_playing = self.is_playing_audio should_force_silence = (self.force_silence_mode or
(self.echo_suppression_start_time > 0 and
current_time - self.echo_suppression_start_time < 3.0) or # 3秒回声抑制窗口
self.is_playing_audio or
not self.say_hello_completed)
if is_currently_playing or not self.say_hello_completed: if should_force_silence:
# 如果正在播放或者 say hello 未完成,发送静音数据 # 强制静音模式:完全停止任何音频录制
if current_time - last_silence_time > 0.05: # 每50ms发送一次 if current_time - last_silence_time > 0.05: # 每50ms发送一次
await self.client.task_request(silence_data) await self.client.task_request(silence_data)
last_silence_time = current_time last_silence_time = current_time
if not self.say_hello_completed and not is_currently_playing:
print("say hello 期间发送静音数据") # 调试信息
if not hasattr(self, 'last_silence_debug_time') or current_time - self.last_silence_debug_time > 2:
mode_desc = []
if self.force_silence_mode:
mode_desc.append("强制静音")
if self.is_playing_audio:
mode_desc.append("播放中")
if not self.say_hello_completed:
mode_desc.append("say_hello")
if self.echo_suppression_start_time > 0 and current_time - self.echo_suppression_start_time < 3.0:
mode_desc.append("回声抑制")
print(f"强制静音模式: {', '.join(mode_desc)}")
self.last_silence_debug_time = current_time
await asyncio.sleep(0.01) await asyncio.sleep(0.01)
continue continue

View File

@ -11,7 +11,7 @@ ws_connect_config = {
"X-Api-Resource-Id": "volc.speech.dialog", # 固定值 "X-Api-Resource-Id": "volc.speech.dialog", # 固定值
"X-Api-App-Key": "PlgvMymc7f3tQnJ6", # 固定值 "X-Api-App-Key": "PlgvMymc7f3tQnJ6", # 固定值
"X-Api-Connect-Id": str(uuid.uuid4()), "X-Api-Connect-Id": str(uuid.uuid4()),
} },
} }
start_session_req = { start_session_req = {
@ -21,14 +21,10 @@ start_session_req = {
}, },
}, },
"tts": { "tts": {
"speaker": "zh_male_yunzhou_jupiter_bigtts", "speaker": "zh_female_vv_jupiter_bigtts",
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest # "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色不需要填character_manifest # "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色不需要填character_manifest
"audio_config": { "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000},
"channel": 1,
"format": "pcm",
"sample_rate": 24000
},
}, },
"dialog": { "dialog": {
"bot_name": "豆包", "bot_name": "豆包",
@ -36,15 +32,15 @@ start_session_req = {
"speaking_style": "你的说话风格简洁明了,语速适中,语调自然。", "speaking_style": "你的说话风格简洁明了,语速适中,语调自然。",
# "character_manifest": "外貌与穿着\n26岁短发干净利落眉眼分明笑起来露出整齐有力的牙齿。体态挺拔肌肉线条不夸张但明显。常穿简单的衬衫或夹克看似随意但每件衣服都干净整洁给人一种干练可靠的感觉。平时冷峻眼神锐利专注时让人不自觉紧张。\n\n性格特点\n平时话不多不喜欢多说废话通常用“嗯”或者短句带过。但内心极为细腻特别在意身边人的感受只是不轻易表露。嘴硬是常态“少管我”是他的常用台词但会悄悄做些体贴的事情比如把对方喜欢的饮料放在手边。战斗或训练后常说“没事”但动作中透露出疲惫习惯用小动作缓解身体酸痛。\n性格上坚毅果断但不会冲动做事有条理且有原则。\n\n常用表达方式与口头禅\n\t•\t认可对方时\n“行吧这次算你靠谱。”声音稳重手却不自觉放松一下心里松口气\n\t•\t关心对方时\n“快点回去别磨蹭。”语气干脆但眼神一直追着对方的背影\n\t•\t想了解情况时\n“刚刚……你看到那道光了吗话语随意手指敲着桌面但内心紧张小心隐藏身份", # "character_manifest": "外貌与穿着\n26岁短发干净利落眉眼分明笑起来露出整齐有力的牙齿。体态挺拔肌肉线条不夸张但明显。常穿简单的衬衫或夹克看似随意但每件衣服都干净整洁给人一种干练可靠的感觉。平时冷峻眼神锐利专注时让人不自觉紧张。\n\n性格特点\n平时话不多不喜欢多说废话通常用“嗯”或者短句带过。但内心极为细腻特别在意身边人的感受只是不轻易表露。嘴硬是常态“少管我”是他的常用台词但会悄悄做些体贴的事情比如把对方喜欢的饮料放在手边。战斗或训练后常说“没事”但动作中透露出疲惫习惯用小动作缓解身体酸痛。\n性格上坚毅果断但不会冲动做事有条理且有原则。\n\n常用表达方式与口头禅\n\t•\t认可对方时\n“行吧这次算你靠谱。”声音稳重手却不自觉放松一下心里松口气\n\t•\t关心对方时\n“快点回去别磨蹭。”语气干脆但眼神一直追着对方的背影\n\t•\t想了解情况时\n“刚刚……你看到那道光了吗话语随意手指敲着桌面但内心紧张小心隐藏身份",
"location": { "location": {
"city": "北京", "city": "北京",
}, },
"extra": { "extra": {
"strict_audit": False, "strict_audit": False,
"audit_response": "支持客户自定义安全审核回复话术。", "audit_response": "支持客户自定义安全审核回复话术。",
"recv_timeout": 10, "recv_timeout": 10,
"input_mod": "audio" "input_mod": "audio",
} },
} },
} }
input_audio_config = { input_audio_config = {
@ -52,7 +48,7 @@ input_audio_config = {
"format": "pcm", "format": "pcm",
"channels": 1, "channels": 1,
"sample_rate": 16000, "sample_rate": 16000,
"bit_size": pyaudio.paInt16 "bit_size": pyaudio.paInt16,
} }
output_audio_config = { output_audio_config = {
@ -60,5 +56,5 @@ output_audio_config = {
"format": "pcm", "format": "pcm",
"channels": 1, "channels": 1,
"sample_rate": 24000, "sample_rate": 24000,
"bit_size": pyaudio.paFloat32 "bit_size": pyaudio.paFloat32,
} }

Binary file not shown.

Binary file not shown.