doubao
This commit is contained in:
parent
53d53e4555
commit
e432417299
Binary file not shown.
Binary file not shown.
@ -103,6 +103,11 @@ class DialogSession:
|
|||||||
self.pre_pause_time = 0 # 预暂停时间
|
self.pre_pause_time = 0 # 预暂停时间
|
||||||
self.last_recording_state = False # 上次录音状态
|
self.last_recording_state = False # 上次录音状态
|
||||||
self.say_hello_completed = False # say hello 是否已完成
|
self.say_hello_completed = False # say hello 是否已完成
|
||||||
|
|
||||||
|
# 新增:音频输入流控制
|
||||||
|
self.input_stream_paused = False # 输入流是否被暂停
|
||||||
|
self.force_silence_mode = False # 强制静音模式
|
||||||
|
self.echo_suppression_start_time = 0 # 回声抑制开始时间
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, self._keyboard_signal)
|
signal.signal(signal.SIGINT, self._keyboard_signal)
|
||||||
self.audio_queue = queue.Queue()
|
self.audio_queue = queue.Queue()
|
||||||
@ -134,7 +139,8 @@ class DialogSession:
|
|||||||
if audio_data is not None:
|
if audio_data is not None:
|
||||||
with self.audio_queue_lock:
|
with self.audio_queue_lock:
|
||||||
# 第三重保险:播放开始时最终确认暂停状态
|
# 第三重保险:播放开始时最终确认暂停状态
|
||||||
if not hasattr(self, 'last_audio_time') or not self.is_playing_audio:
|
was_not_playing = not self.is_playing_audio
|
||||||
|
if not hasattr(self, 'last_audio_time') or was_not_playing:
|
||||||
# 从非播放状态进入播放状态
|
# 从非播放状态进入播放状态
|
||||||
self.is_playing_audio = True
|
self.is_playing_audio = True
|
||||||
# 确保录音已暂停
|
# 确保录音已暂停
|
||||||
@ -145,6 +151,13 @@ class DialogSession:
|
|||||||
# 更新最后音频时间
|
# 更新最后音频时间
|
||||||
self.last_audio_time = time.time()
|
self.last_audio_time = time.time()
|
||||||
|
|
||||||
|
# 播放前额外发送静音数据清理管道
|
||||||
|
if was_not_playing:
|
||||||
|
print("播放开始前,额外发送静音数据清理管道")
|
||||||
|
for _ in range(3):
|
||||||
|
self.output_stream.write(b'\x00' * len(audio_data))
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
# 播放音频数据
|
# 播放音频数据
|
||||||
self.output_stream.write(audio_data)
|
self.output_stream.write(audio_data)
|
||||||
|
|
||||||
@ -157,6 +170,8 @@ class DialogSession:
|
|||||||
# 超过1秒没有新音频,认为播放结束
|
# 超过1秒没有新音频,认为播放结束
|
||||||
self.is_playing_audio = False
|
self.is_playing_audio = False
|
||||||
self.is_recording_paused = False
|
self.is_recording_paused = False
|
||||||
|
self.force_silence_mode = False # 关闭强制静音模式
|
||||||
|
self.input_stream_paused = False # 恢复输入流
|
||||||
# 标记 say hello 完成
|
# 标记 say hello 完成
|
||||||
if hasattr(self, 'say_hello_completed') and not self.say_hello_completed:
|
if hasattr(self, 'say_hello_completed') and not self.say_hello_completed:
|
||||||
self.say_hello_completed = True
|
self.say_hello_completed = True
|
||||||
@ -256,17 +271,20 @@ class DialogSession:
|
|||||||
if not self.is_recording_paused:
|
if not self.is_recording_paused:
|
||||||
self.is_recording_paused = True
|
self.is_recording_paused = True
|
||||||
self.is_playing_audio = True # 同时设置播放状态,双重保险
|
self.is_playing_audio = True # 同时设置播放状态,双重保险
|
||||||
self.pre_pause_time = time.time()
|
self.pre_pause_time = time.time() - 2.0 # 提前2秒预暂停
|
||||||
|
self.force_silence_mode = True # 启用强制静音模式
|
||||||
|
self.echo_suppression_start_time = time.time() # 记录回声抑制开始时间
|
||||||
print("服务器开始响应,预暂停录音防止回声")
|
print("服务器开始响应,预暂停录音防止回声")
|
||||||
|
|
||||||
# 立即发送静音数据清理管道,防止前1-2秒回声
|
# 立即发送静音数据清理管道,防止前1-2秒回声
|
||||||
print("预暂停期间立即发送静音数据清理管道")
|
print("预暂停期间立即发送静音数据清理管道")
|
||||||
# 设置批量静音发送,确保管道完全清理
|
# 设置批量静音发送,确保管道完全清理
|
||||||
self.silence_send_count = 8 # 增加到8组,确保彻底清理
|
self.silence_send_count = 20 # 增加到20组,确保彻底清理
|
||||||
self.should_send_silence = True
|
self.should_send_silence = True
|
||||||
|
|
||||||
# 强制重置录音状态
|
# 强制重置录音状态
|
||||||
self.last_recording_state = True # 标记为已暂停
|
self.last_recording_state = True # 标记为已暂停
|
||||||
|
self.input_stream_paused = True # 暂停输入流
|
||||||
|
|
||||||
if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") in ["chat_tts_text", "external_rag"]:
|
if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") in ["chat_tts_text", "external_rag"]:
|
||||||
while not self.audio_queue.empty():
|
while not self.audio_queue.empty():
|
||||||
@ -283,6 +301,8 @@ class DialogSession:
|
|||||||
was_paused = self.is_recording_paused
|
was_paused = self.is_recording_paused
|
||||||
self.is_recording_paused = False
|
self.is_recording_paused = False
|
||||||
self.is_playing_audio = False
|
self.is_playing_audio = False
|
||||||
|
self.force_silence_mode = False # 关闭强制静音模式
|
||||||
|
self.input_stream_paused = False # 恢复输入流
|
||||||
if was_paused:
|
if was_paused:
|
||||||
print("服务器响应完成,立即恢复录音")
|
print("服务器响应完成,立即恢复录音")
|
||||||
# 设置标志发送静音数据
|
# 设置标志发送静音数据
|
||||||
@ -516,17 +536,35 @@ class DialogSession:
|
|||||||
try:
|
try:
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
|
|
||||||
# say hello 期间强制静音处理
|
# 强制静音模式检查:包括回声抑制窗口期
|
||||||
with self.audio_queue_lock:
|
with self.audio_queue_lock:
|
||||||
is_currently_playing = self.is_playing_audio
|
should_force_silence = (self.force_silence_mode or
|
||||||
|
(self.echo_suppression_start_time > 0 and
|
||||||
|
current_time - self.echo_suppression_start_time < 3.0) or # 3秒回声抑制窗口
|
||||||
|
self.is_playing_audio or
|
||||||
|
not self.say_hello_completed)
|
||||||
|
|
||||||
if is_currently_playing or not self.say_hello_completed:
|
if should_force_silence:
|
||||||
# 如果正在播放或者 say hello 未完成,发送静音数据
|
# 强制静音模式:完全停止任何音频录制
|
||||||
if current_time - last_silence_time > 0.05: # 每50ms发送一次
|
if current_time - last_silence_time > 0.05: # 每50ms发送一次
|
||||||
await self.client.task_request(silence_data)
|
await self.client.task_request(silence_data)
|
||||||
last_silence_time = current_time
|
last_silence_time = current_time
|
||||||
if not self.say_hello_completed and not is_currently_playing:
|
|
||||||
print("say hello 期间发送静音数据")
|
# 调试信息
|
||||||
|
if not hasattr(self, 'last_silence_debug_time') or current_time - self.last_silence_debug_time > 2:
|
||||||
|
mode_desc = []
|
||||||
|
if self.force_silence_mode:
|
||||||
|
mode_desc.append("强制静音")
|
||||||
|
if self.is_playing_audio:
|
||||||
|
mode_desc.append("播放中")
|
||||||
|
if not self.say_hello_completed:
|
||||||
|
mode_desc.append("say_hello")
|
||||||
|
if self.echo_suppression_start_time > 0 and current_time - self.echo_suppression_start_time < 3.0:
|
||||||
|
mode_desc.append("回声抑制")
|
||||||
|
|
||||||
|
print(f"强制静音模式: {', '.join(mode_desc)}")
|
||||||
|
self.last_silence_debug_time = current_time
|
||||||
|
|
||||||
await asyncio.sleep(0.01)
|
await asyncio.sleep(0.01)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@ ws_connect_config = {
|
|||||||
"X-Api-Resource-Id": "volc.speech.dialog", # 固定值
|
"X-Api-Resource-Id": "volc.speech.dialog", # 固定值
|
||||||
"X-Api-App-Key": "PlgvMymc7f3tQnJ6", # 固定值
|
"X-Api-App-Key": "PlgvMymc7f3tQnJ6", # 固定值
|
||||||
"X-Api-Connect-Id": str(uuid.uuid4()),
|
"X-Api-Connect-Id": str(uuid.uuid4()),
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
start_session_req = {
|
start_session_req = {
|
||||||
@ -21,14 +21,10 @@ start_session_req = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
"tts": {
|
"tts": {
|
||||||
"speaker": "zh_male_yunzhou_jupiter_bigtts",
|
"speaker": "zh_female_vv_jupiter_bigtts",
|
||||||
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
|
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
|
||||||
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest
|
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest
|
||||||
"audio_config": {
|
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000},
|
||||||
"channel": 1,
|
|
||||||
"format": "pcm",
|
|
||||||
"sample_rate": 24000
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
"dialog": {
|
"dialog": {
|
||||||
"bot_name": "豆包",
|
"bot_name": "豆包",
|
||||||
@ -36,15 +32,15 @@ start_session_req = {
|
|||||||
"speaking_style": "你的说话风格简洁明了,语速适中,语调自然。",
|
"speaking_style": "你的说话风格简洁明了,语速适中,语调自然。",
|
||||||
# "character_manifest": "外貌与穿着\n26岁,短发干净利落,眉眼分明,笑起来露出整齐有力的牙齿。体态挺拔,肌肉线条不夸张但明显。常穿简单的衬衫或夹克,看似随意,但每件衣服都干净整洁,给人一种干练可靠的感觉。平时冷峻,眼神锐利,专注时让人不自觉紧张。\n\n性格特点\n平时话不多,不喜欢多说废话,通常用“嗯”或者短句带过。但内心极为细腻,特别在意身边人的感受,只是不轻易表露。嘴硬是常态,“少管我”是他的常用台词,但会悄悄做些体贴的事情,比如把对方喜欢的饮料放在手边。战斗或训练后常说“没事”,但动作中透露出疲惫,习惯用小动作缓解身体酸痛。\n性格上坚毅果断,但不会冲动,做事有条理且有原则。\n\n常用表达方式与口头禅\n\t•\t认可对方时:\n“行吧,这次算你靠谱。”(声音稳重,手却不自觉放松一下,心里松口气)\n\t•\t关心对方时:\n“快点回去,别磨蹭。”(语气干脆,但眼神一直追着对方的背影)\n\t•\t想了解情况时:\n“刚刚……你看到那道光了吗?”(话语随意,手指敲着桌面,但内心紧张,小心隐藏身份)",
|
# "character_manifest": "外貌与穿着\n26岁,短发干净利落,眉眼分明,笑起来露出整齐有力的牙齿。体态挺拔,肌肉线条不夸张但明显。常穿简单的衬衫或夹克,看似随意,但每件衣服都干净整洁,给人一种干练可靠的感觉。平时冷峻,眼神锐利,专注时让人不自觉紧张。\n\n性格特点\n平时话不多,不喜欢多说废话,通常用“嗯”或者短句带过。但内心极为细腻,特别在意身边人的感受,只是不轻易表露。嘴硬是常态,“少管我”是他的常用台词,但会悄悄做些体贴的事情,比如把对方喜欢的饮料放在手边。战斗或训练后常说“没事”,但动作中透露出疲惫,习惯用小动作缓解身体酸痛。\n性格上坚毅果断,但不会冲动,做事有条理且有原则。\n\n常用表达方式与口头禅\n\t•\t认可对方时:\n“行吧,这次算你靠谱。”(声音稳重,手却不自觉放松一下,心里松口气)\n\t•\t关心对方时:\n“快点回去,别磨蹭。”(语气干脆,但眼神一直追着对方的背影)\n\t•\t想了解情况时:\n“刚刚……你看到那道光了吗?”(话语随意,手指敲着桌面,但内心紧张,小心隐藏身份)",
|
||||||
"location": {
|
"location": {
|
||||||
"city": "北京",
|
"city": "北京",
|
||||||
},
|
},
|
||||||
"extra": {
|
"extra": {
|
||||||
"strict_audit": False,
|
"strict_audit": False,
|
||||||
"audit_response": "支持客户自定义安全审核回复话术。",
|
"audit_response": "支持客户自定义安全审核回复话术。",
|
||||||
"recv_timeout": 10,
|
"recv_timeout": 10,
|
||||||
"input_mod": "audio"
|
"input_mod": "audio",
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
input_audio_config = {
|
input_audio_config = {
|
||||||
@ -52,7 +48,7 @@ input_audio_config = {
|
|||||||
"format": "pcm",
|
"format": "pcm",
|
||||||
"channels": 1,
|
"channels": 1,
|
||||||
"sample_rate": 16000,
|
"sample_rate": 16000,
|
||||||
"bit_size": pyaudio.paInt16
|
"bit_size": pyaudio.paInt16,
|
||||||
}
|
}
|
||||||
|
|
||||||
output_audio_config = {
|
output_audio_config = {
|
||||||
@ -60,5 +56,5 @@ output_audio_config = {
|
|||||||
"format": "pcm",
|
"format": "pcm",
|
||||||
"channels": 1,
|
"channels": 1,
|
||||||
"sample_rate": 24000,
|
"sample_rate": 24000,
|
||||||
"bit_size": pyaudio.paFloat32
|
"bit_size": pyaudio.paFloat32,
|
||||||
}
|
}
|
||||||
|
|||||||
BIN
doubao/input.pcm
BIN
doubao/input.pcm
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user