doubao
This commit is contained in:
parent
53d53e4555
commit
e432417299
Binary file not shown.
Binary file not shown.
@ -104,6 +104,11 @@ class DialogSession:
|
||||
self.last_recording_state = False # 上次录音状态
|
||||
self.say_hello_completed = False # say hello 是否已完成
|
||||
|
||||
# 新增:音频输入流控制
|
||||
self.input_stream_paused = False # 输入流是否被暂停
|
||||
self.force_silence_mode = False # 强制静音模式
|
||||
self.echo_suppression_start_time = 0 # 回声抑制开始时间
|
||||
|
||||
signal.signal(signal.SIGINT, self._keyboard_signal)
|
||||
self.audio_queue = queue.Queue()
|
||||
if not self.is_audio_file_input:
|
||||
@ -134,7 +139,8 @@ class DialogSession:
|
||||
if audio_data is not None:
|
||||
with self.audio_queue_lock:
|
||||
# 第三重保险:播放开始时最终确认暂停状态
|
||||
if not hasattr(self, 'last_audio_time') or not self.is_playing_audio:
|
||||
was_not_playing = not self.is_playing_audio
|
||||
if not hasattr(self, 'last_audio_time') or was_not_playing:
|
||||
# 从非播放状态进入播放状态
|
||||
self.is_playing_audio = True
|
||||
# 确保录音已暂停
|
||||
@ -145,6 +151,13 @@ class DialogSession:
|
||||
# 更新最后音频时间
|
||||
self.last_audio_time = time.time()
|
||||
|
||||
# 播放前额外发送静音数据清理管道
|
||||
if was_not_playing:
|
||||
print("播放开始前,额外发送静音数据清理管道")
|
||||
for _ in range(3):
|
||||
self.output_stream.write(b'\x00' * len(audio_data))
|
||||
time.sleep(0.1)
|
||||
|
||||
# 播放音频数据
|
||||
self.output_stream.write(audio_data)
|
||||
|
||||
@ -157,6 +170,8 @@ class DialogSession:
|
||||
# 超过1秒没有新音频,认为播放结束
|
||||
self.is_playing_audio = False
|
||||
self.is_recording_paused = False
|
||||
self.force_silence_mode = False # 关闭强制静音模式
|
||||
self.input_stream_paused = False # 恢复输入流
|
||||
# 标记 say hello 完成
|
||||
if hasattr(self, 'say_hello_completed') and not self.say_hello_completed:
|
||||
self.say_hello_completed = True
|
||||
@ -256,17 +271,20 @@ class DialogSession:
|
||||
if not self.is_recording_paused:
|
||||
self.is_recording_paused = True
|
||||
self.is_playing_audio = True # 同时设置播放状态,双重保险
|
||||
self.pre_pause_time = time.time()
|
||||
self.pre_pause_time = time.time() - 2.0 # 提前2秒预暂停
|
||||
self.force_silence_mode = True # 启用强制静音模式
|
||||
self.echo_suppression_start_time = time.time() # 记录回声抑制开始时间
|
||||
print("服务器开始响应,预暂停录音防止回声")
|
||||
|
||||
# 立即发送静音数据清理管道,防止前1-2秒回声
|
||||
print("预暂停期间立即发送静音数据清理管道")
|
||||
# 设置批量静音发送,确保管道完全清理
|
||||
self.silence_send_count = 8 # 增加到8组,确保彻底清理
|
||||
self.silence_send_count = 20 # 增加到20组,确保彻底清理
|
||||
self.should_send_silence = True
|
||||
|
||||
# 强制重置录音状态
|
||||
self.last_recording_state = True # 标记为已暂停
|
||||
self.input_stream_paused = True # 暂停输入流
|
||||
|
||||
if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") in ["chat_tts_text", "external_rag"]:
|
||||
while not self.audio_queue.empty():
|
||||
@ -283,6 +301,8 @@ class DialogSession:
|
||||
was_paused = self.is_recording_paused
|
||||
self.is_recording_paused = False
|
||||
self.is_playing_audio = False
|
||||
self.force_silence_mode = False # 关闭强制静音模式
|
||||
self.input_stream_paused = False # 恢复输入流
|
||||
if was_paused:
|
||||
print("服务器响应完成,立即恢复录音")
|
||||
# 设置标志发送静音数据
|
||||
@ -516,17 +536,35 @@ class DialogSession:
|
||||
try:
|
||||
current_time = time.time()
|
||||
|
||||
# say hello 期间强制静音处理
|
||||
# 强制静音模式检查:包括回声抑制窗口期
|
||||
with self.audio_queue_lock:
|
||||
is_currently_playing = self.is_playing_audio
|
||||
should_force_silence = (self.force_silence_mode or
|
||||
(self.echo_suppression_start_time > 0 and
|
||||
current_time - self.echo_suppression_start_time < 3.0) or # 3秒回声抑制窗口
|
||||
self.is_playing_audio or
|
||||
not self.say_hello_completed)
|
||||
|
||||
if is_currently_playing or not self.say_hello_completed:
|
||||
# 如果正在播放或者 say hello 未完成,发送静音数据
|
||||
if should_force_silence:
|
||||
# 强制静音模式:完全停止任何音频录制
|
||||
if current_time - last_silence_time > 0.05: # 每50ms发送一次
|
||||
await self.client.task_request(silence_data)
|
||||
last_silence_time = current_time
|
||||
if not self.say_hello_completed and not is_currently_playing:
|
||||
print("say hello 期间发送静音数据")
|
||||
|
||||
# 调试信息
|
||||
if not hasattr(self, 'last_silence_debug_time') or current_time - self.last_silence_debug_time > 2:
|
||||
mode_desc = []
|
||||
if self.force_silence_mode:
|
||||
mode_desc.append("强制静音")
|
||||
if self.is_playing_audio:
|
||||
mode_desc.append("播放中")
|
||||
if not self.say_hello_completed:
|
||||
mode_desc.append("say_hello")
|
||||
if self.echo_suppression_start_time > 0 and current_time - self.echo_suppression_start_time < 3.0:
|
||||
mode_desc.append("回声抑制")
|
||||
|
||||
print(f"强制静音模式: {', '.join(mode_desc)}")
|
||||
self.last_silence_debug_time = current_time
|
||||
|
||||
await asyncio.sleep(0.01)
|
||||
continue
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ ws_connect_config = {
|
||||
"X-Api-Resource-Id": "volc.speech.dialog", # 固定值
|
||||
"X-Api-App-Key": "PlgvMymc7f3tQnJ6", # 固定值
|
||||
"X-Api-Connect-Id": str(uuid.uuid4()),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
start_session_req = {
|
||||
@ -21,14 +21,10 @@ start_session_req = {
|
||||
},
|
||||
},
|
||||
"tts": {
|
||||
"speaker": "zh_male_yunzhou_jupiter_bigtts",
|
||||
"speaker": "zh_female_vv_jupiter_bigtts",
|
||||
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
|
||||
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest
|
||||
"audio_config": {
|
||||
"channel": 1,
|
||||
"format": "pcm",
|
||||
"sample_rate": 24000
|
||||
},
|
||||
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000},
|
||||
},
|
||||
"dialog": {
|
||||
"bot_name": "豆包",
|
||||
@ -36,15 +32,15 @@ start_session_req = {
|
||||
"speaking_style": "你的说话风格简洁明了,语速适中,语调自然。",
|
||||
# "character_manifest": "外貌与穿着\n26岁,短发干净利落,眉眼分明,笑起来露出整齐有力的牙齿。体态挺拔,肌肉线条不夸张但明显。常穿简单的衬衫或夹克,看似随意,但每件衣服都干净整洁,给人一种干练可靠的感觉。平时冷峻,眼神锐利,专注时让人不自觉紧张。\n\n性格特点\n平时话不多,不喜欢多说废话,通常用“嗯”或者短句带过。但内心极为细腻,特别在意身边人的感受,只是不轻易表露。嘴硬是常态,“少管我”是他的常用台词,但会悄悄做些体贴的事情,比如把对方喜欢的饮料放在手边。战斗或训练后常说“没事”,但动作中透露出疲惫,习惯用小动作缓解身体酸痛。\n性格上坚毅果断,但不会冲动,做事有条理且有原则。\n\n常用表达方式与口头禅\n\t•\t认可对方时:\n“行吧,这次算你靠谱。”(声音稳重,手却不自觉放松一下,心里松口气)\n\t•\t关心对方时:\n“快点回去,别磨蹭。”(语气干脆,但眼神一直追着对方的背影)\n\t•\t想了解情况时:\n“刚刚……你看到那道光了吗?”(话语随意,手指敲着桌面,但内心紧张,小心隐藏身份)",
|
||||
"location": {
|
||||
"city": "北京",
|
||||
"city": "北京",
|
||||
},
|
||||
"extra": {
|
||||
"strict_audit": False,
|
||||
"audit_response": "支持客户自定义安全审核回复话术。",
|
||||
"recv_timeout": 10,
|
||||
"input_mod": "audio"
|
||||
}
|
||||
}
|
||||
"input_mod": "audio",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
input_audio_config = {
|
||||
@ -52,7 +48,7 @@ input_audio_config = {
|
||||
"format": "pcm",
|
||||
"channels": 1,
|
||||
"sample_rate": 16000,
|
||||
"bit_size": pyaudio.paInt16
|
||||
"bit_size": pyaudio.paInt16,
|
||||
}
|
||||
|
||||
output_audio_config = {
|
||||
@ -60,5 +56,5 @@ output_audio_config = {
|
||||
"format": "pcm",
|
||||
"channels": 1,
|
||||
"sample_rate": 24000,
|
||||
"bit_size": pyaudio.paFloat32
|
||||
"bit_size": pyaudio.paFloat32,
|
||||
}
|
||||
|
||||
BIN
doubao/input.pcm
BIN
doubao/input.pcm
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user