feat: Text to speech support streaming playback (#2661)

This commit is contained in:
shaohuzhang1 2025-03-24 14:21:29 +08:00 committed by GitHub
parent 0ce6dd0795
commit dcee1b6d55
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 343 additions and 136 deletions

View File

@ -1,5 +1,6 @@
import { type Dict } from '@/api/type/common' import { type Dict } from '@/api/type/common'
import { type Ref } from 'vue' import { type Ref } from 'vue'
import bus from '@/bus'
interface ApplicationFormType { interface ApplicationFormType {
name?: string name?: string
desc?: string desc?: string
@ -144,8 +145,8 @@ export class ChatRecordManage {
}) })
} }
} }
this.chat.answer_text = this.chat.answer_text + chunk_answer this.chat.answer_text = this.chat.answer_text + chunk_answer
bus.emit('change:answer', { record_id: this.chat.record_id, is_end: false })
} }
get_current_up_node(run_node: any) { get_current_up_node(run_node: any) {
const index = this.node_list.findIndex((item) => item == run_node) const index = this.node_list.findIndex((item) => item == run_node)
@ -232,6 +233,7 @@ export class ChatRecordManage {
if (this.loading) { if (this.loading) {
this.loading.value = false this.loading.value = false
} }
bus.emit('change:answer', { record_id: this.chat.record_id, is_end: true })
if (this.id) { if (this.id) {
clearInterval(this.id) clearInterval(this.id)
} }

View File

@ -8,20 +8,35 @@
<!-- 语音播放 --> <!-- 语音播放 -->
<span v-if="tts"> <span v-if="tts">
<el-tooltip <el-tooltip
v-if="audioManage?.isPlaying()"
effect="dark" effect="dark"
:content="$t('chat.operation.play')" :content="$t('chat.operation.pause')"
placement="top" placement="top"
v-if="!audioPlayerStatus"
> >
<el-button text :disabled="!data?.write_ed" @click="playAnswerText(data?.answer_text)"> <el-button
<AppIcon iconName="app-video-play"></AppIcon> type="primary"
</el-button> text
</el-tooltip> :disabled="!data?.write_ed"
<el-tooltip v-else effect="dark" :content="$t('chat.operation.pause')" placement="top"> @click="audioManage?.pause(true)"
<el-button type="primary" text :disabled="!data?.write_ed" @click="pausePlayAnswerText()"> >
<AppIcon iconName="app-video-pause"></AppIcon> <AppIcon iconName="app-video-pause"></AppIcon>
</el-button> </el-button>
</el-tooltip> </el-tooltip>
<el-tooltip effect="dark" :content="$t('chat.operation.play')" placement="top" v-else>
<el-button
text
:disabled="!data?.write_ed"
@click="
() => {
bus.emit('play:pause', props.data.record_id)
audioManage?.play(props.data.answer_text, true)
}
"
>
<AppIcon iconName="app-video-play"></AppIcon>
</el-button>
</el-tooltip>
<el-divider direction="vertical" /> <el-divider direction="vertical" />
</span> </span>
<span v-if="type == 'ai-chat' || type == 'log'"> <span v-if="type == 'ai-chat' || type == 'log'">
@ -82,6 +97,7 @@
</div> </div>
<!-- 先渲染不然不能播放 --> <!-- 先渲染不然不能播放 -->
<audio ref="audioPlayer" v-for="item in audioList" :key="item" controls hidden="hidden"></audio> <audio ref="audioPlayer" v-for="item in audioList" :key="item" controls hidden="hidden"></audio>
<div ref="audioCiontainer"></div>
</div> </div>
</template> </template>
<script setup lang="ts"> <script setup lang="ts">
@ -91,8 +107,9 @@ import { copyClick } from '@/utils/clipboard'
import applicationApi from '@/api/application' import applicationApi from '@/api/application'
import { datetimeFormat } from '@/utils/time' import { datetimeFormat } from '@/utils/time'
import { MsgError } from '@/utils/message' import { MsgError } from '@/utils/message'
import { t } from '@/locales'
import bus from '@/bus' import bus from '@/bus'
import { da } from 'element-plus/es/locale'
const route = useRoute() const route = useRoute()
const { const {
params: { id } params: { id }
@ -118,12 +135,12 @@ const props = withDefaults(
const emit = defineEmits(['update:data', 'regeneration']) const emit = defineEmits(['update:data', 'regeneration'])
const audioPlayer = ref<HTMLAudioElement[] | null>([]) const audioPlayer = ref<HTMLAudioElement[] | null>([])
const audioCiontainer = ref<HTMLDivElement>()
const audioPlayerStatus = ref(false) const audioPlayerStatus = ref(false)
const buttonData = ref(props.data) const buttonData = ref(props.data)
const loading = ref(false) const loading = ref(false)
const utterance = ref<SpeechSynthesisUtterance | null>(null)
const audioList = ref<string[]>([]) const audioList = ref<string[]>([])
const currentAudioIndex = ref(0)
function regeneration() { function regeneration() {
emit('regeneration') emit('regeneration')
@ -166,144 +183,331 @@ function markdownToPlainText(md: string) {
function removeFormRander(text: string) { function removeFormRander(text: string) {
return text.replace(/<form_rander>[\s\S]*?<\/form_rander>/g, '').trim() return text.replace(/<form_rander>[\s\S]*?<\/form_rander>/g, '').trim()
} }
function getKey(keys: Array<number>, index: number) {
const playAnswerText = (text: string) => { // index
if (!text) { for (let i = keys.length - 1; i >= 0; i--) {
text = t('chat.tip.answerMessage') if (keys[i] <= index) {
return keys[i]
}
} }
// return 0
text = removeFormRander(text) }
// text function smartSplit(
text = markdownToPlainText(text) str: string,
// console.log(text) minLengthConfig: any = {
audioPlayerStatus.value = true 0: 10,
// 1: 25,
audioList.value = text.split(/(<audio[^>]*><\/audio>)/).filter((item) => item.trim().length > 0) 3: 50,
nextTick(() => { 5: 100
// console.log(audioList.value, audioPlayer.value) },
playAnswerTextPart() is_end = false
}) ) {
// /20
const regex = /([。?\n])|(<audio[^>]*><\/audio>)/g
//
const parts = str.split(regex)
const result = []
const keys = Object.keys(minLengthConfig).map(Number)
let minLength = minLengthConfig[0]
let temp_str = ''
for (let i = 0; i < parts.length; i++) {
const content = parts[i]
if (content == undefined) {
continue
}
if (/^<audio[^>]*><\/audio>$/.test(content)) {
if (temp_str.length > 0) {
result.push(temp_str)
temp_str = ''
}
result.push(content)
continue
}
temp_str += parts[i]
if (temp_str.length > minLength && /[。?\n]$/.test(temp_str)) {
minLength = minLengthConfig[getKey(keys, i)]
result.push(temp_str)
temp_str = ''
}
}
if (temp_str.length > 0 && is_end) {
result.push(temp_str)
}
return result
} }
const playAnswerTextPart = () => { enum AudioStatus {
// console.log(audioList.value, currentAudioIndex.value) /**
if (currentAudioIndex.value === audioList.value.length) { * 结束
audioPlayerStatus.value = false */
currentAudioIndex.value = 0 END = 'END',
return /**
* 播放中
*/
PLAY_INT = 'PLAY_INT',
/**
* 刚挂载
*/
MOUNTED = 'MOUNTED',
/**
* 就绪
*/
READY = 'READY',
/**
* 错误
*/
ERROR = 'ERROR'
}
class AudioManage {
textList: Array<string>
statusList: Array<AudioStatus>
audioList: Array<HTMLAudioElement | SpeechSynthesisUtterance>
ttsType: string
root: Element
constructor(ttsType: string, root: HTMLDivElement) {
this.textList = []
this.audioList = []
this.statusList = []
this.ttsType = ttsType
this.root = root
} }
if (audioList.value[currentAudioIndex.value].includes('<audio')) { appendTextList(textList: Array<string>) {
if (audioPlayer.value) { const newTextList = textList.slice(this.textList.length)
audioPlayer.value[currentAudioIndex.value].src = //
audioList.value[currentAudioIndex.value].match(/src="([^"]*)"/)?.[1] || '' if (newTextList.length <= 0) {
audioPlayer.value[currentAudioIndex.value].play() // return
audioPlayer.value[currentAudioIndex.value].onended = () => { }
currentAudioIndex.value += 1 newTextList.forEach((text, index) => {
playAnswerTextPart() this.textList.push(text)
this.statusList.push(AudioStatus.MOUNTED)
index = this.textList.length - 1
if (this.ttsType === 'TTS') {
const audioElement: HTMLAudioElement = document.createElement('audio')
audioElement.controls = true
audioElement.hidden = true
/**
* 播放结束事件
*/
audioElement.onended = () => {
this.statusList[index] = AudioStatus.END
//
if (this.statusList.every((item) => item === AudioStatus.END)) {
this.statusList = this.statusList.map((item) => AudioStatus.READY)
} else {
// next
this.play()
}
}
this.root.appendChild(audioElement)
if (/^<audio[^>]*><\/audio>$/.test(text)) {
audioElement.src = text.match(/src="([^"]*)"/)?.[1] || ''
this.statusList[index] = AudioStatus.READY
} else {
applicationApi
.postTextToSpeech(
(props.applicationId as string) || (id as string),
{ text: text },
loading
)
.then(async (res: any) => {
if (res.type === 'application/json') {
const text = await res.text()
MsgError(text)
this.statusList[index] = AudioStatus.ERROR
this.play()
return
}
// MP3
// Blob
const blob = new Blob([res], { type: 'audio/mp3' })
// URL
const url = URL.createObjectURL(blob)
audioElement.src = url
this.statusList[index] = AudioStatus.READY
this.play()
})
.catch((err) => {
console.log('err: ', err)
this.statusList[index] = AudioStatus.ERROR
this.play()
})
}
this.audioList.push(audioElement)
} else {
const speechSynthesisUtterance: SpeechSynthesisUtterance = new SpeechSynthesisUtterance(
text
)
speechSynthesisUtterance.onpause = () => {
console.log('onpause')
}
speechSynthesisUtterance.onend = () => {
this.statusList[index] = AudioStatus.END
//
if (this.statusList.every((item) => item === AudioStatus.END)) {
this.statusList = this.statusList.map((item) => AudioStatus.READY)
} else {
// next
this.play()
}
}
speechSynthesisUtterance.onerror = (e) => {
this.statusList[index] = AudioStatus.READY
}
this.statusList[index] = AudioStatus.READY
this.audioList.push(speechSynthesisUtterance)
this.play()
}
})
}
reTryError() {
this.statusList.forEach((status, index) => {
if (status === AudioStatus.ERROR) {
const audioElement = this.audioList[index]
if (audioElement instanceof HTMLAudioElement) {
const text = this.textList[index]
applicationApi
.postTextToSpeech(
(props.applicationId as string) || (id as string),
{ text: text },
loading
)
.then(async (res: any) => {
if (res.type === 'application/json') {
const text = await res.text()
MsgError(text)
this.statusList[index] = AudioStatus.ERROR
return
}
// MP3
// Blob
const blob = new Blob([res], { type: 'audio/mp3' })
// URL
const url = URL.createObjectURL(blob)
audioElement.src = url
this.statusList[index] = AudioStatus.READY
this.play()
})
.catch((err) => {
console.log('err: ', err)
this.statusList[index] = AudioStatus.ERROR
})
}
}
})
}
isPlaying() {
return this.statusList.some((item) => [AudioStatus.PLAY_INT].includes(item))
}
play(text?: string, is_end?: boolean) {
if (text) {
const textList = this.getTextList(text, is_end ? true : false)
this.appendTextList(textList)
}
//
if (this.statusList.some((item) => [AudioStatus.PLAY_INT].includes(item))) {
return
}
this.reTryError()
//
const index = this.statusList.findIndex((status) => [AudioStatus.READY].includes(status))
if (index < 0 || this.statusList[index] === AudioStatus.MOUNTED) {
return
}
const audioElement = this.audioList[index]
if (audioElement instanceof SpeechSynthesisUtterance) {
if (window.speechSynthesis.paused) {
window.speechSynthesis.resume()
} else {
if (window.speechSynthesis.pending) {
window.speechSynthesis.cancel()
}
speechSynthesis.speak(audioElement)
this.statusList[index] = AudioStatus.PLAY_INT
}
} else {
//
try {
audioElement.play()
this.statusList[index] = AudioStatus.PLAY_INT
} catch (e) {
this.statusList[index] = AudioStatus.ERROR
} }
} }
} else if (props.tts_type === 'BROWSER') { }
if (audioList.value[currentAudioIndex.value] !== utterance.value?.text) { pause(self?: boolean) {
window.speechSynthesis.cancel() const index = this.statusList.findIndex((status) => status === AudioStatus.PLAY_INT)
} if (index < 0) {
if (
window.speechSynthesis.paused &&
audioList.value[currentAudioIndex.value] === utterance.value?.text
) {
window.speechSynthesis.resume()
return return
} }
// SpeechSynthesisUtterance const audioElement = this.audioList[index]
utterance.value = new SpeechSynthesisUtterance(audioList.value[currentAudioIndex.value]) if (audioElement instanceof SpeechSynthesisUtterance) {
utterance.value.onend = () => { this.statusList[index] = AudioStatus.READY
utterance.value = null if (self) {
currentAudioIndex.value += 1 window.speechSynthesis.pause()
playAnswerTextPart() nextTick(() => {
} if (!window.speechSynthesis.paused) {
utterance.value.onerror = () => { window.speechSynthesis.cancel()
audioPlayerStatus.value = false
utterance.value = null
}
//
window.speechSynthesis.speak(utterance.value)
} else if (props.tts_type === 'TTS') {
//
if (audioPlayer.value && audioPlayer.value[currentAudioIndex.value]?.src) {
audioPlayer.value[currentAudioIndex.value].play()
return
}
applicationApi
.postTextToSpeech(
(props.applicationId as string) || (id as string),
{ text: audioList.value[currentAudioIndex.value] },
loading
)
.then(async (res: any) => {
if (res.type === 'application/json') {
const text = await res.text()
MsgError(text)
return
}
// MP3
// Blob
const blob = new Blob([res], { type: 'audio/mp3' })
// URL
const url = URL.createObjectURL(blob)
// blob
// const link = document.createElement('a')
// link.href = window.URL.createObjectURL(blob)
// link.download = "abc.mp3"
// link.click()
// audioPlayer DOM
if (audioPlayer.value) {
audioPlayer.value[currentAudioIndex.value].src = url
audioPlayer.value[currentAudioIndex.value].play() //
audioPlayer.value[currentAudioIndex.value].onended = () => {
currentAudioIndex.value += 1
playAnswerTextPart()
} }
} else { })
console.error('audioPlayer.value is not an instance of HTMLAudioElement') } else {
} window.speechSynthesis.cancel()
}) }
.catch((err) => { } else {
console.log('err: ', err) if (this.statusList[index] === AudioStatus.PLAY_INT) {
}) //
} this.statusList[index] = AudioStatus.READY
} audioElement.pause()
}
const pausePlayAnswerText = () => {
audioPlayerStatus.value = false
if (props.tts_type === 'TTS') {
if (audioPlayer.value) {
audioPlayer.value?.forEach((item) => {
item.pause()
})
} }
} }
if (props.tts_type === 'BROWSER') { getTextList(text: string, is_end: boolean) {
window.speechSynthesis.pause() //
text = removeFormRander(text)
// text
text = markdownToPlainText(text)
const split = smartSplit(
props.data.answer_text,
{
0: 20,
1: 50,
5: 100
},
is_end
)
return split
} }
} }
const audioManage = ref<AudioManage>()
onMounted(() => { onMounted(() => {
bus.on('pause-autoplay', () => { if (audioCiontainer.value) {
pausePlayAnswerText() audioManage.value = new AudioManage(props.tts_type, audioCiontainer.value)
// console.log(1234)
})
bus.emit('pause-autoplay')
//
if (
props.tts &&
props.tts_autoplay &&
buttonData.value.write_ed &&
!buttonData.value.update_time
) {
playAnswerText(buttonData.value.answer_text)
} }
bus.on('play:pause', (record_id: string) => {
if (record_id !== props.data.record_id) {
if (audioManage.value) {
audioManage.value?.pause()
}
}
})
bus.on('change:answer', (data: any) => {
const record_id = data.record_id
bus.emit('play:pause', record_id)
if (props.data.record_id == record_id) {
if (props.tts && props.tts_autoplay) {
if (audioManage.value) {
audioManage.value.play(props.data.answer_text, data.is_end)
}
}
}
})
}) })
</script> </script>
<style lang="scss" scoped> <style lang="scss" scoped>

View File

@ -26,7 +26,7 @@
</div> </div>
<ChatOperationButton <ChatOperationButton
v-if="chatRecord.write_ed && 500 != chatRecord.status" v-show="chatRecord.write_ed && 500 != chatRecord.status"
:tts="application.tts_model_enable" :tts="application.tts_model_enable"
:tts_type="application.tts_type" :tts_type="application.tts_type"
:tts_autoplay="application.tts_autoplay" :tts_autoplay="application.tts_autoplay"

View File

@ -490,6 +490,7 @@ const handleScroll = () => {
} }
onMounted(() => { onMounted(() => {
window.speechSynthesis.cancel()
window.sendMessage = sendMessage window.sendMessage = sendMessage
}) })