This commit is contained in:
朱潮 2025-09-18 19:00:36 +08:00
parent e70dbf5f86
commit 44b43a7e07
5 changed files with 33 additions and 17 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
piper_arm64.tar.gz Normal file

Binary file not shown.

View File

@ -20,21 +20,23 @@ Copyright: M15.ai
License: MIT License: MIT
""" """
import os import io
import json import json
import os
import queue import queue
import re
import subprocess
import threading import threading
import time import time
import wave import wave
import io
import re import numpy as np
import subprocess
from vosk import Model, KaldiRecognizer
import pyaudio import pyaudio
import requests import requests
from pydub import AudioSegment
import soxr import soxr
import numpy as np from pydub import AudioSegment
from vosk import KaldiRecognizer, Model
# ------------------- TIMING UTILITY ------------------- # ------------------- TIMING UTILITY -------------------
class Timer: class Timer:
@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3):
CONFIG_PATH = os.path.expanduser("va_config.json") CONFIG_PATH = os.path.expanduser("va_config.json")
BASE_DIR = os.path.dirname(__file__) BASE_DIR = os.path.dirname(__file__)
MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model') MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
CHAT_URL = 'http://localhost:11434/api/chat' CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm' # Replace with your actual token
# ------------------- CONFIG FILE LOADING ------------------- # ------------------- CONFIG FILE LOADING -------------------
@ -236,27 +239,40 @@ def start_stream():
# ------------------- QUERY OLLAMA CHAT ENDPOINT ------------------- # ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
def query_ollama(): def query_glm():
headers = {
'Authorization': f'Bearer {AUTH_TOKEN}',
'Content-Type': 'application/json'
}
payload = { payload = {
"model": MODEL_NAME, "model": "glm-4.5",
"messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top "messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top
"stream": False} "temperature": 0.6,
"max_tokens": 1024,
"stream": False
}
with Timer("Inference"): # measure inference latency with Timer("Inference"): # measure inference latency
resp = requests.post(CHAT_URL, json=payload) resp = requests.post(CHAT_URL, json=payload, headers=headers)
#print(f'[Debug] Ollama status: {resp.status_code}')
if resp.status_code != 200:
print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}')
return ''
data = resp.json() data = resp.json()
# Extract assistant message # Extract assistant message
reply = '' reply = ''
if 'message' in data and 'content' in data['message']: if 'choices' in data and len(data['choices']) > 0:
reply = data['message']['content'].strip() choice = data['choices'][0]
#print('[Debug] Reply:', reply) if 'message' in choice and 'content' in choice['message']:
reply = choice['message']['content'].strip()
return reply return reply
# ------------------- TTS & DEGRADATION ------------------- # ------------------- TTS & DEGRADATION -------------------
import tempfile import tempfile
def play_response(text): def play_response(text):
import io import io
import tempfile import tempfile
@ -422,7 +438,7 @@ def processing_loop():
messages.append({"role": "user", "content": user}) messages.append({"role": "user", "content": user})
# Generate assistant response # Generate assistant response
resp_text = query_ollama() resp_text = query_glm()
if resp_text: if resp_text:
# Clean debug print (remove newlines and carriage returns) # Clean debug print (remove newlines and carriage returns)
clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ') clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')

BIN
vosk-model/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.