This commit is contained in:
朱潮 2025-09-18 19:00:36 +08:00
parent e70dbf5f86
commit 44b43a7e07
5 changed files with 33 additions and 17 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
piper_arm64.tar.gz Normal file

Binary file not shown.

View File

@ -20,21 +20,23 @@ Copyright: M15.ai
License: MIT
"""
import os
import io
import json
import os
import queue
import re
import subprocess
import threading
import time
import wave
import io
import re
import subprocess
from vosk import Model, KaldiRecognizer
import numpy as np
import pyaudio
import requests
from pydub import AudioSegment
import soxr
import numpy as np
from pydub import AudioSegment
from vosk import KaldiRecognizer, Model
# ------------------- TIMING UTILITY -------------------
class Timer:
@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3):
CONFIG_PATH = os.path.expanduser("va_config.json")
BASE_DIR = os.path.dirname(__file__)
MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
CHAT_URL = 'http://localhost:11434/api/chat'
CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm' # Replace with your actual token
# ------------------- CONFIG FILE LOADING -------------------
@ -236,27 +239,40 @@ def start_stream():
# ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
def query_ollama():
def query_glm():
headers = {
'Authorization': f'Bearer {AUTH_TOKEN}',
'Content-Type': 'application/json'
}
payload = {
"model": MODEL_NAME,
"model": "glm-4.5",
"messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top
"stream": False}
"temperature": 0.6,
"max_tokens": 1024,
"stream": False
}
with Timer("Inference"): # measure inference latency
resp = requests.post(CHAT_URL, json=payload)
#print(f'[Debug] Ollama status: {resp.status_code}')
resp = requests.post(CHAT_URL, json=payload, headers=headers)
if resp.status_code != 200:
print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}')
return ''
data = resp.json()
# Extract assistant message
reply = ''
if 'message' in data and 'content' in data['message']:
reply = data['message']['content'].strip()
#print('[Debug] Reply:', reply)
if 'choices' in data and len(data['choices']) > 0:
choice = data['choices'][0]
if 'message' in choice and 'content' in choice['message']:
reply = choice['message']['content'].strip()
return reply
# ------------------- TTS & DEGRADATION -------------------
import tempfile
def play_response(text):
import io
import tempfile
@ -422,7 +438,7 @@ def processing_loop():
messages.append({"role": "user", "content": user})
# Generate assistant response
resp_text = query_ollama()
resp_text = query_glm()
if resp_text:
# Clean debug print (remove newlines and carriage returns)
clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')

BIN
vosk-model/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.