diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5da4b23 Binary files /dev/null and b/.DS_Store differ diff --git a/piper_arm64.tar.gz b/piper_arm64.tar.gz new file mode 100644 index 0000000..1634a85 Binary files /dev/null and b/piper_arm64.tar.gz differ diff --git a/voice_assistant.py b/voice_assistant.py index 9cd8fda..fbd844f 100644 --- a/voice_assistant.py +++ b/voice_assistant.py @@ -20,21 +20,23 @@ Copyright: M15.ai License: MIT """ -import os +import io import json +import os import queue +import re +import subprocess import threading import time import wave -import io -import re -import subprocess -from vosk import Model, KaldiRecognizer + +import numpy as np import pyaudio import requests -from pydub import AudioSegment import soxr -import numpy as np +from pydub import AudioSegment +from vosk import KaldiRecognizer, Model + # ------------------- TIMING UTILITY ------------------- class Timer: @@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3): CONFIG_PATH = os.path.expanduser("va_config.json") BASE_DIR = os.path.dirname(__file__) MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model') -CHAT_URL = 'http://localhost:11434/api/chat' +CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions' +AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm' # Replace with your actual token # ------------------- CONFIG FILE LOADING ------------------- @@ -236,27 +239,40 @@ def start_stream(): # ------------------- QUERY OLLAMA CHAT ENDPOINT ------------------- -def query_ollama(): +def query_glm(): + headers = { + 'Authorization': f'Bearer {AUTH_TOKEN}', + 'Content-Type': 'application/json' + } payload = { - "model": MODEL_NAME, + "model": "glm-4.5", "messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top - "stream": False} + "temperature": 0.6, + "max_tokens": 1024, + "stream": False + } with Timer("Inference"): # measure inference latency - resp = requests.post(CHAT_URL, json=payload) - #print(f'[Debug] Ollama status: {resp.status_code}') + resp = requests.post(CHAT_URL, json=payload, headers=headers) + + if resp.status_code != 200: + print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}') + return '' + data = resp.json() # Extract assistant message reply = '' - if 'message' in data and 'content' in data['message']: - reply = data['message']['content'].strip() - #print('[Debug] Reply:', reply) + if 'choices' in data and len(data['choices']) > 0: + choice = data['choices'][0] + if 'message' in choice and 'content' in choice['message']: + reply = choice['message']['content'].strip() return reply # ------------------- TTS & DEGRADATION ------------------- import tempfile + def play_response(text): import io import tempfile @@ -422,7 +438,7 @@ def processing_loop(): messages.append({"role": "user", "content": user}) # Generate assistant response - resp_text = query_ollama() + resp_text = query_glm() if resp_text: # Clean debug print (remove newlines and carriage returns) clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ') diff --git a/vosk-model/.DS_Store b/vosk-model/.DS_Store new file mode 100644 index 0000000..96d2d27 Binary files /dev/null and b/vosk-model/.DS_Store differ diff --git a/vosk-model/vosk-model-small-cn-0.22.zip b/vosk-model/vosk-model-small-cn-0.22.zip new file mode 100644 index 0000000..b465498 Binary files /dev/null and b/vosk-model/vosk-model-small-cn-0.22.zip differ