bigmodel

2025-09-18 19:00:36 +08:00 · 2025-09-18 19:00:36 +08:00 · 44b43a7e07
commit 44b43a7e07
parent e70dbf5f86
5 changed files with 33 additions and 17 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/piper_arm64.tar.gz
+++ b/piper_arm64.tar.gz
--- a/voice_assistant.py
+++ b/voice_assistant.py
@ -20,21 +20,23 @@ Copyright: M15.ai
 License: MIT
 """
-import os
+import io
 import json
 import os
 import queue
 import re
 import subprocess
 import threading
 import time
 import wave
-import io
+
-import re
+import numpy as np
 import subprocess
 from vosk import Model, KaldiRecognizer
 import pyaudio
 import requests
 from pydub import AudioSegment
 import soxr
-import numpy as np
+from pydub import AudioSegment
 from vosk import KaldiRecognizer, Model
 # ------------------- TIMING UTILITY -------------------
 class Timer:
@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3):
 CONFIG_PATH = os.path.expanduser("va_config.json")
 BASE_DIR = os.path.dirname(__file__)
 MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
-CHAT_URL = 'http://localhost:11434/api/chat'
+CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
 AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm'  # Replace with your actual token
 # ------------------- CONFIG FILE LOADING -------------------
@ -236,27 +239,40 @@ def start_stream():
 # ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
-def query_ollama():
+def query_glm():
    headers = {
        'Authorization': f'Bearer {AUTH_TOKEN}',
        'Content-Type': 'application/json'
    }
    payload = {
-        "model": MODEL_NAME,
+        "model": "glm-4.5",
        "messages": [messages[0]] + messages[-HISTORY_LENGTH:],  # force system prompt at top
-        "stream": False}
+        "temperature": 0.6,
        "max_tokens": 1024,
        "stream": False
    }
    with Timer("Inference"):  # measure inference latency
-        resp = requests.post(CHAT_URL, json=payload)
+        resp = requests.post(CHAT_URL, json=payload, headers=headers)
-    #print(f'[Debug] Ollama status: {resp.status_code}')
+    
    if resp.status_code != 200:
        print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}')
        return ''
    data = resp.json()
    # Extract assistant message
    reply = ''
-    if 'message' in data and 'content' in data['message']:
+    if 'choices' in data and len(data['choices']) > 0:
-        reply = data['message']['content'].strip()
+        choice = data['choices'][0]
-    #print('[Debug] Reply:', reply)
+        if 'message' in choice and 'content' in choice['message']:
            reply = choice['message']['content'].strip()
    return reply
 # ------------------- TTS & DEGRADATION -------------------
 import tempfile
 def play_response(text):
    import io
    import tempfile
@ -422,7 +438,7 @@ def processing_loop():
                messages.append({"role": "user", "content": user})
                # Generate assistant response
-                resp_text = query_ollama()
+                resp_text = query_glm()
                if resp_text:
                    # Clean debug print (remove newlines and carriage returns)
                    clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
--- a/vosk-model/.DS_Store
+++ b/vosk-model/.DS_Store
--- a/vosk-model/vosk-model-small-cn-0.22.zip
+++ b/vosk-model/vosk-model-small-cn-0.22.zip