diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..5da4b23
Binary files /dev/null and b/.DS_Store differ
diff --git a/piper_arm64.tar.gz b/piper_arm64.tar.gz
new file mode 100644
index 0000000..1634a85
Binary files /dev/null and b/piper_arm64.tar.gz differ
diff --git a/voice_assistant.py b/voice_assistant.py
index 9cd8fda..fbd844f 100644
--- a/voice_assistant.py
+++ b/voice_assistant.py
@@ -20,21 +20,23 @@ Copyright: M15.ai
 License: MIT
 """
 
-import os
+import io
 import json
+import os
 import queue
+import re
+import subprocess
 import threading
 import time
 import wave
-import io
-import re
-import subprocess
-from vosk import Model, KaldiRecognizer
+
+import numpy as np
 import pyaudio
 import requests
-from pydub import AudioSegment
 import soxr
-import numpy as np
+from pydub import AudioSegment
+from vosk import KaldiRecognizer, Model
+
 
 # ------------------- TIMING UTILITY -------------------
 class Timer:
@@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3):
 CONFIG_PATH = os.path.expanduser("va_config.json")
 BASE_DIR = os.path.dirname(__file__)
 MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
-CHAT_URL = 'http://localhost:11434/api/chat'
+CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
+AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm'  # Replace with your actual token
 
 # ------------------- CONFIG FILE LOADING -------------------
 
@@ -236,27 +239,40 @@ def start_stream():
 
 # ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
 
-def query_ollama():
+def query_glm():
+    headers = {
+        'Authorization': f'Bearer {AUTH_TOKEN}',
+        'Content-Type': 'application/json'
+    }
     payload = {
-        "model": MODEL_NAME,
+        "model": "glm-4.5",
         "messages": [messages[0]] + messages[-HISTORY_LENGTH:],  # force system prompt at top
-        "stream": False}
+        "temperature": 0.6,
+        "max_tokens": 1024,
+        "stream": False
+    }
 
     with Timer("Inference"):  # measure inference latency
-        resp = requests.post(CHAT_URL, json=payload)
-    #print(f'[Debug] Ollama status: {resp.status_code}')
+        resp = requests.post(CHAT_URL, json=payload, headers=headers)
+    
+    if resp.status_code != 200:
+        print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}')
+        return ''
+    
     data = resp.json()
     # Extract assistant message
     reply = ''
-    if 'message' in data and 'content' in data['message']:
-        reply = data['message']['content'].strip()
-    #print('[Debug] Reply:', reply)
+    if 'choices' in data and len(data['choices']) > 0:
+        choice = data['choices'][0]
+        if 'message' in choice and 'content' in choice['message']:
+            reply = choice['message']['content'].strip()
     return reply
 
 # ------------------- TTS & DEGRADATION -------------------
 
 import tempfile
 
+
 def play_response(text):
     import io
     import tempfile
@@ -422,7 +438,7 @@ def processing_loop():
 
                 messages.append({"role": "user", "content": user})
                 # Generate assistant response
-                resp_text = query_ollama()
+                resp_text = query_glm()
                 if resp_text:
                     # Clean debug print (remove newlines and carriage returns)
                     clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
diff --git a/vosk-model/.DS_Store b/vosk-model/.DS_Store
new file mode 100644
index 0000000..96d2d27
Binary files /dev/null and b/vosk-model/.DS_Store differ
diff --git a/vosk-model/vosk-model-small-cn-0.22.zip b/vosk-model/vosk-model-small-cn-0.22.zip
new file mode 100644
index 0000000..b465498
Binary files /dev/null and b/vosk-model/vosk-model-small-cn-0.22.zip differ