bigmodel
This commit is contained in:
parent
e70dbf5f86
commit
44b43a7e07
BIN
piper_arm64.tar.gz
Normal file
BIN
piper_arm64.tar.gz
Normal file
Binary file not shown.
@ -20,21 +20,23 @@ Copyright: M15.ai
|
||||
License: MIT
|
||||
"""
|
||||
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import wave
|
||||
import io
|
||||
import re
|
||||
import subprocess
|
||||
from vosk import Model, KaldiRecognizer
|
||||
|
||||
import numpy as np
|
||||
import pyaudio
|
||||
import requests
|
||||
from pydub import AudioSegment
|
||||
import soxr
|
||||
import numpy as np
|
||||
from pydub import AudioSegment
|
||||
from vosk import KaldiRecognizer, Model
|
||||
|
||||
|
||||
# ------------------- TIMING UTILITY -------------------
|
||||
class Timer:
|
||||
@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3):
|
||||
CONFIG_PATH = os.path.expanduser("va_config.json")
|
||||
BASE_DIR = os.path.dirname(__file__)
|
||||
MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
|
||||
CHAT_URL = 'http://localhost:11434/api/chat'
|
||||
CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
|
||||
AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm' # Replace with your actual token
|
||||
|
||||
# ------------------- CONFIG FILE LOADING -------------------
|
||||
|
||||
@ -236,27 +239,40 @@ def start_stream():
|
||||
|
||||
# ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
|
||||
|
||||
def query_ollama():
|
||||
def query_glm():
|
||||
headers = {
|
||||
'Authorization': f'Bearer {AUTH_TOKEN}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
payload = {
|
||||
"model": MODEL_NAME,
|
||||
"model": "glm-4.5",
|
||||
"messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top
|
||||
"stream": False}
|
||||
"temperature": 0.6,
|
||||
"max_tokens": 1024,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
with Timer("Inference"): # measure inference latency
|
||||
resp = requests.post(CHAT_URL, json=payload)
|
||||
#print(f'[Debug] Ollama status: {resp.status_code}')
|
||||
resp = requests.post(CHAT_URL, json=payload, headers=headers)
|
||||
|
||||
if resp.status_code != 200:
|
||||
print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}')
|
||||
return ''
|
||||
|
||||
data = resp.json()
|
||||
# Extract assistant message
|
||||
reply = ''
|
||||
if 'message' in data and 'content' in data['message']:
|
||||
reply = data['message']['content'].strip()
|
||||
#print('[Debug] Reply:', reply)
|
||||
if 'choices' in data and len(data['choices']) > 0:
|
||||
choice = data['choices'][0]
|
||||
if 'message' in choice and 'content' in choice['message']:
|
||||
reply = choice['message']['content'].strip()
|
||||
return reply
|
||||
|
||||
# ------------------- TTS & DEGRADATION -------------------
|
||||
|
||||
import tempfile
|
||||
|
||||
|
||||
def play_response(text):
|
||||
import io
|
||||
import tempfile
|
||||
@ -422,7 +438,7 @@ def processing_loop():
|
||||
|
||||
messages.append({"role": "user", "content": user})
|
||||
# Generate assistant response
|
||||
resp_text = query_ollama()
|
||||
resp_text = query_glm()
|
||||
if resp_text:
|
||||
# Clean debug print (remove newlines and carriage returns)
|
||||
clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
|
||||
|
||||
BIN
vosk-model/.DS_Store
vendored
Normal file
BIN
vosk-model/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
vosk-model/vosk-model-small-cn-0.22.zip
Normal file
BIN
vosk-model/vosk-model-small-cn-0.22.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user