bigmodel
This commit is contained in:
parent
e70dbf5f86
commit
44b43a7e07
BIN
piper_arm64.tar.gz
Normal file
BIN
piper_arm64.tar.gz
Normal file
Binary file not shown.
@ -20,21 +20,23 @@ Copyright: M15.ai
|
|||||||
License: MIT
|
License: MIT
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import io
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import queue
|
import queue
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import wave
|
import wave
|
||||||
import io
|
|
||||||
import re
|
import numpy as np
|
||||||
import subprocess
|
|
||||||
from vosk import Model, KaldiRecognizer
|
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import requests
|
import requests
|
||||||
from pydub import AudioSegment
|
|
||||||
import soxr
|
import soxr
|
||||||
import numpy as np
|
from pydub import AudioSegment
|
||||||
|
from vosk import KaldiRecognizer, Model
|
||||||
|
|
||||||
|
|
||||||
# ------------------- TIMING UTILITY -------------------
|
# ------------------- TIMING UTILITY -------------------
|
||||||
class Timer:
|
class Timer:
|
||||||
@ -128,7 +130,8 @@ def set_output_volume(volume_level, card_id=3):
|
|||||||
CONFIG_PATH = os.path.expanduser("va_config.json")
|
CONFIG_PATH = os.path.expanduser("va_config.json")
|
||||||
BASE_DIR = os.path.dirname(__file__)
|
BASE_DIR = os.path.dirname(__file__)
|
||||||
MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
|
MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
|
||||||
CHAT_URL = 'http://localhost:11434/api/chat'
|
CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
|
||||||
|
AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm' # Replace with your actual token
|
||||||
|
|
||||||
# ------------------- CONFIG FILE LOADING -------------------
|
# ------------------- CONFIG FILE LOADING -------------------
|
||||||
|
|
||||||
@ -236,27 +239,40 @@ def start_stream():
|
|||||||
|
|
||||||
# ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
|
# ------------------- QUERY OLLAMA CHAT ENDPOINT -------------------
|
||||||
|
|
||||||
def query_ollama():
|
def query_glm():
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {AUTH_TOKEN}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
payload = {
|
payload = {
|
||||||
"model": MODEL_NAME,
|
"model": "glm-4.5",
|
||||||
"messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top
|
"messages": [messages[0]] + messages[-HISTORY_LENGTH:], # force system prompt at top
|
||||||
"stream": False}
|
"temperature": 0.6,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"stream": False
|
||||||
|
}
|
||||||
|
|
||||||
with Timer("Inference"): # measure inference latency
|
with Timer("Inference"): # measure inference latency
|
||||||
resp = requests.post(CHAT_URL, json=payload)
|
resp = requests.post(CHAT_URL, json=payload, headers=headers)
|
||||||
#print(f'[Debug] Ollama status: {resp.status_code}')
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
print(f'[Error] GLM API failed with status {resp.status_code}: {resp.text}')
|
||||||
|
return ''
|
||||||
|
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
# Extract assistant message
|
# Extract assistant message
|
||||||
reply = ''
|
reply = ''
|
||||||
if 'message' in data and 'content' in data['message']:
|
if 'choices' in data and len(data['choices']) > 0:
|
||||||
reply = data['message']['content'].strip()
|
choice = data['choices'][0]
|
||||||
#print('[Debug] Reply:', reply)
|
if 'message' in choice and 'content' in choice['message']:
|
||||||
|
reply = choice['message']['content'].strip()
|
||||||
return reply
|
return reply
|
||||||
|
|
||||||
# ------------------- TTS & DEGRADATION -------------------
|
# ------------------- TTS & DEGRADATION -------------------
|
||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
def play_response(text):
|
def play_response(text):
|
||||||
import io
|
import io
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -422,7 +438,7 @@ def processing_loop():
|
|||||||
|
|
||||||
messages.append({"role": "user", "content": user})
|
messages.append({"role": "user", "content": user})
|
||||||
# Generate assistant response
|
# Generate assistant response
|
||||||
resp_text = query_ollama()
|
resp_text = query_glm()
|
||||||
if resp_text:
|
if resp_text:
|
||||||
# Clean debug print (remove newlines and carriage returns)
|
# Clean debug print (remove newlines and carriage returns)
|
||||||
clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
|
clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
|
||||||
|
|||||||
BIN
vosk-model/.DS_Store
vendored
Normal file
BIN
vosk-model/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
vosk-model/vosk-model-small-cn-0.22.zip
Normal file
BIN
vosk-model/vosk-model-small-cn-0.22.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user