2025-09-29 09:19:40 +08:00
|
|
|
|
import sounddevice as sd
|
|
|
|
|
|
import pvporcupine
|
|
|
|
|
|
import struct
|
|
|
|
|
|
import websocket
|
|
|
|
|
|
import threading
|
|
|
|
|
|
import hmac
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
import base64
|
|
|
|
|
|
import json
|
|
|
|
|
|
import time
|
|
|
|
|
|
import urllib.parse
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
import queue
|
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
# 原代码9. 音频采集与WebSocket + 10. 唤醒词监听完整逻辑
|
|
|
|
|
|
class VoiceRecogController:
|
|
|
|
|
|
def __init__(self, access_key, wakeup_word_path, model_path, appid, access_key_id, access_key_secret, tts_controller, feedback_text):
|
|
|
|
|
|
# 接收调度脚本传入的参数,保持原逻辑
|
|
|
|
|
|
self.ACCESS_KEY = access_key
|
|
|
|
|
|
self.WAKEUP_WORD_PATH = wakeup_word_path
|
|
|
|
|
|
self.MODEL_PATH = model_path
|
|
|
|
|
|
self.APPID = appid
|
|
|
|
|
|
self.ACCESS_KEY_ID = access_key_id
|
|
|
|
|
|
self.ACCESS_KEY_SECRET = access_key_secret
|
|
|
|
|
|
self.tts_controller = tts_controller
|
|
|
|
|
|
self.FEEDBACK_TEXT = feedback_text
|
|
|
|
|
|
self.SAMPLE_RATE = 16000
|
|
|
|
|
|
self.CHANNELS = 1
|
|
|
|
|
|
self.SAMPLE_FORMAT = "int16"
|
|
|
|
|
|
self.INTERACTION_TIMEOUT = 30
|
|
|
|
|
|
self.audio_q = queue.Queue()
|
|
|
|
|
|
self.stream = None # 麦克风流后续初始化
|
|
|
|
|
|
|
|
|
|
|
|
def wakeup_listener(self):
|
|
|
|
|
|
"""原代码10. 唤醒词监听"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
porcupine = pvporcupine.create(
|
|
|
|
|
|
access_key=self.ACCESS_KEY,
|
|
|
|
|
|
keyword_paths=[self.WAKEUP_WORD_PATH],
|
|
|
|
|
|
model_path=self.MODEL_PATH
|
|
|
|
|
|
)
|
|
|
|
|
|
print(f"\n🎯 唤醒词引擎就绪(采样率:{porcupine.sample_rate})")
|
|
|
|
|
|
|
|
|
|
|
|
wakeup_mic = sd.RawInputStream(
|
|
|
|
|
|
samplerate=porcupine.sample_rate,
|
|
|
|
|
|
blocksize=porcupine.frame_length,
|
|
|
|
|
|
dtype="int16",
|
|
|
|
|
|
channels=1
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print("📢 等待唤醒词「小黄鸭」(按Ctrl+C退出)")
|
|
|
|
|
|
with wakeup_mic:
|
|
|
|
|
|
while True:
|
|
|
|
|
|
pcm_data, _ = wakeup_mic.read(porcupine.frame_length)
|
|
|
|
|
|
pcm_unpacked = struct.unpack_from("h" * porcupine.frame_length, pcm_data)
|
|
|
|
|
|
if porcupine.process(pcm_unpacked) >= 0:
|
|
|
|
|
|
print("🚀 检测到唤醒词「小黄鸭」!")
|
|
|
|
|
|
# 播放唤醒反馈(同步执行)
|
|
|
|
|
|
self.tts_controller.speak(self.FEEDBACK_TEXT["wakeup"])
|
|
|
|
|
|
porcupine.delete()
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"\n❌ 唤醒词监听失败:{str(e)}")
|
|
|
|
|
|
print(" 排查:1. 唤醒词文件路径 2. 麦克风连接 3. PicoVoice Key有效性")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
def _audio_callback(self, indata, frames, t, status):
|
|
|
|
|
|
"""原代码9. 音频采集回调"""
|
|
|
|
|
|
if status:
|
|
|
|
|
|
print(f"⚠️ 音频异常:{status}")
|
|
|
|
|
|
self.audio_q.put(bytes(indata))
|
|
|
|
|
|
|
|
|
|
|
|
def _create_ws_url(self):
|
|
|
|
|
|
"""原代码9. 创建WebSocket URL"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
host = "office-api-ast-dx.iflyaisol.com"
|
|
|
|
|
|
path = "/ast/communicate/v1"
|
|
|
|
|
|
utc = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) + "+0000"
|
|
|
|
|
|
session_uuid = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
|
|
params = {
|
|
|
|
|
|
"accessKeyId": self.ACCESS_KEY_ID,
|
|
|
|
|
|
"appId": self.APPID,
|
|
|
|
|
|
"samplerate": self.SAMPLE_RATE,
|
|
|
|
|
|
"audio_encode": "pcm_s16le",
|
|
|
|
|
|
"lang": "autodialect",
|
|
|
|
|
|
"uuid": session_uuid,
|
|
|
|
|
|
"utc": utc,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sorted_params = sorted(params.items(), key=lambda x: x[0])
|
|
|
|
|
|
base_string = "&".join(
|
|
|
|
|
|
f"{urllib.parse.quote_plus(str(k))}={urllib.parse.quote_plus(str(v))}"
|
|
|
|
|
|
for k, v in sorted_params
|
|
|
|
|
|
)
|
|
|
|
|
|
signature = hmac.new(
|
|
|
|
|
|
self.ACCESS_KEY_SECRET.encode("utf-8"),
|
|
|
|
|
|
base_string.encode("utf-8"),
|
|
|
|
|
|
hashlib.sha1
|
|
|
|
|
|
).digest()
|
|
|
|
|
|
signature = base64.b64encode(signature).decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
query = base_string + "&signature=" + urllib.parse.quote_plus(signature)
|
|
|
|
|
|
return f"wss://{host}{path}?{query}", session_uuid
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"❌ WebSocket URL生成失败:{str(e)}")
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
def _on_message(self, ws, message, current_text, last_audio_time):
|
|
|
|
|
|
"""原代码9. WebSocket消息处理(接收全局变量引用)"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = json.loads(message)
|
|
|
|
|
|
if data.get("msg_type") == "result" and "cn" in data.get("data", {}):
|
|
|
|
|
|
words = [
|
|
|
|
|
|
cw.get("w", "")
|
|
|
|
|
|
for rt in data["data"]["cn"].get("st", {}).get("rt", [])
|
|
|
|
|
|
for ws_item in rt.get("ws", [])
|
|
|
|
|
|
for cw in ws_item.get("cw", [])
|
|
|
|
|
|
]
|
|
|
|
|
|
if words:
|
|
|
|
|
|
current_text[0] = "".join(words) # 用列表传引用,修改全局变量
|
|
|
|
|
|
last_audio_time[0] = time.time()
|
|
|
|
|
|
print(f"🎧 识别中:{current_text[0]}", end="\r")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"\n❌ 语音识别消息处理错误:{str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
def _on_error(self, ws, error, is_processing):
|
|
|
|
|
|
"""原代码9. WebSocket错误处理"""
|
|
|
|
|
|
if not is_processing[0]:
|
|
|
|
|
|
print(f"\n❌ WebSocket连接错误:{str(error)}")
|
|
|
|
|
|
|
|
|
|
|
|
def _on_close(self, ws, close_status_code, close_msg, current_text, final_result, stream):
|
|
|
|
|
|
"""原代码9. WebSocket关闭处理"""
|
|
|
|
|
|
print(f"\n🔌 WebSocket连接关闭 | 状态码:{close_status_code}")
|
|
|
|
|
|
if stream and stream.active:
|
|
|
|
|
|
stream.stop()
|
|
|
|
|
|
current_text[0] = ""
|
|
|
|
|
|
final_result[0] = ""
|
|
|
|
|
|
|
|
|
|
|
|
def _on_open(self, ws, stream, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback):
|
|
|
|
|
|
"""新增 execute_callback 参数,用于接收指令执行函数"""
|
|
|
|
|
|
def send_audio_and_handle():
|
|
|
|
|
|
print("\n🎤 指令已就绪!支持:")
|
|
|
|
|
|
print(" - 运动:前进3秒、左转2秒 | - 图像识别:这是什么")
|
|
|
|
|
|
print(" - 闲聊:今天天气怎么样 | - 音量:增大音量、减小音量\n")
|
|
|
|
|
|
stream.start()
|
|
|
|
|
|
current_text[0] = ""
|
|
|
|
|
|
final_result[0] = ""
|
|
|
|
|
|
last_command_time[0] = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 1. 处理音频队列(避免堆积)
|
|
|
|
|
|
while self.audio_q.qsize() > 5:
|
|
|
|
|
|
self.audio_q.get_nowait()
|
|
|
|
|
|
# 2. 发送音频数据(若队列有数据)
|
|
|
|
|
|
audio_data = self.audio_q.get(timeout=0.5)
|
|
|
|
|
|
ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 指令识别与执行:有文本且2秒内无新音频时执行
|
|
|
|
|
|
if current_text[0] and (time.time() - last_audio_time[0]) > 2:
|
|
|
|
|
|
final_result[0] = current_text[0].strip()
|
|
|
|
|
|
if len(final_result[0]) > 0: # 确保指令有效
|
|
|
|
|
|
print(f"\n⏹ 最终指令:{final_result[0]}")
|
|
|
|
|
|
# 调用回调函数执行指令(关键修复:直接在这里执行)
|
|
|
|
|
|
execute_callback(final_result[0])
|
|
|
|
|
|
last_command_time[0] = time.time() # 更新最后操作时间
|
|
|
|
|
|
current_text[0] = "" # 执行后清空,避免重复识别
|
|
|
|
|
|
final_result[0] = ""
|
|
|
|
|
|
time.sleep(1) # 等待指令执行完成
|
|
|
|
|
|
|
|
|
|
|
|
# 4. 超时检测:30秒无操作则关闭连接
|
|
|
|
|
|
if time.time() - last_command_time[0] > self.INTERACTION_TIMEOUT:
|
|
|
|
|
|
print(f"\n⌛ {self.INTERACTION_TIMEOUT}秒无操作,关闭连接")
|
|
|
|
|
|
self.tts_controller.speak(self.FEEDBACK_TEXT.get("wakeup_timeout", "长时间没操作,我先休息啦"))
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
ws.send("close", websocket.ABNF.OPCODE_TEXT)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
except queue.Empty:
|
|
|
|
|
|
# 队列为空时,检测超时
|
|
|
|
|
|
if time.time() - last_command_time[0] > self.INTERACTION_TIMEOUT:
|
|
|
|
|
|
print(f"\n⌛ {self.INTERACTION_TIMEOUT}秒无操作,关闭连接")
|
|
|
|
|
|
self.tts_controller.speak(self.FEEDBACK_TEXT.get("wakeup_timeout", "长时间没操作,我先休息啦"))
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
ws.send("close", websocket.ABNF.OPCODE_TEXT)
|
|
|
|
|
|
break
|
|
|
|
|
|
continue # 继续循环等待音频
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"\n❌ 音频发送错误:{str(e)}")
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
audio_thread = threading.Thread(target=send_audio_and_handle, daemon=True)
|
|
|
|
|
|
audio_thread.start()
|
|
|
|
|
|
|
|
|
|
|
|
def start_websocket(self, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback):
|
|
|
|
|
|
"""新增 execute_callback 参数,用于传递指令执行函数"""
|
|
|
|
|
|
self.stream = sd.RawInputStream(
|
|
|
|
|
|
samplerate=self.SAMPLE_RATE,
|
|
|
|
|
|
channels=self.CHANNELS,
|
|
|
|
|
|
dtype=self.SAMPLE_FORMAT,
|
|
|
|
|
|
callback=self._audio_callback,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
ws_url, session_id = self._create_ws_url()
|
|
|
|
|
|
if not ws_url:
|
|
|
|
|
|
print("⚠️ 无法生成语音识别连接,3秒后重新监听...")
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
print(f"🔄 连接语音识别服务(会话ID:{session_id[:8]}...)")
|
|
|
|
|
|
# 绑定WebSocket回调时传入 execute_callback
|
|
|
|
|
|
ws = websocket.WebSocketApp(
|
|
|
|
|
|
ws_url,
|
|
|
|
|
|
on_open=lambda ws: self._on_open(ws, self.stream, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback),
|
|
|
|
|
|
on_message=lambda ws, msg: self._on_message(ws, msg, current_text, last_audio_time),
|
|
|
|
|
|
on_error=lambda ws, err: self._on_error(ws, err, is_processing),
|
|
|
|
|
|
on_close=lambda ws, status, msg: self._on_close(ws, status, msg, current_text, final_result, self.stream)
|
|
|
|
|
|
)
|
|
|
|
|
|
ws.run_forever(ping_interval=10, ping_timeout=5)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"❌ 语音识别连接失败:{str(e)}")
|
|
|
|
|
|
print("⚠️ 3秒后重新监听唤醒词...")
|
|
|
|
|
|
time.sleep(3)
|