代码补充

This commit is contained in:
“jiawei” 2025-09-29 09:19:40 +08:00
parent cb2eb13b6e
commit ab42cc6de5
7 changed files with 973 additions and 0 deletions

View File

@ -0,0 +1,101 @@
from openai import OpenAI
import time
import sys
import queue # 新增:用于缓存实时文本片段
import threading # 新增:用于并行处理语音播放
# 原代码7. 火山方舟API调用完整逻辑
class ArkAPIController:
def __init__(self, ark_api_key, ark_model_id, tts_controller, feedback_text):
# 接收调度脚本传入的TTS实例和反馈文本保持原逻辑
self.ARK_API_KEY = ark_api_key
self.ARK_MODEL_ID = ark_model_id
self.tts_controller = tts_controller
self.FEEDBACK_TEXT = feedback_text
self.chat_context = [] # 聊天上下文由模块内部维护(与原逻辑一致)
self.MAX_CONTEXT_LEN = 10
# 新增:实时语音播放队列与线程
self.speech_queue = queue.Queue() # 缓存待播放的文本片段
self.speech_thread = threading.Thread(target=self._process_speech_queue, daemon=True)
self.speech_thread.start() # 启动语音播放线程
# 新增:处理语音队列的函数(循环从队列取片段并播放)
def _process_speech_queue(self):
"""持续从队列中获取文本片段并调用TTS播放"""
while True:
text = self.speech_queue.get() # 阻塞等待队列消息
if text is None: # 退出信号
break
self.tts_controller.speak(text) # 播放片段
self.speech_queue.task_done() # 标记任务完成
def call_ark_api(self, content_type: str, content: dict):
# 播放操作反馈(同步执行)
self.tts_controller.speak(self.FEEDBACK_TEXT[content_type])
client = OpenAI(
base_url="https://ark.cn-beijing.volces.com/api/v3",
api_key=self.ARK_API_KEY
)
try:
messages = []
if content_type == "chat":
messages.extend(self.chat_context[-self.MAX_CONTEXT_LEN*2:])
messages.append({"role": "user", "content": [{"type": "text", "text": content["prompt"]}]})
elif content_type == "image_recog":
messages.append({
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{content['image_base64']}"}},
{"type": "text", "text": content["prompt"]}
]
})
response = client.chat.completions.create(
model=self.ARK_MODEL_ID,
messages=messages,
max_tokens=300,
temperature=0.7 if content_type == "chat" else 0.3,
stream=True
)
full_response = ""
current_speech_chunk = "" # 缓存当前待播放的片段
print("\n" + "="*50)
print("🤖 回应:", end="", flush=True)
for chunk in response:
if chunk.choices and chunk.choices[0].delta.content:
char = chunk.choices[0].delta.content
full_response += char
current_speech_chunk += char # 累加片段
print(char, end="", flush=True)
time.sleep(0.05)
# 关键逻辑:当片段包含标点或达到一定长度时,推送到语音队列
if any(punct in current_speech_chunk for punct in [".", "", "!", "", "?", "", ",", "", ";", ""]):
self.speech_queue.put(current_speech_chunk) # 推送片段到队列
current_speech_chunk = "" # 重置片段缓存
# 处理最后剩余的片段(如果有)
if current_speech_chunk:
self.speech_queue.put(current_speech_chunk)
print("\n" + "="*50 + "\n")
# 等待所有语音片段播放完成
self.speech_queue.join()
# 维护聊天上下文(原有逻辑)
if content_type == "chat" and full_response.strip():
self.chat_context.append({"role": "user", "content": [{"type": "text", "text": content["prompt"]}]})
self.chat_context.append({"role": "assistant", "content": [{"type": "text", "text": full_response}]})
return full_response
except Exception as e:
error_msg = f"❌ API调用失败{str(e)}"
print(f"\n" + "="*50)
print(error_msg)
print("="*50 + "\n")
self.tts_controller.speak(self.FEEDBACK_TEXT["api_error"])
return error_msg

View File

@ -0,0 +1,30 @@
import io
import base64
from PIL import Image
from picamera2 import Picamera2
import sys
# 原代码6. 摄像头模块完整逻辑
class CameraModule:
def __init__(self):
try:
self.camera = Picamera2()
cam_config = self.camera.create_still_configuration(main={"size": (320, 240)})
self.camera.configure(cam_config)
self.camera.start()
print("📷 摄像头模块初始化成功")
except Exception as e:
print(f"❌ 摄像头失败:{str(e)}")
self.camera = None
def capture_base64(self):
if not self.camera:
return None
try:
img_array = self.camera.capture_array()
img_byte = io.BytesIO()
Image.fromarray(img_array).save(img_byte, format="JPEG", quality=80)
return base64.b64encode(img_byte.getvalue()).decode("utf-8")
except Exception as e:
print(f"❌ 拍摄失败:{str(e)}")
return None

View File

@ -0,0 +1,291 @@
import signal
import sys
import time
import re
import subprocess
import queue
# 导入所有模块
from tts_module import BaiduOnlineTTS
from volume_module import VolumeController, detect_audio_control
from motion_module import RobotMotionController
from camera_module import CameraModule
from ark_api_module import ArkAPIController
from voice_recog_module import VoiceRecogController
# -------------------- 1. 基础配置完全保留原代码1. 基础配置) --------------------
# 1.1 项目路径与运动模型
PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2"
sys.path.append(PROJECT_ROOT)
ONNX_MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini-2/BEST_WALK_ONNX_2.onnx"
# 1.2 火山方舟API配置
ARK_API_KEY = "390d517c-129a-41c1-bf3d-458048007b69"
ARK_MODEL_ID = "doubao-seed-1-6-250615"
# 1.3 语音识别与唤醒词配置
APPID = "1ff50710"
ACCESS_KEY_ID = "a4f43e95ee0a9518d11befac8d31f1d4"
ACCESS_KEY_SECRET = "YzQ4NTRhZjc2ZTM4MDA1YjM2MmIyNDEy"
ACCESS_KEY = "e0EQQBoH0HIVU9KrXsmB7CMlVci+GAs2x0Ejtrdp8CTtZmf25rCLaQ=="
WAKEUP_WORD_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/xiaohuangya_zh_raspberry-pi_v3_0_0.ppn"
MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/porcupine_params_zh.pv"
# 1.4 百度在线TTS配置
BAIDU_TTS_API_KEY = "TnwYZPPvElNushOzfL6vBlUI"
BAIDU_TTS_SECRET_KEY = "55HeI8VNUMNlkW3t2QRwVtrjumpxjfxk"
# 1.5 语音反馈文本配置
FEEDBACK_TEXT = {
"wakeup": "你好呀,有什么吩咐",
"move_forward": "好的,我正在前进",
"move_backward": "好的,我正在后退",
"turn_left": "好的,我正在左转",
"turn_right": "好的,我正在右转",
"image_recog": "好的,我来识别一下",
"chat": "好的,我来想想",
"volume_increase": "音量已增大",
"volume_decrease": "音量已减小",
"volume_max": "已调至最大音量",
"volume_min": "已调至最小音量",
"unknown": "抱歉,没听懂,请再说一次",
"api_error": "抱歉,处理请求时出错了"
}
# 1.6 音频参数
VOLUME_STEP = 10
MIN_VOLUME = 0
MAX_VOLUME = 100
CURRENT_VOLUME = 40
AUDIO_CONTROL_NAME = None
# 1.7 麦克风与扬声器参数(模块内部已定义,此处保留用于一致性)
SAMPLE_RATE = 16000
CHANNELS = 1
SAMPLE_FORMAT = "int16"
AUDIO_ENCODE = "pcm_s16le"
LANG = "autodialect"
INTERACTION_TIMEOUT = 30
# -------------------- 2. 全局状态变量完全保留原代码2. 全局状态变量,用列表传引用) --------------------
audio_q = queue.Queue()
last_audio_time = [time.time()] # 列表传引用,供模块修改
current_text = [""] # 列表传引用,供模块修改
final_result = [""] # 列表传引用,供模块修改
is_processing = [False] # 列表传引用,供模块修改
last_command_time = [time.time()]# 列表传引用,供模块修改
feedback_playing = False # TTS模块使用的全局变量
# -------------------- 8. 指令解析与执行完全保留原代码8. 指令解析与执行) --------------------
def parse_voice_command(command_text: str):
command_text = command_text.strip().lower()
if not command_text:
return ("unknown", {})
# 运动指令
motion_rules = [
{"keywords": ["前进", "往前走", "向前走"], "action": "move_forward"},
{"keywords": ["后退", "往后走", "向后退"], "action": "move_backward"},
{"keywords": ["左转", "向左转", "往左转"], "action": "turn_left"},
{"keywords": ["右转", "向右转", "往右转"], "action": "turn_right"},
]
for rule in motion_rules:
if any(keyword in command_text for keyword in rule["keywords"]):
number_match = re.search(r"(\d{1,2})", command_text)
seconds = int(number_match.group(1)) if number_match else 2
return ("motion", {"action": rule["action"], "seconds": seconds})
# 图像识别指令
image_keywords = ["是什么", "这是什么", "识别", "看这个", "这东西", "这物体", "辨认"]
if any(keyword in command_text for keyword in image_keywords):
prompt = f"请简洁描述图片中的物体1-2句话说明{command_text}"
return ("image_recog", {"prompt": prompt})
# 闲聊指令
chat_keywords = [
"什么", "怎么", "为什么", "哪里", "多少", "如何", "", "", "",
"你好", "哈喽", "", "今天", "天气", "时间", "故事", "笑话", "知识"
]
exclude_keywords = ["前进", "后退", "左转", "右转", "识别", "音量", "增大", "减小"]
if len(command_text) >= 2 and any(k in command_text for k in chat_keywords) and not any(k in command_text for k in exclude_keywords):
return ("chat", {"prompt": command_text})
# 音量控制指令
if any(keyword in command_text for keyword in ["增大音量", "声音大一点", "调大音量"]):
return ("volume", {"action": "increase"})
elif any(keyword in command_text for keyword in ["减小音量", "声音小一点", "调小音量"]):
return ("volume", {"action": "decrease"})
elif any(keyword in command_text for keyword in ["最大音量", "声音最大"]):
return ("volume", {"action": "max"})
elif any(keyword in command_text for keyword in ["最小音量", "声音最小", "静音"]):
return ("volume", {"action": "min"})
# 未知指令
return ("unknown", {})
def execute_command(command_type: str, params: dict, motion_controller, ark_api_controller, volume_controller):
global is_processing, feedback_playing
if is_processing[0]:
tts_controller.speak(FEEDBACK_TEXT["unknown"])
print("⚠️ 已有指令处理中,请稍后再说")
return
is_processing[0] = True
try:
if command_type == "motion":
motion_controller.execute_motion(params["action"], params["seconds"])
elif command_type == "image_recog":
print(f"\n🔍 触发图像识别,正在拍摄...")
image_base64 = camera_module.capture_base64()
if not image_base64:
tts_controller.speak(FEEDBACK_TEXT["unknown"])
print("\n" + "="*50)
print("❌ 图像采集失败,无法识别")
print("="*50 + "\n")
return
ark_api_controller.call_ark_api("image_recog", {"image_base64": image_base64, "prompt": params["prompt"]})
elif command_type == "chat":
print(f"\n💬 触发闲聊,正在思考...")
ark_api_controller.call_ark_api("chat", {"prompt": params["prompt"]})
elif command_type == "volume":
volume_action = params["action"]
if volume_action == "increase":
success = volume_controller.adjust_volume(is_increase=True)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_increase"])
elif volume_action == "decrease":
success = volume_controller.adjust_volume(is_increase=False)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_decrease"])
elif volume_action == "max":
success = volume_controller.set_system_volume(MAX_VOLUME)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_max"])
elif volume_action == "min":
success = volume_controller.set_system_volume(MIN_VOLUME)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_min"])
elif command_type == "unknown":
tts_controller.speak(FEEDBACK_TEXT["unknown"])
print("\n" + "="*50)
print(f"❌ 未识别到有效指令,支持:")
print(f" - 运动前进3秒、左转2秒 | - 图像识别:这是什么")
print(f" - 闲聊:今天天气怎么样 | - 音量:增大音量、减小音量")
print("="*50 + "\n")
finally:
is_processing[0] = False
# -------------------- 11. 主循环完全保留原代码11. 主循环逻辑) --------------------
def main():
global tts_controller, camera_module, AUDIO_CONTROL_NAME, feedback_playing
# 初始化各模块(按原代码顺序)
# 1. 初始化TTS
try:
tts_controller = BaiduOnlineTTS(BAIDU_TTS_API_KEY, BAIDU_TTS_SECRET_KEY)
except Exception as e:
print(f"❌ TTS初始化失败: {str(e)}")
sys.exit(1)
# 2. 初始化音量控制
AUDIO_CONTROL_NAME = detect_audio_control()
volume_controller = VolumeController(
audio_control_name=AUDIO_CONTROL_NAME,
current_volume=CURRENT_VOLUME,
volume_step=VOLUME_STEP,
min_volume=MIN_VOLUME,
max_volume=MAX_VOLUME
)
# 3. 初始化运动控制
motion_controller = RobotMotionController(
onnx_model_path=ONNX_MODEL_PATH,
tts_controller=tts_controller,
feedback_text=FEEDBACK_TEXT
)
# 4. 初始化摄像头
camera_module = CameraModule()
# 5. 初始化API控制器
ark_api_controller = ArkAPIController(
ark_api_key=ARK_API_KEY,
ark_model_id=ARK_MODEL_ID,
tts_controller=tts_controller,
feedback_text=FEEDBACK_TEXT
)
# 6. 初始化语音识别
voice_recog_controller = VoiceRecogController(
access_key=ACCESS_KEY,
wakeup_word_path=WAKEUP_WORD_PATH,
model_path=MODEL_PATH,
appid=APPID,
access_key_id=ACCESS_KEY_ID,
access_key_secret=ACCESS_KEY_SECRET,
tts_controller=tts_controller,
feedback_text=FEEDBACK_TEXT
)
# 中断处理(完全保留原逻辑)
def handle_interrupt(signum, frame):
print("\n🛑 收到退出信号,正在清理资源...")
# 停止机器人运动
if 'motion_controller' in globals() and hasattr(motion_controller, 'rl_walk'):
motion_controller.rl_walk.last_commands = [0.0, 0.0, 0.0]
# 停止TTS播放
global feedback_playing
feedback_playing = False
# 停止摄像头与麦克风
if 'camera_module' in globals() and camera_module.camera:
camera_module.camera.stop()
if hasattr(voice_recog_controller, 'stream') and voice_recog_controller.stream and voice_recog_controller.stream.active:
voice_recog_controller.stream.stop()
# 关闭TTS资源
tts_controller.close()
print("✅ 所有资源清理完成,程序退出")
sys.exit(0)
signal.signal(signal.SIGINT, handle_interrupt)
# 强制测试一次语音输出(原逻辑)
print("\n🔍 正在测试语音输出...")
tts_controller.speak("系统初始化完成,等待语音唤醒")
# 主循环(原逻辑)
while True:
if voice_recog_controller.wakeup_listener():
# 定义指令执行回调函数(关键修复)
def execute_callback(command_text):
command_type, params = parse_voice_command(command_text)
execute_command(command_type, params, motion_controller, ark_api_controller, volume_controller)
# 启动WebSocket时传入回调函数
voice_recog_controller.start_websocket(
current_text=current_text,
final_result=final_result,
last_audio_time=last_audio_time,
is_processing=is_processing,
last_command_time=last_command_time,
execute_callback=execute_callback # 传入回调
)
# 重置状态
last_audio_time[0] = time.time()
last_command_time[0] = time.time()
if __name__ == "__main__":
# # 确保ffmpeg已安装原逻辑
# try:
# subprocess.run(["ffmpeg", "--version"], capture_output=True, check=True)
# except:
# print("⚠️ 未检测到ffmpeg正在尝试安装...")
# subprocess.run(["sudo", "apt-get", "install", "-y", "ffmpeg"], check=True)
main()

View File

@ -0,0 +1,59 @@
import sys
import time
import threading
# 原代码5. 运动控制模块完整逻辑(保留原路径导入)
PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2"
sys.path.append(PROJECT_ROOT)
from v2_rl_walk_mujoco import RLWalk
class RobotMotionController:
def __init__(self, onnx_model_path, tts_controller, feedback_text):
# 接收调度脚本传入的TTS实例和反馈文本保持原逻辑调用
self.tts_controller = tts_controller
self.FEEDBACK_TEXT = feedback_text
try:
self.rl_walk = RLWalk(
onnx_model_path=onnx_model_path,
cutoff_frequency=40,
pid=[30, 0, 0]
)
self.walk_thread = threading.Thread(target=self.rl_walk.run, daemon=True)
self.walk_thread.start()
time.sleep(1)
print("🤖 运动控制模块初始化成功")
except Exception as e:
print(f"❌ 运动控制失败:{str(e)}")
sys.exit(1)
def execute_motion(self, action_name: str, seconds: int):
# 全局变量由调度脚本传入,保留原逻辑
global is_processing
is_processing = True
try:
# 播放反馈(同步执行确保声音输出)
self.tts_controller.speak(self.FEEDBACK_TEXT[action_name])
# 执行运动
seconds = max(2, min(seconds, 5))
if action_name == "move_forward":
print(f"\n🚶 前进{seconds}秒...")
self.rl_walk.last_commands[0] = 0.17
elif action_name == "move_backward":
print(f"\n🚶 后退{seconds}秒...")
self.rl_walk.last_commands[0] = -0.17
elif action_name == "turn_left":
print(f"\n🔄 左转{seconds}秒...")
self.rl_walk.last_commands[2] = 1.1
elif action_name == "turn_right":
print(f"\n🔄 右转{seconds}秒...")
self.rl_walk.last_commands[2] = -1.1
time.sleep(seconds)
self.rl_walk.last_commands = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
print(f"✅ 运动完成")
except Exception as e:
print(f"❌ 运动执行失败:{str(e)}")
self.rl_walk.last_commands = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
finally:
is_processing = False

View File

@ -0,0 +1,173 @@
import pyaudio
import wave
import tempfile
import os
import requests
import time
import sys
# 原代码3. 百度在线TTS模块完整逻辑
class BaiduOnlineTTS:
def __init__(self, api_key, secret_key):
"""初始化百度在线TTS"""
self.api_key = api_key
self.secret_key = secret_key
self.access_token = None
self.token_expires = 0
# 初始化音频播放器
self.audio_player = pyaudio.PyAudio()
# TTS配置参数
self.default_options = {
'vol': 5, # 音量(0-15)
'spd': 5, # 语速(0-9)
'pit': 5, # 音调(0-9)
'per': 0 # 发音人(0:女,1:男,3:情感女,4:情感男)
}
# 获取初始访问令牌
if not self._get_access_token():
raise Exception("无法获取百度API访问令牌请检查密钥是否正确")
def _get_access_token(self):
"""获取百度API访问令牌"""
# 检查令牌是否仍然有效
if self.access_token and time.time() < self.token_expires - 300:
return True
try:
url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={self.api_key}&client_secret={self.secret_key}"
response = requests.get(url)
result = response.json()
if "access_token" in result:
self.access_token = result["access_token"]
self.token_expires = time.time() + result["expires_in"]
print("✅ 成功获取百度API访问令牌")
return True
else:
print(f"❌ 获取令牌失败: {result}")
return False
except Exception as e:
print(f"❌ 获取令牌时发生错误: {str(e)}")
return False
def text_to_speech(self, text, options=None, save_path=None):
"""将文本转换为语音"""
# 确保令牌有效
if not self._get_access_token():
return None
# 合并配置参数
params = self.default_options.copy()
if options:
params.update(options)
try:
# 对文本进行URL编码
encoded_text = requests.utils.quote(text)
url = f"https://tsn.baidu.com/text2audio?tex={encoded_text}&lan=zh&cuid=baidu-tts-python&ctp=1&tok={self.access_token}"
# 添加合成参数
for key, value in params.items():
url += f"&{key}={value}"
# 发送请求
response = requests.get(url)
# 检查响应是否为音频数据
if response.headers.get("Content-Type", "").startswith("audio/"):
# 保存文件(如果需要)
if save_path:
with open(save_path, "wb") as f:
f.write(response.content)
print(f"✅ 音频已保存至: {save_path}")
return response.content
else:
# 解析错误信息
try:
error = response.json()
print(f"❌ 语音合成失败: {error.get('err_msg', '未知错误')}")
except:
print(f"❌ 语音合成失败,响应内容: {response.text}")
return None
except Exception as e:
print(f"❌ 语音合成时发生错误: {str(e)}")
return None
def speak(self, text, options=None):
"""直接播放文本转换的语音"""
# 全局变量由调度脚本传入,此处保留原逻辑调用
from main_scheduler import feedback_playing
if feedback_playing:
return False
feedback_playing = True
# 限制文本长度(百度API有长度限制)
if len(text) > 1024:
print("⚠️ 文本过长将截断为1024字符")
text = text[:1024]
# 获取音频数据
audio_data = self.text_to_speech(text, options)
if not audio_data:
feedback_playing = False
return False
try:
# 创建临时MP3文件
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
temp_file.write(audio_data)
temp_filename = temp_file.name
# 转换为WAV格式(适配pyaudio)
from pydub import AudioSegment
audio = AudioSegment.from_mp3(temp_filename)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wav_file:
audio.export(wav_file.name, format="wav")
wav_filename = wav_file.name
# 播放WAV文件
wf = wave.open(wav_filename, 'rb')
stream = self.audio_player.open(
format=self.audio_player.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True
)
# 播放音频
chunk = 1024
data = wf.readframes(chunk)
while data and feedback_playing:
stream.write(data)
data = wf.readframes(chunk)
# 清理资源
stream.stop_stream()
stream.close()
wf.close()
print(f"✅ 语音播放完成: {text[:20]}...")
return True
except Exception as e:
print(f"❌ 播放语音时发生错误: {str(e)}")
return False
finally:
# 删除临时文件
if 'temp_filename' in locals() and os.path.exists(temp_filename):
os.remove(temp_filename)
if 'wav_filename' in locals() and os.path.exists(wav_filename):
os.remove(wav_filename)
feedback_playing = False
def close(self):
"""释放资源"""
self.audio_player.terminate()
print("✅ TTS资源已释放")

View File

@ -0,0 +1,226 @@
import sounddevice as sd
import pvporcupine
import struct
import websocket
import threading
import hmac
import hashlib
import base64
import json
import time
import urllib.parse
import uuid
import queue
import sys
# 原代码9. 音频采集与WebSocket + 10. 唤醒词监听完整逻辑
class VoiceRecogController:
def __init__(self, access_key, wakeup_word_path, model_path, appid, access_key_id, access_key_secret, tts_controller, feedback_text):
# 接收调度脚本传入的参数,保持原逻辑
self.ACCESS_KEY = access_key
self.WAKEUP_WORD_PATH = wakeup_word_path
self.MODEL_PATH = model_path
self.APPID = appid
self.ACCESS_KEY_ID = access_key_id
self.ACCESS_KEY_SECRET = access_key_secret
self.tts_controller = tts_controller
self.FEEDBACK_TEXT = feedback_text
self.SAMPLE_RATE = 16000
self.CHANNELS = 1
self.SAMPLE_FORMAT = "int16"
self.INTERACTION_TIMEOUT = 30
self.audio_q = queue.Queue()
self.stream = None # 麦克风流后续初始化
def wakeup_listener(self):
"""原代码10. 唤醒词监听"""
try:
porcupine = pvporcupine.create(
access_key=self.ACCESS_KEY,
keyword_paths=[self.WAKEUP_WORD_PATH],
model_path=self.MODEL_PATH
)
print(f"\n🎯 唤醒词引擎就绪(采样率:{porcupine.sample_rate}")
wakeup_mic = sd.RawInputStream(
samplerate=porcupine.sample_rate,
blocksize=porcupine.frame_length,
dtype="int16",
channels=1
)
print("📢 等待唤醒词「小黄鸭」按Ctrl+C退出")
with wakeup_mic:
while True:
pcm_data, _ = wakeup_mic.read(porcupine.frame_length)
pcm_unpacked = struct.unpack_from("h" * porcupine.frame_length, pcm_data)
if porcupine.process(pcm_unpacked) >= 0:
print("🚀 检测到唤醒词「小黄鸭」!")
# 播放唤醒反馈(同步执行)
self.tts_controller.speak(self.FEEDBACK_TEXT["wakeup"])
porcupine.delete()
return True
except Exception as e:
print(f"\n❌ 唤醒词监听失败:{str(e)}")
print(" 排查1. 唤醒词文件路径 2. 麦克风连接 3. PicoVoice Key有效性")
sys.exit(1)
def _audio_callback(self, indata, frames, t, status):
"""原代码9. 音频采集回调"""
if status:
print(f"⚠️ 音频异常:{status}")
self.audio_q.put(bytes(indata))
def _create_ws_url(self):
"""原代码9. 创建WebSocket URL"""
try:
host = "office-api-ast-dx.iflyaisol.com"
path = "/ast/communicate/v1"
utc = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) + "+0000"
session_uuid = str(uuid.uuid4())
params = {
"accessKeyId": self.ACCESS_KEY_ID,
"appId": self.APPID,
"samplerate": self.SAMPLE_RATE,
"audio_encode": "pcm_s16le",
"lang": "autodialect",
"uuid": session_uuid,
"utc": utc,
}
sorted_params = sorted(params.items(), key=lambda x: x[0])
base_string = "&".join(
f"{urllib.parse.quote_plus(str(k))}={urllib.parse.quote_plus(str(v))}"
for k, v in sorted_params
)
signature = hmac.new(
self.ACCESS_KEY_SECRET.encode("utf-8"),
base_string.encode("utf-8"),
hashlib.sha1
).digest()
signature = base64.b64encode(signature).decode("utf-8")
query = base_string + "&signature=" + urllib.parse.quote_plus(signature)
return f"wss://{host}{path}?{query}", session_uuid
except Exception as e:
print(f"❌ WebSocket URL生成失败{str(e)}")
return None, None
def _on_message(self, ws, message, current_text, last_audio_time):
"""原代码9. WebSocket消息处理接收全局变量引用"""
try:
data = json.loads(message)
if data.get("msg_type") == "result" and "cn" in data.get("data", {}):
words = [
cw.get("w", "")
for rt in data["data"]["cn"].get("st", {}).get("rt", [])
for ws_item in rt.get("ws", [])
for cw in ws_item.get("cw", [])
]
if words:
current_text[0] = "".join(words) # 用列表传引用,修改全局变量
last_audio_time[0] = time.time()
print(f"🎧 识别中:{current_text[0]}", end="\r")
except Exception as e:
print(f"\n❌ 语音识别消息处理错误:{str(e)}")
def _on_error(self, ws, error, is_processing):
"""原代码9. WebSocket错误处理"""
if not is_processing[0]:
print(f"\n❌ WebSocket连接错误{str(error)}")
def _on_close(self, ws, close_status_code, close_msg, current_text, final_result, stream):
"""原代码9. WebSocket关闭处理"""
print(f"\n🔌 WebSocket连接关闭 | 状态码:{close_status_code}")
if stream and stream.active:
stream.stop()
current_text[0] = ""
final_result[0] = ""
def _on_open(self, ws, stream, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback):
"""新增 execute_callback 参数,用于接收指令执行函数"""
def send_audio_and_handle():
print("\n🎤 指令已就绪!支持:")
print(" - 运动前进3秒、左转2秒 | - 图像识别:这是什么")
print(" - 闲聊:今天天气怎么样 | - 音量:增大音量、减小音量\n")
stream.start()
current_text[0] = ""
final_result[0] = ""
last_command_time[0] = time.time()
while True:
try:
# 1. 处理音频队列(避免堆积)
while self.audio_q.qsize() > 5:
self.audio_q.get_nowait()
# 2. 发送音频数据(若队列有数据)
audio_data = self.audio_q.get(timeout=0.5)
ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
# 3. 指令识别与执行有文本且2秒内无新音频时执行
if current_text[0] and (time.time() - last_audio_time[0]) > 2:
final_result[0] = current_text[0].strip()
if len(final_result[0]) > 0: # 确保指令有效
print(f"\n⏹ 最终指令:{final_result[0]}")
# 调用回调函数执行指令(关键修复:直接在这里执行)
execute_callback(final_result[0])
last_command_time[0] = time.time() # 更新最后操作时间
current_text[0] = "" # 执行后清空,避免重复识别
final_result[0] = ""
time.sleep(1) # 等待指令执行完成
# 4. 超时检测30秒无操作则关闭连接
if time.time() - last_command_time[0] > self.INTERACTION_TIMEOUT:
print(f"\n{self.INTERACTION_TIMEOUT}秒无操作,关闭连接")
self.tts_controller.speak(self.FEEDBACK_TEXT.get("wakeup_timeout", "长时间没操作,我先休息啦"))
time.sleep(1)
ws.send("close", websocket.ABNF.OPCODE_TEXT)
break
except queue.Empty:
# 队列为空时,检测超时
if time.time() - last_command_time[0] > self.INTERACTION_TIMEOUT:
print(f"\n{self.INTERACTION_TIMEOUT}秒无操作,关闭连接")
self.tts_controller.speak(self.FEEDBACK_TEXT.get("wakeup_timeout", "长时间没操作,我先休息啦"))
time.sleep(1)
ws.send("close", websocket.ABNF.OPCODE_TEXT)
break
continue # 继续循环等待音频
except Exception as e:
print(f"\n❌ 音频发送错误:{str(e)}")
break
audio_thread = threading.Thread(target=send_audio_and_handle, daemon=True)
audio_thread.start()
def start_websocket(self, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback):
"""新增 execute_callback 参数,用于传递指令执行函数"""
self.stream = sd.RawInputStream(
samplerate=self.SAMPLE_RATE,
channels=self.CHANNELS,
dtype=self.SAMPLE_FORMAT,
callback=self._audio_callback,
)
ws_url, session_id = self._create_ws_url()
if not ws_url:
print("⚠️ 无法生成语音识别连接3秒后重新监听...")
time.sleep(3)
return
try:
print(f"🔄 连接语音识别服务会话ID{session_id[:8]}...")
# 绑定WebSocket回调时传入 execute_callback
ws = websocket.WebSocketApp(
ws_url,
on_open=lambda ws: self._on_open(ws, self.stream, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback),
on_message=lambda ws, msg: self._on_message(ws, msg, current_text, last_audio_time),
on_error=lambda ws, err: self._on_error(ws, err, is_processing),
on_close=lambda ws, status, msg: self._on_close(ws, status, msg, current_text, final_result, self.stream)
)
ws.run_forever(ping_interval=10, ping_timeout=5)
except Exception as e:
print(f"❌ 语音识别连接失败:{str(e)}")
print("⚠️ 3秒后重新监听唤醒词...")
time.sleep(3)

View File

@ -0,0 +1,93 @@
import subprocess
import re
import sys
# 原代码4. 音频控制项自动检测与音量控制完整逻辑
def detect_audio_control():
"""自动检测可用的音频播放控制项"""
try:
result = subprocess.run(["amixer", "controls"], capture_output=True, text=True)
playback_controls = []
# 优先查找常见的音频控制项
priority_names = ["Master", "Speaker", "Headphone", "Audio", "Sound"]
for name in priority_names:
if name in result.stdout:
print(f"✅ 自动检测到音频控制项:{name}")
return name
# 如果没有找到优先项,从所有控制项中提取
for line in result.stdout.splitlines():
if "Playback" in line:
match = re.search(r"'([^']+)'", line)
if match:
playback_controls.append(match.group(1))
if playback_controls:
print(f"✅ 找到音频控制项:{playback_controls[0]}")
return playback_controls[0]
else:
print("⚠️ 未检测到音频控制项,将尝试不指定控制项调节音量")
return None
except Exception as e:
print(f"❌ 音频控制检测失败:{str(e)}")
return None
class VolumeController:
def __init__(self, audio_control_name, current_volume, volume_step, min_volume, max_volume):
# 接收调度脚本传入的全局参数,保持原逻辑
self.available = True
self.AUDIO_CONTROL_NAME = audio_control_name
self.CURRENT_VOLUME = current_volume
self.VOLUME_STEP = volume_step
self.MIN_VOLUME = min_volume
self.MAX_VOLUME = max_volume
try:
# 强制取消静音并设置初始音量
self.set_system_volume(self.CURRENT_VOLUME)
current_volume = self.get_system_volume()
if current_volume is not None:
self.CURRENT_VOLUME = current_volume
print(f"🔊 音量控制初始化成功(当前音量:{self.CURRENT_VOLUME}%")
else:
print(f"⚠️ 无法获取当前音量,但已尝试设置为{self.CURRENT_VOLUME}%")
except Exception as e:
self.available = False
print(f"❌ 音量控制失败:{str(e)}")
def get_system_volume(self):
try:
# 根据检测到的控制项获取音量
cmd = ["amixer", "get"]
if self.AUDIO_CONTROL_NAME:
cmd.append(self.AUDIO_CONTROL_NAME)
result = subprocess.run(cmd, capture_output=True, text=True)
volume_match = re.search(r"(\d+)%", result.stdout)
return int(volume_match.group(1)) if volume_match else None
except Exception as e:
print(f"❌ 获取音量失败:{str(e)}")
return None
def set_system_volume(self, target_volume: int):
target_volume = max(self.MIN_VOLUME, min(self.MAX_VOLUME, target_volume))
try:
# 根据检测到的控制项设置音量
cmd = ["amixer", "set"]
if self.AUDIO_CONTROL_NAME:
cmd.append(self.AUDIO_CONTROL_NAME)
cmd.extend([f"{target_volume}%", "unmute"]) # 强制取消静音
subprocess.run(cmd, capture_output=True)
self.CURRENT_VOLUME = target_volume
print(f"🔊 音量已调整至:{self.CURRENT_VOLUME}%")
return True
except Exception as e:
print(f"❌ 调整音量失败:{str(e)}")
return False
def adjust_volume(self, is_increase: bool):
target_volume = self.CURRENT_VOLUME + self.VOLUME_STEP if is_increase else self.CURRENT_VOLUME - self.VOLUME_STEP
return self.set_system_volume(target_volume)