2025-09-29 09:19:40 +08:00
|
|
|
|
import signal
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import time
|
|
|
|
|
|
import re
|
|
|
|
|
|
import subprocess
|
|
|
|
|
|
import queue
|
|
|
|
|
|
# 导入所有模块
|
|
|
|
|
|
from tts_module import BaiduOnlineTTS
|
|
|
|
|
|
from volume_module import VolumeController, detect_audio_control
|
|
|
|
|
|
from motion_module import RobotMotionController
|
|
|
|
|
|
from camera_module import CameraModule
|
|
|
|
|
|
from ark_api_module import ArkAPIController
|
|
|
|
|
|
from voice_recog_module import VoiceRecogController
|
|
|
|
|
|
|
|
|
|
|
|
# -------------------- 1. 基础配置(完全保留原代码1. 基础配置) --------------------
|
|
|
|
|
|
# 1.1 项目路径与运动模型
|
|
|
|
|
|
PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2"
|
|
|
|
|
|
sys.path.append(PROJECT_ROOT)
|
|
|
|
|
|
ONNX_MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini-2/BEST_WALK_ONNX_2.onnx"
|
|
|
|
|
|
|
|
|
|
|
|
# 1.2 火山方舟API配置
|
|
|
|
|
|
ARK_API_KEY = "390d517c-129a-41c1-bf3d-458048007b69"
|
|
|
|
|
|
ARK_MODEL_ID = "doubao-seed-1-6-250615"
|
|
|
|
|
|
|
|
|
|
|
|
# 1.3 语音识别与唤醒词配置
|
|
|
|
|
|
APPID = "1ff50710"
|
|
|
|
|
|
ACCESS_KEY_ID = "a4f43e95ee0a9518d11befac8d31f1d4"
|
|
|
|
|
|
ACCESS_KEY_SECRET = "YzQ4NTRhZjc2ZTM4MDA1YjM2MmIyNDEy"
|
|
|
|
|
|
ACCESS_KEY = "e0EQQBoH0HIVU9KrXsmB7CMlVci+GAs2x0Ejtrdp8CTtZmf25rCLaQ=="
|
|
|
|
|
|
WAKEUP_WORD_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/xiaohuangya_zh_raspberry-pi_v3_0_0.ppn"
|
|
|
|
|
|
MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/porcupine_params_zh.pv"
|
|
|
|
|
|
|
|
|
|
|
|
# 1.4 百度在线TTS配置
|
|
|
|
|
|
BAIDU_TTS_API_KEY = "TnwYZPPvElNushOzfL6vBlUI"
|
|
|
|
|
|
BAIDU_TTS_SECRET_KEY = "55HeI8VNUMNlkW3t2QRwVtrjumpxjfxk"
|
|
|
|
|
|
|
|
|
|
|
|
# 1.5 语音反馈文本配置
|
|
|
|
|
|
FEEDBACK_TEXT = {
|
|
|
|
|
|
"wakeup": "你好呀,有什么吩咐",
|
|
|
|
|
|
"move_forward": "好的,我正在前进",
|
|
|
|
|
|
"move_backward": "好的,我正在后退",
|
|
|
|
|
|
"turn_left": "好的,我正在左转",
|
|
|
|
|
|
"turn_right": "好的,我正在右转",
|
|
|
|
|
|
"image_recog": "好的,我来识别一下",
|
|
|
|
|
|
"chat": "好的,我来想想",
|
|
|
|
|
|
"volume_increase": "音量已增大",
|
|
|
|
|
|
"volume_decrease": "音量已减小",
|
|
|
|
|
|
"volume_max": "已调至最大音量",
|
|
|
|
|
|
"volume_min": "已调至最小音量",
|
|
|
|
|
|
"unknown": "抱歉,没听懂,请再说一次",
|
|
|
|
|
|
"api_error": "抱歉,处理请求时出错了"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 1.6 音频参数
|
|
|
|
|
|
VOLUME_STEP = 10
|
|
|
|
|
|
MIN_VOLUME = 0
|
|
|
|
|
|
MAX_VOLUME = 100
|
|
|
|
|
|
CURRENT_VOLUME = 40
|
|
|
|
|
|
AUDIO_CONTROL_NAME = None
|
|
|
|
|
|
|
|
|
|
|
|
# 1.7 麦克风与扬声器参数(模块内部已定义,此处保留用于一致性)
|
|
|
|
|
|
SAMPLE_RATE = 16000
|
|
|
|
|
|
CHANNELS = 1
|
|
|
|
|
|
SAMPLE_FORMAT = "int16"
|
|
|
|
|
|
AUDIO_ENCODE = "pcm_s16le"
|
|
|
|
|
|
LANG = "autodialect"
|
|
|
|
|
|
INTERACTION_TIMEOUT = 30
|
|
|
|
|
|
|
|
|
|
|
|
# -------------------- 2. 全局状态变量(完全保留原代码2. 全局状态变量,用列表传引用) --------------------
|
|
|
|
|
|
audio_q = queue.Queue()
|
|
|
|
|
|
last_audio_time = [time.time()] # 列表传引用,供模块修改
|
|
|
|
|
|
current_text = [""] # 列表传引用,供模块修改
|
|
|
|
|
|
final_result = [""] # 列表传引用,供模块修改
|
|
|
|
|
|
is_processing = [False] # 列表传引用,供模块修改
|
|
|
|
|
|
last_command_time = [time.time()]# 列表传引用,供模块修改
|
|
|
|
|
|
feedback_playing = False # TTS模块使用的全局变量
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# -------------------- 8. 指令解析与执行(完全保留原代码8. 指令解析与执行) --------------------
|
|
|
|
|
|
def parse_voice_command(command_text: str):
|
|
|
|
|
|
command_text = command_text.strip().lower()
|
|
|
|
|
|
if not command_text:
|
|
|
|
|
|
return ("unknown", {})
|
|
|
|
|
|
|
|
|
|
|
|
# 运动指令
|
|
|
|
|
|
motion_rules = [
|
|
|
|
|
|
{"keywords": ["前进", "往前走", "向前走"], "action": "move_forward"},
|
|
|
|
|
|
{"keywords": ["后退", "往后走", "向后退"], "action": "move_backward"},
|
|
|
|
|
|
{"keywords": ["左转", "向左转", "往左转"], "action": "turn_left"},
|
|
|
|
|
|
{"keywords": ["右转", "向右转", "往右转"], "action": "turn_right"},
|
|
|
|
|
|
]
|
|
|
|
|
|
for rule in motion_rules:
|
|
|
|
|
|
if any(keyword in command_text for keyword in rule["keywords"]):
|
|
|
|
|
|
number_match = re.search(r"(\d{1,2})", command_text)
|
|
|
|
|
|
seconds = int(number_match.group(1)) if number_match else 2
|
|
|
|
|
|
return ("motion", {"action": rule["action"], "seconds": seconds})
|
|
|
|
|
|
|
|
|
|
|
|
# 图像识别指令
|
|
|
|
|
|
image_keywords = ["是什么", "这是什么", "识别", "看这个", "这东西", "这物体", "辨认"]
|
|
|
|
|
|
if any(keyword in command_text for keyword in image_keywords):
|
|
|
|
|
|
prompt = f"请简洁描述图片中的物体,1-2句话说明:{command_text}"
|
|
|
|
|
|
return ("image_recog", {"prompt": prompt})
|
|
|
|
|
|
|
|
|
|
|
|
# 闲聊指令
|
|
|
|
|
|
chat_keywords = [
|
|
|
|
|
|
"什么", "怎么", "为什么", "哪里", "多少", "如何", "吗", "呢", "吧",
|
|
|
|
|
|
"你好", "哈喽", "嗨", "今天", "天气", "时间", "故事", "笑话", "知识"
|
|
|
|
|
|
]
|
|
|
|
|
|
exclude_keywords = ["前进", "后退", "左转", "右转", "识别", "音量", "增大", "减小"]
|
|
|
|
|
|
if len(command_text) >= 2 and any(k in command_text for k in chat_keywords) and not any(k in command_text for k in exclude_keywords):
|
|
|
|
|
|
return ("chat", {"prompt": command_text})
|
|
|
|
|
|
|
|
|
|
|
|
# 音量控制指令
|
|
|
|
|
|
if any(keyword in command_text for keyword in ["增大音量", "声音大一点", "调大音量"]):
|
|
|
|
|
|
return ("volume", {"action": "increase"})
|
|
|
|
|
|
elif any(keyword in command_text for keyword in ["减小音量", "声音小一点", "调小音量"]):
|
|
|
|
|
|
return ("volume", {"action": "decrease"})
|
|
|
|
|
|
elif any(keyword in command_text for keyword in ["最大音量", "声音最大"]):
|
|
|
|
|
|
return ("volume", {"action": "max"})
|
|
|
|
|
|
elif any(keyword in command_text for keyword in ["最小音量", "声音最小", "静音"]):
|
|
|
|
|
|
return ("volume", {"action": "min"})
|
|
|
|
|
|
|
|
|
|
|
|
# 未知指令
|
|
|
|
|
|
return ("unknown", {})
|
|
|
|
|
|
|
|
|
|
|
|
def execute_command(command_type: str, params: dict, motion_controller, ark_api_controller, volume_controller):
|
|
|
|
|
|
global is_processing, feedback_playing
|
|
|
|
|
|
if is_processing[0]:
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["unknown"])
|
|
|
|
|
|
print("⚠️ 已有指令处理中,请稍后再说")
|
|
|
|
|
|
return
|
|
|
|
|
|
is_processing[0] = True
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if command_type == "motion":
|
|
|
|
|
|
motion_controller.execute_motion(params["action"], params["seconds"])
|
|
|
|
|
|
|
|
|
|
|
|
elif command_type == "image_recog":
|
|
|
|
|
|
print(f"\n🔍 触发图像识别,正在拍摄...")
|
|
|
|
|
|
image_base64 = camera_module.capture_base64()
|
|
|
|
|
|
if not image_base64:
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["unknown"])
|
|
|
|
|
|
print("\n" + "="*50)
|
|
|
|
|
|
print("❌ 图像采集失败,无法识别")
|
|
|
|
|
|
print("="*50 + "\n")
|
|
|
|
|
|
return
|
|
|
|
|
|
ark_api_controller.call_ark_api("image_recog", {"image_base64": image_base64, "prompt": params["prompt"]})
|
|
|
|
|
|
|
|
|
|
|
|
elif command_type == "chat":
|
|
|
|
|
|
print(f"\n💬 触发闲聊,正在思考...")
|
|
|
|
|
|
ark_api_controller.call_ark_api("chat", {"prompt": params["prompt"]})
|
|
|
|
|
|
|
|
|
|
|
|
elif command_type == "volume":
|
|
|
|
|
|
volume_action = params["action"]
|
|
|
|
|
|
if volume_action == "increase":
|
|
|
|
|
|
success = volume_controller.adjust_volume(is_increase=True)
|
|
|
|
|
|
if success:
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["volume_increase"])
|
|
|
|
|
|
elif volume_action == "decrease":
|
|
|
|
|
|
success = volume_controller.adjust_volume(is_increase=False)
|
|
|
|
|
|
if success:
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["volume_decrease"])
|
|
|
|
|
|
elif volume_action == "max":
|
|
|
|
|
|
success = volume_controller.set_system_volume(MAX_VOLUME)
|
|
|
|
|
|
if success:
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["volume_max"])
|
|
|
|
|
|
elif volume_action == "min":
|
|
|
|
|
|
success = volume_controller.set_system_volume(MIN_VOLUME)
|
|
|
|
|
|
if success:
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["volume_min"])
|
|
|
|
|
|
|
|
|
|
|
|
elif command_type == "unknown":
|
|
|
|
|
|
tts_controller.speak(FEEDBACK_TEXT["unknown"])
|
|
|
|
|
|
print("\n" + "="*50)
|
|
|
|
|
|
print(f"❌ 未识别到有效指令,支持:")
|
|
|
|
|
|
print(f" - 运动:前进3秒、左转2秒 | - 图像识别:这是什么")
|
|
|
|
|
|
print(f" - 闲聊:今天天气怎么样 | - 音量:增大音量、减小音量")
|
|
|
|
|
|
print("="*50 + "\n")
|
|
|
|
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
is_processing[0] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# -------------------- 11. 主循环(完全保留原代码11. 主循环逻辑) --------------------
|
|
|
|
|
|
def main():
|
|
|
|
|
|
global tts_controller, camera_module, AUDIO_CONTROL_NAME, feedback_playing
|
|
|
|
|
|
|
|
|
|
|
|
# 初始化各模块(按原代码顺序)
|
|
|
|
|
|
# 1. 初始化TTS
|
|
|
|
|
|
try:
|
|
|
|
|
|
tts_controller = BaiduOnlineTTS(BAIDU_TTS_API_KEY, BAIDU_TTS_SECRET_KEY)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"❌ TTS初始化失败: {str(e)}")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 初始化音量控制
|
|
|
|
|
|
AUDIO_CONTROL_NAME = detect_audio_control()
|
|
|
|
|
|
volume_controller = VolumeController(
|
|
|
|
|
|
audio_control_name=AUDIO_CONTROL_NAME,
|
|
|
|
|
|
current_volume=CURRENT_VOLUME,
|
|
|
|
|
|
volume_step=VOLUME_STEP,
|
|
|
|
|
|
min_volume=MIN_VOLUME,
|
|
|
|
|
|
max_volume=MAX_VOLUME
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 初始化运动控制
|
|
|
|
|
|
motion_controller = RobotMotionController(
|
|
|
|
|
|
onnx_model_path=ONNX_MODEL_PATH,
|
|
|
|
|
|
tts_controller=tts_controller,
|
|
|
|
|
|
feedback_text=FEEDBACK_TEXT
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 4. 初始化摄像头
|
|
|
|
|
|
camera_module = CameraModule()
|
|
|
|
|
|
|
|
|
|
|
|
# 5. 初始化API控制器
|
|
|
|
|
|
ark_api_controller = ArkAPIController(
|
|
|
|
|
|
ark_api_key=ARK_API_KEY,
|
|
|
|
|
|
ark_model_id=ARK_MODEL_ID,
|
|
|
|
|
|
tts_controller=tts_controller,
|
|
|
|
|
|
feedback_text=FEEDBACK_TEXT
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 6. 初始化语音识别
|
|
|
|
|
|
voice_recog_controller = VoiceRecogController(
|
|
|
|
|
|
access_key=ACCESS_KEY,
|
|
|
|
|
|
wakeup_word_path=WAKEUP_WORD_PATH,
|
|
|
|
|
|
model_path=MODEL_PATH,
|
|
|
|
|
|
appid=APPID,
|
|
|
|
|
|
access_key_id=ACCESS_KEY_ID,
|
|
|
|
|
|
access_key_secret=ACCESS_KEY_SECRET,
|
|
|
|
|
|
tts_controller=tts_controller,
|
|
|
|
|
|
feedback_text=FEEDBACK_TEXT
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 中断处理(完全保留原逻辑)
|
|
|
|
|
|
def handle_interrupt(signum, frame):
|
|
|
|
|
|
print("\n🛑 收到退出信号,正在清理资源...")
|
|
|
|
|
|
# 停止机器人运动
|
|
|
|
|
|
if 'motion_controller' in globals() and hasattr(motion_controller, 'rl_walk'):
|
|
|
|
|
|
motion_controller.rl_walk.last_commands = [0.0, 0.0, 0.0]
|
|
|
|
|
|
# 停止TTS播放
|
|
|
|
|
|
global feedback_playing
|
|
|
|
|
|
feedback_playing = False
|
|
|
|
|
|
# 停止摄像头与麦克风
|
|
|
|
|
|
if 'camera_module' in globals() and camera_module.camera:
|
|
|
|
|
|
camera_module.camera.stop()
|
|
|
|
|
|
if hasattr(voice_recog_controller, 'stream') and voice_recog_controller.stream and voice_recog_controller.stream.active:
|
|
|
|
|
|
voice_recog_controller.stream.stop()
|
|
|
|
|
|
# 关闭TTS资源
|
|
|
|
|
|
tts_controller.close()
|
|
|
|
|
|
print("✅ 所有资源清理完成,程序退出")
|
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, handle_interrupt)
|
|
|
|
|
|
|
|
|
|
|
|
# 强制测试一次语音输出(原逻辑)
|
|
|
|
|
|
print("\n🔍 正在测试语音输出...")
|
|
|
|
|
|
tts_controller.speak("系统初始化完成,等待语音唤醒")
|
|
|
|
|
|
|
|
|
|
|
|
# 主循环(原逻辑)
|
|
|
|
|
|
while True:
|
|
|
|
|
|
if voice_recog_controller.wakeup_listener():
|
|
|
|
|
|
# 定义指令执行回调函数(关键修复)
|
|
|
|
|
|
def execute_callback(command_text):
|
|
|
|
|
|
command_type, params = parse_voice_command(command_text)
|
|
|
|
|
|
execute_command(command_type, params, motion_controller, ark_api_controller, volume_controller)
|
|
|
|
|
|
|
|
|
|
|
|
# 启动WebSocket时传入回调函数
|
|
|
|
|
|
voice_recog_controller.start_websocket(
|
|
|
|
|
|
current_text=current_text,
|
|
|
|
|
|
final_result=final_result,
|
|
|
|
|
|
last_audio_time=last_audio_time,
|
|
|
|
|
|
is_processing=is_processing,
|
|
|
|
|
|
last_command_time=last_command_time,
|
|
|
|
|
|
execute_callback=execute_callback # 传入回调
|
|
|
|
|
|
)
|
|
|
|
|
|
# 重置状态
|
|
|
|
|
|
last_audio_time[0] = time.time()
|
|
|
|
|
|
last_command_time[0] = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
# # 确保ffmpeg已安装(原逻辑)
|
|
|
|
|
|
# try:
|
|
|
|
|
|
# subprocess.run(["ffmpeg", "--version"], capture_output=True, check=True)
|
|
|
|
|
|
# except:
|
|
|
|
|
|
# print("⚠️ 未检测到ffmpeg,正在尝试安装...")
|
|
|
|
|
|
# subprocess.run(["sudo", "apt-get", "install", "-y", "ffmpeg"], check=True)
|
|
|
|
|
|
|
|
|
|
|
|
main()
|