Open_Duck_Mini_Interact/main_scheduler.py
2025-09-29 09:19:40 +08:00

291 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import signal
import sys
import time
import re
import subprocess
import queue
# 导入所有模块
from tts_module import BaiduOnlineTTS
from volume_module import VolumeController, detect_audio_control
from motion_module import RobotMotionController
from camera_module import CameraModule
from ark_api_module import ArkAPIController
from voice_recog_module import VoiceRecogController
# -------------------- 1. 基础配置完全保留原代码1. 基础配置) --------------------
# 1.1 项目路径与运动模型
PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2"
sys.path.append(PROJECT_ROOT)
ONNX_MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini-2/BEST_WALK_ONNX_2.onnx"
# 1.2 火山方舟API配置
ARK_API_KEY = "390d517c-129a-41c1-bf3d-458048007b69"
ARK_MODEL_ID = "doubao-seed-1-6-250615"
# 1.3 语音识别与唤醒词配置
APPID = "1ff50710"
ACCESS_KEY_ID = "a4f43e95ee0a9518d11befac8d31f1d4"
ACCESS_KEY_SECRET = "YzQ4NTRhZjc2ZTM4MDA1YjM2MmIyNDEy"
ACCESS_KEY = "e0EQQBoH0HIVU9KrXsmB7CMlVci+GAs2x0Ejtrdp8CTtZmf25rCLaQ=="
WAKEUP_WORD_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/xiaohuangya_zh_raspberry-pi_v3_0_0.ppn"
MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/porcupine_params_zh.pv"
# 1.4 百度在线TTS配置
BAIDU_TTS_API_KEY = "TnwYZPPvElNushOzfL6vBlUI"
BAIDU_TTS_SECRET_KEY = "55HeI8VNUMNlkW3t2QRwVtrjumpxjfxk"
# 1.5 语音反馈文本配置
FEEDBACK_TEXT = {
"wakeup": "你好呀,有什么吩咐",
"move_forward": "好的,我正在前进",
"move_backward": "好的,我正在后退",
"turn_left": "好的,我正在左转",
"turn_right": "好的,我正在右转",
"image_recog": "好的,我来识别一下",
"chat": "好的,我来想想",
"volume_increase": "音量已增大",
"volume_decrease": "音量已减小",
"volume_max": "已调至最大音量",
"volume_min": "已调至最小音量",
"unknown": "抱歉,没听懂,请再说一次",
"api_error": "抱歉,处理请求时出错了"
}
# 1.6 音频参数
VOLUME_STEP = 10
MIN_VOLUME = 0
MAX_VOLUME = 100
CURRENT_VOLUME = 40
AUDIO_CONTROL_NAME = None
# 1.7 麦克风与扬声器参数(模块内部已定义,此处保留用于一致性)
SAMPLE_RATE = 16000
CHANNELS = 1
SAMPLE_FORMAT = "int16"
AUDIO_ENCODE = "pcm_s16le"
LANG = "autodialect"
INTERACTION_TIMEOUT = 30
# -------------------- 2. 全局状态变量完全保留原代码2. 全局状态变量,用列表传引用) --------------------
audio_q = queue.Queue()
last_audio_time = [time.time()] # 列表传引用,供模块修改
current_text = [""] # 列表传引用,供模块修改
final_result = [""] # 列表传引用,供模块修改
is_processing = [False] # 列表传引用,供模块修改
last_command_time = [time.time()]# 列表传引用,供模块修改
feedback_playing = False # TTS模块使用的全局变量
# -------------------- 8. 指令解析与执行完全保留原代码8. 指令解析与执行) --------------------
def parse_voice_command(command_text: str):
command_text = command_text.strip().lower()
if not command_text:
return ("unknown", {})
# 运动指令
motion_rules = [
{"keywords": ["前进", "往前走", "向前走"], "action": "move_forward"},
{"keywords": ["后退", "往后走", "向后退"], "action": "move_backward"},
{"keywords": ["左转", "向左转", "往左转"], "action": "turn_left"},
{"keywords": ["右转", "向右转", "往右转"], "action": "turn_right"},
]
for rule in motion_rules:
if any(keyword in command_text for keyword in rule["keywords"]):
number_match = re.search(r"(\d{1,2})", command_text)
seconds = int(number_match.group(1)) if number_match else 2
return ("motion", {"action": rule["action"], "seconds": seconds})
# 图像识别指令
image_keywords = ["是什么", "这是什么", "识别", "看这个", "这东西", "这物体", "辨认"]
if any(keyword in command_text for keyword in image_keywords):
prompt = f"请简洁描述图片中的物体1-2句话说明{command_text}"
return ("image_recog", {"prompt": prompt})
# 闲聊指令
chat_keywords = [
"什么", "怎么", "为什么", "哪里", "多少", "如何", "", "", "",
"你好", "哈喽", "", "今天", "天气", "时间", "故事", "笑话", "知识"
]
exclude_keywords = ["前进", "后退", "左转", "右转", "识别", "音量", "增大", "减小"]
if len(command_text) >= 2 and any(k in command_text for k in chat_keywords) and not any(k in command_text for k in exclude_keywords):
return ("chat", {"prompt": command_text})
# 音量控制指令
if any(keyword in command_text for keyword in ["增大音量", "声音大一点", "调大音量"]):
return ("volume", {"action": "increase"})
elif any(keyword in command_text for keyword in ["减小音量", "声音小一点", "调小音量"]):
return ("volume", {"action": "decrease"})
elif any(keyword in command_text for keyword in ["最大音量", "声音最大"]):
return ("volume", {"action": "max"})
elif any(keyword in command_text for keyword in ["最小音量", "声音最小", "静音"]):
return ("volume", {"action": "min"})
# 未知指令
return ("unknown", {})
def execute_command(command_type: str, params: dict, motion_controller, ark_api_controller, volume_controller):
global is_processing, feedback_playing
if is_processing[0]:
tts_controller.speak(FEEDBACK_TEXT["unknown"])
print("⚠️ 已有指令处理中,请稍后再说")
return
is_processing[0] = True
try:
if command_type == "motion":
motion_controller.execute_motion(params["action"], params["seconds"])
elif command_type == "image_recog":
print(f"\n🔍 触发图像识别,正在拍摄...")
image_base64 = camera_module.capture_base64()
if not image_base64:
tts_controller.speak(FEEDBACK_TEXT["unknown"])
print("\n" + "="*50)
print("❌ 图像采集失败,无法识别")
print("="*50 + "\n")
return
ark_api_controller.call_ark_api("image_recog", {"image_base64": image_base64, "prompt": params["prompt"]})
elif command_type == "chat":
print(f"\n💬 触发闲聊,正在思考...")
ark_api_controller.call_ark_api("chat", {"prompt": params["prompt"]})
elif command_type == "volume":
volume_action = params["action"]
if volume_action == "increase":
success = volume_controller.adjust_volume(is_increase=True)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_increase"])
elif volume_action == "decrease":
success = volume_controller.adjust_volume(is_increase=False)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_decrease"])
elif volume_action == "max":
success = volume_controller.set_system_volume(MAX_VOLUME)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_max"])
elif volume_action == "min":
success = volume_controller.set_system_volume(MIN_VOLUME)
if success:
tts_controller.speak(FEEDBACK_TEXT["volume_min"])
elif command_type == "unknown":
tts_controller.speak(FEEDBACK_TEXT["unknown"])
print("\n" + "="*50)
print(f"❌ 未识别到有效指令,支持:")
print(f" - 运动前进3秒、左转2秒 | - 图像识别:这是什么")
print(f" - 闲聊:今天天气怎么样 | - 音量:增大音量、减小音量")
print("="*50 + "\n")
finally:
is_processing[0] = False
# -------------------- 11. 主循环完全保留原代码11. 主循环逻辑) --------------------
def main():
global tts_controller, camera_module, AUDIO_CONTROL_NAME, feedback_playing
# 初始化各模块(按原代码顺序)
# 1. 初始化TTS
try:
tts_controller = BaiduOnlineTTS(BAIDU_TTS_API_KEY, BAIDU_TTS_SECRET_KEY)
except Exception as e:
print(f"❌ TTS初始化失败: {str(e)}")
sys.exit(1)
# 2. 初始化音量控制
AUDIO_CONTROL_NAME = detect_audio_control()
volume_controller = VolumeController(
audio_control_name=AUDIO_CONTROL_NAME,
current_volume=CURRENT_VOLUME,
volume_step=VOLUME_STEP,
min_volume=MIN_VOLUME,
max_volume=MAX_VOLUME
)
# 3. 初始化运动控制
motion_controller = RobotMotionController(
onnx_model_path=ONNX_MODEL_PATH,
tts_controller=tts_controller,
feedback_text=FEEDBACK_TEXT
)
# 4. 初始化摄像头
camera_module = CameraModule()
# 5. 初始化API控制器
ark_api_controller = ArkAPIController(
ark_api_key=ARK_API_KEY,
ark_model_id=ARK_MODEL_ID,
tts_controller=tts_controller,
feedback_text=FEEDBACK_TEXT
)
# 6. 初始化语音识别
voice_recog_controller = VoiceRecogController(
access_key=ACCESS_KEY,
wakeup_word_path=WAKEUP_WORD_PATH,
model_path=MODEL_PATH,
appid=APPID,
access_key_id=ACCESS_KEY_ID,
access_key_secret=ACCESS_KEY_SECRET,
tts_controller=tts_controller,
feedback_text=FEEDBACK_TEXT
)
# 中断处理(完全保留原逻辑)
def handle_interrupt(signum, frame):
print("\n🛑 收到退出信号,正在清理资源...")
# 停止机器人运动
if 'motion_controller' in globals() and hasattr(motion_controller, 'rl_walk'):
motion_controller.rl_walk.last_commands = [0.0, 0.0, 0.0]
# 停止TTS播放
global feedback_playing
feedback_playing = False
# 停止摄像头与麦克风
if 'camera_module' in globals() and camera_module.camera:
camera_module.camera.stop()
if hasattr(voice_recog_controller, 'stream') and voice_recog_controller.stream and voice_recog_controller.stream.active:
voice_recog_controller.stream.stop()
# 关闭TTS资源
tts_controller.close()
print("✅ 所有资源清理完成,程序退出")
sys.exit(0)
signal.signal(signal.SIGINT, handle_interrupt)
# 强制测试一次语音输出(原逻辑)
print("\n🔍 正在测试语音输出...")
tts_controller.speak("系统初始化完成,等待语音唤醒")
# 主循环(原逻辑)
while True:
if voice_recog_controller.wakeup_listener():
# 定义指令执行回调函数(关键修复)
def execute_callback(command_text):
command_type, params = parse_voice_command(command_text)
execute_command(command_type, params, motion_controller, ark_api_controller, volume_controller)
# 启动WebSocket时传入回调函数
voice_recog_controller.start_websocket(
current_text=current_text,
final_result=final_result,
last_audio_time=last_audio_time,
is_processing=is_processing,
last_command_time=last_command_time,
execute_callback=execute_callback # 传入回调
)
# 重置状态
last_audio_time[0] = time.time()
last_command_time[0] = time.time()
if __name__ == "__main__":
# # 确保ffmpeg已安装原逻辑
# try:
# subprocess.run(["ffmpeg", "--version"], capture_output=True, check=True)
# except:
# print("⚠️ 未检测到ffmpeg正在尝试安装...")
# subprocess.run(["sudo", "apt-get", "install", "-y", "ffmpeg"], check=True)
main()