2025-09-29 09:19:40 +08:00
|
|
|
|
from openai import OpenAI
|
|
|
|
|
|
import time
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import queue # 新增:用于缓存实时文本片段
|
|
|
|
|
|
import threading # 新增:用于并行处理语音播放
|
|
|
|
|
|
# 原代码7. 火山方舟API调用完整逻辑
|
|
|
|
|
|
class ArkAPIController:
|
|
|
|
|
|
def __init__(self, ark_api_key, ark_model_id, tts_controller, feedback_text):
|
|
|
|
|
|
# 接收调度脚本传入的TTS实例和反馈文本,保持原逻辑
|
|
|
|
|
|
self.ARK_API_KEY = ark_api_key
|
|
|
|
|
|
self.ARK_MODEL_ID = ark_model_id
|
|
|
|
|
|
self.tts_controller = tts_controller
|
|
|
|
|
|
self.FEEDBACK_TEXT = feedback_text
|
|
|
|
|
|
self.chat_context = [] # 聊天上下文由模块内部维护(与原逻辑一致)
|
|
|
|
|
|
self.MAX_CONTEXT_LEN = 10
|
|
|
|
|
|
|
|
|
|
|
|
# 新增:实时语音播放队列与线程
|
|
|
|
|
|
self.speech_queue = queue.Queue() # 缓存待播放的文本片段
|
|
|
|
|
|
self.speech_thread = threading.Thread(target=self._process_speech_queue, daemon=True)
|
|
|
|
|
|
self.speech_thread.start() # 启动语音播放线程
|
|
|
|
|
|
|
|
|
|
|
|
# 新增:处理语音队列的函数(循环从队列取片段并播放)
|
|
|
|
|
|
def _process_speech_queue(self):
|
|
|
|
|
|
"""持续从队列中获取文本片段并调用TTS播放"""
|
|
|
|
|
|
while True:
|
|
|
|
|
|
text = self.speech_queue.get() # 阻塞等待队列消息
|
|
|
|
|
|
if text is None: # 退出信号
|
|
|
|
|
|
break
|
|
|
|
|
|
self.tts_controller.speak(text) # 播放片段
|
|
|
|
|
|
self.speech_queue.task_done() # 标记任务完成
|
|
|
|
|
|
def call_ark_api(self, content_type: str, content: dict):
|
|
|
|
|
|
# 播放操作反馈(同步执行)
|
|
|
|
|
|
self.tts_controller.speak(self.FEEDBACK_TEXT[content_type])
|
|
|
|
|
|
|
|
|
|
|
|
client = OpenAI(
|
|
|
|
|
|
base_url="https://ark.cn-beijing.volces.com/api/v3",
|
|
|
|
|
|
api_key=self.ARK_API_KEY
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
messages = []
|
|
|
|
|
|
if content_type == "chat":
|
|
|
|
|
|
messages.extend(self.chat_context[-self.MAX_CONTEXT_LEN*2:])
|
|
|
|
|
|
messages.append({"role": "user", "content": [{"type": "text", "text": content["prompt"]}]})
|
|
|
|
|
|
elif content_type == "image_recog":
|
|
|
|
|
|
messages.append({
|
|
|
|
|
|
"role": "user",
|
|
|
|
|
|
"content": [
|
|
|
|
|
|
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{content['image_base64']}"}},
|
|
|
|
|
|
{"type": "text", "text": content["prompt"]}
|
|
|
|
|
|
]
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
response = client.chat.completions.create(
|
|
|
|
|
|
model=self.ARK_MODEL_ID,
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
max_tokens=300,
|
|
|
|
|
|
temperature=0.7 if content_type == "chat" else 0.3,
|
|
|
|
|
|
stream=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
full_response = ""
|
|
|
|
|
|
current_speech_chunk = "" # 缓存当前待播放的片段
|
|
|
|
|
|
print("\n" + "="*50)
|
|
|
|
|
|
print("🤖 回应:", end="", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
for chunk in response:
|
|
|
|
|
|
if chunk.choices and chunk.choices[0].delta.content:
|
|
|
|
|
|
char = chunk.choices[0].delta.content
|
|
|
|
|
|
full_response += char
|
|
|
|
|
|
current_speech_chunk += char # 累加片段
|
|
|
|
|
|
print(char, end="", flush=True)
|
|
|
|
|
|
time.sleep(0.05)
|
|
|
|
|
|
|
|
|
|
|
|
# 关键逻辑:当片段包含标点或达到一定长度时,推送到语音队列
|
|
|
|
|
|
if any(punct in current_speech_chunk for punct in [".", "。", "!", "!", "?", "?", ",", ",", ";", ";"]):
|
|
|
|
|
|
self.speech_queue.put(current_speech_chunk) # 推送片段到队列
|
|
|
|
|
|
current_speech_chunk = "" # 重置片段缓存
|
|
|
|
|
|
|
|
|
|
|
|
# 处理最后剩余的片段(如果有)
|
|
|
|
|
|
if current_speech_chunk:
|
|
|
|
|
|
self.speech_queue.put(current_speech_chunk)
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*50 + "\n")
|
|
|
|
|
|
|
|
|
|
|
|
# 等待所有语音片段播放完成
|
|
|
|
|
|
self.speech_queue.join()
|
|
|
|
|
|
|
|
|
|
|
|
# 维护聊天上下文(原有逻辑)
|
|
|
|
|
|
if content_type == "chat" and full_response.strip():
|
|
|
|
|
|
self.chat_context.append({"role": "user", "content": [{"type": "text", "text": content["prompt"]}]})
|
|
|
|
|
|
self.chat_context.append({"role": "assistant", "content": [{"type": "text", "text": full_response}]})
|
|
|
|
|
|
|
|
|
|
|
|
return full_response
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
error_msg = f"❌ API调用失败:{str(e)}"
|
|
|
|
|
|
print(f"\n" + "="*50)
|
|
|
|
|
|
print(error_msg)
|
|
|
|
|
|
print("="*50 + "\n")
|
|
|
|
|
|
self.tts_controller.speak(self.FEEDBACK_TEXT["api_error"])
|
|
|
|
|
|
return error_msg
|