已经可以在单一环境中运行tts与cli
This commit is contained in:
125
cli.py
125
cli.py
@@ -6,22 +6,22 @@ from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.chat_engine import SimpleChatEngine
|
||||
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
||||
from llama_index.core.llms import ChatMessage
|
||||
try:
|
||||
from llama_index.core.llms.types import ImageBlock, TextBlock
|
||||
except ImportError:
|
||||
try:
|
||||
# 尝试其他可能的导入路径
|
||||
from llama_index.core import ImageBlock, TextBlock
|
||||
except ImportError:
|
||||
# 如果都失败,定义简单的占位类
|
||||
class ImageBlock:
|
||||
def __init__(self, base64_str=None, path=None):
|
||||
self.base64_str = base64_str
|
||||
self.path = path
|
||||
|
||||
class TextBlock:
|
||||
def __init__(self, text=""):
|
||||
self.text = text
|
||||
# try:
|
||||
# from llama_index.core.llms.types import ImageBlock, TextBlock
|
||||
# except ImportError:
|
||||
# try:
|
||||
# # 尝试其他可能的导入路径
|
||||
# from llama_index.core import ImageBlock, TextBlock
|
||||
# except ImportError:
|
||||
# # 如果都失败,定义简单的占位类
|
||||
# class ImageBlock:
|
||||
# def __init__(self, base64_str=None, path=None):
|
||||
# self.base64_str = base64_str
|
||||
# self.path = path
|
||||
#
|
||||
# class TextBlock:
|
||||
# def __init__(self, text=""):
|
||||
# self.text = text
|
||||
from llama_index.core import Settings
|
||||
from Convention.Runtime.File import ToolFile
|
||||
import requests
|
||||
@@ -358,7 +358,7 @@ def image_file_to_base64(image_path: str) -> str:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"读取图片文件失败 {image_path}: {e}")
|
||||
return None
|
||||
|
||||
async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base64: str, end_symbol: list) -> None:
|
||||
async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base64: str, end_symbol: list) -> str:
|
||||
"""
|
||||
直接调用 Ollama API 进行带图片的流式聊天
|
||||
|
||||
@@ -382,11 +382,15 @@ async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base
|
||||
"role": msg.role if hasattr(msg, 'role') else "user",
|
||||
"content": msg.content if hasattr(msg, 'content') else str(msg)
|
||||
}
|
||||
# 如果是第一条用户消息且有图片,添加图片
|
||||
if (hasattr(msg, 'role') and msg.role == "user") and image_base64 and len(api_messages) == 0:
|
||||
api_msg["images"] = [image_base64]
|
||||
api_messages.append(api_msg)
|
||||
|
||||
# 如果有图片,添加到最后一条用户消息(当前用户消息)
|
||||
if image_base64:
|
||||
for i in range(len(api_messages) - 1, -1, -1):
|
||||
if api_messages[i].get("role") == "user":
|
||||
api_messages[i]["images"] = [image_base64]
|
||||
break
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": api_messages,
|
||||
@@ -441,6 +445,8 @@ async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base
|
||||
if len(buffer_response) > 0:
|
||||
if TTS_ENABLE:
|
||||
await play_vocal(buffer_response)
|
||||
|
||||
return buffer_response
|
||||
except Exception as e:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"Ollama API 调用错误: {e}")
|
||||
if VERBOSE:
|
||||
@@ -467,7 +473,7 @@ def capture_screenshot() -> str:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"截图失败: {e}")
|
||||
return None
|
||||
|
||||
async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, auto_screenshot: bool = False) -> None:
|
||||
async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, auto_screenshot: bool = False, conversation_history: Optional[list] = None) -> list:
|
||||
"""
|
||||
使用 Ollama LLM 进行多模态聊天
|
||||
|
||||
@@ -501,16 +507,22 @@ async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, a
|
||||
# 图片将通过 Ollama 的底层 API 传递
|
||||
chat_message = ChatMessage(role="user", content=user_message)
|
||||
|
||||
# 构建消息列表
|
||||
messages = [chat_message]
|
||||
# 构建消息列表 - 使用对话历史(如果提供)
|
||||
if conversation_history is None:
|
||||
conversation_history = []
|
||||
|
||||
# 如果有系统提示,添加到消息列表开头
|
||||
if SYSTEM_PROMPT_PATH is not None:
|
||||
# 构建完整的消息列表(包含历史记录)
|
||||
messages = conversation_history.copy()
|
||||
|
||||
# 如果对话历史为空,且需要系统提示,添加 system 消息(只添加一次)
|
||||
if len(messages) == 0 and SYSTEM_PROMPT_PATH is not None:
|
||||
system_prompt = ToolFile(SYSTEM_PROMPT_PATH).LoadAsText()
|
||||
if system_prompt:
|
||||
# 将系统提示添加到用户消息中,因为 Ollama 可能不支持 system role
|
||||
user_message = f"{system_prompt}\n\n{user_message}"
|
||||
messages[0] = ChatMessage(role="user", content=user_message)
|
||||
system_msg = ChatMessage(role="system", content=system_prompt)
|
||||
messages.append(system_msg)
|
||||
|
||||
# 添加当前用户消息
|
||||
messages.append(chat_message)
|
||||
|
||||
buffer_response = ""
|
||||
end_symbol = ['。', '?', '!']
|
||||
@@ -519,33 +531,41 @@ async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, a
|
||||
# 如果有图片,需要直接调用 Ollama API,因为 llama-index 的封装可能不支持图片
|
||||
if image_base64:
|
||||
# 直接调用 Ollama 的流式 API
|
||||
await _ollama_stream_chat_with_image(llm, messages, image_base64, end_symbol)
|
||||
return
|
||||
assistant_response = await _ollama_stream_chat_with_image(llm, messages, image_base64, end_symbol)
|
||||
else:
|
||||
# 使用流式聊天(无图片时)
|
||||
streaming_response = await llm.astream_chat(messages)
|
||||
|
||||
# 实时输出流式文本
|
||||
async for chunk in streaming_response.async_response_gen():
|
||||
await asyncio.sleep(0.01)
|
||||
print(chunk, end='', flush=True)
|
||||
for ch in chunk:
|
||||
buffer_response += ch
|
||||
if len(buffer_response) > 20:
|
||||
if ch in end_symbol:
|
||||
if TTS_ENABLE:
|
||||
await play_vocal(buffer_response.strip())
|
||||
buffer_response = ""
|
||||
|
||||
assistant_response = buffer_response.strip()
|
||||
if len(assistant_response) > 0:
|
||||
if TTS_ENABLE:
|
||||
await play_vocal(assistant_response)
|
||||
|
||||
# 使用流式聊天(无图片时)
|
||||
streaming_response = await llm.astream_chat(messages)
|
||||
# 更新对话历史:添加用户消息和助手响应
|
||||
updated_history = messages.copy()
|
||||
if assistant_response:
|
||||
assistant_msg = ChatMessage(role="assistant", content=assistant_response)
|
||||
updated_history.append(assistant_msg)
|
||||
|
||||
# 实时输出流式文本
|
||||
async for chunk in streaming_response.async_response_gen():
|
||||
await asyncio.sleep(0.01)
|
||||
print(chunk, end='', flush=True)
|
||||
for ch in chunk:
|
||||
buffer_response += ch
|
||||
if len(buffer_response) > 20:
|
||||
if ch in end_symbol:
|
||||
if TTS_ENABLE:
|
||||
await play_vocal(buffer_response.strip())
|
||||
buffer_response = ""
|
||||
|
||||
buffer_response = buffer_response.strip()
|
||||
if len(buffer_response) > 0:
|
||||
if TTS_ENABLE:
|
||||
await play_vocal(buffer_response)
|
||||
return updated_history
|
||||
except Exception as e:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"聊天错误: {e}")
|
||||
if VERBOSE:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return conversation_history if conversation_history else []
|
||||
|
||||
|
||||
def add_speaker() -> None:
|
||||
@@ -588,6 +608,9 @@ async def event_loop(llm: Ollama) -> None:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, "支持的图片格式: .png, .jpg, .jpeg")
|
||||
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, "输入 'quit' 或 'exit' 退出\n")
|
||||
|
||||
# 维护对话历史,避免重复发送系统提示词
|
||||
conversation_history = []
|
||||
|
||||
message = input("请开始对话: ")
|
||||
wait_second = AUTO_SPEAK_WAIT_SECOND
|
||||
try:
|
||||
@@ -614,19 +637,19 @@ async def event_loop(llm: Ollama) -> None:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTGREEN_EX, "截图成功")
|
||||
|
||||
PrintColorful(ConsoleFrontColor.GREEN, "AI: ", is_reset=False, end='')
|
||||
await achat(llm, message, image_base64)
|
||||
conversation_history = await achat(llm, message, image_base64, False, conversation_history)
|
||||
PrintColorful(ConsoleFrontColor.RESET,"")
|
||||
|
||||
# 等待用户输入
|
||||
message = await ainput(wait_second)
|
||||
if not message:
|
||||
# 用户没有输入,触发 AI 自主发言(会自动截图)
|
||||
wait_second = max(wait_second*1.5, 3600)
|
||||
wait_second = min(wait_second*1.5, 3600)
|
||||
if VERBOSE:
|
||||
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, f"用户无输入,等待 {wait_second} 秒后 AI 自主发言...")
|
||||
# 触发 AI 自主发言(会自动截图)
|
||||
PrintColorful(ConsoleFrontColor.GREEN, "AI: ", is_reset=False, end='')
|
||||
await achat(llm, "", None, auto_screenshot=True)
|
||||
conversation_history = await achat(llm, "", None, auto_screenshot=True, conversation_history=conversation_history)
|
||||
PrintColorful(ConsoleFrontColor.RESET,"")
|
||||
else:
|
||||
wait_second = AUTO_SPEAK_WAIT_SECOND
|
||||
|
||||
Reference in New Issue
Block a user