已经可以在单一环境中运行tts与cli
This commit is contained in:
2
.cursor
2
.cursor
Submodule .cursor updated: 67480b7ec2...66e8c67fc0
Submodule Convention updated: efd826f677...058975c37d
123
cli.py
123
cli.py
@@ -6,22 +6,22 @@ from llama_index.llms.ollama import Ollama
|
|||||||
from llama_index.core.chat_engine import SimpleChatEngine
|
from llama_index.core.chat_engine import SimpleChatEngine
|
||||||
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
||||||
from llama_index.core.llms import ChatMessage
|
from llama_index.core.llms import ChatMessage
|
||||||
try:
|
# try:
|
||||||
from llama_index.core.llms.types import ImageBlock, TextBlock
|
# from llama_index.core.llms.types import ImageBlock, TextBlock
|
||||||
except ImportError:
|
# except ImportError:
|
||||||
try:
|
# try:
|
||||||
# 尝试其他可能的导入路径
|
# # 尝试其他可能的导入路径
|
||||||
from llama_index.core import ImageBlock, TextBlock
|
# from llama_index.core import ImageBlock, TextBlock
|
||||||
except ImportError:
|
# except ImportError:
|
||||||
# 如果都失败,定义简单的占位类
|
# # 如果都失败,定义简单的占位类
|
||||||
class ImageBlock:
|
# class ImageBlock:
|
||||||
def __init__(self, base64_str=None, path=None):
|
# def __init__(self, base64_str=None, path=None):
|
||||||
self.base64_str = base64_str
|
# self.base64_str = base64_str
|
||||||
self.path = path
|
# self.path = path
|
||||||
|
#
|
||||||
class TextBlock:
|
# class TextBlock:
|
||||||
def __init__(self, text=""):
|
# def __init__(self, text=""):
|
||||||
self.text = text
|
# self.text = text
|
||||||
from llama_index.core import Settings
|
from llama_index.core import Settings
|
||||||
from Convention.Runtime.File import ToolFile
|
from Convention.Runtime.File import ToolFile
|
||||||
import requests
|
import requests
|
||||||
@@ -358,7 +358,7 @@ def image_file_to_base64(image_path: str) -> str:
|
|||||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"读取图片文件失败 {image_path}: {e}")
|
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"读取图片文件失败 {image_path}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base64: str, end_symbol: list) -> None:
|
async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base64: str, end_symbol: list) -> str:
|
||||||
"""
|
"""
|
||||||
直接调用 Ollama API 进行带图片的流式聊天
|
直接调用 Ollama API 进行带图片的流式聊天
|
||||||
|
|
||||||
@@ -382,11 +382,15 @@ async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base
|
|||||||
"role": msg.role if hasattr(msg, 'role') else "user",
|
"role": msg.role if hasattr(msg, 'role') else "user",
|
||||||
"content": msg.content if hasattr(msg, 'content') else str(msg)
|
"content": msg.content if hasattr(msg, 'content') else str(msg)
|
||||||
}
|
}
|
||||||
# 如果是第一条用户消息且有图片,添加图片
|
|
||||||
if (hasattr(msg, 'role') and msg.role == "user") and image_base64 and len(api_messages) == 0:
|
|
||||||
api_msg["images"] = [image_base64]
|
|
||||||
api_messages.append(api_msg)
|
api_messages.append(api_msg)
|
||||||
|
|
||||||
|
# 如果有图片,添加到最后一条用户消息(当前用户消息)
|
||||||
|
if image_base64:
|
||||||
|
for i in range(len(api_messages) - 1, -1, -1):
|
||||||
|
if api_messages[i].get("role") == "user":
|
||||||
|
api_messages[i]["images"] = [image_base64]
|
||||||
|
break
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": api_messages,
|
"messages": api_messages,
|
||||||
@@ -441,6 +445,8 @@ async def _ollama_stream_chat_with_image(llm: Ollama, messages: list, image_base
|
|||||||
if len(buffer_response) > 0:
|
if len(buffer_response) > 0:
|
||||||
if TTS_ENABLE:
|
if TTS_ENABLE:
|
||||||
await play_vocal(buffer_response)
|
await play_vocal(buffer_response)
|
||||||
|
|
||||||
|
return buffer_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"Ollama API 调用错误: {e}")
|
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"Ollama API 调用错误: {e}")
|
||||||
if VERBOSE:
|
if VERBOSE:
|
||||||
@@ -467,7 +473,7 @@ def capture_screenshot() -> str:
|
|||||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"截图失败: {e}")
|
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"截图失败: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, auto_screenshot: bool = False) -> None:
|
async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, auto_screenshot: bool = False, conversation_history: Optional[list] = None) -> list:
|
||||||
"""
|
"""
|
||||||
使用 Ollama LLM 进行多模态聊天
|
使用 Ollama LLM 进行多模态聊天
|
||||||
|
|
||||||
@@ -501,16 +507,22 @@ async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, a
|
|||||||
# 图片将通过 Ollama 的底层 API 传递
|
# 图片将通过 Ollama 的底层 API 传递
|
||||||
chat_message = ChatMessage(role="user", content=user_message)
|
chat_message = ChatMessage(role="user", content=user_message)
|
||||||
|
|
||||||
# 构建消息列表
|
# 构建消息列表 - 使用对话历史(如果提供)
|
||||||
messages = [chat_message]
|
if conversation_history is None:
|
||||||
|
conversation_history = []
|
||||||
|
|
||||||
# 如果有系统提示,添加到消息列表开头
|
# 构建完整的消息列表(包含历史记录)
|
||||||
if SYSTEM_PROMPT_PATH is not None:
|
messages = conversation_history.copy()
|
||||||
|
|
||||||
|
# 如果对话历史为空,且需要系统提示,添加 system 消息(只添加一次)
|
||||||
|
if len(messages) == 0 and SYSTEM_PROMPT_PATH is not None:
|
||||||
system_prompt = ToolFile(SYSTEM_PROMPT_PATH).LoadAsText()
|
system_prompt = ToolFile(SYSTEM_PROMPT_PATH).LoadAsText()
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
# 将系统提示添加到用户消息中,因为 Ollama 可能不支持 system role
|
system_msg = ChatMessage(role="system", content=system_prompt)
|
||||||
user_message = f"{system_prompt}\n\n{user_message}"
|
messages.append(system_msg)
|
||||||
messages[0] = ChatMessage(role="user", content=user_message)
|
|
||||||
|
# 添加当前用户消息
|
||||||
|
messages.append(chat_message)
|
||||||
|
|
||||||
buffer_response = ""
|
buffer_response = ""
|
||||||
end_symbol = ['。', '?', '!']
|
end_symbol = ['。', '?', '!']
|
||||||
@@ -519,33 +531,41 @@ async def achat(llm: Ollama, message: str, image_base64: Optional[str] = None, a
|
|||||||
# 如果有图片,需要直接调用 Ollama API,因为 llama-index 的封装可能不支持图片
|
# 如果有图片,需要直接调用 Ollama API,因为 llama-index 的封装可能不支持图片
|
||||||
if image_base64:
|
if image_base64:
|
||||||
# 直接调用 Ollama 的流式 API
|
# 直接调用 Ollama 的流式 API
|
||||||
await _ollama_stream_chat_with_image(llm, messages, image_base64, end_symbol)
|
assistant_response = await _ollama_stream_chat_with_image(llm, messages, image_base64, end_symbol)
|
||||||
return
|
else:
|
||||||
|
# 使用流式聊天(无图片时)
|
||||||
|
streaming_response = await llm.astream_chat(messages)
|
||||||
|
|
||||||
# 使用流式聊天(无图片时)
|
# 实时输出流式文本
|
||||||
streaming_response = await llm.astream_chat(messages)
|
async for chunk in streaming_response.async_response_gen():
|
||||||
|
await asyncio.sleep(0.01)
|
||||||
|
print(chunk, end='', flush=True)
|
||||||
|
for ch in chunk:
|
||||||
|
buffer_response += ch
|
||||||
|
if len(buffer_response) > 20:
|
||||||
|
if ch in end_symbol:
|
||||||
|
if TTS_ENABLE:
|
||||||
|
await play_vocal(buffer_response.strip())
|
||||||
|
buffer_response = ""
|
||||||
|
|
||||||
# 实时输出流式文本
|
assistant_response = buffer_response.strip()
|
||||||
async for chunk in streaming_response.async_response_gen():
|
if len(assistant_response) > 0:
|
||||||
await asyncio.sleep(0.01)
|
if TTS_ENABLE:
|
||||||
print(chunk, end='', flush=True)
|
await play_vocal(assistant_response)
|
||||||
for ch in chunk:
|
|
||||||
buffer_response += ch
|
|
||||||
if len(buffer_response) > 20:
|
|
||||||
if ch in end_symbol:
|
|
||||||
if TTS_ENABLE:
|
|
||||||
await play_vocal(buffer_response.strip())
|
|
||||||
buffer_response = ""
|
|
||||||
|
|
||||||
buffer_response = buffer_response.strip()
|
# 更新对话历史:添加用户消息和助手响应
|
||||||
if len(buffer_response) > 0:
|
updated_history = messages.copy()
|
||||||
if TTS_ENABLE:
|
if assistant_response:
|
||||||
await play_vocal(buffer_response)
|
assistant_msg = ChatMessage(role="assistant", content=assistant_response)
|
||||||
|
updated_history.append(assistant_msg)
|
||||||
|
|
||||||
|
return updated_history
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"聊天错误: {e}")
|
PrintColorful(ConsoleFrontColor.LIGHTRED_EX, f"聊天错误: {e}")
|
||||||
if VERBOSE:
|
if VERBOSE:
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
return conversation_history if conversation_history else []
|
||||||
|
|
||||||
|
|
||||||
def add_speaker() -> None:
|
def add_speaker() -> None:
|
||||||
@@ -588,6 +608,9 @@ async def event_loop(llm: Ollama) -> None:
|
|||||||
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, "支持的图片格式: .png, .jpg, .jpeg")
|
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, "支持的图片格式: .png, .jpg, .jpeg")
|
||||||
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, "输入 'quit' 或 'exit' 退出\n")
|
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, "输入 'quit' 或 'exit' 退出\n")
|
||||||
|
|
||||||
|
# 维护对话历史,避免重复发送系统提示词
|
||||||
|
conversation_history = []
|
||||||
|
|
||||||
message = input("请开始对话: ")
|
message = input("请开始对话: ")
|
||||||
wait_second = AUTO_SPEAK_WAIT_SECOND
|
wait_second = AUTO_SPEAK_WAIT_SECOND
|
||||||
try:
|
try:
|
||||||
@@ -614,19 +637,19 @@ async def event_loop(llm: Ollama) -> None:
|
|||||||
PrintColorful(ConsoleFrontColor.LIGHTGREEN_EX, "截图成功")
|
PrintColorful(ConsoleFrontColor.LIGHTGREEN_EX, "截图成功")
|
||||||
|
|
||||||
PrintColorful(ConsoleFrontColor.GREEN, "AI: ", is_reset=False, end='')
|
PrintColorful(ConsoleFrontColor.GREEN, "AI: ", is_reset=False, end='')
|
||||||
await achat(llm, message, image_base64)
|
conversation_history = await achat(llm, message, image_base64, False, conversation_history)
|
||||||
PrintColorful(ConsoleFrontColor.RESET,"")
|
PrintColorful(ConsoleFrontColor.RESET,"")
|
||||||
|
|
||||||
# 等待用户输入
|
# 等待用户输入
|
||||||
message = await ainput(wait_second)
|
message = await ainput(wait_second)
|
||||||
if not message:
|
if not message:
|
||||||
# 用户没有输入,触发 AI 自主发言(会自动截图)
|
# 用户没有输入,触发 AI 自主发言(会自动截图)
|
||||||
wait_second = max(wait_second*1.5, 3600)
|
wait_second = min(wait_second*1.5, 3600)
|
||||||
if VERBOSE:
|
if VERBOSE:
|
||||||
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, f"用户无输入,等待 {wait_second} 秒后 AI 自主发言...")
|
PrintColorful(ConsoleFrontColor.LIGHTYELLOW_EX, f"用户无输入,等待 {wait_second} 秒后 AI 自主发言...")
|
||||||
# 触发 AI 自主发言(会自动截图)
|
# 触发 AI 自主发言(会自动截图)
|
||||||
PrintColorful(ConsoleFrontColor.GREEN, "AI: ", is_reset=False, end='')
|
PrintColorful(ConsoleFrontColor.GREEN, "AI: ", is_reset=False, end='')
|
||||||
await achat(llm, "", None, auto_screenshot=True)
|
conversation_history = await achat(llm, "", None, auto_screenshot=True, conversation_history=conversation_history)
|
||||||
PrintColorful(ConsoleFrontColor.RESET,"")
|
PrintColorful(ConsoleFrontColor.RESET,"")
|
||||||
else:
|
else:
|
||||||
wait_second = AUTO_SPEAK_WAIT_SECOND
|
wait_second = AUTO_SPEAK_WAIT_SECOND
|
||||||
|
|||||||
@@ -3,12 +3,14 @@ aiofiles==23.2.1
|
|||||||
aiohappyeyeballs==2.6.1
|
aiohappyeyeballs==2.6.1
|
||||||
aiohttp==3.13.2
|
aiohttp==3.13.2
|
||||||
aiosignal==1.4.0
|
aiosignal==1.4.0
|
||||||
|
aiosqlite==0.22.0
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
antlr4-python3-runtime==4.9.3
|
antlr4-python3-runtime==4.9.3
|
||||||
anyio==4.11.0
|
anyio==4.11.0
|
||||||
async-timeout==5.0.1
|
async-timeout==5.0.1
|
||||||
attrs==25.4.0
|
attrs==25.4.0
|
||||||
audioread==3.1.0
|
audioread==3.1.0
|
||||||
|
banks==2.2.0
|
||||||
beautifulsoup4==4.14.2
|
beautifulsoup4==4.14.2
|
||||||
cachetools==6.2.2
|
cachetools==6.2.2
|
||||||
certifi==2025.11.12
|
certifi==2025.11.12
|
||||||
@@ -21,14 +23,20 @@ conformer==0.3.2
|
|||||||
contourpy==1.3.2
|
contourpy==1.3.2
|
||||||
cycler==0.12.1
|
cycler==0.12.1
|
||||||
Cython==3.2.1
|
Cython==3.2.1
|
||||||
|
dataclasses-json==0.6.7
|
||||||
decorator==5.2.1
|
decorator==5.2.1
|
||||||
|
defusedxml==0.7.1
|
||||||
|
Deprecated==1.2.18
|
||||||
diffusers==0.29.0
|
diffusers==0.29.0
|
||||||
|
dirtyjson==1.0.8
|
||||||
|
distro==1.9.0
|
||||||
einops==0.8.1
|
einops==0.8.1
|
||||||
exceptiongroup==1.3.0
|
exceptiongroup==1.3.0
|
||||||
fastapi==0.115.6
|
fastapi==0.115.6
|
||||||
fastapi-cli==0.0.4
|
fastapi-cli==0.0.4
|
||||||
ffmpy==1.0.0
|
ffmpy==1.0.0
|
||||||
filelock==3.20.0
|
filelock==3.20.0
|
||||||
|
filetype==1.2.0
|
||||||
flatbuffers==25.9.23
|
flatbuffers==25.9.23
|
||||||
fonttools==4.60.1
|
fonttools==4.60.1
|
||||||
frozenlist==1.8.0
|
frozenlist==1.8.0
|
||||||
@@ -38,6 +46,8 @@ google-auth==2.43.0
|
|||||||
google-auth-oauthlib==1.0.0
|
google-auth-oauthlib==1.0.0
|
||||||
gradio==5.4.0
|
gradio==5.4.0
|
||||||
gradio_client==1.4.2
|
gradio_client==1.4.2
|
||||||
|
greenlet==3.3.0
|
||||||
|
griffe==1.15.0
|
||||||
grpcio==1.57.0
|
grpcio==1.57.0
|
||||||
grpcio-tools==1.57.0
|
grpcio-tools==1.57.0
|
||||||
h11==0.16.0
|
h11==0.16.0
|
||||||
@@ -52,6 +62,7 @@ importlib_metadata==8.7.0
|
|||||||
inflect==7.3.1
|
inflect==7.3.1
|
||||||
intel-openmp==2021.4.0
|
intel-openmp==2021.4.0
|
||||||
Jinja2==3.1.6
|
Jinja2==3.1.6
|
||||||
|
jiter==0.12.0
|
||||||
joblib==1.5.2
|
joblib==1.5.2
|
||||||
kaldifst==1.7.17
|
kaldifst==1.7.17
|
||||||
kiwisolver==1.4.9
|
kiwisolver==1.4.9
|
||||||
@@ -59,10 +70,13 @@ lazy_loader==0.4
|
|||||||
librosa==0.10.2
|
librosa==0.10.2
|
||||||
lightning==2.2.4
|
lightning==2.2.4
|
||||||
lightning-utilities==0.15.2
|
lightning-utilities==0.15.2
|
||||||
|
llama-index
|
||||||
|
llama-index-llms-ollama
|
||||||
llvmlite==0.45.1
|
llvmlite==0.45.1
|
||||||
Markdown==3.10
|
Markdown==3.10
|
||||||
markdown-it-py==4.0.0
|
markdown-it-py==4.0.0
|
||||||
MarkupSafe==2.1.5
|
MarkupSafe==2.1.5
|
||||||
|
marshmallow==3.26.1
|
||||||
matplotlib==3.7.5
|
matplotlib==3.7.5
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
mkl==2021.4.0
|
mkl==2021.4.0
|
||||||
@@ -71,17 +85,22 @@ more-itertools==10.8.0
|
|||||||
mpmath==1.3.0
|
mpmath==1.3.0
|
||||||
msgpack==1.1.2
|
msgpack==1.1.2
|
||||||
multidict==6.7.0
|
multidict==6.7.0
|
||||||
|
mypy_extensions==1.1.0
|
||||||
|
nest-asyncio==1.6.0
|
||||||
networkx==3.1
|
networkx==3.1
|
||||||
|
nltk==3.9.2
|
||||||
numba==0.62.1
|
numba==0.62.1
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
oauthlib==3.3.1
|
oauthlib==3.3.1
|
||||||
|
ollama==0.6.1
|
||||||
omegaconf==2.3.0
|
omegaconf==2.3.0
|
||||||
onnx==1.16.0
|
onnx==1.16.0
|
||||||
onnxruntime==1.23.2
|
onnxruntime==1.23.2
|
||||||
|
openai==2.14.0
|
||||||
openai-whisper==20231117
|
openai-whisper==20231117
|
||||||
orjson==3.11.4
|
orjson==3.11.4
|
||||||
packaging==24.2
|
packaging==24.2
|
||||||
pandas==2.3.3
|
pandas==2.2.3
|
||||||
pillow==11.3.0
|
pillow==11.3.0
|
||||||
platformdirs==4.5.0
|
platformdirs==4.5.0
|
||||||
pooch==1.8.2
|
pooch==1.8.2
|
||||||
@@ -90,15 +109,18 @@ protobuf==4.25.0
|
|||||||
pyarrow==18.1.0
|
pyarrow==18.1.0
|
||||||
pyasn1==0.6.1
|
pyasn1==0.6.1
|
||||||
pyasn1_modules==0.4.2
|
pyasn1_modules==0.4.2
|
||||||
|
PyAudio==0.2.14
|
||||||
pycparser==2.23
|
pycparser==2.23
|
||||||
pydantic==2.7.0
|
pydantic==2.12.5
|
||||||
pydantic_core==2.18.1
|
pydantic_core==2.41.5
|
||||||
pydub==0.25.1
|
pydub==0.25.1
|
||||||
Pygments==2.19.2
|
Pygments==2.19.2
|
||||||
pyparsing==3.2.5
|
pyparsing==3.2.5
|
||||||
|
pypdf==6.4.2
|
||||||
pyreadline3==3.5.4
|
pyreadline3==3.5.4
|
||||||
PySocks==1.7.1
|
PySocks==1.7.1
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
|
python-dotenv==1.2.1
|
||||||
python-multipart==0.0.12
|
python-multipart==0.0.12
|
||||||
pytorch-lightning==2.5.6
|
pytorch-lightning==2.5.6
|
||||||
pytz==2025.2
|
pytz==2025.2
|
||||||
@@ -123,9 +145,12 @@ sniffio==1.3.1
|
|||||||
soundfile==0.12.1
|
soundfile==0.12.1
|
||||||
soupsieve==2.8
|
soupsieve==2.8
|
||||||
soxr==1.0.0
|
soxr==1.0.0
|
||||||
|
SQLAlchemy==2.0.45
|
||||||
starlette==0.41.3
|
starlette==0.41.3
|
||||||
|
striprtf==0.0.26
|
||||||
sympy==1.14.0
|
sympy==1.14.0
|
||||||
tbb==2021.13.1
|
tbb==2021.13.1
|
||||||
|
tenacity==9.1.2
|
||||||
tensorboard==2.14.0
|
tensorboard==2.14.0
|
||||||
tensorboard-data-server==0.7.2
|
tensorboard-data-server==0.7.2
|
||||||
threadpoolctl==3.6.0
|
threadpoolctl==3.6.0
|
||||||
@@ -139,6 +164,8 @@ tqdm==4.67.1
|
|||||||
transformers==4.51.3
|
transformers==4.51.3
|
||||||
typeguard==4.4.4
|
typeguard==4.4.4
|
||||||
typer==0.20.0
|
typer==0.20.0
|
||||||
|
typing-inspect==0.9.0
|
||||||
|
typing-inspection==0.4.2
|
||||||
typing_extensions==4.15.0
|
typing_extensions==4.15.0
|
||||||
tzdata==2025.2
|
tzdata==2025.2
|
||||||
urllib3==2.5.0
|
urllib3==2.5.0
|
||||||
@@ -147,5 +174,6 @@ websockets==12.0
|
|||||||
Werkzeug==3.1.3
|
Werkzeug==3.1.3
|
||||||
wetext==0.0.4
|
wetext==0.0.4
|
||||||
wget==3.2
|
wget==3.2
|
||||||
|
wrapt==1.17.3
|
||||||
yarl==1.22.0
|
yarl==1.22.0
|
||||||
zipp==3.23.0
|
zipp==3.23.0
|
||||||
|
|||||||
Reference in New Issue
Block a user