crew AI笔记[5] - knowledge和memory特性详解
crew AI的knowledge和memory特性讲解
功能简述
knowledge模块实现了类似RAG知识库功能(不止于此),memory从物理层面实现了knoledge模块的功能,两者缺一不可。刚开始调试的时候单独搞knowledge,怎么也弄不好。
应该算是最难的一篇输出了,花了我好多时间。。。
memory
crew AI框架能够把信息记录下来,具体记忆行为定义为以下四种:
-
短期记忆:使用 OpenAI Embeddings 创建 ChromaDB 向量存储,用于存储agent执行历史记录。
-
最近的记忆:SQLite3 db 用于存储最近的任务执行结果。
-
长期记忆:SQLite3 db 用于存储任务结果,请注意,任务描述必须完全匹配(相当严格)才能检索长期记忆
-
实体记忆:提取关键实体并将实体关系存储到另一个 ChromaDB 向量存储中。
默认存储位置(Windows)
C:\Users\{username}\AppData\Local\CrewAI\{project_name}\
├── knowledge\
├── short_term_memory\
├── long_term_memory\
├── entities\
└── long_term_memory_storage.db
存储路径检查
from crewai.utilities.paths import db_storage_path
import os
# Get the base storage path
storage_path = db_storage_path()
print(f"CrewAI storage location: {storage_path}")
# List all CrewAI storage directories
if os.path.exists(storage_path):
print("\nStored files and directories:")
for item in os.listdir(storage_path):
item_path = os.path.join(storage_path, item)
if os.path.isdir(item_path):
print(f"📁 {item}/")
# Show ChromaDB collections
if os.path.exists(item_path):
for subitem in os.listdir(item_path):
print(f" └── {subitem}")
else:
print(f"📄 {item}")
else:
print("No CrewAI storage directory found yet.")
存储路径自定义
import os
from crewai import Crew
# Set custom storage location
current_dir = os.path.dirname(os.path.abspath(__file__))
STORAGE_DIR = current_dir + "/storage"
os.environ["CREWAI_STORAGE_DIR"] = STORAGE_DIR
# All memory and knowledge will now be stored in ./storage/
crew = Crew(
agents=[...],
tasks=[...],
memory=True
)
存储重置
命令行(失败)
出处,官网文档
好像这种方式必须是工程通过CLI方式构建成的才行
crewai reset-memories --knowledge
代码段内重重(失败)
官网解决方案也不能用,太扯了。
from crewai import Crew
# Reset all memory storage
crew = Crew(agents=[...], tasks=[...], memory=True)
# Reset specific memory types
crew.reset_memories(command_type='short') # Short-term memory
crew.reset_memories(command_type='long') # Long-term memory
crew.reset_memories(command_type='entity') # Entity memory
crew.reset_memories(command_type='knowledge') # Knowledge storage
运行重置脚本(成功)
# 请保存为: reset_crew_memories.py
# 调用: python reset_crew_memories.py [file1.py] [file2.py]
import os
import sys
import importlib.util
from inspect import isfunction, ismethod
from pathlib import Path
from typing import Any, List, Optional, get_type_hints
from crewai.crew import Crew
from crewai.flow import Flow
def get_crew_instance(module_attr: Any) -> Optional[Crew]:
if isinstance(module_attr, Crew):
return module_attr
if (
callable(module_attr)
and hasattr(module_attr, "is_crew_class")
and module_attr.is_crew_class
):
return module_attr().crew() # type: ignore
try:
if (
ismethod(module_attr) or isfunction(module_attr)
) and get_type_hints(module_attr).get("return") is Crew:
return module_attr()
except Exception:
pass
return None
def fetch_crews_from_module_attr(module_attr: Any) -> List[Crew]:
crew_instances: List[Crew] = []
if crew_instance := get_crew_instance(module_attr):
crew_instances.append(crew_instance)
if isinstance(module_attr, type) and issubclass(module_attr, Flow):
try:
instance = module_attr()
for attr_name in dir(instance):
if not attr_name.startswith("_"):
attr = getattr(instance, attr_name)
if crew_instance := get_crew_instance(attr):
crew_instances.append(crew_instance)
except Exception:
pass
return crew_instances
def find_crews_in_file(filepath: str) -> List[Crew]:
if not os.path.exists(filepath):
print(f"Error: File not found at '{filepath}'", file=sys.stderr)
return []
absolute_path = os.path.abspath(filepath)
module_name = Path(absolute_path).stem
file_dir = os.path.dirname(absolute_path)
if file_dir not in sys.path:
sys.path.insert(0, file_dir)
try:
spec = importlib.util.spec_from_file_location(
module_name, absolute_path
)
if not spec or not spec.loader:
print(
f"Error: Could not create module spec for '{filepath}'",
file=sys.stderr,
)
if file_dir in sys.path:
sys.path.remove(file_dir)
return []
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
found_crews = []
for attr_name in dir(module):
if not attr_name.startswith("_"):
module_attr = getattr(module, attr_name)
found_crews.extend(fetch_crews_from_module_attr(module_attr))
return found_crews
except Exception as e:
print(f"Error processing file '{filepath}': {e}", file=sys.stderr)
return []
finally:
if file_dir in sys.path:
sys.path.remove(file_dir)
if module_name in sys.modules:
del sys.modules[module_name]
def main():
if len(sys.argv) < 2:
print("Usage: python reset_all_memories.py <file1.py> <file2.py> ...")
sys.exit(1)
filenames = sys.argv[1:]
total_crews_reset = 0
for filename in filenames:
print(f"\nProcessing '{filename}'...")
crews = find_crews_in_file(filename)
if not crews:
print(f"No crews found in '{filename}'.")
continue
for crew in crews:
try:
crew_id = crew.name if crew.name else crew.id
print(f" - Resetting all memories for Crew '{crew_id}'...")
# command_type = long, short, entity, knowledge, agent_knowledge, kickoff_outputs or all
crew.reset_memories(
command_type="all"
)
total_crews_reset += 1
print(f" - Memories for crew '{crew_id}' have been reset.")
except Exception as e:
crew_id = crew.name if crew.name else crew.id
print(
f" - Error resetting memories for crew '{crew_id}': {e}",
file=sys.stderr,
)
print(
f"\nOperation complete. Reset memories for {total_crews_reset} Crew(s)."
)
if __name__ == "__main__":
main()
knowledge
knowledge模块利用上下文信息、本地存储的资料、主动提取的实体信息,可以为AI agent提供额外的参考,以此增强回答的准确性和效率。
knowledge支持的格式
非结构化: string、text、pdf、html、docx
结构化:csv、xlsx、json
文档配置(特别注意)
1、创建ROOT_DIR/knowledge文件夹,把所有资料放到knowledge文件夹中
2、代码中使用相对路径
参考代码
请保存为:knowledge_samples.py
import os
from crewai import Agent, Task, Crew, LLM, Process
from crewai_tools import TXTSearchTool
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
from crewai.knowledge.source.excel_knowledge_source import ExcelKnowledgeSource
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
from crewai.knowledge.source.csv_knowledge_source import CSVKnowledgeSource
# #自定义存储区域
# current_dir = os.path.dirname(os.path.abspath(__file__))
# STORAGE_DIR = current_dir + "/storage"
# os.environ["CREWAI_STORAGE_DIR"] = STORAGE_DIR
# 设置虚拟的OpenAI API密钥以避免错误
os.environ["OPENAI_API_KEY"] = "dummy-key"
# 配置Qwen Plus大模型
LLM_QW = LLM(
model='dashscope/qwen-plus',
base_url='https://dashscope.aliyuncs.com/compatible-mode/v1',
api_key='sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
stream=False,
temperature=0.7,
request_timeout=120,
)
# 创建TXT搜索工具,并指定使用Ollama嵌入模型
excel_source = ExcelKnowledgeSource(
file_paths=["excel_example.xlsx"]
)
text_source = TextFileKnowledgeSource(
file_paths=["text_example.txt"]
)
pdf_source = PDFKnowledgeSource(
file_paths=["pdf_example.pdf"]
)
csv_source = CSVKnowledgeSource(
file_paths=["csv_example.csv"]
)
json_source = JSONKnowledgeSource(
file_paths=["json_example.json"]
)
# 创建研究分析师代理
researcher = Agent(
role="Research Analyst",
goal="Find and analyze information from the provided documents",
backstory="You are an expert researcher who can extract and analyze information from various documents.",
verbose=True,
allow_delegation=False,
llm=LLM_QW,
knowledge_sources=[json_source]
)
research_task = Task(
description="Search for information about users in the knowledge base and provide a summary of what you find",
expected_output="请输出json文件中的基本信息",
agent=researcher
)
# 创建Crew,启用memory功能
crew = Crew(
agents=[researcher],
tasks=[research_task],
verbose=True,
memory=True,
process=Process.sequential,
knowledge_sources=[json_source],
embedder={
"provider": "ollama",
"config": {
"model": "all-minilm:l6-v2"
}
}
)
# 执行任务
if __name__ == "__main__":
print("开始执行CrewAI任务...")
result = crew.kickoff()
print("任务执行结果:")
print(result)
实现详解(精华)
这段代码看起来没几句话,花了我大量试错时间【crew AI框架还有许多bug和使用不便的地方,而且embedding不支持国内大模型,让我们很难玩起来】
综合尝试下来,好像只有使用本地ollama的向量模型才能用,其余的都玩不了。关于ollama本地配置,网上一搜一大把,就不赘述了。主要特点如下:
- 配置Qwen为LLM
- 配置本地ollama向量模型为embedder
- 严格运用官网给出的使用方法存储和使用knowledge
- 重定向存储地点
自己还尝试绕过默认的chromaDB和sqlite3配置,使用qwen的向量模型,也捣鼓成功了,但是看着满屏的自定义实现,完全脱离了crew AI框架,何苦呢?源码还是不贴出来了祸害大家了)
其他参考
实战演练
1、crew AI框架构建
虚拟环境创建,省略。。。
对应模块安装
pip install crewai crewai-tools langchain langchain-community langchain-ollama
2、配置knowledge资源
把knowledge文件夹放到根目录下
把knowledge_samples.py,reset_crew_memories.py拷贝
执行knowledge_samples.py即可
3、清除memory
执行 reset_crew_memories 模块即可
python reset_crew_memories.py knowledge_samples.py
拓展
1、后续要尝试把memory放到根目录文件夹下,这样互不冲突(取消头部的注释即可,已经实现了)
2、memory的重置仍然有问题,真的是服了~
更多推荐
所有评论(0)