crew AI笔记[5] - knowledge和memory特性详解

crew AI的knowledge和memory特性讲解

yinchao163

987人浏览 · 2025-09-08 16:50:12

yinchao163 · 2025-09-08 16:50:12 发布

功能简述

knowledge模块实现了类似RAG知识库功能（不止于此），memory从物理层面实现了knoledge模块的功能，两者缺一不可。刚开始调试的时候单独搞knowledge，怎么也弄不好。

应该算是最难的一篇输出了，花了我好多时间。。。

memory

官网描述

crew AI框架能够把信息记录下来，具体记忆行为定义为以下四种：

短期记忆：使用 OpenAI Embeddings 创建 ChromaDB 向量存储，用于存储agent执行历史记录。
最近的记忆：SQLite3 db 用于存储最近的任务执行结果。
长期记忆：SQLite3 db 用于存储任务结果，请注意，任务描述必须完全匹配（相当严格）才能检索长期记忆
实体记忆：提取关键实体并将实体关系存储到另一个 ChromaDB 向量存储中。

默认存储位置（Windows）

C:\Users\{username}\AppData\Local\CrewAI\{project_name}\
├── knowledge\
├── short_term_memory\
├── long_term_memory\
├── entities\
└── long_term_memory_storage.db

存储路径检查

from crewai.utilities.paths import db_storage_path
import os

# Get the base storage path
storage_path = db_storage_path()
print(f"CrewAI storage location: {storage_path}")

# List all CrewAI storage directories
if os.path.exists(storage_path):
    print("\nStored files and directories:")
    for item in os.listdir(storage_path):
        item_path = os.path.join(storage_path, item)
        if os.path.isdir(item_path):
            print(f"📁 {item}/")
            # Show ChromaDB collections
            if os.path.exists(item_path):
                for subitem in os.listdir(item_path):
                    print(f"   └── {subitem}")
        else:
            print(f"📄 {item}")
else:
    print("No CrewAI storage directory found yet.")

存储路径自定义

import os
from crewai import Crew

# Set custom storage location
current_dir = os.path.dirname(os.path.abspath(__file__))
STORAGE_DIR = current_dir + "/storage"
os.environ["CREWAI_STORAGE_DIR"] = STORAGE_DIR

# All memory and knowledge will now be stored in ./storage/
crew = Crew(
    agents=[...],
    tasks=[...],
    memory=True
)

存储重置

命令行（失败）

出处，官网文档

好像这种方式必须是工程通过CLI方式构建成的才行

crewai reset-memories --knowledge

代码段内重重（失败）

官网解决方案也不能用，太扯了。

from crewai import Crew

# Reset all memory storage
crew = Crew(agents=[...], tasks=[...], memory=True)

# Reset specific memory types
crew.reset_memories(command_type='short')     # Short-term memory
crew.reset_memories(command_type='long')      # Long-term memory
crew.reset_memories(command_type='entity')    # Entity memory
crew.reset_memories(command_type='knowledge') # Knowledge storage

运行重置脚本（成功）

参考

# 请保存为： reset_crew_memories.py
# 调用： python reset_crew_memories.py [file1.py] [file2.py]

import os
import sys
import importlib.util
from inspect import isfunction, ismethod
from pathlib import Path
from typing import Any, List, Optional, get_type_hints

from crewai.crew import Crew
from crewai.flow import Flow

def get_crew_instance(module_attr: Any) -> Optional[Crew]:
    if isinstance(module_attr, Crew):
        return module_attr

    if (
        callable(module_attr)
        and hasattr(module_attr, "is_crew_class")
        and module_attr.is_crew_class
    ):
        return module_attr().crew() # type: ignore

    try:
        if (
            ismethod(module_attr) or isfunction(module_attr)
        ) and get_type_hints(module_attr).get("return") is Crew:
            return module_attr()
    except Exception:
        pass

    return None

def fetch_crews_from_module_attr(module_attr: Any) -> List[Crew]:
    crew_instances: List[Crew] = []

    if crew_instance := get_crew_instance(module_attr):
        crew_instances.append(crew_instance)

    if isinstance(module_attr, type) and issubclass(module_attr, Flow):
        try:
            instance = module_attr()
            for attr_name in dir(instance):
                if not attr_name.startswith("_"):
                    attr = getattr(instance, attr_name)
                    if crew_instance := get_crew_instance(attr):
                        crew_instances.append(crew_instance)
        except Exception:
            pass

    return crew_instances

def find_crews_in_file(filepath: str) -> List[Crew]:
    if not os.path.exists(filepath):
        print(f"Error: File not found at '{filepath}'", file=sys.stderr)
        return []

    absolute_path = os.path.abspath(filepath)
    module_name = Path(absolute_path).stem

    file_dir = os.path.dirname(absolute_path)
    if file_dir not in sys.path:
        sys.path.insert(0, file_dir)

    try:
        spec = importlib.util.spec_from_file_location(
            module_name, absolute_path
        )
        if not spec or not spec.loader:
            print(
                f"Error: Could not create module spec for '{filepath}'",
                file=sys.stderr,
            )
            if file_dir in sys.path:
                sys.path.remove(file_dir)
            return []

        module = importlib.util.module_from_spec(spec)
        sys.modules[module_name] = module
        spec.loader.exec_module(module)

        found_crews = []
        for attr_name in dir(module):
            if not attr_name.startswith("_"):
                module_attr = getattr(module, attr_name)
                found_crews.extend(fetch_crews_from_module_attr(module_attr))
        return found_crews
    except Exception as e:
        print(f"Error processing file '{filepath}': {e}", file=sys.stderr)
        return []
    finally:
        if file_dir in sys.path:
            sys.path.remove(file_dir)
        if module_name in sys.modules:
            del sys.modules[module_name]

def main():
    if len(sys.argv) < 2:
        print("Usage: python reset_all_memories.py <file1.py> <file2.py> ...")
        sys.exit(1)

    filenames = sys.argv[1:]
    total_crews_reset = 0

    for filename in filenames:
        print(f"\nProcessing '{filename}'...")

        crews = find_crews_in_file(filename)
        if not crews:
            print(f"No crews found in '{filename}'.")
            continue

        for crew in crews:
            try:
                crew_id = crew.name if crew.name else crew.id
                print(f"  - Resetting all memories for Crew '{crew_id}'...")
                # command_type = long, short, entity, knowledge, agent_knowledge, kickoff_outputs or all
                crew.reset_memories(
                    command_type="all"
                ) 
                total_crews_reset += 1
                print(f"  - Memories for crew '{crew_id}' have been reset.")
            except Exception as e:
                crew_id = crew.name if crew.name else crew.id
                print(
                    f"  - Error resetting memories for crew '{crew_id}': {e}",
                    file=sys.stderr,
                )

    print(
        f"\nOperation complete. Reset memories for {total_crews_reset} Crew(s)."
    )

if __name__ == "__main__":
    main()

knowledge

官网参考

knowledge模块利用上下文信息、本地存储的资料、主动提取的实体信息，可以为AI agent提供额外的参考，以此增强回答的准确性和效率。

knowledge支持的格式

非结构化： string、text、pdf、html、docx

结构化：csv、xlsx、json

文档配置（特别注意）

1、创建ROOT_DIR/knowledge文件夹，把所有资料放到knowledge文件夹中

2、代码中使用相对路径

参考代码

请保存为：knowledge_samples.py

import os
from crewai import Agent, Task, Crew, LLM, Process
from crewai_tools import TXTSearchTool
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
from crewai.knowledge.source.excel_knowledge_source import ExcelKnowledgeSource
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
from crewai.knowledge.source.csv_knowledge_source import CSVKnowledgeSource

# #自定义存储区域
# current_dir = os.path.dirname(os.path.abspath(__file__))
# STORAGE_DIR = current_dir + "/storage"
# os.environ["CREWAI_STORAGE_DIR"] = STORAGE_DIR

# 设置虚拟的OpenAI API密钥以避免错误
os.environ["OPENAI_API_KEY"] = "dummy-key"

# 配置Qwen Plus大模型
LLM_QW = LLM(
    model='dashscope/qwen-plus',
    base_url='https://dashscope.aliyuncs.com/compatible-mode/v1',
    api_key='sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
    stream=False,
    temperature=0.7,
    request_timeout=120,
)

# 创建TXT搜索工具，并指定使用Ollama嵌入模型

excel_source = ExcelKnowledgeSource(
    file_paths=["excel_example.xlsx"]
)

text_source = TextFileKnowledgeSource(
    file_paths=["text_example.txt"]
)

pdf_source = PDFKnowledgeSource(
    file_paths=["pdf_example.pdf"]
)

csv_source = CSVKnowledgeSource(
    file_paths=["csv_example.csv"]
)

json_source = JSONKnowledgeSource(
    file_paths=["json_example.json"]
)

# 创建研究分析师代理
researcher = Agent(
    role="Research Analyst",
    goal="Find and analyze information from the provided documents",
    backstory="You are an expert researcher who can extract and analyze information from various documents.",
    verbose=True,
    allow_delegation=False,
    llm=LLM_QW,
    knowledge_sources=[json_source]
)


research_task = Task(
    description="Search for information about users in the knowledge base and provide a summary of what you find",
    expected_output="请输出json文件中的基本信息",
    agent=researcher
)

# 创建Crew，启用memory功能
crew = Crew(
    agents=[researcher],
    tasks=[research_task],
    verbose=True,
    memory=True,
    process=Process.sequential,
    knowledge_sources=[json_source],
    embedder={
        "provider": "ollama",
        "config": {
            "model": "all-minilm:l6-v2"
        }
    }
)

# 执行任务
if __name__ == "__main__":
    print("开始执行CrewAI任务...")
    result = crew.kickoff()
    print("任务执行结果:")
    print(result)

实现详解（精华）

这段代码看起来没几句话，花了我大量试错时间【crew AI框架还有许多bug和使用不便的地方，而且embedding不支持国内大模型，让我们很难玩起来】

综合尝试下来，好像只有使用本地ollama的向量模型才能用，其余的都玩不了。关于ollama本地配置，网上一搜一大把，就不赘述了。主要特点如下：

配置Qwen为LLM
配置本地ollama向量模型为embedder
严格运用官网给出的使用方法存储和使用knowledge
重定向存储地点

自己还尝试绕过默认的chromaDB和sqlite3配置，使用qwen的向量模型，也捣鼓成功了，但是看着满屏的自定义实现，完全脱离了crew AI框架，何苦呢？源码还是不贴出来了祸害大家了）

其他参考

学习视频

实战演练

1、crew AI框架构建

虚拟环境创建，省略。。。

对应模块安装

pip install crewai crewai-tools langchain langchain-community langchain-ollama

2、配置knowledge资源

把knowledge文件夹放到根目录下

把knowledge_samples.py，reset_crew_memories.py拷贝

执行knowledge_samples.py即可

3、清除memory

执行 reset_crew_memories 模块即可

python reset_crew_memories.py knowledge_samples.py

拓展

1、后续要尝试把memory放到根目录文件夹下，这样互不冲突（取消头部的注释即可，已经实现了）

2、memory的重置仍然有问题，真的是服了~

深圳城市开发者社区

一座年轻的奋斗人之城，一个温馨的开发者之家。在这里，代码改变人生，开发创造未来！

更多推荐

谷歌 AI Agent 全攻略：从技术原理到企业落地的实战指南（建议收藏）

深圳城市开发者社区

SpringBoot+微信小程序助眠(平台完整项目源码+SQL脚本+接口文档【Java Web毕设】

深圳城市开发者社区

《Compose Multiplatform：跨平台UI框架实战》

基于JetBrains推出的跨平台解决方案，允许开发者使用Kotlin和Jetpack Compose构建共享UI的Android、iOS、桌面（Windows/macOS/Linux）及Web应用。通过上述方法，开发者可快速掌握Compose Multiplatform的核心开发流程，高效构建跨平台应用。实际开发中建议结合平台特性灵活调整UI设计，以平衡一致性与原生体验。通过IntelliJ I