autogpt 还原,开新分支编写

This commit is contained in:
w_xiaolizu
2023-05-30 16:38:01 +08:00
parent 548532e522
commit 519df19f43
65 changed files with 3352 additions and 1868 deletions

View File

@ -0,0 +1,99 @@
from autogpt.memory.local import LocalCache
from autogpt.memory.no_memory import NoMemory
# List of supported memory backends
# Add a backend to this list if the import attempt is successful
supported_memory = ["local", "no_memory"]
try:
from autogpt.memory.redismem import RedisMemory
supported_memory.append("redis")
except ImportError:
# print("Redis not installed. Skipping import.")
RedisMemory = None
try:
from autogpt.memory.pinecone import PineconeMemory
supported_memory.append("pinecone")
except ImportError:
# print("Pinecone not installed. Skipping import.")
PineconeMemory = None
try:
from autogpt.memory.weaviate import WeaviateMemory
supported_memory.append("weaviate")
except ImportError:
# print("Weaviate not installed. Skipping import.")
WeaviateMemory = None
try:
from autogpt.memory.milvus import MilvusMemory
supported_memory.append("milvus")
except ImportError:
# print("pymilvus not installed. Skipping import.")
MilvusMemory = None
def get_memory(cfg, init=False):
memory = None
if cfg.memory_backend == "pinecone":
if not PineconeMemory:
print(
"Error: Pinecone is not installed. Please install pinecone"
" to use Pinecone as a memory backend."
)
else:
memory = PineconeMemory(cfg)
if init:
memory.clear()
elif cfg.memory_backend == "redis":
if not RedisMemory:
print(
"Error: Redis is not installed. Please install redis-py to"
" use Redis as a memory backend."
)
else:
memory = RedisMemory(cfg)
elif cfg.memory_backend == "weaviate":
if not WeaviateMemory:
print(
"Error: Weaviate is not installed. Please install weaviate-client to"
" use Weaviate as a memory backend."
)
else:
memory = WeaviateMemory(cfg)
elif cfg.memory_backend == "milvus":
if not MilvusMemory:
print(
"Error: pymilvus sdk is not installed."
"Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
)
else:
memory = MilvusMemory(cfg)
elif cfg.memory_backend == "no_memory":
memory = NoMemory(cfg)
if memory is None:
memory = LocalCache(cfg)
if init:
memory.clear()
return memory
def get_supported_memory_backends():
return supported_memory
__all__ = [
"get_memory",
"LocalCache",
"RedisMemory",
"PineconeMemory",
"NoMemory",
"MilvusMemory",
"WeaviateMemory",
]

28
autogpt/memory/base.py Normal file
View File

@ -0,0 +1,28 @@
"""Base class for memory providers."""
import abc
from autogpt.config import AbstractSingleton, Config
cfg = Config()
class MemoryProviderSingleton(AbstractSingleton):
@abc.abstractmethod
def add(self, data):
pass
@abc.abstractmethod
def get(self, data):
pass
@abc.abstractmethod
def clear(self):
pass
@abc.abstractmethod
def get_relevant(self, data, num_relevant=5):
pass
@abc.abstractmethod
def get_stats(self):
pass

126
autogpt/memory/local.py Normal file
View File

@ -0,0 +1,126 @@
from __future__ import annotations
import dataclasses
from pathlib import Path
from typing import Any, List
import numpy as np
import orjson
from autogpt.llm_utils import create_embedding_with_ada
from autogpt.memory.base import MemoryProviderSingleton
EMBED_DIM = 1536
SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS
def create_default_embeddings():
return np.zeros((0, EMBED_DIM)).astype(np.float32)
@dataclasses.dataclass
class CacheContent:
texts: List[str] = dataclasses.field(default_factory=list)
embeddings: np.ndarray = dataclasses.field(
default_factory=create_default_embeddings
)
class LocalCache(MemoryProviderSingleton):
"""A class that stores the memory in a local file"""
def __init__(self, cfg) -> None:
"""Initialize a class instance
Args:
cfg: Config object
Returns:
None
"""
workspace_path = Path(cfg.workspace_path)
self.filename = workspace_path / f"{cfg.memory_index}.json"
self.filename.touch(exist_ok=True)
file_content = b"{}"
with self.filename.open("w+b") as f:
f.write(file_content)
self.data = CacheContent()
def add(self, text: str):
"""
Add text to our list of texts, add embedding as row to our
embeddings-matrix
Args:
text: str
Returns: None
"""
if "Command Error:" in text:
return ""
self.data.texts.append(text)
embedding = create_embedding_with_ada(text)
vector = np.array(embedding).astype(np.float32)
vector = vector[np.newaxis, :]
self.data.embeddings = np.concatenate(
[
self.data.embeddings,
vector,
],
axis=0,
)
with open(self.filename, "wb") as f:
out = orjson.dumps(self.data, option=SAVE_OPTIONS)
f.write(out)
return text
def clear(self) -> str:
"""
Clears the redis server.
Returns: A message indicating that the memory has been cleared.
"""
self.data = CacheContent()
return "Obliviated"
def get(self, data: str) -> list[Any] | None:
"""
Gets the data from the memory that is most relevant to the given data.
Args:
data: The data to compare to.
Returns: The most relevant data.
"""
return self.get_relevant(data, 1)
def get_relevant(self, text: str, k: int) -> list[Any]:
""" "
matrix-vector mult to find score-for-each-row-of-matrix
get indices for top-k winning scores
return texts for those indices
Args:
text: str
k: int
Returns: List[str]
"""
embedding = create_embedding_with_ada(text)
scores = np.dot(self.data.embeddings, embedding)
top_k_indices = np.argsort(scores)[-k:][::-1]
return [self.data.texts[i] for i in top_k_indices]
def get_stats(self) -> tuple[int, tuple[int, ...]]:
"""
Returns: The stats of the local cache.
"""
return len(self.data.texts), self.data.embeddings.shape

162
autogpt/memory/milvus.py Normal file
View File

@ -0,0 +1,162 @@
""" Milvus memory storage provider."""
import re
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections
from autogpt.config import Config
from autogpt.llm_utils import get_ada_embedding
from autogpt.memory.base import MemoryProviderSingleton
class MilvusMemory(MemoryProviderSingleton):
"""Milvus memory storage provider."""
def __init__(self, cfg: Config) -> None:
"""Construct a milvus memory storage connection.
Args:
cfg (Config): Auto-GPT global config.
"""
self.configure(cfg)
connect_kwargs = {}
if self.username:
connect_kwargs["user"] = self.username
connect_kwargs["password"] = self.password
connections.connect(
**connect_kwargs,
uri=self.uri or "",
address=self.address or "",
secure=self.secure,
)
self.init_collection()
def configure(self, cfg: Config) -> None:
# init with configuration.
self.uri = None
self.address = cfg.milvus_addr
self.secure = cfg.milvus_secure
self.username = cfg.milvus_username
self.password = cfg.milvus_password
self.collection_name = cfg.milvus_collection
# use HNSW by default.
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
if (self.username is None) != (self.password is None):
raise ValueError(
"Both username and password must be set to use authentication for Milvus"
)
# configured address may be a full URL.
if re.match(r"^(https?|tcp)://", self.address) is not None:
self.uri = self.address
self.address = None
if self.uri.startswith("https"):
self.secure = True
# Zilliz Cloud requires AutoIndex.
if re.match(r"^https://(.*)\.zillizcloud\.(com|cn)", self.address) is not None:
self.index_params = {
"metric_type": "IP",
"index_type": "AUTOINDEX",
"params": {},
}
def init_collection(self) -> None:
"""Initialize collection in vector database."""
fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536),
FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535),
]
# create collection if not exist and load it.
self.schema = CollectionSchema(fields, "auto-gpt memory storage")
self.collection = Collection(self.collection_name, self.schema)
# create index if not exist.
if not self.collection.has_index():
self.collection.release()
self.collection.create_index(
"embeddings",
self.index_params,
index_name="embeddings",
)
self.collection.load()
def add(self, data) -> str:
"""Add an embedding of data into memory.
Args:
data (str): The raw text to construct embedding index.
Returns:
str: log.
"""
embedding = get_ada_embedding(data)
result = self.collection.insert([[embedding], [data]])
_text = (
"Inserting data into memory at primary key: "
f"{result.primary_keys[0]}:\n data: {data}"
)
return _text
def get(self, data):
"""Return the most relevant data in memory.
Args:
data: The data to compare to.
"""
return self.get_relevant(data, 1)
def clear(self) -> str:
"""Drop the index in memory.
Returns:
str: log.
"""
self.collection.drop()
self.collection = Collection(self.collection_name, self.schema)
self.collection.create_index(
"embeddings",
self.index_params,
index_name="embeddings",
)
self.collection.load()
return "Obliviated"
def get_relevant(self, data: str, num_relevant: int = 5):
"""Return the top-k relevant data in memory.
Args:
data: The data to compare to.
num_relevant (int, optional): The max number of relevant data.
Defaults to 5.
Returns:
list: The top-k relevant data.
"""
# search the embedding and return the most relevant text.
embedding = get_ada_embedding(data)
search_params = {
"metrics_type": "IP",
"params": {"nprobe": 8},
}
result = self.collection.search(
[embedding],
"embeddings",
search_params,
num_relevant,
output_fields=["raw_text"],
)
return [item.entity.value_of_field("raw_text") for item in result[0]]
def get_stats(self) -> str:
"""
Returns: The stats of the milvus cache.
"""
return f"Entities num: {self.collection.num_entities}"

View File

@ -0,0 +1,73 @@
"""A class that does not store any data. This is the default memory provider."""
from __future__ import annotations
from typing import Any
from autogpt.memory.base import MemoryProviderSingleton
class NoMemory(MemoryProviderSingleton):
"""
A class that does not store any data. This is the default memory provider.
"""
def __init__(self, cfg):
"""
Initializes the NoMemory provider.
Args:
cfg: The config object.
Returns: None
"""
pass
def add(self, data: str) -> str:
"""
Adds a data point to the memory. No action is taken in NoMemory.
Args:
data: The data to add.
Returns: An empty string.
"""
return ""
def get(self, data: str) -> list[Any] | None:
"""
Gets the data from the memory that is most relevant to the given data.
NoMemory always returns None.
Args:
data: The data to compare to.
Returns: None
"""
return None
def clear(self) -> str:
"""
Clears the memory. No action is taken in NoMemory.
Returns: An empty string.
"""
return ""
def get_relevant(self, data: str, num_relevant: int = 5) -> list[Any] | None:
"""
Returns all the data in the memory that is relevant to the given data.
NoMemory always returns None.
Args:
data: The data to compare to.
num_relevant: The number of relevant data to return.
Returns: None
"""
return None
def get_stats(self):
"""
Returns: An empty dictionary as there are no stats in NoMemory.
"""
return {}

View File

@ -0,0 +1,75 @@
import pinecone
from colorama import Fore, Style
from autogpt.llm_utils import create_embedding_with_ada
from autogpt.logs import logger
from autogpt.memory.base import MemoryProviderSingleton
class PineconeMemory(MemoryProviderSingleton):
def __init__(self, cfg):
pinecone_api_key = cfg.pinecone_api_key
pinecone_region = cfg.pinecone_region
pinecone.init(api_key=pinecone_api_key, environment=pinecone_region)
dimension = 1536
metric = "cosine"
pod_type = "p1"
table_name = "auto-gpt"
# this assumes we don't start with memory.
# for now this works.
# we'll need a more complicated and robust system if we want to start with
# memory.
self.vec_num = 0
try:
pinecone.whoami()
except Exception as e:
logger.typewriter_log(
"FAILED TO CONNECT TO PINECONE",
Fore.RED,
Style.BRIGHT + str(e) + Style.RESET_ALL,
)
logger.double_check(
"Please ensure you have setup and configured Pinecone properly for use."
+ f"You can check out {Fore.CYAN + Style.BRIGHT}"
"https://github.com/Torantulino/Auto-GPT#-pinecone-api-key-setup"
f"{Style.RESET_ALL} to ensure you've set up everything correctly."
)
exit(1)
if table_name not in pinecone.list_indexes():
pinecone.create_index(
table_name, dimension=dimension, metric=metric, pod_type=pod_type
)
self.index = pinecone.Index(table_name)
def add(self, data):
vector = create_embedding_with_ada(data)
# no metadata here. We may wish to change that long term.
self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})])
_text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}"
self.vec_num += 1
return _text
def get(self, data):
return self.get_relevant(data, 1)
def clear(self):
self.index.delete(deleteAll=True)
return "Obliviated"
def get_relevant(self, data, num_relevant=5):
"""
Returns all the data in the memory that is relevant to the given data.
:param data: The data to compare to.
:param num_relevant: The number of relevant data to return. Defaults to 5
"""
query_embedding = create_embedding_with_ada(data)
results = self.index.query(
query_embedding, top_k=num_relevant, include_metadata=True
)
sorted_results = sorted(results.matches, key=lambda x: x.score)
return [str(item["metadata"]["raw_text"]) for item in sorted_results]
def get_stats(self):
return self.index.describe_index_stats()

156
autogpt/memory/redismem.py Normal file
View File

@ -0,0 +1,156 @@
"""Redis memory provider."""
from __future__ import annotations
from typing import Any
import numpy as np
import redis
from colorama import Fore, Style
from redis.commands.search.field import TextField, VectorField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import Query
from autogpt.llm_utils import create_embedding_with_ada
from autogpt.logs import logger
from autogpt.memory.base import MemoryProviderSingleton
SCHEMA = [
TextField("data"),
VectorField(
"embedding",
"HNSW",
{"TYPE": "FLOAT32", "DIM": 1536, "DISTANCE_METRIC": "COSINE"},
),
]
class RedisMemory(MemoryProviderSingleton):
def __init__(self, cfg):
"""
Initializes the Redis memory provider.
Args:
cfg: The config object.
Returns: None
"""
redis_host = cfg.redis_host
redis_port = cfg.redis_port
redis_password = cfg.redis_password
self.dimension = 1536
self.redis = redis.Redis(
host=redis_host,
port=redis_port,
password=redis_password,
db=0, # Cannot be changed
)
self.cfg = cfg
# Check redis connection
try:
self.redis.ping()
except redis.ConnectionError as e:
logger.typewriter_log(
"FAILED TO CONNECT TO REDIS",
Fore.RED,
Style.BRIGHT + str(e) + Style.RESET_ALL,
)
logger.double_check(
"Please ensure you have setup and configured Redis properly for use. "
+ f"You can check out {Fore.CYAN + Style.BRIGHT}"
f"https://github.com/Torantulino/Auto-GPT#redis-setup{Style.RESET_ALL}"
" to ensure you've set up everything correctly."
)
exit(1)
if cfg.wipe_redis_on_start:
self.redis.flushall()
try:
self.redis.ft(f"{cfg.memory_index}").create_index(
fields=SCHEMA,
definition=IndexDefinition(
prefix=[f"{cfg.memory_index}:"], index_type=IndexType.HASH
),
)
except Exception as e:
print("Error creating Redis search index: ", e)
existing_vec_num = self.redis.get(f"{cfg.memory_index}-vec_num")
self.vec_num = int(existing_vec_num.decode("utf-8")) if existing_vec_num else 0
def add(self, data: str) -> str:
"""
Adds a data point to the memory.
Args:
data: The data to add.
Returns: Message indicating that the data has been added.
"""
if "Command Error:" in data:
return ""
vector = create_embedding_with_ada(data)
vector = np.array(vector).astype(np.float32).tobytes()
data_dict = {b"data": data, "embedding": vector}
pipe = self.redis.pipeline()
pipe.hset(f"{self.cfg.memory_index}:{self.vec_num}", mapping=data_dict)
_text = (
f"Inserting data into memory at index: {self.vec_num}:\n" f"data: {data}"
)
self.vec_num += 1
pipe.set(f"{self.cfg.memory_index}-vec_num", self.vec_num)
pipe.execute()
return _text
def get(self, data: str) -> list[Any] | None:
"""
Gets the data from the memory that is most relevant to the given data.
Args:
data: The data to compare to.
Returns: The most relevant data.
"""
return self.get_relevant(data, 1)
def clear(self) -> str:
"""
Clears the redis server.
Returns: A message indicating that the memory has been cleared.
"""
self.redis.flushall()
return "Obliviated"
def get_relevant(self, data: str, num_relevant: int = 5) -> list[Any] | None:
"""
Returns all the data in the memory that is relevant to the given data.
Args:
data: The data to compare to.
num_relevant: The number of relevant data to return.
Returns: A list of the most relevant data.
"""
query_embedding = create_embedding_with_ada(data)
base_query = f"*=>[KNN {num_relevant} @embedding $vector AS vector_score]"
query = (
Query(base_query)
.return_fields("data", "vector_score")
.sort_by("vector_score")
.dialect(2)
)
query_vector = np.array(query_embedding).astype(np.float32).tobytes()
try:
results = self.redis.ft(f"{self.cfg.memory_index}").search(
query, query_params={"vector": query_vector}
)
except Exception as e:
print("Error calling Redis search: ", e)
return None
return [result.data for result in results.docs]
def get_stats(self):
"""
Returns: The stats of the memory index.
"""
return self.redis.ft(f"{self.cfg.memory_index}").info()

126
autogpt/memory/weaviate.py Normal file
View File

@ -0,0 +1,126 @@
import weaviate
from weaviate import Client
from weaviate.embedded import EmbeddedOptions
from weaviate.util import generate_uuid5
from autogpt.llm_utils import get_ada_embedding
from autogpt.memory.base import MemoryProviderSingleton
def default_schema(weaviate_index):
return {
"class": weaviate_index,
"properties": [
{
"name": "raw_text",
"dataType": ["text"],
"description": "original text for the embedding",
}
],
}
class WeaviateMemory(MemoryProviderSingleton):
def __init__(self, cfg):
auth_credentials = self._build_auth_credentials(cfg)
url = f"{cfg.weaviate_protocol}://{cfg.weaviate_host}:{cfg.weaviate_port}"
if cfg.use_weaviate_embedded:
self.client = Client(
embedded_options=EmbeddedOptions(
hostname=cfg.weaviate_host,
port=int(cfg.weaviate_port),
persistence_data_path=cfg.weaviate_embedded_path,
)
)
print(
f"Weaviate Embedded running on: {url} with persistence path: {cfg.weaviate_embedded_path}"
)
else:
self.client = Client(url, auth_client_secret=auth_credentials)
self.index = WeaviateMemory.format_classname(cfg.memory_index)
self._create_schema()
@staticmethod
def format_classname(index):
# weaviate uses capitalised index names
# The python client uses the following code to format
# index names before the corresponding class is created
index = index.replace("-", "_")
if len(index) == 1:
return index.capitalize()
return index[0].capitalize() + index[1:]
def _create_schema(self):
schema = default_schema(self.index)
if not self.client.schema.contains(schema):
self.client.schema.create_class(schema)
def _build_auth_credentials(self, cfg):
if cfg.weaviate_username and cfg.weaviate_password:
return weaviate.AuthClientPassword(
cfg.weaviate_username, cfg.weaviate_password
)
if cfg.weaviate_api_key:
return weaviate.AuthApiKey(api_key=cfg.weaviate_api_key)
else:
return None
def add(self, data):
vector = get_ada_embedding(data)
doc_uuid = generate_uuid5(data, self.index)
data_object = {"raw_text": data}
with self.client.batch as batch:
batch.add_data_object(
uuid=doc_uuid,
data_object=data_object,
class_name=self.index,
vector=vector,
)
return f"Inserting data into memory at uuid: {doc_uuid}:\n data: {data}"
def get(self, data):
return self.get_relevant(data, 1)
def clear(self):
self.client.schema.delete_all()
# weaviate does not yet have a neat way to just remove the items in an index
# without removing the entire schema, therefore we need to re-create it
# after a call to delete_all
self._create_schema()
return "Obliterated"
def get_relevant(self, data, num_relevant=5):
query_embedding = get_ada_embedding(data)
try:
results = (
self.client.query.get(self.index, ["raw_text"])
.with_near_vector({"vector": query_embedding, "certainty": 0.7})
.with_limit(num_relevant)
.do()
)
if len(results["data"]["Get"][self.index]) > 0:
return [
str(item["raw_text"]) for item in results["data"]["Get"][self.index]
]
else:
return []
except Exception as err:
print(f"Unexpected error {err=}, {type(err)=}")
return []
def get_stats(self):
result = self.client.query.aggregate(self.index).with_meta_count().do()
class_data = result["data"]["Aggregate"][self.index]
return class_data[0]["meta"] if class_data else {}