autogpt 还原,开新分支编写
This commit is contained in:
99
autogpt/memory/__init__.py
Normal file
99
autogpt/memory/__init__.py
Normal file
@ -0,0 +1,99 @@
|
||||
from autogpt.memory.local import LocalCache
|
||||
from autogpt.memory.no_memory import NoMemory
|
||||
|
||||
# List of supported memory backends
|
||||
# Add a backend to this list if the import attempt is successful
|
||||
supported_memory = ["local", "no_memory"]
|
||||
|
||||
try:
|
||||
from autogpt.memory.redismem import RedisMemory
|
||||
|
||||
supported_memory.append("redis")
|
||||
except ImportError:
|
||||
# print("Redis not installed. Skipping import.")
|
||||
RedisMemory = None
|
||||
|
||||
try:
|
||||
from autogpt.memory.pinecone import PineconeMemory
|
||||
|
||||
supported_memory.append("pinecone")
|
||||
except ImportError:
|
||||
# print("Pinecone not installed. Skipping import.")
|
||||
PineconeMemory = None
|
||||
|
||||
try:
|
||||
from autogpt.memory.weaviate import WeaviateMemory
|
||||
|
||||
supported_memory.append("weaviate")
|
||||
except ImportError:
|
||||
# print("Weaviate not installed. Skipping import.")
|
||||
WeaviateMemory = None
|
||||
|
||||
try:
|
||||
from autogpt.memory.milvus import MilvusMemory
|
||||
|
||||
supported_memory.append("milvus")
|
||||
except ImportError:
|
||||
# print("pymilvus not installed. Skipping import.")
|
||||
MilvusMemory = None
|
||||
|
||||
|
||||
def get_memory(cfg, init=False):
|
||||
memory = None
|
||||
if cfg.memory_backend == "pinecone":
|
||||
if not PineconeMemory:
|
||||
print(
|
||||
"Error: Pinecone is not installed. Please install pinecone"
|
||||
" to use Pinecone as a memory backend."
|
||||
)
|
||||
else:
|
||||
memory = PineconeMemory(cfg)
|
||||
if init:
|
||||
memory.clear()
|
||||
elif cfg.memory_backend == "redis":
|
||||
if not RedisMemory:
|
||||
print(
|
||||
"Error: Redis is not installed. Please install redis-py to"
|
||||
" use Redis as a memory backend."
|
||||
)
|
||||
else:
|
||||
memory = RedisMemory(cfg)
|
||||
elif cfg.memory_backend == "weaviate":
|
||||
if not WeaviateMemory:
|
||||
print(
|
||||
"Error: Weaviate is not installed. Please install weaviate-client to"
|
||||
" use Weaviate as a memory backend."
|
||||
)
|
||||
else:
|
||||
memory = WeaviateMemory(cfg)
|
||||
elif cfg.memory_backend == "milvus":
|
||||
if not MilvusMemory:
|
||||
print(
|
||||
"Error: pymilvus sdk is not installed."
|
||||
"Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
|
||||
)
|
||||
else:
|
||||
memory = MilvusMemory(cfg)
|
||||
elif cfg.memory_backend == "no_memory":
|
||||
memory = NoMemory(cfg)
|
||||
|
||||
if memory is None:
|
||||
memory = LocalCache(cfg)
|
||||
if init:
|
||||
memory.clear()
|
||||
return memory
|
||||
|
||||
|
||||
def get_supported_memory_backends():
|
||||
return supported_memory
|
||||
|
||||
|
||||
__all__ = [
|
||||
"get_memory",
|
||||
"LocalCache",
|
||||
"RedisMemory",
|
||||
"PineconeMemory",
|
||||
"NoMemory",
|
||||
"MilvusMemory",
|
||||
"WeaviateMemory",
|
||||
]
|
||||
28
autogpt/memory/base.py
Normal file
28
autogpt/memory/base.py
Normal file
@ -0,0 +1,28 @@
|
||||
"""Base class for memory providers."""
|
||||
import abc
|
||||
|
||||
from autogpt.config import AbstractSingleton, Config
|
||||
|
||||
cfg = Config()
|
||||
|
||||
|
||||
class MemoryProviderSingleton(AbstractSingleton):
|
||||
@abc.abstractmethod
|
||||
def add(self, data):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get(self, data):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def clear(self):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_relevant(self, data, num_relevant=5):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_stats(self):
|
||||
pass
|
||||
126
autogpt/memory/local.py
Normal file
126
autogpt/memory/local.py
Normal file
@ -0,0 +1,126 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from pathlib import Path
|
||||
from typing import Any, List
|
||||
|
||||
import numpy as np
|
||||
import orjson
|
||||
|
||||
from autogpt.llm_utils import create_embedding_with_ada
|
||||
from autogpt.memory.base import MemoryProviderSingleton
|
||||
|
||||
EMBED_DIM = 1536
|
||||
SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS
|
||||
|
||||
|
||||
def create_default_embeddings():
|
||||
return np.zeros((0, EMBED_DIM)).astype(np.float32)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class CacheContent:
|
||||
texts: List[str] = dataclasses.field(default_factory=list)
|
||||
embeddings: np.ndarray = dataclasses.field(
|
||||
default_factory=create_default_embeddings
|
||||
)
|
||||
|
||||
|
||||
class LocalCache(MemoryProviderSingleton):
|
||||
"""A class that stores the memory in a local file"""
|
||||
|
||||
def __init__(self, cfg) -> None:
|
||||
"""Initialize a class instance
|
||||
|
||||
Args:
|
||||
cfg: Config object
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
workspace_path = Path(cfg.workspace_path)
|
||||
self.filename = workspace_path / f"{cfg.memory_index}.json"
|
||||
|
||||
self.filename.touch(exist_ok=True)
|
||||
|
||||
file_content = b"{}"
|
||||
with self.filename.open("w+b") as f:
|
||||
f.write(file_content)
|
||||
|
||||
self.data = CacheContent()
|
||||
|
||||
def add(self, text: str):
|
||||
"""
|
||||
Add text to our list of texts, add embedding as row to our
|
||||
embeddings-matrix
|
||||
|
||||
Args:
|
||||
text: str
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
if "Command Error:" in text:
|
||||
return ""
|
||||
self.data.texts.append(text)
|
||||
|
||||
embedding = create_embedding_with_ada(text)
|
||||
|
||||
vector = np.array(embedding).astype(np.float32)
|
||||
vector = vector[np.newaxis, :]
|
||||
self.data.embeddings = np.concatenate(
|
||||
[
|
||||
self.data.embeddings,
|
||||
vector,
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
|
||||
with open(self.filename, "wb") as f:
|
||||
out = orjson.dumps(self.data, option=SAVE_OPTIONS)
|
||||
f.write(out)
|
||||
return text
|
||||
|
||||
def clear(self) -> str:
|
||||
"""
|
||||
Clears the redis server.
|
||||
|
||||
Returns: A message indicating that the memory has been cleared.
|
||||
"""
|
||||
self.data = CacheContent()
|
||||
return "Obliviated"
|
||||
|
||||
def get(self, data: str) -> list[Any] | None:
|
||||
"""
|
||||
Gets the data from the memory that is most relevant to the given data.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
|
||||
Returns: The most relevant data.
|
||||
"""
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def get_relevant(self, text: str, k: int) -> list[Any]:
|
||||
""" "
|
||||
matrix-vector mult to find score-for-each-row-of-matrix
|
||||
get indices for top-k winning scores
|
||||
return texts for those indices
|
||||
Args:
|
||||
text: str
|
||||
k: int
|
||||
|
||||
Returns: List[str]
|
||||
"""
|
||||
embedding = create_embedding_with_ada(text)
|
||||
|
||||
scores = np.dot(self.data.embeddings, embedding)
|
||||
|
||||
top_k_indices = np.argsort(scores)[-k:][::-1]
|
||||
|
||||
return [self.data.texts[i] for i in top_k_indices]
|
||||
|
||||
def get_stats(self) -> tuple[int, tuple[int, ...]]:
|
||||
"""
|
||||
Returns: The stats of the local cache.
|
||||
"""
|
||||
return len(self.data.texts), self.data.embeddings.shape
|
||||
162
autogpt/memory/milvus.py
Normal file
162
autogpt/memory/milvus.py
Normal file
@ -0,0 +1,162 @@
|
||||
""" Milvus memory storage provider."""
|
||||
import re
|
||||
|
||||
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections
|
||||
|
||||
from autogpt.config import Config
|
||||
from autogpt.llm_utils import get_ada_embedding
|
||||
from autogpt.memory.base import MemoryProviderSingleton
|
||||
|
||||
|
||||
class MilvusMemory(MemoryProviderSingleton):
|
||||
"""Milvus memory storage provider."""
|
||||
|
||||
def __init__(self, cfg: Config) -> None:
|
||||
"""Construct a milvus memory storage connection.
|
||||
|
||||
Args:
|
||||
cfg (Config): Auto-GPT global config.
|
||||
"""
|
||||
self.configure(cfg)
|
||||
|
||||
connect_kwargs = {}
|
||||
if self.username:
|
||||
connect_kwargs["user"] = self.username
|
||||
connect_kwargs["password"] = self.password
|
||||
|
||||
connections.connect(
|
||||
**connect_kwargs,
|
||||
uri=self.uri or "",
|
||||
address=self.address or "",
|
||||
secure=self.secure,
|
||||
)
|
||||
|
||||
self.init_collection()
|
||||
|
||||
def configure(self, cfg: Config) -> None:
|
||||
# init with configuration.
|
||||
self.uri = None
|
||||
self.address = cfg.milvus_addr
|
||||
self.secure = cfg.milvus_secure
|
||||
self.username = cfg.milvus_username
|
||||
self.password = cfg.milvus_password
|
||||
self.collection_name = cfg.milvus_collection
|
||||
# use HNSW by default.
|
||||
self.index_params = {
|
||||
"metric_type": "IP",
|
||||
"index_type": "HNSW",
|
||||
"params": {"M": 8, "efConstruction": 64},
|
||||
}
|
||||
|
||||
if (self.username is None) != (self.password is None):
|
||||
raise ValueError(
|
||||
"Both username and password must be set to use authentication for Milvus"
|
||||
)
|
||||
|
||||
# configured address may be a full URL.
|
||||
if re.match(r"^(https?|tcp)://", self.address) is not None:
|
||||
self.uri = self.address
|
||||
self.address = None
|
||||
|
||||
if self.uri.startswith("https"):
|
||||
self.secure = True
|
||||
|
||||
# Zilliz Cloud requires AutoIndex.
|
||||
if re.match(r"^https://(.*)\.zillizcloud\.(com|cn)", self.address) is not None:
|
||||
self.index_params = {
|
||||
"metric_type": "IP",
|
||||
"index_type": "AUTOINDEX",
|
||||
"params": {},
|
||||
}
|
||||
|
||||
def init_collection(self) -> None:
|
||||
"""Initialize collection in vector database."""
|
||||
fields = [
|
||||
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
|
||||
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536),
|
||||
FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535),
|
||||
]
|
||||
|
||||
# create collection if not exist and load it.
|
||||
self.schema = CollectionSchema(fields, "auto-gpt memory storage")
|
||||
self.collection = Collection(self.collection_name, self.schema)
|
||||
# create index if not exist.
|
||||
if not self.collection.has_index():
|
||||
self.collection.release()
|
||||
self.collection.create_index(
|
||||
"embeddings",
|
||||
self.index_params,
|
||||
index_name="embeddings",
|
||||
)
|
||||
self.collection.load()
|
||||
|
||||
def add(self, data) -> str:
|
||||
"""Add an embedding of data into memory.
|
||||
|
||||
Args:
|
||||
data (str): The raw text to construct embedding index.
|
||||
|
||||
Returns:
|
||||
str: log.
|
||||
"""
|
||||
embedding = get_ada_embedding(data)
|
||||
result = self.collection.insert([[embedding], [data]])
|
||||
_text = (
|
||||
"Inserting data into memory at primary key: "
|
||||
f"{result.primary_keys[0]}:\n data: {data}"
|
||||
)
|
||||
return _text
|
||||
|
||||
def get(self, data):
|
||||
"""Return the most relevant data in memory.
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
"""
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def clear(self) -> str:
|
||||
"""Drop the index in memory.
|
||||
|
||||
Returns:
|
||||
str: log.
|
||||
"""
|
||||
self.collection.drop()
|
||||
self.collection = Collection(self.collection_name, self.schema)
|
||||
self.collection.create_index(
|
||||
"embeddings",
|
||||
self.index_params,
|
||||
index_name="embeddings",
|
||||
)
|
||||
self.collection.load()
|
||||
return "Obliviated"
|
||||
|
||||
def get_relevant(self, data: str, num_relevant: int = 5):
|
||||
"""Return the top-k relevant data in memory.
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
num_relevant (int, optional): The max number of relevant data.
|
||||
Defaults to 5.
|
||||
|
||||
Returns:
|
||||
list: The top-k relevant data.
|
||||
"""
|
||||
# search the embedding and return the most relevant text.
|
||||
embedding = get_ada_embedding(data)
|
||||
search_params = {
|
||||
"metrics_type": "IP",
|
||||
"params": {"nprobe": 8},
|
||||
}
|
||||
result = self.collection.search(
|
||||
[embedding],
|
||||
"embeddings",
|
||||
search_params,
|
||||
num_relevant,
|
||||
output_fields=["raw_text"],
|
||||
)
|
||||
return [item.entity.value_of_field("raw_text") for item in result[0]]
|
||||
|
||||
def get_stats(self) -> str:
|
||||
"""
|
||||
Returns: The stats of the milvus cache.
|
||||
"""
|
||||
return f"Entities num: {self.collection.num_entities}"
|
||||
73
autogpt/memory/no_memory.py
Normal file
73
autogpt/memory/no_memory.py
Normal file
@ -0,0 +1,73 @@
|
||||
"""A class that does not store any data. This is the default memory provider."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from autogpt.memory.base import MemoryProviderSingleton
|
||||
|
||||
|
||||
class NoMemory(MemoryProviderSingleton):
|
||||
"""
|
||||
A class that does not store any data. This is the default memory provider.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg):
|
||||
"""
|
||||
Initializes the NoMemory provider.
|
||||
|
||||
Args:
|
||||
cfg: The config object.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def add(self, data: str) -> str:
|
||||
"""
|
||||
Adds a data point to the memory. No action is taken in NoMemory.
|
||||
|
||||
Args:
|
||||
data: The data to add.
|
||||
|
||||
Returns: An empty string.
|
||||
"""
|
||||
return ""
|
||||
|
||||
def get(self, data: str) -> list[Any] | None:
|
||||
"""
|
||||
Gets the data from the memory that is most relevant to the given data.
|
||||
NoMemory always returns None.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear(self) -> str:
|
||||
"""
|
||||
Clears the memory. No action is taken in NoMemory.
|
||||
|
||||
Returns: An empty string.
|
||||
"""
|
||||
return ""
|
||||
|
||||
def get_relevant(self, data: str, num_relevant: int = 5) -> list[Any] | None:
|
||||
"""
|
||||
Returns all the data in the memory that is relevant to the given data.
|
||||
NoMemory always returns None.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
num_relevant: The number of relevant data to return.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_stats(self):
|
||||
"""
|
||||
Returns: An empty dictionary as there are no stats in NoMemory.
|
||||
"""
|
||||
return {}
|
||||
75
autogpt/memory/pinecone.py
Normal file
75
autogpt/memory/pinecone.py
Normal file
@ -0,0 +1,75 @@
|
||||
import pinecone
|
||||
from colorama import Fore, Style
|
||||
|
||||
from autogpt.llm_utils import create_embedding_with_ada
|
||||
from autogpt.logs import logger
|
||||
from autogpt.memory.base import MemoryProviderSingleton
|
||||
|
||||
|
||||
class PineconeMemory(MemoryProviderSingleton):
|
||||
def __init__(self, cfg):
|
||||
pinecone_api_key = cfg.pinecone_api_key
|
||||
pinecone_region = cfg.pinecone_region
|
||||
pinecone.init(api_key=pinecone_api_key, environment=pinecone_region)
|
||||
dimension = 1536
|
||||
metric = "cosine"
|
||||
pod_type = "p1"
|
||||
table_name = "auto-gpt"
|
||||
# this assumes we don't start with memory.
|
||||
# for now this works.
|
||||
# we'll need a more complicated and robust system if we want to start with
|
||||
# memory.
|
||||
self.vec_num = 0
|
||||
|
||||
try:
|
||||
pinecone.whoami()
|
||||
except Exception as e:
|
||||
logger.typewriter_log(
|
||||
"FAILED TO CONNECT TO PINECONE",
|
||||
Fore.RED,
|
||||
Style.BRIGHT + str(e) + Style.RESET_ALL,
|
||||
)
|
||||
logger.double_check(
|
||||
"Please ensure you have setup and configured Pinecone properly for use."
|
||||
+ f"You can check out {Fore.CYAN + Style.BRIGHT}"
|
||||
"https://github.com/Torantulino/Auto-GPT#-pinecone-api-key-setup"
|
||||
f"{Style.RESET_ALL} to ensure you've set up everything correctly."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
if table_name not in pinecone.list_indexes():
|
||||
pinecone.create_index(
|
||||
table_name, dimension=dimension, metric=metric, pod_type=pod_type
|
||||
)
|
||||
self.index = pinecone.Index(table_name)
|
||||
|
||||
def add(self, data):
|
||||
vector = create_embedding_with_ada(data)
|
||||
# no metadata here. We may wish to change that long term.
|
||||
self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})])
|
||||
_text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}"
|
||||
self.vec_num += 1
|
||||
return _text
|
||||
|
||||
def get(self, data):
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def clear(self):
|
||||
self.index.delete(deleteAll=True)
|
||||
return "Obliviated"
|
||||
|
||||
def get_relevant(self, data, num_relevant=5):
|
||||
"""
|
||||
Returns all the data in the memory that is relevant to the given data.
|
||||
:param data: The data to compare to.
|
||||
:param num_relevant: The number of relevant data to return. Defaults to 5
|
||||
"""
|
||||
query_embedding = create_embedding_with_ada(data)
|
||||
results = self.index.query(
|
||||
query_embedding, top_k=num_relevant, include_metadata=True
|
||||
)
|
||||
sorted_results = sorted(results.matches, key=lambda x: x.score)
|
||||
return [str(item["metadata"]["raw_text"]) for item in sorted_results]
|
||||
|
||||
def get_stats(self):
|
||||
return self.index.describe_index_stats()
|
||||
156
autogpt/memory/redismem.py
Normal file
156
autogpt/memory/redismem.py
Normal file
@ -0,0 +1,156 @@
|
||||
"""Redis memory provider."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import redis
|
||||
from colorama import Fore, Style
|
||||
from redis.commands.search.field import TextField, VectorField
|
||||
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
||||
from redis.commands.search.query import Query
|
||||
|
||||
from autogpt.llm_utils import create_embedding_with_ada
|
||||
from autogpt.logs import logger
|
||||
from autogpt.memory.base import MemoryProviderSingleton
|
||||
|
||||
SCHEMA = [
|
||||
TextField("data"),
|
||||
VectorField(
|
||||
"embedding",
|
||||
"HNSW",
|
||||
{"TYPE": "FLOAT32", "DIM": 1536, "DISTANCE_METRIC": "COSINE"},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class RedisMemory(MemoryProviderSingleton):
|
||||
def __init__(self, cfg):
|
||||
"""
|
||||
Initializes the Redis memory provider.
|
||||
|
||||
Args:
|
||||
cfg: The config object.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
redis_host = cfg.redis_host
|
||||
redis_port = cfg.redis_port
|
||||
redis_password = cfg.redis_password
|
||||
self.dimension = 1536
|
||||
self.redis = redis.Redis(
|
||||
host=redis_host,
|
||||
port=redis_port,
|
||||
password=redis_password,
|
||||
db=0, # Cannot be changed
|
||||
)
|
||||
self.cfg = cfg
|
||||
|
||||
# Check redis connection
|
||||
try:
|
||||
self.redis.ping()
|
||||
except redis.ConnectionError as e:
|
||||
logger.typewriter_log(
|
||||
"FAILED TO CONNECT TO REDIS",
|
||||
Fore.RED,
|
||||
Style.BRIGHT + str(e) + Style.RESET_ALL,
|
||||
)
|
||||
logger.double_check(
|
||||
"Please ensure you have setup and configured Redis properly for use. "
|
||||
+ f"You can check out {Fore.CYAN + Style.BRIGHT}"
|
||||
f"https://github.com/Torantulino/Auto-GPT#redis-setup{Style.RESET_ALL}"
|
||||
" to ensure you've set up everything correctly."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
if cfg.wipe_redis_on_start:
|
||||
self.redis.flushall()
|
||||
try:
|
||||
self.redis.ft(f"{cfg.memory_index}").create_index(
|
||||
fields=SCHEMA,
|
||||
definition=IndexDefinition(
|
||||
prefix=[f"{cfg.memory_index}:"], index_type=IndexType.HASH
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
print("Error creating Redis search index: ", e)
|
||||
existing_vec_num = self.redis.get(f"{cfg.memory_index}-vec_num")
|
||||
self.vec_num = int(existing_vec_num.decode("utf-8")) if existing_vec_num else 0
|
||||
|
||||
def add(self, data: str) -> str:
|
||||
"""
|
||||
Adds a data point to the memory.
|
||||
|
||||
Args:
|
||||
data: The data to add.
|
||||
|
||||
Returns: Message indicating that the data has been added.
|
||||
"""
|
||||
if "Command Error:" in data:
|
||||
return ""
|
||||
vector = create_embedding_with_ada(data)
|
||||
vector = np.array(vector).astype(np.float32).tobytes()
|
||||
data_dict = {b"data": data, "embedding": vector}
|
||||
pipe = self.redis.pipeline()
|
||||
pipe.hset(f"{self.cfg.memory_index}:{self.vec_num}", mapping=data_dict)
|
||||
_text = (
|
||||
f"Inserting data into memory at index: {self.vec_num}:\n" f"data: {data}"
|
||||
)
|
||||
self.vec_num += 1
|
||||
pipe.set(f"{self.cfg.memory_index}-vec_num", self.vec_num)
|
||||
pipe.execute()
|
||||
return _text
|
||||
|
||||
def get(self, data: str) -> list[Any] | None:
|
||||
"""
|
||||
Gets the data from the memory that is most relevant to the given data.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
|
||||
Returns: The most relevant data.
|
||||
"""
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def clear(self) -> str:
|
||||
"""
|
||||
Clears the redis server.
|
||||
|
||||
Returns: A message indicating that the memory has been cleared.
|
||||
"""
|
||||
self.redis.flushall()
|
||||
return "Obliviated"
|
||||
|
||||
def get_relevant(self, data: str, num_relevant: int = 5) -> list[Any] | None:
|
||||
"""
|
||||
Returns all the data in the memory that is relevant to the given data.
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
num_relevant: The number of relevant data to return.
|
||||
|
||||
Returns: A list of the most relevant data.
|
||||
"""
|
||||
query_embedding = create_embedding_with_ada(data)
|
||||
base_query = f"*=>[KNN {num_relevant} @embedding $vector AS vector_score]"
|
||||
query = (
|
||||
Query(base_query)
|
||||
.return_fields("data", "vector_score")
|
||||
.sort_by("vector_score")
|
||||
.dialect(2)
|
||||
)
|
||||
query_vector = np.array(query_embedding).astype(np.float32).tobytes()
|
||||
|
||||
try:
|
||||
results = self.redis.ft(f"{self.cfg.memory_index}").search(
|
||||
query, query_params={"vector": query_vector}
|
||||
)
|
||||
except Exception as e:
|
||||
print("Error calling Redis search: ", e)
|
||||
return None
|
||||
return [result.data for result in results.docs]
|
||||
|
||||
def get_stats(self):
|
||||
"""
|
||||
Returns: The stats of the memory index.
|
||||
"""
|
||||
return self.redis.ft(f"{self.cfg.memory_index}").info()
|
||||
126
autogpt/memory/weaviate.py
Normal file
126
autogpt/memory/weaviate.py
Normal file
@ -0,0 +1,126 @@
|
||||
import weaviate
|
||||
from weaviate import Client
|
||||
from weaviate.embedded import EmbeddedOptions
|
||||
from weaviate.util import generate_uuid5
|
||||
|
||||
from autogpt.llm_utils import get_ada_embedding
|
||||
from autogpt.memory.base import MemoryProviderSingleton
|
||||
|
||||
|
||||
def default_schema(weaviate_index):
|
||||
return {
|
||||
"class": weaviate_index,
|
||||
"properties": [
|
||||
{
|
||||
"name": "raw_text",
|
||||
"dataType": ["text"],
|
||||
"description": "original text for the embedding",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class WeaviateMemory(MemoryProviderSingleton):
|
||||
def __init__(self, cfg):
|
||||
auth_credentials = self._build_auth_credentials(cfg)
|
||||
|
||||
url = f"{cfg.weaviate_protocol}://{cfg.weaviate_host}:{cfg.weaviate_port}"
|
||||
|
||||
if cfg.use_weaviate_embedded:
|
||||
self.client = Client(
|
||||
embedded_options=EmbeddedOptions(
|
||||
hostname=cfg.weaviate_host,
|
||||
port=int(cfg.weaviate_port),
|
||||
persistence_data_path=cfg.weaviate_embedded_path,
|
||||
)
|
||||
)
|
||||
|
||||
print(
|
||||
f"Weaviate Embedded running on: {url} with persistence path: {cfg.weaviate_embedded_path}"
|
||||
)
|
||||
else:
|
||||
self.client = Client(url, auth_client_secret=auth_credentials)
|
||||
|
||||
self.index = WeaviateMemory.format_classname(cfg.memory_index)
|
||||
self._create_schema()
|
||||
|
||||
@staticmethod
|
||||
def format_classname(index):
|
||||
# weaviate uses capitalised index names
|
||||
# The python client uses the following code to format
|
||||
# index names before the corresponding class is created
|
||||
index = index.replace("-", "_")
|
||||
if len(index) == 1:
|
||||
return index.capitalize()
|
||||
return index[0].capitalize() + index[1:]
|
||||
|
||||
def _create_schema(self):
|
||||
schema = default_schema(self.index)
|
||||
if not self.client.schema.contains(schema):
|
||||
self.client.schema.create_class(schema)
|
||||
|
||||
def _build_auth_credentials(self, cfg):
|
||||
if cfg.weaviate_username and cfg.weaviate_password:
|
||||
return weaviate.AuthClientPassword(
|
||||
cfg.weaviate_username, cfg.weaviate_password
|
||||
)
|
||||
if cfg.weaviate_api_key:
|
||||
return weaviate.AuthApiKey(api_key=cfg.weaviate_api_key)
|
||||
else:
|
||||
return None
|
||||
|
||||
def add(self, data):
|
||||
vector = get_ada_embedding(data)
|
||||
|
||||
doc_uuid = generate_uuid5(data, self.index)
|
||||
data_object = {"raw_text": data}
|
||||
|
||||
with self.client.batch as batch:
|
||||
batch.add_data_object(
|
||||
uuid=doc_uuid,
|
||||
data_object=data_object,
|
||||
class_name=self.index,
|
||||
vector=vector,
|
||||
)
|
||||
|
||||
return f"Inserting data into memory at uuid: {doc_uuid}:\n data: {data}"
|
||||
|
||||
def get(self, data):
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def clear(self):
|
||||
self.client.schema.delete_all()
|
||||
|
||||
# weaviate does not yet have a neat way to just remove the items in an index
|
||||
# without removing the entire schema, therefore we need to re-create it
|
||||
# after a call to delete_all
|
||||
self._create_schema()
|
||||
|
||||
return "Obliterated"
|
||||
|
||||
def get_relevant(self, data, num_relevant=5):
|
||||
query_embedding = get_ada_embedding(data)
|
||||
try:
|
||||
results = (
|
||||
self.client.query.get(self.index, ["raw_text"])
|
||||
.with_near_vector({"vector": query_embedding, "certainty": 0.7})
|
||||
.with_limit(num_relevant)
|
||||
.do()
|
||||
)
|
||||
|
||||
if len(results["data"]["Get"][self.index]) > 0:
|
||||
return [
|
||||
str(item["raw_text"]) for item in results["data"]["Get"][self.index]
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
||||
except Exception as err:
|
||||
print(f"Unexpected error {err=}, {type(err)=}")
|
||||
return []
|
||||
|
||||
def get_stats(self):
|
||||
result = self.client.query.aggregate(self.index).with_meta_count().do()
|
||||
class_data = result["data"]["Aggregate"][self.index]
|
||||
|
||||
return class_data[0]["meta"] if class_data else {}
|
||||
Reference in New Issue
Block a user