autogpt 还原,开新分支编写

This commit is contained in:
w_xiaolizu
2023-05-30 16:38:01 +08:00
parent 548532e522
commit 519df19f43
65 changed files with 3352 additions and 1868 deletions

View File

@ -1,121 +1,46 @@
"""File operations for AutoGPT"""
from __future__ import annotations
import hashlib
import os
import os.path
from typing import TYPE_CHECKING, Generator, Literal
from typing import Generator
import requests
from colorama import Back, Fore
from requests.adapters import HTTPAdapter, Retry
from autogpt.commands.command import command
from autogpt.commands.file_operations_utils import read_textual_file
from autogpt.logs import logger
from autogpt.memory.vector import MemoryItem, VectorMemory
from autogpt.config import Config
from autogpt.spinner import Spinner
from autogpt.utils import readable_file_size
if TYPE_CHECKING:
from autogpt.config import Config
CFG = Config()
Operation = Literal["write", "append", "delete"]
def text_checksum(text: str) -> str:
"""Get the hex checksum for the given text."""
return hashlib.md5(text.encode("utf-8")).hexdigest()
def operations_from_log(
log_path: str,
) -> Generator[tuple[Operation, str, str | None], None, None]:
"""Parse the file operations log and return a tuple containing the log entries"""
try:
log = open(log_path, "r", encoding="utf-8")
except FileNotFoundError:
return
for line in log:
line = line.replace("File Operation Logger", "").strip()
if not line:
continue
operation, tail = line.split(": ", maxsplit=1)
operation = operation.strip()
if operation in ("write", "append"):
try:
path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1))
except ValueError:
logger.warn(f"File log entry lacks checksum: '{line}'")
path, checksum = tail.strip(), None
yield (operation, path, checksum)
elif operation == "delete":
yield (operation, tail.strip(), None)
log.close()
def file_operations_state(log_path: str) -> dict[str, str]:
"""Iterates over the operations log and returns the expected state.
Parses a log file at config.file_logger_path to construct a dictionary that maps
each file path written or appended to its checksum. Deleted files are removed
from the dictionary.
Returns:
A dictionary mapping file paths to their checksums.
Raises:
FileNotFoundError: If config.file_logger_path is not found.
ValueError: If the log file content is not in the expected format.
"""
state = {}
for operation, path, checksum in operations_from_log(log_path):
if operation in ("write", "append"):
state[path] = checksum
elif operation == "delete":
del state[path]
return state
def is_duplicate_operation(
operation: Operation, filename: str, config: Config, checksum: str | None = None
) -> bool:
"""Check if the operation has already been performed
def check_duplicate_operation(operation: str, filename: str) -> bool:
"""Check if the operation has already been performed on the given file
Args:
operation: The operation to check for
filename: The name of the file to check for
checksum: The checksum of the contents to be written
operation (str): The operation to check for
filename (str): The name of the file to check for
Returns:
True if the operation has already been performed on the file
bool: True if the operation has already been performed on the file
"""
state = file_operations_state(config.file_logger_path)
if operation == "delete" and filename not in state:
return True
if operation == "write" and state.get(filename) == checksum:
return True
return False
log_content = read_file(CFG.file_logger_path)
log_entry = f"{operation}: {filename}\n"
return log_entry in log_content
def log_operation(
operation: str, filename: str, config: Config, checksum: str | None = None
) -> None:
def log_operation(operation: str, filename: str) -> None:
"""Log the file operation to the file_logger.txt
Args:
operation: The operation to log
filename: The name of the file the operation was performed on
checksum: The checksum of the contents to be written
operation (str): The operation to log
filename (str): The name of the file the operation was performed on
"""
log_entry = f"{operation}: {filename}"
if checksum is not None:
log_entry += f" #{checksum}"
logger.debug(f"Logging file operation: {log_entry}")
append_to_file(config.file_logger_path, f"{log_entry}\n", config, should_log=False)
log_entry = f"{operation}: {filename}\n"
append_to_file(CFG.file_logger_path, log_entry, should_log=False)
def split_file(
@ -138,7 +63,7 @@ def split_file(
while start < content_length:
end = start + max_length
if end + overlap < content_length:
chunk = content[start : end + max(overlap - 1, 0)]
chunk = content[start : end + overlap - 1]
else:
chunk = content[start:content_length]
@ -150,8 +75,8 @@ def split_file(
start += max_length - overlap
@command("read_file", "Read a file", '"filename": "<filename>"')
def read_file(filename: str, config: Config) -> str:
@command("read_file", "Read file", '"filename": "<filename>"')
def read_file(filename: str) -> str:
"""Read a file and return the contents
Args:
@ -161,46 +86,49 @@ def read_file(filename: str, config: Config) -> str:
str: The contents of the file
"""
try:
content = read_textual_file(filename, logger)
# TODO: invalidate/update memory when file is edited
file_memory = MemoryItem.from_text_file(content, filename)
if len(file_memory.chunks) > 1:
return file_memory.summary
with open(filename, "r", encoding="utf-8") as f:
content = f.read()
return content
except Exception as e:
return f"Error: {str(e)}"
def ingest_file(
filename: str,
memory: VectorMemory,
filename: str, memory, max_length: int = 4000, overlap: int = 200
) -> None:
"""
Ingest a file by reading its content, splitting it into chunks with a specified
maximum length and overlap, and adding the chunks to the memory storage.
Args:
filename: The name of the file to ingest
memory: An object with an add() method to store the chunks in memory
:param filename: The name of the file to ingest
:param memory: An object with an add() method to store the chunks in memory
:param max_length: The maximum length of each chunk, default is 4000
:param overlap: The number of overlapping characters between chunks, default is 200
"""
try:
logger.info(f"Ingesting file {filename}")
print(f"Working with file {filename}")
content = read_file(filename)
content_length = len(content)
print(f"File length: {content_length} characters")
# TODO: differentiate between different types of files
file_memory = MemoryItem.from_text_file(content, filename)
logger.debug(f"Created memory: {file_memory.dump()}")
memory.add(file_memory)
chunks = list(split_file(content, max_length=max_length, overlap=overlap))
logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
except Exception as err:
logger.warn(f"Error while ingesting file '{filename}': {err}")
num_chunks = len(chunks)
for i, chunk in enumerate(chunks):
print(f"Ingesting chunk {i + 1} / {num_chunks} into memory")
memory_to_add = (
f"Filename: {filename}\n" f"Content part#{i + 1}/{num_chunks}: {chunk}"
)
memory.add(memory_to_add)
print(f"Done ingesting {num_chunks} chunks from {filename}.")
except Exception as e:
print(f"Error while ingesting file '{filename}': {str(e)}")
@command("write_to_file", "Write to file", '"filename": "<filename>", "text": "<text>"')
def write_to_file(filename: str, text: str, config: Config) -> str:
def write_to_file(filename: str, text: str) -> str:
"""Write text to a file
Args:
@ -210,26 +138,24 @@ def write_to_file(filename: str, text: str, config: Config) -> str:
Returns:
str: A message indicating success or failure
"""
checksum = text_checksum(text)
if is_duplicate_operation("write", filename, config, checksum):
if check_duplicate_operation("write", filename):
return "Error: File has already been updated."
try:
directory = os.path.dirname(filename)
os.makedirs(directory, exist_ok=True)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filename, "w", encoding="utf-8") as f:
f.write(text)
log_operation("write", filename, config, checksum)
log_operation("write", filename)
return "File written to successfully."
except Exception as err:
return f"Error: {err}"
except Exception as e:
return f"Error: {str(e)}"
@command(
"append_to_file", "Append to file", '"filename": "<filename>", "text": "<text>"'
)
def append_to_file(
filename: str, text: str, config: Config, should_log: bool = True
) -> str:
def append_to_file(filename: str, text: str, should_log: bool = True) -> str:
"""Append text to a file
Args:
@ -241,23 +167,19 @@ def append_to_file(
str: A message indicating success or failure
"""
try:
directory = os.path.dirname(filename)
os.makedirs(directory, exist_ok=True)
with open(filename, "a", encoding="utf-8") as f:
with open(filename, "a") as f:
f.write(text)
if should_log:
with open(filename, "r", encoding="utf-8") as f:
checksum = text_checksum(f.read())
log_operation("append", filename, config, checksum=checksum)
log_operation("append", filename)
return "Text appended successfully."
except Exception as err:
return f"Error: {err}"
except Exception as e:
return f"Error: {str(e)}"
@command("delete_file", "Delete file", '"filename": "<filename>"')
def delete_file(filename: str, config: Config) -> str:
def delete_file(filename: str) -> str:
"""Delete a file
Args:
@ -266,19 +188,19 @@ def delete_file(filename: str, config: Config) -> str:
Returns:
str: A message indicating success or failure
"""
if is_duplicate_operation("delete", filename, config):
if check_duplicate_operation("delete", filename):
return "Error: File has already been deleted."
try:
os.remove(filename)
log_operation("delete", filename, config)
log_operation("delete", filename)
return "File deleted successfully."
except Exception as err:
return f"Error: {err}"
except Exception as e:
return f"Error: {str(e)}"
@command("list_files", "List Files in Directory", '"directory": "<directory>"')
def list_files(directory: str, config: Config) -> list[str]:
"""lists files in a directory recursively
@command("search_files", "Search Files", '"directory": "<directory>"')
def search_files(directory: str) -> list[str]:
"""Search for files in a directory
Args:
directory (str): The directory to search in
@ -293,7 +215,7 @@ def list_files(directory: str, config: Config) -> list[str]:
if file.startswith("."):
continue
relative_path = os.path.relpath(
os.path.join(root, file), config.workspace_path
os.path.join(root, file), CFG.workspace_path
)
found_files.append(relative_path)
@ -304,20 +226,18 @@ def list_files(directory: str, config: Config) -> list[str]:
"download_file",
"Download File",
'"url": "<url>", "filename": "<filename>"',
lambda config: config.allow_downloads,
CFG.allow_downloads,
"Error: You do not have user authorization to download files locally.",
)
def download_file(url, filename, config: Config):
def download_file(url, filename):
"""Downloads a file
Args:
url (str): URL of the file to download
filename (str): Filename to save the file as
"""
try:
directory = os.path.dirname(filename)
os.makedirs(directory, exist_ok=True)
message = f"{Fore.YELLOW}Downloading file from {Back.LIGHTBLUE_EX}{url}{Back.RESET}{Fore.RESET}"
with Spinner(message, plain_output=config.plain_output) as spinner:
message = f"{Fore.YELLOW}Downloading file from {Back.MAGENTA}{url}{Back.RESET}{Fore.RESET}"
with Spinner(message) as spinner:
session = requests.Session()
retry = Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
adapter = HTTPAdapter(max_retries=retry)
@ -341,8 +261,8 @@ def download_file(url, filename, config: Config):
progress = f"{readable_file_size(downloaded_size)} / {readable_file_size(total_size)}"
spinner.update_message(f"{message} {progress}")
return f'Successfully downloaded and locally stored file: "{filename}"! (Size: {readable_file_size(downloaded_size)})'
except requests.HTTPError as err:
return f"Got an HTTP Error whilst trying to download file: {err}"
except Exception as err:
return f"Error: {err}"
return f'Successfully downloaded and locally stored file: "{filename}"! (Size: {readable_file_size(total_size)})'
except requests.HTTPError as e:
return f"Got an HTTP Error whilst trying to download file: {e}"
except Exception as e:
return "Error: " + str(e)