Files
gpt_academic/autogpt/llm/chat.py
2023-05-30 15:44:39 +08:00

203 lines
7.7 KiB
Python

from __future__ import annotations
import time
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from autogpt.agent.agent import Agent
from autogpt.config import Config
from autogpt.llm.api_manager import ApiManager
from autogpt.llm.base import ChatSequence, Message
from autogpt.llm.utils import count_message_tokens, create_chat_completion
from autogpt.log_cycle.log_cycle import CURRENT_CONTEXT_FILE_NAME
from autogpt.logs import logger
# TODO: Change debug from hardcode to argument
def chat_with_ai(
config: Config,
agent: Agent,
system_prompt: str,
user_input: str,
token_limit: int,
model: str | None = None,
):
"""
Interact with the OpenAI API, sending the prompt, user input,
message history, and permanent memory.
Args:
config (Config): The config to use.
agent (Agent): The agent to use.
system_prompt (str): The prompt explaining the rules to the AI.
user_input (str): The input from the user.
token_limit (int): The maximum number of tokens allowed in the API call.
model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None.
Returns:
str: The AI's response.
"""
if model is None:
model = config.fast_llm_model
# Reserve 1000 tokens for the response
logger.debug(f"Token limit: {token_limit}")
send_token_limit = token_limit - 1000
# if len(agent.history) == 0:
# relevant_memory = ""
# else:
# recent_history = agent.history[-5:]
# shuffle(recent_history)
# relevant_memories = agent.memory.get_relevant(
# str(recent_history), 5
# )
# if relevant_memories:
# shuffle(relevant_memories)
# relevant_memory = str(relevant_memories)
# logger.debug(f"Memory Stats: {agent.memory.get_stats()}")
relevant_memory = []
message_sequence = ChatSequence.for_model(
model,
[
Message("system", system_prompt),
Message("system", f"The current time and date is {time.strftime('%c')}"),
# Message(
# "system",
# f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
# ),
],
)
# Add messages from the full message history until we reach the token limit
next_message_to_add_index = len(agent.history) - 1
insertion_index = len(message_sequence)
# Count the currently used tokens
current_tokens_used = message_sequence.token_length
# while current_tokens_used > 2500:
# # remove memories until we are under 2500 tokens
# relevant_memory = relevant_memory[:-1]
# (
# next_message_to_add_index,
# current_tokens_used,
# insertion_index,
# current_context,
# ) = generate_context(
# prompt, relevant_memory, agent.history, model
# )
# Account for user input (appended later)
user_input_msg = Message("user", user_input)
current_tokens_used += count_message_tokens([user_input_msg], model)
current_tokens_used += 500 # Reserve space for new_summary_message
# Add Messages until the token limit is reached or there are no more messages to add.
for cycle in reversed(list(agent.history.per_cycle())):
messages_to_add = [msg for msg in cycle if msg is not None]
tokens_to_add = count_message_tokens(messages_to_add, model)
if current_tokens_used + tokens_to_add > send_token_limit:
break
# Add the most recent message to the start of the chain,
# after the system prompts.
message_sequence.insert(insertion_index, *messages_to_add)
current_tokens_used += tokens_to_add
# Update & add summary of trimmed messages
if len(agent.history) > 0:
new_summary_message, trimmed_messages = agent.history.trim_messages(
current_message_chain=list(message_sequence),
)
tokens_to_add = count_message_tokens([new_summary_message], model)
message_sequence.insert(insertion_index, new_summary_message)
current_tokens_used += tokens_to_add - 500
# FIXME: uncomment when memory is back in use
# memory_store = get_memory(cfg)
# for _, ai_msg, result_msg in agent.history.per_cycle(trimmed_messages):
# memory_to_add = MemoryItem.from_ai_action(ai_msg, result_msg)
# logger.debug(f"Storing the following memory:\n{memory_to_add.dump()}")
# memory_store.add(memory_to_add)
api_manager = ApiManager()
# inform the AI about its remaining budget (if it has one)
if api_manager.get_total_budget() > 0.0:
remaining_budget = api_manager.get_total_budget() - api_manager.get_total_cost()
if remaining_budget < 0:
remaining_budget = 0
budget_message = f"Your remaining API budget is ${remaining_budget:.3f}" + (
" BUDGET EXCEEDED! SHUT DOWN!\n\n"
if remaining_budget == 0
else " Budget very nearly exceeded! Shut down gracefully!\n\n"
if remaining_budget < 0.005
else " Budget nearly exceeded. Finish up.\n\n"
if remaining_budget < 0.01
else "\n\n"
)
logger.debug(budget_message)
message_sequence.add("system", budget_message)
current_tokens_used += count_message_tokens([message_sequence[-1]], model)
# Append user input, the length of this is accounted for above
message_sequence.append(user_input_msg)
plugin_count = len(config.plugins)
for i, plugin in enumerate(config.plugins):
if not plugin.can_handle_on_planning():
continue
plugin_response = plugin.on_planning(
agent.config.prompt_generator, message_sequence.raw()
)
if not plugin_response or plugin_response == "":
continue
tokens_to_add = count_message_tokens(
[Message("system", plugin_response)], model
)
if current_tokens_used + tokens_to_add > send_token_limit:
logger.debug(f"Plugin response too long, skipping: {plugin_response}")
logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
break
message_sequence.add("system", plugin_response)
# Calculate remaining tokens
tokens_remaining = token_limit - current_tokens_used
# assert tokens_remaining >= 0, "Tokens remaining is negative.
# This should never happen, please submit a bug report at
# https://www.github.com/Torantulino/Auto-GPT"
# Debug print the current context
logger.debug(f"Token limit: {token_limit}")
logger.debug(f"Send Token Count: {current_tokens_used}")
logger.debug(f"Tokens remaining for response: {tokens_remaining}")
logger.debug("------------ CONTEXT SENT TO AI ---------------")
for message in message_sequence:
# Skip printing the prompt
if message.role == "system" and message.content == system_prompt:
continue
logger.debug(f"{message.role.capitalize()}: {message.content}")
logger.debug("")
logger.debug("----------- END OF CONTEXT ----------------")
agent.log_cycle_handler.log_cycle(
agent.config.ai_name,
agent.created_at,
agent.cycle_count,
message_sequence.raw(),
CURRENT_CONTEXT_FILE_NAME,
)
# TODO: use a model defined elsewhere, so that model can contain
# temperature and other settings we care about
assistant_reply = create_chat_completion(
prompt=message_sequence,
max_tokens=tokens_remaining,
)
# Update full message history
agent.history.append(user_input_msg)
agent.history.add("assistant", assistant_reply, "ai_response")
return assistant_reply