From c180a199b3e8d525c9d48117a7870fb068ea86f6 Mon Sep 17 00:00:00 2001 From: Max Whitton Date: Fri, 25 Oct 2024 13:23:37 -0400 Subject: [PATCH 1/5] new file --- .gitignore | 3 +- BRANCH | 0 agent/agents.py | 95 +++++++++++++++++++++ agent/run_agent.py | 178 ++++++++++++++++++++++++++++++++++++++-- agent/team.py | 127 ++++++++++++++++++++++++++++ processing_summary.json | 14 ++++ run | 0 7 files changed, 409 insertions(+), 8 deletions(-) create mode 100644 BRANCH create mode 100644 agent/team.py create mode 100644 processing_summary.json create mode 100644 run diff --git a/.gitignore b/.gitignore index 9062184..b347b86 100644 --- a/.gitignore +++ b/.gitignore @@ -167,4 +167,5 @@ config.yml hydra_outputs/ .commit0* .agent* -docs/analysis*.md \ No newline at end of file +docs/analysis*.md +.aider* diff --git a/BRANCH b/BRANCH new file mode 100644 index 0000000..e69de29 diff --git a/agent/agents.py b/agent/agents.py index 6e7d9d8..d14c656 100644 --- a/agent/agents.py +++ b/agent/agents.py @@ -49,6 +49,101 @@ def run(self) -> AgentReturn: """Start agent""" raise NotImplementedError +class AgentTeams(Agents): + def __init__(self, max_iteration: int, model_name: str): + super().__init__(max_iteration) + self.model = Model(model_name) + def run( + self, + message: str, + test_cmd: str, + lint_cmd: str, + fnames: list[str], + log_dir: Path, + test_first: bool = False, + ) -> AgentReturn: + """Start agent team""" + if test_cmd: + auto_test = True + else: + auto_test = False + if lint_cmd: + auto_lint = True + else: + auto_lint = False + log_dir = log_dir.resolve() + log_dir.mkdir(parents=True, exist_ok=True) + input_history_file = log_dir / ".team.input.history" + chat_history_file = log_dir / ".team.chat.history.md" + + # Set up logging + log_file = log_dir / "team.log" + logging.basicConfig( + filename=log_file, + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + # Redirect print statements to the log file + sys.stdout = open(log_file, "a") + sys.stderr = open(log_file, "a") + + # Log the message + agent_message_log_file = log_dir / "agent_message.log" + with open(agent_message_log_file, "a") as f: + f.write(f"Message Sent: {message}\n\n") + + # Configure httpx and backoff logging + handle_logging("httpx", log_file) + handle_logging("backoff", log_file) + + io = InputOutput( + yes=True, + input_history_file=input_history_file, + chat_history_file=chat_history_file, + ) + manager = Coder.create( + main_model=self.model, + fnames=fnames, + auto_lint=auto_lint, + auto_test=auto_test, + lint_cmds={"python": lint_cmd}, + test_cmd=test_cmd, + io=io, + ) + manager.max_reflection = 1 + manager.stream = True + + # Run the agent + manager_message = "First, add every file in the repo to your conversation. Second, write a plan of attack to implement the entire repo and return this plan." + manager.run(manager_message) + + + coder = Coder.create( + main_model=self.model, + fnames=fnames, + auto_lint=auto_lint, + auto_test=auto_test, + lint_cmds={"python": lint_cmd}, + test_cmd=test_cmd, + io=io, + ) + coder.max_reflection = self.max_iteration + coder.stream = True + + # Run the agent + with open(chat_history_file, 'r', encoding='utf-8') as file: + plan = file.read() + coder_message = "follow this implementation plan: "+plan + coder.run(coder_message) + + sys.stdout.close() + sys.stderr.close() + # Restore original stdout and stderr + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + return AgentReturn(log_file) class AiderAgents(Agents): def __init__(self, max_iteration: int, model_name: str): diff --git a/agent/run_agent.py b/agent/run_agent.py index 5315086..b214ffa 100644 --- a/agent/run_agent.py +++ b/agent/run_agent.py @@ -1,4 +1,5 @@ import os +from typing import Any import yaml import multiprocessing from datasets import load_dataset @@ -12,6 +13,7 @@ read_yaml_config, ) from agent.agents import AiderAgents +from agent.agents import AgentTeams from typing import Optional, Type, cast from types import TracebackType from agent.class_types import AgentConfig @@ -42,8 +44,44 @@ def __exit__( ) -> None: os.chdir(self.cwd) - def run_agent_for_repo( + commit0_config: Any, + repo_base_dir: str, + agent_config: AgentConfig, + example: RepoInstance, + update_queue: multiprocessing.Queue, + branch: str, + override_previous_changes: bool = False, + backend: str = "modal", + log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), +) -> None: + if agent_config.agent_name == "aider": + run_aider_for_repo( + commit0_config["base_dir"], + agent_config, + cast(RepoInstance, example), + update_queue, + branch, + override_previous_changes, + backend, + log_dir, + ) + elif agent_config.agent_name == "aider_team": + run_team_for_repo( + commit0_config["base_dir"], + agent_config, + cast(RepoInstance, example), + update_queue, + branch, + override_previous_changes, + backend, + log_dir, + ) + else: + raise NotImplementedError( + f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py." + ) +def run_aider_for_repo( repo_base_dir: str, agent_config: AgentConfig, example: RepoInstance, @@ -53,6 +91,8 @@ def run_agent_for_repo( backend: str = "modal", log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), ) -> None: + + agent = AiderAgents(agent_config.max_iteration, agent_config.model_name) """Run Aider for a given repository.""" # get repo info _, repo_name = example["repo"].split("/") @@ -74,12 +114,7 @@ def run_agent_for_repo( f"{repo_path} is not a git repo. Check if base_dir is correctly specified." ) - if agent_config.agent_name == "aider": - agent = AiderAgents(agent_config.max_iteration, agent_config.model_name) - else: - raise NotImplementedError( - f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py." - ) + # # if branch_name is not provided, create a new branch name based on agent_config # if branch is None: @@ -160,6 +195,7 @@ def run_agent_for_repo( update_queue.put( ("start_repo", (original_repo_name, len(target_edit_files))) ) + for f in target_edit_files: update_queue.put(("set_current_file", (repo_name, f))) dependencies = import_dependencies[f] @@ -175,6 +211,132 @@ def run_agent_for_repo( ) ) update_queue.put(("finish_repo", original_repo_name)) +def run_team_for_repo( + repo_base_dir: str, + agent_config: AgentConfig, + example: RepoInstance, + update_queue: multiprocessing.Queue, + branch: str, + override_previous_changes: bool = False, + backend: str = "modal", + log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), +) -> None: + """Run Aider for a given repository.""" + # get repo info + _, repo_name = example["repo"].split("/") + + # before starting, display all information to terminal + original_repo_name = repo_name + update_queue.put(("start_repo", (original_repo_name, 0))) + + # repo_name = repo_name.lower() + # repo_name = repo_name.replace(".", "-") + + repo_path = os.path.join(repo_base_dir, repo_name) + repo_path = os.path.abspath(repo_path) + + try: + local_repo = Repo(repo_path) + except Exception: + raise Exception( + f"{repo_path} is not a git repo. Check if base_dir is correctly specified." + ) + + manager = AiderAgents(1, agent_config.model_name) + coder = AiderAgents(agent_config.max_iteration, agent_config.model_name) + + + # # if branch_name is not provided, create a new branch name based on agent_config + # if branch is None: + # branch = args2string(agent_config) + create_branch(local_repo, branch, example["base_commit"]) + + # in cases where the latest commit of branch is not commit 0 + # set it back to commit 0 + latest_commit = local_repo.commit(branch) + if latest_commit.hexsha != example["base_commit"] and override_previous_changes: + local_repo.git.reset("--hard", example["base_commit"]) + + target_edit_files, import_dependencies = get_target_edit_files( + local_repo, + example["src_dir"], + example["test"]["test_dir"], + str(latest_commit), + example["reference_commit"], + ) + + + # Call the commit0 get-tests command to retrieve test files + test_files_str = get_tests(repo_name, verbose=0) + test_files = sorted(list(set([i.split(":")[0] for i in test_files_str]))) + + # prepare the log dir + experiment_log_dir = ( + Path(log_dir) + / repo_name + / branch + / datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ) + experiment_log_dir.mkdir(parents=True, exist_ok=True) + + # write agent_config to .agent.yaml in the log_dir for record + agent_config_log_file = experiment_log_dir / ".agent.yaml" + with open(agent_config_log_file, "w") as agent_config_file: + yaml.dump(agent_config, agent_config_file) + + # TODO: make this path more general + commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml") + manager_message = "Write a concise plan of attack to implement the entire repo, but don't actually do any coding. The plan should not include any reccommendations to add files and should be a maximum of 500 words." + + with DirContext(repo_path): + if agent_config is None: + raise ValueError("Invalid input") + else: + # when unit test feedback is not available, iterate over target files to edit + + update_queue.put( + ("start_repo", (original_repo_name, len(target_edit_files))) + ) + + for f in target_edit_files: + update_queue.put(("set_current_file", (repo_name, f))) + dependencies = import_dependencies[f] + file_name = "all" + file_log_dir = experiment_log_dir / file_name + lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + + + agent_return = manager.run(manager_message, "", lint_cmd, target_edit_files, file_log_dir) + with open(agent_return.log_file, 'r', encoding='utf-8') as file: + plan = file.read() + coder_message = "follow this implementation plan: "+plan + + agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) + + # for f in target_edit_files: + # update_queue.put(("set_current_file", (repo_name, f))) + # dependencies = import_dependencies[f] + # message = update_message_with_dependencies(coder_message, dependencies) + # file_name = f.replace(".py", "").replace("/", "__") + # file_log_dir = experiment_log_dir / file_name + # lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + # agent_return = coder.run(message, "", lint_cmd, [f], file_log_dir) + # update_queue.put( + # ( + # "update_money_display", + # (repo_name, file_name, agent_return.last_cost), + # ) + # ) + update_queue.put( + ( + "update_money_display", + (repo_name, file_name, agent_return.last_cost), + ) + ) + + + + update_queue.put(("finish_repo", original_repo_name)) def run_agent( @@ -248,6 +410,7 @@ def run_agent( result = pool.apply_async( run_agent_for_repo, args=( + commit0_config, commit0_config["base_dir"], agent_config, cast(RepoInstance, example), @@ -256,6 +419,7 @@ def run_agent( override_previous_changes, backend, log_dir, + ), ) results.append(result) diff --git a/agent/team.py b/agent/team.py new file mode 100644 index 0000000..83ff693 --- /dev/null +++ b/agent/team.py @@ -0,0 +1,127 @@ +def run_team_for_repo( + repo_base_dir: str, + agent_config: AgentConfig, + example: RepoInstance, + update_queue: multiprocessing.Queue, + branch: str, + override_previous_changes: bool = False, + backend: str = "modal", + log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), +) -> None: + """Run Aider for a given repository.""" + # get repo info + _, repo_name = example["repo"].split("/") + + # before starting, display all information to terminal + original_repo_name = repo_name + update_queue.put(("start_repo", (original_repo_name, 0))) + + # repo_name = repo_name.lower() + # repo_name = repo_name.replace(".", "-") + + repo_path = os.path.join(repo_base_dir, repo_name) + repo_path = os.path.abspath(repo_path) + + try: + local_repo = Repo(repo_path) + except Exception: + raise Exception( + f"{repo_path} is not a git repo. Check if base_dir is correctly specified." + ) + + manager = AiderAgents(1, agent_config.model_name) + coder = AiderAgents(agent_config.max_iteration, agent_config.model_name) + + + # # if branch_name is not provided, create a new branch name based on agent_config + # if branch is None: + # branch = args2string(agent_config) + create_branch(local_repo, branch, example["base_commit"]) + + # in cases where the latest commit of branch is not commit 0 + # set it back to commit 0 + latest_commit = local_repo.commit(branch) + if latest_commit.hexsha != example["base_commit"] and override_previous_changes: + local_repo.git.reset("--hard", example["base_commit"]) + + target_edit_files, import_dependencies = get_target_edit_files( + local_repo, + example["src_dir"], + example["test"]["test_dir"], + str(latest_commit), + example["reference_commit"], + ) + + + # Call the commit0 get-tests command to retrieve test files + test_files_str = get_tests(repo_name, verbose=0) + test_files = sorted(list(set([i.split(":")[0] for i in test_files_str]))) + + # prepare the log dir + experiment_log_dir = ( + Path(log_dir) + / repo_name + / branch + / datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ) + experiment_log_dir.mkdir(parents=True, exist_ok=True) + + # write agent_config to .agent.yaml in the log_dir for record + agent_config_log_file = experiment_log_dir / ".agent.yaml" + with open(agent_config_log_file, "w") as agent_config_file: + yaml.dump(agent_config, agent_config_file) + + # TODO: make this path more general + commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml") + manager_message = "Write a concise plan of attack to implement the entire repo, but don't actually do any coding. The plan should not include any reccommendations to add files and should be a maximum of 500 words." + + with DirContext(repo_path): + if agent_config is None: + raise ValueError("Invalid input") + else: + # when unit test feedback is not available, iterate over target files to edit + + update_queue.put( + ("start_repo", (original_repo_name, len(target_edit_files))) + ) + + for f in target_edit_files: + update_queue.put(("set_current_file", (repo_name, f))) + dependencies = import_dependencies[f] + file_name = "all" + file_log_dir = experiment_log_dir / file_name + lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + + + agent_return = manager.run(manager_message, "", lint_cmd, target_edit_files, file_log_dir) + with open(agent_return.log_file, 'r', encoding='utf-8') as file: + plan = file.read() + coder_message = "follow this implementation plan: "+plan + + agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) + + # for f in target_edit_files: + # update_queue.put(("set_current_file", (repo_name, f))) + # dependencies = import_dependencies[f] + # message = update_message_with_dependencies(coder_message, dependencies) + # file_name = f.replace(".py", "").replace("/", "__") + # file_log_dir = experiment_log_dir / file_name + # lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + # agent_return = coder.run(message, "", lint_cmd, [f], file_log_dir) + # update_queue.put( + # ( + # "update_money_display", + # (repo_name, file_name, agent_return.last_cost), + # ) + # ) + update_queue.put( + ( + "update_money_display", + (repo_name, file_name, agent_return.last_cost), + ) + ) + + + + update_queue.put(("finish_repo", original_repo_name)) + diff --git a/processing_summary.json b/processing_summary.json new file mode 100644 index 0000000..cad21cf --- /dev/null +++ b/processing_summary.json @@ -0,0 +1,14 @@ +{ + "timestamp": "2024-10-21T11:56:57.187746", + "total_time_spent": 53, + "total_files_processed": 4, + "total_money_spent": 0.15000000000000002, + "repositories": [ + { + "name": "parsel", + "time_spent": 52.766640186309814, + "files_processed": 4, + "money_spent": 0.15000000000000002 + } + ] +} \ No newline at end of file diff --git a/run b/run new file mode 100644 index 0000000..e69de29 From 51ed26c56d5401fa30781490e0dfe33914a35d9e Mon Sep 17 00:00:00 2001 From: Max Whitton Date: Fri, 25 Oct 2024 13:23:37 -0400 Subject: [PATCH 2/5] new file --- .gitignore | 3 +- BRANCH | 0 agent/agents.py | 95 +++++++++++++++++++ agent/run_agent.py | 199 ++++++++++++++++++++++++++++++++++++++-- agent/team.py | 127 +++++++++++++++++++++++++ processing_summary.json | 14 +++ run | 0 7 files changed, 428 insertions(+), 10 deletions(-) create mode 100644 BRANCH create mode 100644 agent/team.py create mode 100644 processing_summary.json create mode 100644 run diff --git a/.gitignore b/.gitignore index 9062184..b347b86 100644 --- a/.gitignore +++ b/.gitignore @@ -167,4 +167,5 @@ config.yml hydra_outputs/ .commit0* .agent* -docs/analysis*.md \ No newline at end of file +docs/analysis*.md +.aider* diff --git a/BRANCH b/BRANCH new file mode 100644 index 0000000..e69de29 diff --git a/agent/agents.py b/agent/agents.py index e908090..e0168a9 100644 --- a/agent/agents.py +++ b/agent/agents.py @@ -57,6 +57,101 @@ def get_money_cost(self) -> float: last_cost = float(match.group(1)) return last_cost +class AgentTeams(Agents): + def __init__(self, max_iteration: int, model_name: str): + super().__init__(max_iteration) + self.model = Model(model_name) + def run( + self, + message: str, + test_cmd: str, + lint_cmd: str, + fnames: list[str], + log_dir: Path, + test_first: bool = False, + ) -> AgentReturn: + """Start agent team""" + if test_cmd: + auto_test = True + else: + auto_test = False + if lint_cmd: + auto_lint = True + else: + auto_lint = False + log_dir = log_dir.resolve() + log_dir.mkdir(parents=True, exist_ok=True) + input_history_file = log_dir / ".team.input.history" + chat_history_file = log_dir / ".team.chat.history.md" + + # Set up logging + log_file = log_dir / "team.log" + logging.basicConfig( + filename=log_file, + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + # Redirect print statements to the log file + sys.stdout = open(log_file, "a") + sys.stderr = open(log_file, "a") + + # Log the message + agent_message_log_file = log_dir / "agent_message.log" + with open(agent_message_log_file, "a") as f: + f.write(f"Message Sent: {message}\n\n") + + # Configure httpx and backoff logging + handle_logging("httpx", log_file) + handle_logging("backoff", log_file) + + io = InputOutput( + yes=True, + input_history_file=input_history_file, + chat_history_file=chat_history_file, + ) + manager = Coder.create( + main_model=self.model, + fnames=fnames, + auto_lint=auto_lint, + auto_test=auto_test, + lint_cmds={"python": lint_cmd}, + test_cmd=test_cmd, + io=io, + ) + manager.max_reflection = 1 + manager.stream = True + + # Run the agent + manager_message = "First, add every file in the repo to your conversation. Second, write a plan of attack to implement the entire repo and return this plan." + manager.run(manager_message) + + + coder = Coder.create( + main_model=self.model, + fnames=fnames, + auto_lint=auto_lint, + auto_test=auto_test, + lint_cmds={"python": lint_cmd}, + test_cmd=test_cmd, + io=io, + ) + coder.max_reflection = self.max_iteration + coder.stream = True + + # Run the agent + with open(chat_history_file, 'r', encoding='utf-8') as file: + plan = file.read() + coder_message = "follow this implementation plan: "+plan + coder.run(coder_message) + + sys.stdout.close() + sys.stderr.close() + # Restore original stdout and stderr + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + return AgentReturn(log_file) class AiderAgents(Agents): def __init__(self, max_iteration: int, model_name: str): diff --git a/agent/run_agent.py b/agent/run_agent.py index a140a5a..41369b3 100644 --- a/agent/run_agent.py +++ b/agent/run_agent.py @@ -1,4 +1,5 @@ import os +from typing import Any import yaml import multiprocessing from datasets import load_dataset @@ -14,6 +15,7 @@ ) import subprocess from agent.agents import AiderAgents +from agent.agents import AgentTeams from typing import Optional, Type, cast from types import TracebackType from agent.class_types import AgentConfig @@ -44,7 +46,6 @@ def __exit__( ) -> None: os.chdir(self.cwd) - def run_agent_for_repo( repo_base_dir: str, agent_config: AgentConfig, @@ -56,9 +57,53 @@ def run_agent_for_repo( log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), commit0_config_file: str = "", ) -> None: - """Run Aider for a given repository.""" - # get repo info + commit0_config = read_commit0_dot_file(commit0_config_file) + if agent_config.agent_name == "aider": + run_aider_for_repo( + commit0_config, + commit0_config["base_dir"], + agent_config, + cast(RepoInstance, example), + update_queue, + branch, + override_previous_changes, + backend, + log_dir, + commit0_config_file, + ) + elif agent_config.agent_name == "aider_team": + run_team_for_repo( + commit0_config, + commit0_config["base_dir"], + agent_config, + cast(RepoInstance, example), + update_queue, + branch, + override_previous_changes, + backend, + log_dir, + commit0_config_file, + ) + else: + raise NotImplementedError( + f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py." + ) + +def run_aider_for_repo( + commit0_config: Any, + repo_base_dir: str, + agent_config: AgentConfig, + example: RepoInstance, + update_queue: multiprocessing.Queue, + branch: str, + override_previous_changes: bool = False, + backend: str = "modal", + log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), + commit0_config_file: str = "", +) -> None: + + """Run Aider for a given repository.""" assert "commit0" in commit0_config["dataset_name"] _, repo_name = example["repo"].split("/") @@ -79,12 +124,7 @@ def run_agent_for_repo( f"{repo_path} is not a git repo. Check if base_dir is correctly specified." ) - if agent_config.agent_name == "aider": - agent = AiderAgents(agent_config.max_iteration, agent_config.model_name) - else: - raise NotImplementedError( - f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py." - ) + agent = AiderAgents(agent_config.max_iteration, agent_config.model_name) # Check if there are changes in the current branch if local_repo.is_dirty(): @@ -219,6 +259,146 @@ def run_agent_for_repo( ) update_queue.put(("finish_repo", repo_name)) +def run_team_for_repo( + commit0_config: Any, + repo_base_dir: str, + agent_config: AgentConfig, + example: RepoInstance, + update_queue: multiprocessing.Queue, + branch: str, + override_previous_changes: bool = False, + backend: str = "modal", + log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), + commit0_config_file: str = "", +) -> None: + """Run Aider Team for a given repository.""" + # get repo info + assert "commit0" in commit0_config["dataset_name"] + _, repo_name = example["repo"].split("/") + + # before starting, display all information to terminal + update_queue.put(("start_repo", (repo_name, 0))) + + # repo_name = repo_name.lower() + # repo_name = repo_name.replace(".", "-") + + repo_path = os.path.join(repo_base_dir, repo_name) + repo_path = os.path.abspath(repo_path) + + try: + local_repo = Repo(repo_path) + except Exception: + raise Exception( + f"{repo_path} is not a git repo. Check if base_dir is correctly specified." + ) + + manager = AiderAgents(1, agent_config.model_name) + coder = AiderAgents(agent_config.max_iteration, agent_config.model_name) + + # Check if there are changes in the current branch + if local_repo.is_dirty(): + # Stage all changes + local_repo.git.add(A=True) + # Commit changes with the message "left from last change" + local_repo.index.commit("left from last change") + + # # if branch_name is not provided, create a new branch name based on agent_config + # if branch is None: + # branch = args2string(agent_config) + create_branch(local_repo, branch, example["base_commit"]) + + # in cases where the latest commit of branch is not commit 0 + # set it back to commit 0 + latest_commit = local_repo.commit(branch) + if latest_commit.hexsha != example["base_commit"] and override_previous_changes: + local_repo.git.reset("--hard", example["base_commit"]) + + # get target files to edit and test files to run + target_edit_files, import_dependencies = get_target_edit_files( + local_repo, + example["src_dir"], + example["test"]["test_dir"], + branch, + example["reference_commit"], + agent_config.use_topo_sort_dependencies, + ) + + lint_files = get_changed_files_from_commits( + local_repo, "HEAD", example["base_commit"] + ) + # Call the commit0 get-tests command to retrieve test files + test_files_str = get_tests(repo_name, verbose=0) + test_files = sorted(list(set([i.split(":")[0] for i in test_files_str]))) + + # prepare the log dir + experiment_log_dir = ( + Path(log_dir) + / repo_name + / branch + / datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ) + experiment_log_dir.mkdir(parents=True, exist_ok=True) + + # write agent_config to .agent.yaml in the log_dir for record + agent_config_log_file = experiment_log_dir / ".agent.yaml" + with open(agent_config_log_file, "w") as agent_config_file: + yaml.dump(agent_config, agent_config_file) + + manager_message = "Write a concise plan of attack to implement the entire repo, but don't actually do any coding. The plan should not include any reccommendations to add files and should be a maximum of 500 words." + + with DirContext(repo_path): + if agent_config is None: + raise ValueError("Invalid input") + else: + # when unit test feedback is not available, iterate over target files to edit + + update_queue.put( + ("start_repo", (repo_name, len(target_edit_files))) + ) + + #TODO: add support for unit test / lint feedback + + for f in target_edit_files: + update_queue.put(("set_current_file", (repo_name, f))) + dependencies = import_dependencies[f] + file_name = "all" + file_log_dir = experiment_log_dir / file_name + lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + + + agent_return = manager.run(manager_message, "", lint_cmd, target_edit_files, file_log_dir) + with open(agent_return.log_file, 'r', encoding='utf-8') as file: + plan = file.read() + coder_message = "follow this implementation plan: "+plan + + agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) + + # for f in target_edit_files: + # update_queue.put(("set_current_file", (repo_name, f))) + # dependencies = import_dependencies[f] + # message = update_message_with_dependencies(coder_message, dependencies) + # file_name = f.replace(".py", "").replace("/", "__") + # file_log_dir = experiment_log_dir / file_name + # lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + # agent_return = coder.run(message, "", lint_cmd, [f], file_log_dir) + # update_queue.put( + # ( + # "update_money_display", + # (repo_name, file_name, agent_return.last_cost), + # ) + # ) + update_queue.put( + ( + "update_money_display", + (repo_name, file_name, agent_return.last_cost), + ) + ) + + + + update_queue.put(("finish_repo", repo_name)) + + def run_agent( branch: str, @@ -303,6 +483,7 @@ def run_agent( result = pool.apply_async( run_agent_for_repo, args=( + commit0_config, commit0_config["base_dir"], agent_config, cast(RepoInstance, example), diff --git a/agent/team.py b/agent/team.py new file mode 100644 index 0000000..83ff693 --- /dev/null +++ b/agent/team.py @@ -0,0 +1,127 @@ +def run_team_for_repo( + repo_base_dir: str, + agent_config: AgentConfig, + example: RepoInstance, + update_queue: multiprocessing.Queue, + branch: str, + override_previous_changes: bool = False, + backend: str = "modal", + log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), +) -> None: + """Run Aider for a given repository.""" + # get repo info + _, repo_name = example["repo"].split("/") + + # before starting, display all information to terminal + original_repo_name = repo_name + update_queue.put(("start_repo", (original_repo_name, 0))) + + # repo_name = repo_name.lower() + # repo_name = repo_name.replace(".", "-") + + repo_path = os.path.join(repo_base_dir, repo_name) + repo_path = os.path.abspath(repo_path) + + try: + local_repo = Repo(repo_path) + except Exception: + raise Exception( + f"{repo_path} is not a git repo. Check if base_dir is correctly specified." + ) + + manager = AiderAgents(1, agent_config.model_name) + coder = AiderAgents(agent_config.max_iteration, agent_config.model_name) + + + # # if branch_name is not provided, create a new branch name based on agent_config + # if branch is None: + # branch = args2string(agent_config) + create_branch(local_repo, branch, example["base_commit"]) + + # in cases where the latest commit of branch is not commit 0 + # set it back to commit 0 + latest_commit = local_repo.commit(branch) + if latest_commit.hexsha != example["base_commit"] and override_previous_changes: + local_repo.git.reset("--hard", example["base_commit"]) + + target_edit_files, import_dependencies = get_target_edit_files( + local_repo, + example["src_dir"], + example["test"]["test_dir"], + str(latest_commit), + example["reference_commit"], + ) + + + # Call the commit0 get-tests command to retrieve test files + test_files_str = get_tests(repo_name, verbose=0) + test_files = sorted(list(set([i.split(":")[0] for i in test_files_str]))) + + # prepare the log dir + experiment_log_dir = ( + Path(log_dir) + / repo_name + / branch + / datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ) + experiment_log_dir.mkdir(parents=True, exist_ok=True) + + # write agent_config to .agent.yaml in the log_dir for record + agent_config_log_file = experiment_log_dir / ".agent.yaml" + with open(agent_config_log_file, "w") as agent_config_file: + yaml.dump(agent_config, agent_config_file) + + # TODO: make this path more general + commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml") + manager_message = "Write a concise plan of attack to implement the entire repo, but don't actually do any coding. The plan should not include any reccommendations to add files and should be a maximum of 500 words." + + with DirContext(repo_path): + if agent_config is None: + raise ValueError("Invalid input") + else: + # when unit test feedback is not available, iterate over target files to edit + + update_queue.put( + ("start_repo", (original_repo_name, len(target_edit_files))) + ) + + for f in target_edit_files: + update_queue.put(("set_current_file", (repo_name, f))) + dependencies = import_dependencies[f] + file_name = "all" + file_log_dir = experiment_log_dir / file_name + lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + + + agent_return = manager.run(manager_message, "", lint_cmd, target_edit_files, file_log_dir) + with open(agent_return.log_file, 'r', encoding='utf-8') as file: + plan = file.read() + coder_message = "follow this implementation plan: "+plan + + agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) + + # for f in target_edit_files: + # update_queue.put(("set_current_file", (repo_name, f))) + # dependencies = import_dependencies[f] + # message = update_message_with_dependencies(coder_message, dependencies) + # file_name = f.replace(".py", "").replace("/", "__") + # file_log_dir = experiment_log_dir / file_name + # lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + # agent_return = coder.run(message, "", lint_cmd, [f], file_log_dir) + # update_queue.put( + # ( + # "update_money_display", + # (repo_name, file_name, agent_return.last_cost), + # ) + # ) + update_queue.put( + ( + "update_money_display", + (repo_name, file_name, agent_return.last_cost), + ) + ) + + + + update_queue.put(("finish_repo", original_repo_name)) + diff --git a/processing_summary.json b/processing_summary.json new file mode 100644 index 0000000..cad21cf --- /dev/null +++ b/processing_summary.json @@ -0,0 +1,14 @@ +{ + "timestamp": "2024-10-21T11:56:57.187746", + "total_time_spent": 53, + "total_files_processed": 4, + "total_money_spent": 0.15000000000000002, + "repositories": [ + { + "name": "parsel", + "time_spent": 52.766640186309814, + "files_processed": 4, + "money_spent": 0.15000000000000002 + } + ] +} \ No newline at end of file diff --git a/run b/run new file mode 100644 index 0000000..e69de29 From 96309deb1d193474a1eb9e19bb67d0d6f1a86417 Mon Sep 17 00:00:00 2001 From: Eugene Choi <4eugenechoi@gmail.com> Date: Mon, 28 Oct 2024 01:51:14 -0400 Subject: [PATCH 3/5] Add run_manager code, not completely working --- agent/agent_utils.py | 10 +++++ agent/agents.py | 91 +++++++++++++++++++++++++++++--------------- agent/run_agent.py | 58 +++++++++++++++++----------- 3 files changed, 105 insertions(+), 54 deletions(-) diff --git a/agent/agent_utils.py b/agent/agent_utils.py index 4fdea82..914dd9d 100644 --- a/agent/agent_utils.py +++ b/agent/agent_utils.py @@ -571,3 +571,13 @@ def read_yaml_config(config_file: str) -> dict: raise FileNotFoundError(f"The config file '{config_file}' does not exist.") with open(config_file, "r") as f: return yaml.load(f, Loader=yaml.FullLoader) + + +def parse_tasks(text: str) -> list[str]: + """Parse the tasks from the manager output.""" + tasks = [] + for line in text.strip().splitlines(): + if not line.strip()[0].isdigit(): + continue + tasks.append(line.strip()) + return tasks \ No newline at end of file diff --git a/agent/agents.py b/agent/agents.py index e0168a9..3fd7fec 100644 --- a/agent/agents.py +++ b/agent/agents.py @@ -61,6 +61,59 @@ class AgentTeams(Agents): def __init__(self, max_iteration: int, model_name: str): super().__init__(max_iteration) self.model = Model(model_name) + + def run_manager( + self, + message: str, + fnames: list[str], + log_dir: Path, + ) -> AgentReturn: + """Start agent manager""" + + log_dir = log_dir.resolve() + log_dir.mkdir(parents=True, exist_ok=True) + input_history_file = log_dir / ".manager.input.history" + chat_history_file = log_dir / ".manager.chat.history.md" + + # Set up logging + log_file = log_dir / "manager.log" + logging.basicConfig( + filename=log_file, + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + # Redirect print statements to the log file + sys.stdout = open(log_file, "a") + sys.stderr = open(log_file, "a") + + # Configure httpx and backoff logging + handle_logging("httpx", log_file) + handle_logging("backoff", log_file) + + io = InputOutput( + yes=False, + input_history_file=input_history_file, + chat_history_file=chat_history_file, + ) + manager = Coder.create( + main_model=self.model, + read_only_fnames=fnames, + io=io, + ) + manager.max_reflection = self.max_iteration + manager.stream = True + + manager.run(message) + + sys.stdout.close() + sys.stderr.close() + # Restore original stdout and stderr + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + return AgentReturn(log_file) + def run( self, message: str, @@ -81,11 +134,11 @@ def run( auto_lint = False log_dir = log_dir.resolve() log_dir.mkdir(parents=True, exist_ok=True) - input_history_file = log_dir / ".team.input.history" - chat_history_file = log_dir / ".team.chat.history.md" + input_history_file = log_dir / ".coder.input.history" + chat_history_file = log_dir / ".coder.chat.history.md" # Set up logging - log_file = log_dir / "team.log" + log_file = log_dir / "coder.log" logging.basicConfig( filename=log_file, level=logging.INFO, @@ -96,11 +149,6 @@ def run( sys.stdout = open(log_file, "a") sys.stderr = open(log_file, "a") - # Log the message - agent_message_log_file = log_dir / "agent_message.log" - with open(agent_message_log_file, "a") as f: - f.write(f"Message Sent: {message}\n\n") - # Configure httpx and backoff logging handle_logging("httpx", log_file) handle_logging("backoff", log_file) @@ -110,26 +158,12 @@ def run( input_history_file=input_history_file, chat_history_file=chat_history_file, ) - manager = Coder.create( - main_model=self.model, - fnames=fnames, - auto_lint=auto_lint, - auto_test=auto_test, - lint_cmds={"python": lint_cmd}, - test_cmd=test_cmd, - io=io, - ) - manager.max_reflection = 1 - manager.stream = True - - # Run the agent - manager_message = "First, add every file in the repo to your conversation. Second, write a plan of attack to implement the entire repo and return this plan." - manager.run(manager_message) - coder = Coder.create( main_model=self.model, - fnames=fnames, + + #make the coder import files on its own for now + fnames=[], auto_lint=auto_lint, auto_test=auto_test, lint_cmds={"python": lint_cmd}, @@ -138,12 +172,7 @@ def run( ) coder.max_reflection = self.max_iteration coder.stream = True - - # Run the agent - with open(chat_history_file, 'r', encoding='utf-8') as file: - plan = file.read() - coder_message = "follow this implementation plan: "+plan - coder.run(coder_message) + coder.run(message) sys.stdout.close() sys.stderr.close() diff --git a/agent/run_agent.py b/agent/run_agent.py index 71bc536..567cfbb 100644 --- a/agent/run_agent.py +++ b/agent/run_agent.py @@ -12,6 +12,7 @@ update_message_with_dependencies, get_lint_cmd, read_yaml_config, + parse_tasks, ) import subprocess from agent.agents import AiderAgents @@ -293,8 +294,8 @@ def run_team_for_repo( f"{repo_path} is not a git repo. Check if base_dir is correctly specified." ) - manager = AiderAgents(1, agent_config.model_name) - coder = AiderAgents(agent_config.max_iteration, agent_config.model_name) + manager = AgentTeams(agent_config.max_iteration, agent_config.model_name) + coder = AgentTeams(agent_config.max_iteration, agent_config.model_name) # Check if there are changes in the current branch if local_repo.is_dirty(): @@ -345,7 +346,8 @@ def run_team_for_repo( with open(agent_config_log_file, "w") as agent_config_file: yaml.dump(agent_config, agent_config_file) - manager_message = "Write a concise plan of attack to implement the entire repo, but don't actually do any coding. The plan should not include any reccommendations to add files and should be a maximum of 500 words." + # /ask will make aider not write any code, but only a plan + manager_message = "/ask You are a manager in charge of writing a plan to complete the implementations for all functions (i.e., those with pass statements) and pass the unit tests. Write a concise plan of attack to implement the entire repo, but don't actually do any coding. Please output the plan in the format of a list of numbered steps. Each step should specify a file to edit and a high-level description of the change to make. For example, '1.) file.py: add a function to calculate the sum of two numbers'. Note that we only need to edit the files that contain functions with pass statements, ie. those in the current context. Give me only the plan, with no extraneous text." with DirContext(repo_path): if agent_config is None: @@ -364,36 +366,46 @@ def run_team_for_repo( dependencies = import_dependencies[f] file_name = "all" file_log_dir = experiment_log_dir / file_name - lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) + lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info, commit0_config_file) + + """ + #uncommenting below works, but the manager.run_manager line doesnt work idk why - agent_return = manager.run(manager_message, "", lint_cmd, target_edit_files, file_log_dir) - with open(agent_return.log_file, 'r', encoding='utf-8') as file: - plan = file.read() - coder_message = "follow this implementation plan: "+plan - + coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements). You may add the specified file to the context if necessary:" + agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) + """ + + agent_return = manager.run_manager(manager_message, target_edit_files, file_log_dir) + + #TODO: uncomment below after figuring out why manager.run_manager doesnt work + + # with open(agent_return.log_file, 'r', encoding='utf-8') as file: + # plan = file.read() + + # update_queue.put( + # ( + # "update_money_display", + # (repo_name, file_name, agent_return.last_cost), + # ) + # ) + + # tasks = parse_tasks(plan) - # for f in target_edit_files: - # update_queue.put(("set_current_file", (repo_name, f))) - # dependencies = import_dependencies[f] - # message = update_message_with_dependencies(coder_message, dependencies) - # file_name = f.replace(".py", "").replace("/", "__") - # file_log_dir = experiment_log_dir / file_name - # lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info) - # agent_return = coder.run(message, "", lint_cmd, [f], file_log_dir) + # for task in tasks: + # coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements). You may add the specified file to the context if necessary: \n{task}" + + # agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) + # #TODO: fix the display (right now it just displys one file) + + # update_queue.put( # ( # "update_money_display", # (repo_name, file_name, agent_return.last_cost), # ) # ) - update_queue.put( - ( - "update_money_display", - (repo_name, file_name, agent_return.last_cost), - ) - ) From 245400d7f75403bc31c7b2a94cd8f412c8df797d Mon Sep 17 00:00:00 2001 From: Eugene Choi <4eugenechoi@gmail.com> Date: Wed, 6 Nov 2024 14:59:44 -0500 Subject: [PATCH 4/5] Add working manager code --- agent/agent_utils.py | 29 ++++++++++++++----- agent/agents.py | 27 +++++++++++++++--- agent/run_agent.py | 67 ++++++++++++++++++++++++-------------------- 3 files changed, 82 insertions(+), 41 deletions(-) diff --git a/agent/agent_utils.py b/agent/agent_utils.py index 914dd9d..1f28265 100644 --- a/agent/agent_utils.py +++ b/agent/agent_utils.py @@ -572,12 +572,27 @@ def read_yaml_config(config_file: str) -> dict: with open(config_file, "r") as f: return yaml.load(f, Loader=yaml.FullLoader) - -def parse_tasks(text: str) -> list[str]: +def parse_tasks(text: str) -> list[tuple[str, str]]: """Parse the tasks from the manager output.""" - tasks = [] - for line in text.strip().splitlines(): - if not line.strip()[0].isdigit(): - continue - tasks.append(line.strip()) + tasks = [] + + # Extract the portion between PLAN_START and PLAN_END + plan_match = re.search(r"PLAN_START(.*?)PLAN_END", text, re.DOTALL) + if not plan_match: + return tasks + + # Get the plan content and split by patterns that indicate the start of a new task + plan_text = plan_match.group(1).strip() + task_blocks = re.split(r"\d+\.?\)?\s+", plan_text) # Split at task numbers + + for block in task_blocks: + if not block.strip(): + continue + # Match the file name and task description + match = re.search(r"([\w\-/\.0-9]+\.\w+):\s*(.*)", block.strip(), re.DOTALL) + if match: + file_name = match.group(1) + description = re.sub(r'\s+', ' ', match.group(2)).strip() # Remove extra spaces + tasks.append((file_name, description)) + return tasks \ No newline at end of file diff --git a/agent/agents.py b/agent/agents.py index 3fd7fec..0e0736b 100644 --- a/agent/agents.py +++ b/agent/agents.py @@ -57,6 +57,24 @@ def get_money_cost(self) -> float: last_cost = float(match.group(1)) return last_cost +class TeamsReturn(AgentReturn): + def __init__(self, log_file: Path): + super().__init__(log_file) + self.last_cost = self.get_money_cost() + + def get_money_cost(self) -> float: + """Get accumulated money cost from log file""" + last_cost = 0.0 + with open(self.log_file, "r") as file: + for line in file: + if "Tokens:" in line and "Cost:" in line: + match = re.search( + r"Cost: \$\d+\.\d+ message, \$(\d+\.\d+) session", line + ) + if match: + last_cost = float(match.group(1)) + return last_cost + class AgentTeams(Agents): def __init__(self, max_iteration: int, model_name: str): super().__init__(max_iteration) @@ -97,13 +115,14 @@ def run_manager( chat_history_file=chat_history_file, ) manager = Coder.create( + edit_format="ask", main_model=self.model, read_only_fnames=fnames, io=io, ) manager.max_reflection = self.max_iteration manager.stream = True - + manager.run(message) sys.stdout.close() @@ -112,7 +131,7 @@ def run_manager( sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ - return AgentReturn(log_file) + return TeamsReturn(log_file) def run( self, @@ -163,7 +182,7 @@ def run( main_model=self.model, #make the coder import files on its own for now - fnames=[], + fnames=fnames, auto_lint=auto_lint, auto_test=auto_test, lint_cmds={"python": lint_cmd}, @@ -180,7 +199,7 @@ def run( sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ - return AgentReturn(log_file) + return TeamsReturn(log_file) class AiderAgents(Agents): def __init__(self, max_iteration: int, model_name: str): diff --git a/agent/run_agent.py b/agent/run_agent.py index 567cfbb..7219bcf 100644 --- a/agent/run_agent.py +++ b/agent/run_agent.py @@ -15,8 +15,7 @@ parse_tasks, ) import subprocess -from agent.agents import AiderAgents -from agent.agents import AgentTeams +from agent.agents import AiderAgents, AgentTeams from typing import Optional, Type, cast from types import TracebackType from agent.class_types import AgentConfig @@ -30,7 +29,6 @@ import queue import time - class DirContext: def __init__(self, d: str): self.dir = d @@ -273,6 +271,7 @@ def run_team_for_repo( log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()), commit0_config_file: str = "", ) -> None: + """Run Aider Team for a given repository.""" # get repo info assert "commit0" in commit0_config["dataset_name"] @@ -294,7 +293,7 @@ def run_team_for_repo( f"{repo_path} is not a git repo. Check if base_dir is correctly specified." ) - manager = AgentTeams(agent_config.max_iteration, agent_config.model_name) + manager = AgentTeams(1, agent_config.model_name) coder = AgentTeams(agent_config.max_iteration, agent_config.model_name) # Check if there are changes in the current branch @@ -347,7 +346,19 @@ def run_team_for_repo( yaml.dump(agent_config, agent_config_file) # /ask will make aider not write any code, but only a plan - manager_message = "/ask You are a manager in charge of writing a plan to complete the implementations for all functions (i.e., those with pass statements) and pass the unit tests. Write a concise plan of attack to implement the entire repo, but don't actually do any coding. Please output the plan in the format of a list of numbered steps. Each step should specify a file to edit and a high-level description of the change to make. For example, '1.) file.py: add a function to calculate the sum of two numbers'. Note that we only need to edit the files that contain functions with pass statements, ie. those in the current context. Give me only the plan, with no extraneous text." + manager_message = f"""You are a manager in charge of writing a plan to complete the implementations for all functions (i.e., those with pass statements) and pass the unit tests. Write a concise plan of attack to implement the entire repo, but don't actually do any coding. Please output the plan in the format of a list of numbered steps. Each step should specify a file to edit and a high-level description of the change to make. Note that we only need to edit the files that contain functions with pass statements, ie. those in the current context. Give me only the plan, with no extraneous text. + + You MUST precede the plan with the keyword PLAN_START, and end it with the keyword PLAN_END. You MUST follow the formatting of the example plan below, with a number preceding each step on a new line, and one file name followed by a colon and a description of the change to make: + + PLAN_START + 1.) example_file.py: description of function(s) to implement in example_file.py + 2.) example_file2.py: description of function(s) to implement in example_file2.py + ... + PLAN_END + + Remember that you must modify all of the target edit files: {target_edit_files} + The plan does not neccessarily need to edit the whole file in one step, and it may be more granular as you see fit. + """ with DirContext(repo_path): if agent_config is None: @@ -361,51 +372,46 @@ def run_team_for_repo( #TODO: add support for unit test / lint feedback - for f in target_edit_files: - update_queue.put(("set_current_file", (repo_name, f))) - dependencies = import_dependencies[f] file_name = "all" file_log_dir = experiment_log_dir / file_name lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info, commit0_config_file) - """ - #uncommenting below works, but the manager.run_manager line doesnt work idk why + # #uncommenting below works, but the manager.run_manager line doesnt work idk why - coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements). You may add the specified file to the context if necessary:" + # coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements). You may add the specified file to the context if necessary:" - agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) - """ + # agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) agent_return = manager.run_manager(manager_message, target_edit_files, file_log_dir) - #TODO: uncomment below after figuring out why manager.run_manager doesnt work - - # with open(agent_return.log_file, 'r', encoding='utf-8') as file: - # plan = file.read() - # update_queue.put( # ( # "update_money_display", # (repo_name, file_name, agent_return.last_cost), # ) # ) + + with open(agent_return.log_file, 'r', encoding='utf-8') as file: + plan = file.read() - # tasks = parse_tasks(plan) + tasks = parse_tasks(plan) - # for task in tasks: - # coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements). You may add the specified file to the context if necessary: \n{task}" + for file_name, description in tasks: + update_queue.put(("set_current_file", (repo_name, file_name))) - # agent_return = coder.run(coder_message, "", lint_cmd, target_edit_files, file_log_dir) - # #TODO: fix the display (right now it just displys one file) - + coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements): \n{description}" + + agent_return = coder.run(coder_message, "", lint_cmd, [file_name], file_log_dir) + + #TODO: fix the display (right now it just displys one file) - # update_queue.put( - # ( - # "update_money_display", - # (repo_name, file_name, agent_return.last_cost), - # ) - # ) + update_queue.put( + ( + "update_money_display", + (repo_name, file_name, agent_return.last_cost), + ) + ) @@ -486,6 +492,7 @@ def run_agent( agent_config.use_lint_info, ) display.update_branch_display(branch) + with multiprocessing.Manager() as manager: update_queue = manager.Queue() with multiprocessing.Pool(processes=max_parallel_repos) as pool: From 1310628d8742fb6009890b96acad4d9cb40303ff Mon Sep 17 00:00:00 2001 From: Eugene Choi <4eugenechoi@gmail.com> Date: Fri, 8 Nov 2024 15:54:27 -0500 Subject: [PATCH 5/5] Update prompts & add spec to context --- agent/agents.py | 8 +++++++- agent/run_agent.py | 12 ++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/agent/agents.py b/agent/agents.py index 0e0736b..192b68f 100644 --- a/agent/agents.py +++ b/agent/agents.py @@ -8,6 +8,7 @@ from aider.io import InputOutput import re import os +import bz2 def handle_logging(logging_name: str, log_file: Path) -> None: @@ -108,6 +109,11 @@ def run_manager( # Configure httpx and backoff logging handle_logging("httpx", log_file) handle_logging("backoff", log_file) + + # Get the specifications + with bz2.open("spec.pdf.bz2", "rb") as in_file: + with open("spec.pdf", "wb") as out_file: + out_file.write(in_file.read()) io = InputOutput( yes=False, @@ -117,7 +123,7 @@ def run_manager( manager = Coder.create( edit_format="ask", main_model=self.model, - read_only_fnames=fnames, + read_only_fnames=fnames + ["spec.pdf"], io=io, ) manager.max_reflection = self.max_iteration diff --git a/agent/run_agent.py b/agent/run_agent.py index 7219bcf..43d4209 100644 --- a/agent/run_agent.py +++ b/agent/run_agent.py @@ -346,18 +346,18 @@ def run_team_for_repo( yaml.dump(agent_config, agent_config_file) # /ask will make aider not write any code, but only a plan - manager_message = f"""You are a manager in charge of writing a plan to complete the implementations for all functions (i.e., those with pass statements) and pass the unit tests. Write a concise plan of attack to implement the entire repo, but don't actually do any coding. Please output the plan in the format of a list of numbered steps. Each step should specify a file to edit and a high-level description of the change to make. Note that we only need to edit the files that contain functions with pass statements, ie. those in the current context. Give me only the plan, with no extraneous text. + manager_message = f"""You are a manager in charge of writing a plan to complete the implementations for all functions (i.e., those with pass statements) and pass the unit tests. Write a plan of attack to implement the entire repo, keeping in mind the most effective order in which tasks should be implemented. Please output the plan in the format of a list of numbered steps. Each step should specify a file to edit and a high-level description of the change to make. Note that we only need to edit the files that contain functions with pass statements, ie. those in the current context. Give me ONLY the plan, with no extraneous text. - You MUST precede the plan with the keyword PLAN_START, and end it with the keyword PLAN_END. You MUST follow the formatting of the example plan below, with a number preceding each step on a new line, and one file name followed by a colon and a description of the change to make: + You MUST precede the plan with the keyword PLAN_START, and end it with the keyword PLAN_END. You MUST follow the formatting of the example plan below, with a number preceding each step on a new line, and one file name followed by a colon and a detailed description of the change to make: PLAN_START - 1.) example_file.py: description of function(s) to implement in example_file.py - 2.) example_file2.py: description of function(s) to implement in example_file2.py + 1.) example_file.py: description of function(s) to implement in example_file.py, including any relevant context or dependencies + 2.) example_file2.py: description of function(s) to implement in example_file2.py, including any relevant context or dependencies ... PLAN_END Remember that you must modify all of the target edit files: {target_edit_files} - The plan does not neccessarily need to edit the whole file in one step, and it may be more granular as you see fit. + The plan does not neccessarily need to edit the whole file in one step, and it may be more granular as you see fit. Additionally, you should look at the file 'spec.pdf' for more information on the project requirements and specifications. """ with DirContext(repo_path): @@ -400,7 +400,7 @@ def run_team_for_repo( for file_name, description in tasks: update_queue.put(("set_current_file", (repo_name, file_name))) - coder_message = f"Complete the following task, implementing the relevant incomplete functions (i.e., those with pass statements): \n{description}" + coder_message = f"Complete the following task, implementing the relevant incomplete function(s) (i.e., those with pass statements): \n{description}" agent_return = coder.run(coder_message, "", lint_cmd, [file_name], file_log_dir)