diff --git a/src/agentlab/agents/dynamic_prompting.py b/src/agentlab/agents/dynamic_prompting.py index 92ad25b9..e5270a1d 100644 --- a/src/agentlab/agents/dynamic_prompting.py +++ b/src/agentlab/agents/dynamic_prompting.py @@ -560,6 +560,7 @@ class Hints(PromptElement): * Interacting with combobox, dropdowns and auto-complete fields can be tricky, sometimes you need to use select_option, while other times you need to use fill or click and wait for the reaction of the page. +* I want you to respond strictly with the action only. Do not say anything else. """ @@ -575,7 +576,7 @@ class ActionPrompt(PromptElement): _concrete_ex = """ click('a324') - + """ def __init__(self, action_set: AbstractActionSet, action_flags: ActionFlags) -> None: @@ -598,13 +599,13 @@ def __init__(self, action_set: AbstractActionSet, action_flags: ActionFlags) -> self._abstract_ex = f""" {self.action_set.example_action(abstract=True)} - + """ # self._concrete_ex = f""" # # {self.action_set.example_action(abstract=False)} - # + # # """ def _parse_answer(self, text_answer): @@ -789,7 +790,7 @@ def _prompt(self) -> str: prompt += f"\n\n{self.thought}\n\n" if self.flags.use_action_history: - prompt += f"\n\n{self.action}\n\n" + prompt += f"\n\n{self.action}\n\n" # prompt += f"{self.error.prompt}{self.html_diff.prompt}{self.ax_tree_diff.prompt}" prompt += f"{self.error.prompt}" diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py index 391f419c..05d29557 100644 --- a/src/agentlab/experiments/study.py +++ b/src/agentlab/experiments/study.py @@ -30,6 +30,16 @@ logger = logging.getLogger(__name__) +base_url="http://webarena2.eastus.cloudapp.azure.com/" +os.environ["WA_SHOPPING"] = f"{base_url}:8082/" +os.environ["WA_SHOPPING_ADMIN"] = f"{base_url}:8083/admin" +os.environ["WA_REDDIT"] = f"{base_url}:8080" +os.environ["WA_GITLAB"] = f"{base_url}:9001" +os.environ["WA_WIKIPEDIA"] = f"{base_url}:8081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" +os.environ["WA_MAP"] = f"{base_url}:443" +os.environ["WA_HOMEPAGE"] = f"{base_url}:80" +os.environ["WA_FULL_RESET"] = f"" + def make_study( agent_args: list[AgentArgs] | AgentArgs, diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py index 9ee68838..615c862a 100644 --- a/src/agentlab/llm/chat_api.py +++ b/src/agentlab/llm/chat_api.py @@ -324,7 +324,8 @@ def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float tracking.TRACKER.instance(input_tokens, output_tokens, cost) if n_samples == 1: - res = AIMessage(completion.choices[0].message.content) + rec = split_reasoning_and_action(completion.choices[0].message.content) + res = AIMessage("\nreasoning\n" + rec[1]) if self.log_probs: res["log_probs"] = completion.choices[0].log_probs return res @@ -553,3 +554,54 @@ def make_model(self): temperature=self.temperature, max_tokens=self.max_new_tokens, ) + + +from typing import Tuple + +START_TAG = "[BEGIN FINAL RESPONSE]" +END_TAG = "[END FINAL RESPONSE]" +END_RESPONSE_TAG = "<|end|>" + +def split_reasoning_and_action(s: str) -> Tuple[str, str]: + """Return (reasoning_wrapped, action_wrapped) from a single string. + reasoning_wrapped -> '\\n{reasoning}\\n' or '' if none + action_wrapped -> '\\n\\n\\n{content}\\n' + """ + txt = s.strip() + + # Locate tags + i = txt.find(START_TAG) + j = txt.find(END_TAG, i + len(START_TAG)) if i != -1 else -1 + + if i != -1 and j != -1: + reasoning = txt[:i].strip() + content = txt[i + len(START_TAG):j].strip() + else: + reasoning = "" + content = txt + + # Clean accidental echoes + if reasoning.endswith(START_TAG): + reasoning = reasoning[:-len(START_TAG)].rstrip() + if content.startswith(START_TAG): + content = content[len(START_TAG):].lstrip() + if content.endswith(END_TAG): + content = content[:-len(END_TAG)].rstrip() + if content.endswith(END_RESPONSE_TAG): + content = content[:-len(END_RESPONSE_TAG)].rstrip() + + # Normalize existing wrappers + if reasoning.startswith(""): + reasoning = reasoning[len(""):].lstrip() + if reasoning.endswith(""): + reasoning = reasoning[:-len("")].rstrip() + + # Strip any action wrappers inside content before re-wrapping + content = content.replace("", "").replace("", "").strip() + + reasoning_wrapped = f"\n{reasoning}\n" if reasoning else "" + action_wrapped = f"\n\n\n{content}\n" + + return reasoning_wrapped, action_wrapped + + diff --git a/tests/mytest.py b/tests/mytest.py new file mode 100644 index 00000000..f49a987d --- /dev/null +++ b/tests/mytest.py @@ -0,0 +1,132 @@ +from agentlab.experiments.study import make_study + +from agentlab.agents.generic_agent import AGENT_CUSTOM +from agentlab.agents.generic_agent import GenericAgentArgs +from pathlib import Path +from agentlab.llm.chat_api import ( + AnthropicModelArgs, + AzureModelArgs, + OpenAIModelArgs, + OpenRouterModelArgs, + SelfHostedModelArgs, +) + +from finetuning.toolkit_utils.chat_api import ToolkitModelArgs, VLLMModelArgs + +import os +os.environ["SNOW_INSTANCE_URL"]="https://empmassimo17.service-now.com/" +os.environ["SNOW_INSTANCE_UNAME"]="admin" +os.environ["SNOW_INSTANCE_PWD"]="AE_82fH4ZPntQuJ" + +CHECKPOINT_ACCOUNT_NAME_SUFFIX = ( + # "ui_assist" + "adea" +) +DATA_ROOT_PATH = f"/mnt/{CHECKPOINT_ACCOUNT_NAME_SUFFIX}/data" +FINETUNING_PATH = Path(DATA_ROOT_PATH) / "finetuning" +MINIWOB_URL = str(FINETUNING_PATH) + "/.miniwob-plusplus/miniwob/html/miniwob/" +MINIWOB_URL = "file://" + MINIWOB_URL +if os.getenv("MINIWOB_URL", None) is None: + os.environ["MINIWOB_URL"] = MINIWOB_URL + +import bgym +from bgym import HighLevelActionSetArgs + +from agentlab.agents import dynamic_prompting as dp +from agentlab.experiments import args +from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT + +from agentlab.agents.generic_agent import GenericAgentArgs + + +from agentlab.agents.generic_agent.generic_agent_prompt import GenericPromptFlags + + + + + + +FLAGS_CUSTOM = GenericPromptFlags( + obs=dp.ObsFlags( + use_html=False, + use_ax_tree=True, + use_focused_element=True, + use_error_logs=True, + use_history=True, + use_past_error_logs=False, + use_action_history=True, + use_think_history=False, + use_diff=False, + html_type="pruned_html", + use_screenshot=False, + use_som=False, + extract_visible_tag=True, + extract_clickable_tag=False, + extract_coords="False", + filter_visible_elements_only=False, + ), + action=dp.ActionFlags( + action_set=HighLevelActionSetArgs( + subsets=["bid"], + multiaction=False, + ), + long_description=False, + individual_examples=True, + ), + use_plan=False, + use_criticise=False, + use_thinking=True, + use_memory=False, + use_concrete_example=True, + use_abstract_example=True, + use_hints=True, + enable_chat=False, + max_prompt_tokens=40_000, + be_cautious=True, + extra_instructions=None, +) + +dic={"Qwen/Qwen3-8B": VLLMModelArgs( # This is the example, already VLLMModelArgs + model_name="Qwen/Qwen3-8B", + training_total_tokens=128_000, + max_total_tokens=16000, + backend="vllm", + model_size=8, + n_gpus=2, + tensor_parallel_size=2, + # model_url="https://nonpaying-andra-coordinal.ngrok-free.dev/v1", + ), + "openai/gpt-oss-20b": VLLMModelArgs( + model_name="openai/gpt-oss-20b", + max_total_tokens=32_000, + model_size=21, + backend="vllm", + n_gpus=2, + tensor_parallel_size=2, + ) + , + "ServiceNow-AI/Apriel-1.5-15b-Thinker": VLLMModelArgs( + model_name="ServiceNow-AI/Apriel-1.5-15b-Thinker", + model_size=15, + max_total_tokens=40_000, + backend="vllm", + n_gpus=2, + tensor_parallel_size=2, + ),} + +FLAGS_CUSTOM.use_thinking = False +FLAGS_CUSTOM.obs.use_think_history = False + +AGENT_CUSTOM = GenericAgentArgs( + chat_model_args=dic["ServiceNow-AI/Apriel-1.5-15b-Thinker"], + flags=FLAGS_CUSTOM,) + +AGENT_CUSTOM.chat_model_args.temperature = 0.6 + +study = make_study( + benchmark="workarena_l2_agent_curriculum_eval", # or "webarena", "workarena_l1" ... + agent_args=[AGENT_CUSTOM], + comment="My first study", +) + +study.run(n_jobs=20, exp_root=Path("/home/toolkit/AGENTLAB_TEST/Workarena_l2/"), parallel_backend="ray") \ No newline at end of file