diff --git a/src/agentlab/agents/dynamic_prompting.py b/src/agentlab/agents/dynamic_prompting.py
index 92ad25b9..e5270a1d 100644
--- a/src/agentlab/agents/dynamic_prompting.py
+++ b/src/agentlab/agents/dynamic_prompting.py
@@ -560,6 +560,7 @@ class Hints(PromptElement):
 * Interacting with combobox, dropdowns and auto-complete fields can be tricky,
 sometimes you need to use select_option, while other times you need to use fill
 or click and wait for the reaction of the page.
+* I want you to respond strictly with the action only. Do not say anything else.
 """
 
 
@@ -575,7 +576,7 @@ class ActionPrompt(PromptElement):
     _concrete_ex = """
 <action>
 click('a324')
-</action>
+<end_action>
 """
 
     def __init__(self, action_set: AbstractActionSet, action_flags: ActionFlags) -> None:
@@ -598,13 +599,13 @@ def __init__(self, action_set: AbstractActionSet, action_flags: ActionFlags) ->
         self._abstract_ex = f"""
 <action>
 {self.action_set.example_action(abstract=True)}
-</action>
+<end_action>
 """
 
     #         self._concrete_ex = f"""
     # <action>
     # {self.action_set.example_action(abstract=False)}
-    # </action>
+    # <end_action>
     # """
 
     def _parse_answer(self, text_answer):
@@ -789,7 +790,7 @@ def _prompt(self) -> str:
             prompt += f"\n<think>\n{self.thought}\n</think>\n"
 
         if self.flags.use_action_history:
-            prompt += f"\n<action>\n{self.action}\n</action>\n"
+            prompt += f"\n<action>\n{self.action}\n<end_action>\n"
 
         # prompt += f"{self.error.prompt}{self.html_diff.prompt}{self.ax_tree_diff.prompt}"
         prompt += f"{self.error.prompt}"
diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py
index 391f419c..05d29557 100644
--- a/src/agentlab/experiments/study.py
+++ b/src/agentlab/experiments/study.py
@@ -30,6 +30,16 @@
 
 logger = logging.getLogger(__name__)
 
+base_url="http://webarena2.eastus.cloudapp.azure.com/"
+os.environ["WA_SHOPPING"]       = f"{base_url}:8082/"
+os.environ["WA_SHOPPING_ADMIN"] = f"{base_url}:8083/admin"
+os.environ["WA_REDDIT"]         = f"{base_url}:8080"
+os.environ["WA_GITLAB"]         = f"{base_url}:9001"
+os.environ["WA_WIKIPEDIA"]      = f"{base_url}:8081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
+os.environ["WA_MAP"]            = f"{base_url}:443"
+os.environ["WA_HOMEPAGE"]       = f"{base_url}:80"
+os.environ["WA_FULL_RESET"]     = f""
+
 
 def make_study(
     agent_args: list[AgentArgs] | AgentArgs,
diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py
index 9ee68838..615c862a 100644
--- a/src/agentlab/llm/chat_api.py
+++ b/src/agentlab/llm/chat_api.py
@@ -324,7 +324,8 @@ def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float
             tracking.TRACKER.instance(input_tokens, output_tokens, cost)
 
         if n_samples == 1:
-            res = AIMessage(completion.choices[0].message.content)
+            rec = split_reasoning_and_action(completion.choices[0].message.content)
+            res = AIMessage("<think>\nreasoning\n</think>" + rec[1])
             if self.log_probs:
                 res["log_probs"] = completion.choices[0].log_probs
             return res
@@ -553,3 +554,54 @@ def make_model(self):
             temperature=self.temperature,
             max_tokens=self.max_new_tokens,
         )
+
+
+from typing import Tuple
+
+START_TAG = "[BEGIN FINAL RESPONSE]"
+END_TAG = "[END FINAL RESPONSE]"
+END_RESPONSE_TAG = "<|end|>"
+
+def split_reasoning_and_action(s: str) -> Tuple[str, str]:
+    """Return (reasoning_wrapped, action_wrapped) from a single string.
+    reasoning_wrapped -> '<think>\\n{reasoning}\\n</think>' or '' if none
+    action_wrapped    -> '\\n\\n<action>\\n{content}\\n</action>'
+    """
+    txt = s.strip()
+
+    # Locate tags
+    i = txt.find(START_TAG)
+    j = txt.find(END_TAG, i + len(START_TAG)) if i != -1 else -1
+
+    if i != -1 and j != -1:
+        reasoning = txt[:i].strip()
+        content = txt[i + len(START_TAG):j].strip()
+    else:
+        reasoning = ""
+        content = txt
+
+    # Clean accidental echoes
+    if reasoning.endswith(START_TAG):
+        reasoning = reasoning[:-len(START_TAG)].rstrip()
+    if content.startswith(START_TAG):
+        content = content[len(START_TAG):].lstrip()
+    if content.endswith(END_TAG):
+        content = content[:-len(END_TAG)].rstrip()
+    if content.endswith(END_RESPONSE_TAG):
+        content = content[:-len(END_RESPONSE_TAG)].rstrip()
+
+    # Normalize existing <think> wrappers
+    if reasoning.startswith("<think>"):
+        reasoning = reasoning[len("<think>"):].lstrip()
+    if reasoning.endswith("</think>"):
+        reasoning = reasoning[:-len("</think>")].rstrip()
+
+    # Strip any action wrappers inside content before re-wrapping
+    content = content.replace("<action>", "").replace("<end_action>", "").strip()
+
+    reasoning_wrapped = f"<think>\n{reasoning}\n</think>" if reasoning else ""
+    action_wrapped = f"\n\n<action>\n{content}\n</action>"
+
+    return reasoning_wrapped, action_wrapped
+
+
diff --git a/tests/mytest.py b/tests/mytest.py
new file mode 100644
index 00000000..f49a987d
--- /dev/null
+++ b/tests/mytest.py
@@ -0,0 +1,132 @@
+from agentlab.experiments.study import make_study
+
+from agentlab.agents.generic_agent import AGENT_CUSTOM
+from agentlab.agents.generic_agent import GenericAgentArgs
+from pathlib import Path
+from agentlab.llm.chat_api import (
+    AnthropicModelArgs,
+    AzureModelArgs,
+    OpenAIModelArgs,
+    OpenRouterModelArgs,
+    SelfHostedModelArgs,
+)
+
+from finetuning.toolkit_utils.chat_api import ToolkitModelArgs, VLLMModelArgs
+
+import os
+os.environ["SNOW_INSTANCE_URL"]="https://empmassimo17.service-now.com/"
+os.environ["SNOW_INSTANCE_UNAME"]="admin"
+os.environ["SNOW_INSTANCE_PWD"]="AE_82fH4ZPntQuJ"
+
+CHECKPOINT_ACCOUNT_NAME_SUFFIX = (
+    # "ui_assist"
+    "adea"
+)
+DATA_ROOT_PATH = f"/mnt/{CHECKPOINT_ACCOUNT_NAME_SUFFIX}/data"
+FINETUNING_PATH = Path(DATA_ROOT_PATH) / "finetuning"
+MINIWOB_URL = str(FINETUNING_PATH) + "/.miniwob-plusplus/miniwob/html/miniwob/"
+MINIWOB_URL = "file://" + MINIWOB_URL
+if os.getenv("MINIWOB_URL", None) is None:
+    os.environ["MINIWOB_URL"] = MINIWOB_URL
+
+import bgym
+from bgym import HighLevelActionSetArgs
+
+from agentlab.agents import dynamic_prompting as dp
+from agentlab.experiments import args
+from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
+
+from agentlab.agents.generic_agent import GenericAgentArgs
+
+
+from agentlab.agents.generic_agent.generic_agent_prompt import GenericPromptFlags
+
+
+
+
+
+
+FLAGS_CUSTOM = GenericPromptFlags(
+    obs=dp.ObsFlags(
+        use_html=False,
+        use_ax_tree=True,
+        use_focused_element=True,
+        use_error_logs=True,
+        use_history=True,
+        use_past_error_logs=False,
+        use_action_history=True,
+        use_think_history=False,
+        use_diff=False,
+        html_type="pruned_html",
+        use_screenshot=False,
+        use_som=False,
+        extract_visible_tag=True,
+        extract_clickable_tag=False,
+        extract_coords="False",
+        filter_visible_elements_only=False,
+    ),
+    action=dp.ActionFlags(
+        action_set=HighLevelActionSetArgs(
+            subsets=["bid"],
+            multiaction=False,
+        ),
+        long_description=False,
+        individual_examples=True,
+    ),
+    use_plan=False,
+    use_criticise=False,
+    use_thinking=True,
+    use_memory=False,
+    use_concrete_example=True,
+    use_abstract_example=True,
+    use_hints=True,
+    enable_chat=False,
+    max_prompt_tokens=40_000,
+    be_cautious=True,
+    extra_instructions=None,
+)
+
+dic={"Qwen/Qwen3-8B": VLLMModelArgs(  # This is the example, already VLLMModelArgs
+        model_name="Qwen/Qwen3-8B",
+        training_total_tokens=128_000,
+        max_total_tokens=16000,
+        backend="vllm",
+        model_size=8,
+        n_gpus=2,
+        tensor_parallel_size=2,
+        # model_url="https://nonpaying-andra-coordinal.ngrok-free.dev/v1",
+    ),
+    "openai/gpt-oss-20b": VLLMModelArgs(
+        model_name="openai/gpt-oss-20b",
+        max_total_tokens=32_000,
+        model_size=21,
+        backend="vllm",
+        n_gpus=2,
+        tensor_parallel_size=2,
+    )
+    ,
+    "ServiceNow-AI/Apriel-1.5-15b-Thinker": VLLMModelArgs(
+    model_name="ServiceNow-AI/Apriel-1.5-15b-Thinker",
+    model_size=15,
+    max_total_tokens=40_000,
+    backend="vllm",
+    n_gpus=2,
+    tensor_parallel_size=2,
+    ),}
+
+FLAGS_CUSTOM.use_thinking = False
+FLAGS_CUSTOM.obs.use_think_history = False
+
+AGENT_CUSTOM = GenericAgentArgs(
+    chat_model_args=dic["ServiceNow-AI/Apriel-1.5-15b-Thinker"],
+     flags=FLAGS_CUSTOM,)
+
+AGENT_CUSTOM.chat_model_args.temperature = 0.6
+
+study = make_study(
+    benchmark="workarena_l2_agent_curriculum_eval",  # or "webarena", "workarena_l1" ...
+    agent_args=[AGENT_CUSTOM],
+    comment="My first study",
+)
+
+study.run(n_jobs=20, exp_root=Path("/home/toolkit/AGENTLAB_TEST/Workarena_l2/"), parallel_backend="ray")
\ No newline at end of file