From e26cff66fe338a39246c3488e8f2ad3615703bcd Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Mon, 20 Oct 2025 21:00:13 +0000 Subject: [PATCH 1/6] revert to standard flags for generic_agent_hinter --- src/agentlab/agents/generic_agent_hinter/tmlr_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentlab/agents/generic_agent_hinter/tmlr_config.py b/src/agentlab/agents/generic_agent_hinter/tmlr_config.py index d222b7c0..5a749721 100644 --- a/src/agentlab/agents/generic_agent_hinter/tmlr_config.py +++ b/src/agentlab/agents/generic_agent_hinter/tmlr_config.py @@ -23,7 +23,7 @@ use_think_history=True, # gpt-4o config except for this line use_diff=False, html_type="pruned_html", - use_screenshot=True, + use_screenshot=False, use_som=False, extract_visible_tag=True, extract_clickable_tag=True, From 59d5e9406de7ea6464eda9fbb71f11118a783a9b Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Mon, 20 Oct 2025 21:07:44 +0000 Subject: [PATCH 2/6] Make LLM retreival topic index selection more robust --- src/agentlab/utils/hinting.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/agentlab/utils/hinting.py b/src/agentlab/utils/hinting.py index 6ba1f2d5..30aa253a 100644 --- a/src/agentlab/utils/hinting.py +++ b/src/agentlab/utils/hinting.py @@ -12,11 +12,13 @@ import pandas as pd import requests from agentlab.llm.chat_api import ChatModel - +import re +from agentlab.llm.response_api import APIPayload logger = logging.getLogger(__name__) class HintsSource: + def __init__( self, hint_db_path: str, @@ -27,7 +29,8 @@ def __init__( embedder_server: str = "http://localhost:5000", llm_prompt: str = """We're choosing hints to help solve the following task:\n{goal}.\n You need to choose the most relevant hints topic from the following list:\n\nHint topics:\n{topics}\n -Choose hint topic for the task and return only its number, e.g. 1. If you don't know the answer, return -1.""", +Choose hint topic for the task and return only its number. Use the following output format: +index for e.g. 1 for the first choice. If you don't know the answer, return -1""", ) -> None: self.hint_db_path = hint_db_path self.hint_retrieval_mode = hint_retrieval_mode @@ -96,7 +99,10 @@ def choose_hints_llm(self, llm, goal: str, task_name: str) -> list[str]: else: response: str = llm(APIPayload(messages=[llm.msg.user().add_text(prompt)])).think try: - topic_number = json.loads(response) + matches = re.findall(r"(-?\d+)", response) + if len(matches) > 1: + logger.warning(f"LLM selected multiple topics for retrieval using only the first one.") + topic_number = int(matches[0]) if topic_number < 0 or topic_number >= len(hint_topics): logger.error(f"Wrong LLM hint id response: {response}, no hints") return [] From 152b33347f91584f03d41cbc067097b547a8aa4b Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Mon, 20 Oct 2025 21:38:52 +0000 Subject: [PATCH 3/6] Add a check for empty matches for llm retrieval in hinting.py --- src/agentlab/utils/hinting.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/agentlab/utils/hinting.py b/src/agentlab/utils/hinting.py index 30aa253a..d387f079 100644 --- a/src/agentlab/utils/hinting.py +++ b/src/agentlab/utils/hinting.py @@ -14,6 +14,7 @@ from agentlab.llm.chat_api import ChatModel import re from agentlab.llm.response_api import APIPayload + logger = logging.getLogger(__name__) @@ -100,8 +101,13 @@ def choose_hints_llm(self, llm, goal: str, task_name: str) -> list[str]: response: str = llm(APIPayload(messages=[llm.msg.user().add_text(prompt)])).think try: matches = re.findall(r"(-?\d+)", response) + if not matches: + logger.error(f"No choice tags found in LLM response: {response}") + return [] if len(matches) > 1: - logger.warning(f"LLM selected multiple topics for retrieval using only the first one.") + logger.warning( + f"LLM selected multiple topics for retrieval using only the first one." + ) topic_number = int(matches[0]) if topic_number < 0 or topic_number >= len(hint_topics): logger.error(f"Wrong LLM hint id response: {response}, no hints") From 8f8b1f7dcdd81779f9d0db9e775cfc338e790b78 Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Mon, 20 Oct 2025 21:42:48 +0000 Subject: [PATCH 4/6] black --- src/agentlab/agents/generic_agent_hinter/generic_agent.py | 8 ++++++-- .../agents/generic_agent_hinter/generic_agent_prompt.py | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent.py b/src/agentlab/agents/generic_agent_hinter/generic_agent.py index ef8f10c5..63061685 100644 --- a/src/agentlab/agents/generic_agent_hinter/generic_agent.py +++ b/src/agentlab/agents/generic_agent_hinter/generic_agent.py @@ -93,7 +93,9 @@ def __init__( self.flags = flags if self.flags.hint_db_path is not None: - assert os.path.exists(self.flags.hint_db_path), f"Hint database path {self.flags.hint_db_path} does not exist." + assert os.path.exists( + self.flags.hint_db_path + ), f"Hint database path {self.flags.hint_db_path} does not exist." self.action_set = self.flags.action.action_set.make_action_set() self._obs_preprocessor = dp.make_obs_preprocessor(flags.obs) @@ -118,7 +120,9 @@ def get_action(self, obs): # use those queries to retrieve from the database and pass to prompt if step-level self.queries = ( - self._get_queries()[0] if getattr(self.flags, "hint_level", "episode") == "step" else None + self._get_queries()[0] + if getattr(self.flags, "hint_level", "episode") == "step" + else None ) # get hints diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py b/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py index 0fc08e41..5ccb73a9 100644 --- a/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py +++ b/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py @@ -19,6 +19,7 @@ logger = logging.getLogger(__name__) + @dataclass class GenericPromptFlags(dp.Flags): """ @@ -403,6 +404,8 @@ def _parse_answer(self, text_answer): ans_dict["queries"] = json.loads(raw_queries) except Exception as e: t = text_answer.replace("\n", "\\n") - logger.warning(f"Failed to parse queries: {e}. Queries block content: '{ans_dict['queries']}'. RAW llm answer: '{t}'. Will retry") + logger.warning( + f"Failed to parse queries: {e}. Queries block content: '{ans_dict['queries']}'. RAW llm answer: '{t}'. Will retry" + ) raise e return ans_dict From c41d817ba9be6575ba743e514fcf2146e00dd0f2 Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Mon, 20 Oct 2025 21:43:36 +0000 Subject: [PATCH 5/6] add bug indicator in _get_queries method of generic_agent.py --- src/agentlab/agents/generic_agent_hinter/generic_agent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent.py b/src/agentlab/agents/generic_agent_hinter/generic_agent.py index 63061685..843879f8 100644 --- a/src/agentlab/agents/generic_agent_hinter/generic_agent.py +++ b/src/agentlab/agents/generic_agent_hinter/generic_agent.py @@ -208,6 +208,7 @@ def _get_queries(self): ) chat_messages = Discussion([system_prompt, query_prompt.prompt]) + # BUG: Parsing fails multiple times. ans_dict = retry( self.chat_llm, chat_messages, From 453e0cf939e70e7b365e197ab89dfaf3cfd23e98 Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Mon, 20 Oct 2025 22:05:02 +0000 Subject: [PATCH 6/6] update prompt for llm retreival. --- src/agentlab/utils/hinting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentlab/utils/hinting.py b/src/agentlab/utils/hinting.py index d387f079..901d0361 100644 --- a/src/agentlab/utils/hinting.py +++ b/src/agentlab/utils/hinting.py @@ -31,7 +31,7 @@ def __init__( llm_prompt: str = """We're choosing hints to help solve the following task:\n{goal}.\n You need to choose the most relevant hints topic from the following list:\n\nHint topics:\n{topics}\n Choose hint topic for the task and return only its number. Use the following output format: -index for e.g. 1 for the first choice. If you don't know the answer, return -1""", +index for e.g. 0 for the topic with index 0. If you don't know the answer, return -1""", ) -> None: self.hint_db_path = hint_db_path self.hint_retrieval_mode = hint_retrieval_mode