prompt refactoring (fe3c7ce2) · Commits · 张泽凯 / VAGEN

vagen/env/frozenlake/env.py

+3 −3

Original line number	Diff line number	Diff line
		@@ -7,7 +7,7 @@ from gymnasium.envs.toy_text.frozen_lake import FrozenLakeEnv as GymFrozenLakeEn
		from vagen.env.utils.env_utils import NoLoggerWarnings, set_seed
		from vagen.env.utils.context_utils import convert_numpy_to_PIL
		from vagen.env.utils.parse_utils_4 import parse_function_map
		from .prompt_4 import system_prompt, init_observation_template, action_template, format_prompt
		from .prompt import system_prompt, init_observation_template, action_template, format_prompt
		from .env_config import FrozenLakeEnvConfig
		from .utils import generate_random_map, is_valid

		@@ -267,12 +267,12 @@ class FrozenLakeEnv(BaseEnv):
		# Format the observation string using the appropriate template
		if init_obs:
		# Initial observation doesn't include action results
		obs_str = init_observation_template(img_str) + "\n" + format_prompt_text
		obs_str = init_observation_template(img_str=img_str) + "\n" + format_prompt_text
		else:
		# Subsequent observations include action results
		obs_str = action_template(
		valid_action=self.valid_actions,
		observation=img_str,
		img_str=img_str,
		) + "\n" + format_prompt_text

		# Return observation dictionary with appropriate fields

vagen/env/frozenlake/prompt.py

+111 −114

Original line number	Diff line number	Diff line
		def system_prompt():
		def system_prompt(**kwargs):
		return """You are a FrozenLake solver.

		FrozenLake Quick Guide
		Goal: Reach the goal (G).

		Symbols (If image is provided there are no symbols):
		_ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal

		Rules:
		1. Avoid falling into holes.
		2. Frozen tiles are slippery, you may move perpendicular to your intended direction.

		Actions you can take: Left, Down, Right, Up.
		"""

		def init_observation_template(observation):
		def init_observation_template(**kwargs):
		img_str = kwargs.get("img_str", "The player is on the above the target")
		return f"""[Initial Observation]:
		{observation}
		{img_str}
		Decide your next action(s).
		"""

		def action_template(valid_action, observation):
		def action_template(**kwargs):
		valid_action, observation= kwargs.get("valid_action", "Down"), kwargs.get("observation", "The player is on the above the target")
		return f"""After your answer, the extracted valid action is {valid_action}.
		After that, the observation is:
		{observation}
		Decide your next action(s).
		"""

		def free_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your thought process, and then your answer.
		Your response should be in the format of:
		<think>...</think><answer>...</answer>"""
		# Format configurations defining the structure of each format
		FORMAT_CONFIGS = {
		"free_think": {
		"format": "<think>...</think><answer>...</answer>",
		"description": "You should first give your reasoning, and then your answer.",
		"example": "<think>I can see the target is on my down left, I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"
		},

		"no_think": {
		"format": "<answer>...</answer>",
		"description": "You should provide only your answer.",
		"example": "<answer>Down{action_sep}Left</answer>"
		},

		"grounding": {
		"format": "<think><observation>...</observation><reasoning>...</reasoning></think>",
		"description": "You should first describe the observation, then your reasoning, and finally your answer.",
		"example": "<think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"
		},

		"worldmodeling": {
		"format": "<think><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>",
		"description": "You should first give your reasoning, then predict the next state, and finally your answer.",
		"example": "<think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"
		},

		"grounding_worldmodeling": {
		"format": "<think><observation>...</observation><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>",
		"description": "You should first describe the observation, then your reasoning, then predict the next state, and finally your answer.",
		"example": "<think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"
		},

		"grounding_symbol": {
		"format": "<think><observation>...</observation><reasoning>...</reasoning></think><answer>...</answer>",
		"description": "You should first describe the observation as a grid, then your reasoning, and finally your answer.",
		"additional_info": "The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.",
		"example": "<think><observation>_P__\nG___\nOO\n____</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"
		},

		"worldmodeling_symbol": {
		"format": "<think><reasoning>...</reasoning><prediction>...</prediction></think>",
		"description": "You should first give your reasoning, then predict the next state, and finally your answer.",
		"additional_info": "The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.",
		"example": "<think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>____\n√___\nOO\n____</prediction></think><answer>Down{action_sep}Left</answer>"
		},

		"grounding_worldmodeling_symbol": {
		"format": "<think><observation>...</observation><reasoning>...</reasoning><prediction>...</prediction></think>",
		"description": "You should first describe the observation as a grid, then your reasoning, then predict the next state, and finally your answer.",
		"additional_info": "The state should be represented as grids using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.",
		"example": "<think><observation>_P__\nG___\nOO\n____</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>____\n√___\nOO\n____</prediction></think><answer>Down{action_sep}Left</answer>"
		}
		}

		if add_example:
		example = f"""e.g. <think>I can see the target is on my down left, I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt
		def format_prompt_generator(format_type):
		"""
		Generates a prompt function for the specified format type.

		def no_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should provide only your answer.
		Your response should be in the format of:
		<answer>...</answer>"""
		Args:
		format_type (str): The format type to generate a prompt function for

		if add_example:
		example = f"""e.g. <answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt
		Returns:
		function: A function that generates a prompt for the specified format
		"""
		def prompt_function(**kwargs):
		"""
		Generate a prompt for the specified format.

		def grounding_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state, then your thought process, and finally your answer.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<current_state>...</current_state><think>...</think><answer>...</answer>"""
		Args:
		max_actions_per_step (int): Maximum number of actions allowed per step
		action_sep (str): Separator between actions
		add_example (bool): Whether to add an example

		if add_example:
		example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt
		Returns:
		str: The formatted prompt
		"""
		max_actions_per_step = kwargs.get("max_actions_per_step", 1)
		action_sep = kwargs.get("action_sep", "\|")
		add_example = kwargs.get("add_example", False)
		config = FORMAT_CONFIGS[format_type]

		def worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
		# Build the base prompt text
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your thought process, then your answer, and finally predict the next state.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<think>...</think><answer>...</answer><next_state>...</next_state>"""
		{config["description"]}"""

		if add_example:
		example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt
		# Add additional information if available
		if "additional_info" in config:
		base_prompt += f"\n{config['additional_info']}"

		def grounding_worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state, then your thought process, then your answer, and finally predict the next state.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		# Add response format instruction
		base_prompt += f"""
		Your response should be in the format of:
		<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""
		{config["format"]}"""

		# Add example if requested
		if add_example:
		example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt
		example = config["example"].format(action_sep=action_sep)
		return base_prompt + '\n' + f"e.g. {example}"

		def grounding_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state as a grid, then your thought process, and finally your answer.
		The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<current_state>...</current_state><think>...</think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <current_state>_P__
		G___
		_OO_
		____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your thought process, then your answer, and finally predict the next state as a grid.
		The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think>...</think><answer>...</answer><next_state>...</next_state>"""

		if add_example:
		example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>____
		√___
		_OO_
		____</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt
		return prompt_function

		def grounding_worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state as a grid, then your thought process, then your answer, and finally predict the next state as a grid.
		The state should be represented as grids using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""
		# Generate the format prompt dictionary using the generator
		format_prompt = {format_type: format_prompt_generator(format_type)
		for format_type in FORMAT_CONFIGS}

		if add_example:
		example = f"""e.g. <current_state>_P__
		G___
		_OO_
		____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>____
		√___
		_OO_
		____</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt
		if __name__ == "__main__":
		# Example usage
		max_actions_per_step = 2
		action_sep = "\|"

		# Dictionary mapping format names to their corresponding functions
		format_prompt = {
		"free_think": free_think_format_prompt,
		"no_think": no_think_format_prompt,
		"grounding": grounding_format_prompt,
		"worldmodeling": worldmodeling_format_prompt,
		"grounding_worldmodeling": grounding_worldmodeling_format_prompt,
		"grounding_symbol": grounding_symbol_format_prompt,
		"worldmodeling_symbol": worldmodeling_symbol_format_prompt,
		"grounding_worldmodeling_symbol": grounding_worldmodeling_symbol_format_prompt
		}
		No newline at end of file
		for key, func in format_prompt.items():
		print(f"{key} format prompt:")
		print(func(max_actions_per_step=max_actions_per_step, action_sep=action_sep))
		print("\n" + "="*50 + "\n")
		No newline at end of file

vagen/env/frozenlake/prompt_4.py→vagen/env/frozenlake/prompt_backup_1.py

+34 −39

Original line number	Diff line number	Diff line
		def system_prompt():
		return """You are a FrozenLake solver.

		FrozenLake Quick Guide
		Goal: Reach the goal (G).

		Symbols (If image is provided there are no symbols):
		_ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal

		Rules:
		1. Avoid falling into holes.
		2. Frozen tiles are slippery, you may move perpendicular to your intended direction.

		Actions you can take: Left, Down, Right, Up.
		"""

		@@ -25,7 +29,7 @@ Decide your next action(s).

		def free_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your reasoning, and then your answer.
		You should first give your thought process, and then your answer.
		Your response should be in the format of:
		<think>...</think><answer>...</answer>"""

		@@ -47,94 +51,85 @@ Your response should be in the format of:

		def grounding_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first describe the observation, then your reasoning, and finally your answer.
		You should first give the current state, then your thought process, and finally your answer.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<think><observation>...</observation><reasoning>...</reasoning></think><answer>...</answer>"""
		<current_state>...</current_state><think>...</think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
		example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your reasoning, then predict the next state, and finally your answer.
		You should first give your thought process, then your answer, and finally predict the next state.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<think><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>"""
		<think>...</think><answer>...</answer><next_state>...</next_state>"""

		if add_example:
		example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"""
		example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first describe the observation, then your reasoning, then predict the next state, and finally your answer.
		You should first give the current state, then your thought process, then your answer, and finally predict the next state.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<think><observation>...</observation><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>"""
		<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""

		if add_example:
		example = f"""e.g. <think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"""
		example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first describe the observation as a grid, then your reasoning, and finally your answer.
		You should first give the current state as a grid, then your thought process, and finally your answer.
		The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think><observation>_P__
		G___
		OO
		____</observation><reasoning>...</reasoning></think><answer>...</answer>"""
		<current_state>...</current_state><think>...</think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><observation>_P__
		example = f"""e.g. <current_state>_P__
		G___
		OO
		____</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
		_OO_
		____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your reasoning, then predict the next state, and finally your answer.
		You should first give your thought process, then your answer, and finally predict the next state as a grid.
		The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think><reasoning>...</reasoning><prediction>____
		√___
		OO
		____</prediction></think><answer>...</answer>"""
		<think>...</think><answer>...</answer><next_state>...</next_state>"""

		if add_example:
		example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>____
		example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>____
		√___
		OO
		____</prediction></think><answer>Down{action_sep}Left</answer>"""
		_OO_
		____</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first describe the observation as a grid, then your reasoning, then predict the next state, and finally your answer.
		You should first give the current state as a grid, then your thought process, then your answer, and finally predict the next state as a grid.
		The state should be represented as grids using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think><observation>_P__
		G___
		OO
		____</observation><reasoning>...</reasoning><prediction>____
		√___
		OO
		____</prediction></think><answer>...</answer>"""
		<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""

		if add_example:
		example = f"""e.g. <think><observation>_P__
		example = f"""e.g. <current_state>_P__
		G___
		OO
		____</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>____
		_OO_
		____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>____
		√___
		OO
		____</prediction></think><answer>Down{action_sep}Left</answer>"""
		_OO_
		____</next_state>"""
		return base_prompt + '\n' + example
		return base_prompt

vagen/env/frozenlake/prompt_3.py→vagen/env/frozenlake/prompt_backup_2.py

+0 −0

File moved.

View file

vagen/env/frozenlake/prompt_2.py→vagen/env/frozenlake/prompt_backup_3.py

+0 −0

File moved.

View file

Admin message