minor (7fc93836) · Commits · 张泽凯 / VAGEN

vagen/env/frozenlake/env.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -6,8 +6,8 @@ from gymnasium.utils import seeding
		from gymnasium.envs.toy_text.frozen_lake import FrozenLakeEnv as GymFrozenLakeEnv
		from vagen.env.utils.env_utils import NoLoggerWarnings, set_seed
		from vagen.env.utils.context_utils import convert_numpy_to_PIL
		from vagen.env.utils.parse_utils import parse_function_map
		from .prompt import system_prompt, init_observation_template, action_template, format_prompt
		from vagen.env.utils.parse_utils_3 import parse_function_map
		from .prompt_3 import system_prompt, init_observation_template, action_template, format_prompt
		from .env_config import FrozenLakeEnvConfig
		from .utils import generate_random_map, is_valid

vagen/env/frozenlake/prompt_3.py

0 → 100644

+153 −0

Original line number	Diff line number	Diff line
		def system_prompt():
		return """You are a FrozenLake solver.
		FrozenLake Quick Guide
		Goal: Reach the goal (G).
		Symbols (If image is provided there are no symbols):
		_ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal
		Rules:
		1. Avoid falling into holes.
		2. Frozen tiles are slippery, you may move perpendicular to your intended direction.
		Actions you can take: Left, Down, Right, Up.
		"""

		def init_observation_template(observation):
		return f"""[Initial Observation]:
		{observation}
		Decide your next action(s).
		"""

		def action_template(valid_action, observation):
		return f"""After your answer, the extracted valid action is {valid_action}.
		After that, the observation is:
		{observation}
		Decide your next action(s).
		"""

		def free_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your thought process, and then your answer.
		Your response should be in the format of:
		<think><reasoning>...</reasoning></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def no_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should provide only your answer.
		Your response should be in the format of:
		<answer>...</answer>"""

		if add_example:
		example = f"""e.g. <answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state, then your thought process, and finally your answer.
		Your response should be in the format of:
		<think><observation>...</observation><reasoning>...</reasoning></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your thought process, then your answer, and finally predict the next state.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<think><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state, then your thought process, then your answer, and finally predict the next state.
		The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
		Your response should be in the format of:
		<think><observation>{{"player":(row1,column1),"target":(row2,column2)}}</observation><reasoning>...</reasoning><prediction>{{"player":(row1,column1),"target":(row2,column2)}}</prediction></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><observation>{{"player":(2,3),"target":(3,2)}}</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>{{"player":(3,2),"target":(3,2)}}</prediction></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state as a grid, then your thought process, and finally your answer.
		The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think><observation>_P__
		G___
		OO
		____</observation><reasoning>...</reasoning></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><observation>_P__
		G___
		OO
		____</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give your thought process, then your answer, and finally predict the next state as a grid.
		The state should be represented as a grid using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think><reasoning>...</reasoning><prediction>____
		√___
		OO
		____</prediction></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>____
		√___
		OO
		____</prediction></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		def grounding_worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
		base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
		You should first give the current state as a grid, then your thought process, then your answer, and finally predict the next state as a grid.
		The state should be represented as grids using the symbols: _ Frozen \| O Hole \| G Goal \| P Player \| X Player fell into hole \| √ Player on goal.
		Your response should be in the format of:
		<think><observation>_P__
		G___
		OO
		____</observation><reasoning>...</reasoning><prediction>____
		√___
		OO
		____</prediction></think><answer>...</answer>"""

		if add_example:
		example = f"""e.g. <think><observation>_P__
		G___
		OO
		____</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>____
		√___
		OO
		____</prediction></think><answer>Down{action_sep}Left</answer>"""
		return base_prompt + '\n' + example
		return base_prompt

		# Dictionary mapping format names to their corresponding functions
		format_prompt = {
		"free_think": free_think_format_prompt,
		"no_think": no_think_format_prompt,
		"grounding": grounding_format_prompt,
		"worldmodeling": worldmodeling_format_prompt,
		"grounding_worldmodeling": grounding_worldmodeling_format_prompt,
		"grounding_symbol": grounding_symbol_format_prompt,
		"worldmodeling_symbol": worldmodeling_symbol_format_prompt,
		"grounding_worldmodeling_symbol": grounding_worldmodeling_symbol_format_prompt
		}
		No newline at end of file

vagen/env/sokoban/utils.py

+10 −0

Original line number	Diff line number	Diff line
		@@ -38,6 +38,7 @@ def generate_seeds(size,config,min_actions_to_succeed=5,seed=0,n_candidate: int
		pool.join()

		valid_seeds_with_actions = [(seed, gt_action_sequence) for seed, gt_action_sequence in results if gt_action_sequence and len(gt_action_sequence) <= min_actions_to_succeed]
		seed_to_length = {seed: len(gt_action_sequence) for seed, gt_action_sequence in valid_seeds_with_actions}
		valid_seeds = [seed for seed, _ in valid_seeds_with_actions]
		action_lengths = [len(gt_action_sequence) for _, gt_action_sequence in valid_seeds_with_actions]
		for _, gt_action_sequence in valid_seeds_with_actions:
		@@ -76,6 +77,15 @@ def generate_seeds(size,config,min_actions_to_succeed=5,seed=0,n_candidate: int
		for action, count in sorted(action_count.items(), key=lambda x: x[1], reverse=True):
		percentage = (count / len(valid_seeds_with_actions)) * 100
		print(f" {action}: {count} instances ({percentage:.2f}%)")

		distribution_of_generated_seeds = defaultdict(int)
		for seed in valid_seeds[:size]:
		length = seed_to_length[seed]
		distribution_of_generated_seeds[length] += 1
		print("\nDistribution of generated seeds:")
		for length, count in sorted(distribution_of_generated_seeds.items(), key=lambda x: x[0]):
		percentage = (count / len(valid_seeds)) * 100
		print(f" Length {length}: {count} instances ({percentage:.2f}%)")
		return valid_seeds[:size]

		def get_shortest_action_path(room_fixed, room_state, MAX_DEPTH=100):

Admin message