Commit 7fc93836 authored by jameskrw's avatar jameskrw
Browse files

minor

parent 94c75d97
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -6,8 +6,8 @@ from gymnasium.utils import seeding
from gymnasium.envs.toy_text.frozen_lake import FrozenLakeEnv as GymFrozenLakeEnv
from vagen.env.utils.env_utils import NoLoggerWarnings, set_seed
from vagen.env.utils.context_utils import convert_numpy_to_PIL
from vagen.env.utils.parse_utils import parse_function_map
from .prompt import system_prompt, init_observation_template, action_template, format_prompt
from vagen.env.utils.parse_utils_3 import parse_function_map
from .prompt_3 import system_prompt, init_observation_template, action_template, format_prompt
from .env_config import FrozenLakeEnvConfig
from .utils import generate_random_map, is_valid

+153 −0
Original line number Diff line number Diff line
def system_prompt():
    return """You are a FrozenLake solver.
FrozenLake Quick Guide
Goal: Reach the goal (G).
Symbols (If image is provided there are no symbols):
_ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal
Rules:
1. Avoid falling into holes.
2. Frozen tiles are slippery, you may move perpendicular to your intended direction.
Actions you can take: Left, Down, Right, Up. 
"""

def init_observation_template(observation):
    return f"""[Initial Observation]:
{observation}
Decide your next action(s).
"""

def action_template(valid_action, observation):
    return f"""After your answer, the extracted valid action is {valid_action}.
After that, the observation is:
{observation}
Decide your next action(s).
"""

def free_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your thought process, and then your answer. 
Your response should be in the format of:
<think><reasoning>...</reasoning></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def no_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should provide only your answer.
Your response should be in the format of:
<answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state, then your thought process, and finally your answer.
Your response should be in the format of:
<think><observation>...</observation><reasoning>...</reasoning></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your thought process, then your answer, and finally predict the next state.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<think><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state, then your thought process, then your answer, and finally predict the next state.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<think><observation>{{"player":(row1,column1),"target":(row2,column2)}}</observation><reasoning>...</reasoning><prediction>{{"player":(row1,column1),"target":(row2,column2)}}</prediction></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><observation>{{"player":(2,3),"target":(3,2)}}</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>{{"player":(3,2),"target":(3,2)}}</prediction></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state as a grid, then your thought process, and finally your answer.
The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think><observation>_P__
G___
*OO*
____</observation><reasoning>...</reasoning></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><observation>_P__
G___
*OO*
____</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your thought process, then your answer, and finally predict the next state as a grid.
The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think><reasoning>...</reasoning><prediction>____
√___
*OO*
____</prediction></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>____
√___
*OO*
____</prediction></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state as a grid, then your thought process, then your answer, and finally predict the next state as a grid.
The state should be represented as grids using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think><observation>_P__
G___
*OO*
____</observation><reasoning>...</reasoning><prediction>____
√___
*OO*
____</prediction></think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><observation>_P__
G___
*OO*
____</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>____
√___
*OO*
____</prediction></think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

# Dictionary mapping format names to their corresponding functions
format_prompt = {
    "free_think": free_think_format_prompt,
    "no_think": no_think_format_prompt,
    "grounding": grounding_format_prompt,
    "worldmodeling": worldmodeling_format_prompt,
    "grounding_worldmodeling": grounding_worldmodeling_format_prompt,
    "grounding_symbol": grounding_symbol_format_prompt,
    "worldmodeling_symbol": worldmodeling_symbol_format_prompt,
    "grounding_worldmodeling_symbol": grounding_worldmodeling_symbol_format_prompt
}
 No newline at end of file
+10 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ def generate_seeds(size,config,min_actions_to_succeed=5,seed=0,n_candidate: int
    pool.join()

    valid_seeds_with_actions = [(seed, gt_action_sequence) for seed, gt_action_sequence in results if gt_action_sequence and len(gt_action_sequence) <= min_actions_to_succeed]
    seed_to_length = {seed: len(gt_action_sequence) for seed, gt_action_sequence in valid_seeds_with_actions}
    valid_seeds = [seed for seed, _ in valid_seeds_with_actions]
    action_lengths = [len(gt_action_sequence) for _, gt_action_sequence in valid_seeds_with_actions]
    for _, gt_action_sequence in valid_seeds_with_actions:
@@ -76,6 +77,15 @@ def generate_seeds(size,config,min_actions_to_succeed=5,seed=0,n_candidate: int
    for action, count in sorted(action_count.items(), key=lambda x: x[1], reverse=True):
        percentage = (count / len(valid_seeds_with_actions)) * 100
        print(f"  {action}: {count} instances ({percentage:.2f}%)")
    
    distribution_of_generated_seeds = defaultdict(int)
    for seed in valid_seeds[:size]:
        length = seed_to_length[seed]
        distribution_of_generated_seeds[length] += 1
    print("\nDistribution of generated seeds:")
    for length, count in sorted(distribution_of_generated_seeds.items(), key=lambda x: x[0]):
        percentage = (count / len(valid_seeds)) * 100
        print(f"  Length {length}: {count} instances ({percentage:.2f}%)")
    return valid_seeds[:size]
        
def get_shortest_action_path(room_fixed, room_state, MAX_DEPTH=100):