Commit fe3c7ce2 authored by jameskrw's avatar jameskrw
Browse files

prompt refactoring

parent 3e772d71
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ from gymnasium.envs.toy_text.frozen_lake import FrozenLakeEnv as GymFrozenLakeEn
from vagen.env.utils.env_utils import NoLoggerWarnings, set_seed
from vagen.env.utils.context_utils import convert_numpy_to_PIL
from vagen.env.utils.parse_utils_4 import parse_function_map
from .prompt_4 import system_prompt, init_observation_template, action_template, format_prompt
from .prompt import system_prompt, init_observation_template, action_template, format_prompt
from .env_config import FrozenLakeEnvConfig
from .utils import generate_random_map, is_valid

@@ -267,12 +267,12 @@ class FrozenLakeEnv(BaseEnv):
        # Format the observation string using the appropriate template
        if init_obs:
            # Initial observation doesn't include action results
            obs_str = init_observation_template(img_str) + "\n" + format_prompt_text
            obs_str = init_observation_template(img_str=img_str) + "\n" + format_prompt_text
        else:
            # Subsequent observations include action results
            obs_str = action_template(
                valid_action=self.valid_actions,
                observation=img_str,
                img_str=img_str,
            ) + "\n" + format_prompt_text
        
        # Return observation dictionary with appropriate fields
+111 −114
Original line number Diff line number Diff line
def system_prompt():
def system_prompt(**kwargs):
    return """You are a FrozenLake solver.

FrozenLake Quick Guide
Goal: Reach the goal (G).

Symbols (If image is provided there are no symbols):
_ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal

Rules:
1. Avoid falling into holes.
2. Frozen tiles are slippery, you may move perpendicular to your intended direction.

Actions you can take: Left, Down, Right, Up. 
"""

def init_observation_template(observation):
def init_observation_template(**kwargs):
    img_str = kwargs.get("img_str", "The player is on the above the target")
    return f"""[Initial Observation]:
{observation}
{img_str}
Decide your next action(s).
"""

def action_template(valid_action, observation):
def action_template(**kwargs):
    valid_action, observation= kwargs.get("valid_action", "Down"), kwargs.get("observation", "The player is on the above the target")
    return f"""After your answer, the extracted valid action is {valid_action}.
After that, the observation is:
{observation}
Decide your next action(s).
"""

def free_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your thought process, and then your answer. 
Your response should be in the format of:
<think>...</think><answer>...</answer>"""
# Format configurations defining the structure of each format
FORMAT_CONFIGS = {
    "free_think": {
        "format": "<think>...</think><answer>...</answer>",
        "description": "You should first give your reasoning, and then your answer.",
        "example": "<think>I can see the target is on my down left, I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"
    },
    
    "no_think": {
        "format": "<answer>...</answer>",
        "description": "You should provide only your answer.",
        "example": "<answer>Down{action_sep}Left</answer>"
    },
    
    "grounding": {
        "format": "<think><observation>...</observation><reasoning>...</reasoning></think>",
        "description": "You should first describe the observation, then your reasoning, and finally your answer.",
        "example": "<think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"
    },
    
    "worldmodeling": {
        "format": "<think><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>",
        "description": "You should first give your reasoning, then predict the next state, and finally your answer.",
        "example": "<think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"
    },
    
    "grounding_worldmodeling": {
        "format": "<think><observation>...</observation><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>",
        "description": "You should first describe the observation, then your reasoning, then predict the next state, and finally your answer.",
        "example": "<think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"
    },
    
    "grounding_symbol": {
        "format": "<think><observation>...</observation><reasoning>...</reasoning></think><answer>...</answer>",
        "description": "You should first describe the observation as a grid, then your reasoning, and finally your answer.",
        "additional_info": "The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.",
        "example": "<think><observation>_P__\nG___\n*OO*\n____</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"
    },
    
    "worldmodeling_symbol": {
        "format": "<think><reasoning>...</reasoning><prediction>...</prediction></think>",
        "description": "You should first give your reasoning, then predict the next state, and finally your answer.",
        "additional_info": "The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.",
        "example": "<think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>____\n√___\n*OO*\n____</prediction></think><answer>Down{action_sep}Left</answer>"
    },
    
    "grounding_worldmodeling_symbol": {
        "format": "<think><observation>...</observation><reasoning>...</reasoning><prediction>...</prediction></think>",
        "description": "You should first describe the observation as a grid, then your reasoning, then predict the next state, and finally your answer.",
        "additional_info": "The state should be represented as grids using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.",
        "example": "<think><observation>_P__\nG___\n*OO*\n____</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>____\n√___\n*OO*\n____</prediction></think><answer>Down{action_sep}Left</answer>"
    }
}

    if add_example:
        example = f"""e.g. <think>I can see the target is on my down left, I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt
def format_prompt_generator(format_type):
    """
    Generates a prompt function for the specified format type.
    
def no_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should provide only your answer.
Your response should be in the format of:
<answer>...</answer>"""
    Args:
        format_type (str): The format type to generate a prompt function for
        
    if add_example:
        example = f"""e.g. <answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt
    Returns:
        function: A function that generates a prompt for the specified format
    """
    def prompt_function(**kwargs):
        """
        Generate a prompt for the specified format.
        
def grounding_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state, then your thought process, and finally your answer.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<current_state>...</current_state><think>...</think><answer>...</answer>"""
        Args:
            max_actions_per_step (int): Maximum number of actions allowed per step
            action_sep (str): Separator between actions
            add_example (bool): Whether to add an example
            
    if add_example:
        example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt
        Returns:
            str: The formatted prompt
        """
        max_actions_per_step = kwargs.get("max_actions_per_step", 1)
        action_sep = kwargs.get("action_sep", "|")
        add_example = kwargs.get("add_example", False)
        config = FORMAT_CONFIGS[format_type]
        
def worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
        # Build the base prompt text
        base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your thought process, then your answer, and finally predict the next state.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<think>...</think><answer>...</answer><next_state>...</next_state>"""
{config["description"]}"""
        
    if add_example:
        example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt
        # Add additional information if available
        if "additional_info" in config:
            base_prompt += f"\n{config['additional_info']}"
        
def grounding_worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state, then your thought process, then your answer, and finally predict the next state.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
        # Add response format instruction
        base_prompt += f"""
Your response should be in the format of:
<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""
{config["format"]}"""
        
        # Add example if requested
        if add_example:
        example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt
            example = config["example"].format(action_sep=action_sep)
            return base_prompt + '\n' + f"e.g. {example}"
        
def grounding_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state as a grid, then your thought process, and finally your answer.
The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<current_state>...</current_state><think>...</think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <current_state>_P__
G___
_OO_
____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
        return base_prompt
    
def worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your thought process, then your answer, and finally predict the next state as a grid.
The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think>...</think><answer>...</answer><next_state>...</next_state>"""
    
    if add_example:
        example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>____
√___
_OO_
____</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt
    return prompt_function

def grounding_worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give the current state as a grid, then your thought process, then your answer, and finally predict the next state as a grid.
The state should be represented as grids using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""
# Generate the format prompt dictionary using the generator
format_prompt = {format_type: format_prompt_generator(format_type) 
                for format_type in FORMAT_CONFIGS}

    if add_example:
        example = f"""e.g. <current_state>_P__
G___
_OO_
____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>____
√___
_OO_
____</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt
if __name__ == "__main__":
    # Example usage
    max_actions_per_step = 2
    action_sep = "|"
    
# Dictionary mapping format names to their corresponding functions
format_prompt = {
    "free_think": free_think_format_prompt,
    "no_think": no_think_format_prompt,
    "grounding": grounding_format_prompt,
    "worldmodeling": worldmodeling_format_prompt,
    "grounding_worldmodeling": grounding_worldmodeling_format_prompt,
    "grounding_symbol": grounding_symbol_format_prompt,
    "worldmodeling_symbol": worldmodeling_symbol_format_prompt,
    "grounding_worldmodeling_symbol": grounding_worldmodeling_symbol_format_prompt
}
 No newline at end of file
    for key, func in format_prompt.items():
        print(f"{key} format prompt:")
        print(func(max_actions_per_step=max_actions_per_step, action_sep=action_sep))
        print("\n" + "="*50 + "\n")
 No newline at end of file
+34 −39
Original line number Diff line number Diff line
def system_prompt():
    return """You are a FrozenLake solver.

FrozenLake Quick Guide
Goal: Reach the goal (G).

Symbols (If image is provided there are no symbols):
_ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal

Rules:
1. Avoid falling into holes.
2. Frozen tiles are slippery, you may move perpendicular to your intended direction.

Actions you can take: Left, Down, Right, Up. 
"""

@@ -25,7 +29,7 @@ Decide your next action(s).

def free_think_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your reasoning, and then your answer. 
You should first give your thought process, and then your answer. 
Your response should be in the format of:
<think>...</think><answer>...</answer>"""
    
@@ -47,94 +51,85 @@ Your response should be in the format of:

def grounding_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first describe the observation, then your reasoning, and finally your answer.
You should first give the current state, then your thought process, and finally your answer.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<think><observation>...</observation><reasoning>...</reasoning></think><answer>...</answer>"""
<current_state>...</current_state><think>...</think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
        example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your reasoning, then predict the next state, and finally your answer.
You should first give your thought process, then your answer, and finally predict the next state.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<think><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>"""
<think>...</think><answer>...</answer><next_state>...</next_state>"""
    
    if add_example:
        example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"""
        example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_worldmodeling_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first describe the observation, then your reasoning, then predict the next state, and finally your answer.
You should first give the current state, then your thought process, then your answer, and finally predict the next state.
The state should be in the format of {{"player":(row1,column1),"target":(row2,column2)}}
Your response should be in the format of:
<think><observation>...</observation><reasoning>...</reasoning><prediction>...</prediction></think><answer>...</answer>"""
<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""
    
    if add_example:
        example = f"""e.g. <think><observation>The player is on the above the target</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>The player will reach the target</prediction></think><answer>Down{action_sep}Left</answer>"""
        example = f"""e.g. <current_state>{{"player":(2,3),"target":(3,2)}}</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>{{"player":(3,2),"target":(3,2)}}</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first describe the observation as a grid, then your reasoning, and finally your answer.
You should first give the current state as a grid, then your thought process, and finally your answer.
The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think><observation>_P__
G___
*OO*
____</observation><reasoning>...</reasoning></think><answer>...</answer>"""
<current_state>...</current_state><think>...</think><answer>...</answer>"""
    
    if add_example:
        example = f"""e.g. <think><observation>_P__
        example = f"""e.g. <current_state>_P__
G___
*OO*
____</observation><reasoning>I should go down then left to reach the target</reasoning></think><answer>Down{action_sep}Left</answer>"""
_OO_
____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer>"""
        return base_prompt + '\n' + example
    return base_prompt

def worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first give your reasoning, then predict the next state, and finally your answer.
You should first give your thought process, then your answer, and finally predict the next state as a grid.
The state should be represented as a grid using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think><reasoning>...</reasoning><prediction>____
√___
*OO*
____</prediction></think><answer>...</answer>"""
<think>...</think><answer>...</answer><next_state>...</next_state>"""
    
    if add_example:
        example = f"""e.g. <think><reasoning>I can see the target is on my down left, I should go down then left</reasoning><prediction>____
        example = f"""e.g. <think>I can see the target is on my down left, I should go down then left</think><answer>Down{action_sep}Left</answer><next_state>____
√___
*OO*
____</prediction></think><answer>Down{action_sep}Left</answer>"""
_OO_
____</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt

def grounding_worldmodeling_symbol_format_prompt(max_actions_per_step, action_sep, add_example=True):
    base_prompt = f"""You can take up to {max_actions_per_step} action(s) at a time, separated by {action_sep}.
You should first describe the observation as a grid, then your reasoning, then predict the next state, and finally your answer.
You should first give the current state as a grid, then your thought process, then your answer, and finally predict the next state as a grid.
The state should be represented as grids using the symbols: _ Frozen | O Hole | G Goal | P Player | X Player fell into hole | √ Player on goal.
Your response should be in the format of:
<think><observation>_P__
G___
*OO*
____</observation><reasoning>...</reasoning><prediction>____
√___
*OO*
____</prediction></think><answer>...</answer>"""
<current_state>...</current_state><think>...</think><answer>...</answer><next_state>...</next_state>"""
    
    if add_example:
        example = f"""e.g. <think><observation>_P__
        example = f"""e.g. <current_state>_P__
G___
*OO*
____</observation><reasoning>I should go down then left to reach the target</reasoning><prediction>____
_OO_
____</current_state><think>I should go down then left to reach the target</think><answer>Down{action_sep}Left</answer><next_state>____
√___
*OO*
____</prediction></think><answer>Down{action_sep}Left</answer>"""
_OO_
____</next_state>"""
        return base_prompt + '\n' + example
    return base_prompt

Loading