Unverified Commit e51eb887 authored by Yaning Gao's avatar Yaning Gao Committed by GitHub
Browse files

Merge pull request #35 from RAGEN-AI/yn_dev

update inference logic
parents b697eb7b b22115a7
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -5,4 +5,4 @@ env1:
        use_state_reward: false
        prompt_format: free_think
    train_size: 10000 
    test_size: 1
 No newline at end of file
    test_size: 128
 No newline at end of file
+10 −0
Original line number Diff line number Diff line
env1:
    env_name: frozenlake  
    env_config:
        render_mode: vision
        prompt_format: grounding_worldmodeling
        use_state_reward: true
        grounding_reward_weight: 0.5
        worldmodeling_reward_weight: 0.5
    train_size: 10000  
    test_size: 64
 No newline at end of file
+23 −0
Original line number Diff line number Diff line
export VLLM_ATTENTION_BACKEND=XFORMERS
export PYTHONHASHSEED=0

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Extract experiment name from the path
# This will take the last 3 parts of the path: format/sokoban/free_think
EXPERIMENT_NAME=$(echo $SCRIPT_DIR | rev | cut -d'/' -f1-2 | rev | tr '/' '-')

echo "Experiment name: $EXPERIMENT_NAME"
# run python -m vagen.server.server in a tmux session first
python -m vagen.env.create_dataset \
    --yaml_path "$SCRIPT_DIR/env_config.yaml" \
    --train_path "data/$EXPERIMENT_NAME/train.parquet" \
    --test_path "data/$EXPERIMENT_NAME/test.parquet" \
    --force_gen

python -m vagen.inference.run_inference \
    --inference_config_path="$(dirname $SCRIPT_DIR)/inference_config.yaml" \
    --model_config_path="$(dirname $SCRIPT_DIR)/model_config.yaml" \
    --val_files_path="data/$EXPERIMENT_NAME/test.parquet" \
    --wandb_path_name="$EXPERIMENT_NAME"
+34 −18
Original line number Diff line number Diff line
@@ -13,21 +13,27 @@ models:
  #   max_tokens: 150
  #   temperature: 0.7
  
  # qwen_vl_72b:
  #   provider: together
  #   model_name: Qwen/Qwen2.5-VL-72B-Instruct
  #   max_tokens: 150
  #   temperature: 0.7

  # VLM-R1-MATH:
  #   provider: vllm
  #   model_name: omlab/VLM-R1-Qwen2.5VL-3B-Math-0305
  #   max_tokens: 150
  #   temperature: 0.7

  qwen_vl_3b_api:
    provider: routerapi
    model_name: qwen/qwen2.5-vl-3b-instruct:free
    max_tokens: 150
    temperature: 0.7
  # qwen_vl_3b_api:
  #   provider: routerapi
  #   model_name: qwen/qwen2.5-vl-3b-instruct:free
  #   max_tokens: 150
  #   temperature: 0.7

  # gpt4o:
  #   provider: openai
  #   model_name: gpt-4o
  # claude_3_sonnet:
  #   provider: claude
  #   model_name: claude-3-7-sonnet-20250219
  #   max_tokens: 150
  #   temperature: 0.7
  #   presence_penalty: 0.0
@@ -35,18 +41,28 @@ models:
  #   max_retries: 3
  #   timeout: 60

  claude_3_sonnet:
    provider: claude
    model_name: claude-3-7-sonnet-20250219
    max_tokens: 150
    temperature: 0.7
  # claude_3_sonnet:
  #   provider: claude
  #   model_name: claude-3-7-sonnet-20250219
  #   max_tokens: 150
  #   temperature: 0.7
  #   presence_penalty: 0.0
  #   frequency_penalty: 0.0
  #   max_retries: 3
  #   timeout: 60

  # Gemini_2.5_flask:
  #     provider: gemini
  #     model_name: gemini-2.5-flash-preview-04-17
  # claude_3_sonnet:
  #   provider: claude
  #   model_name: claude-3-7-sonnet-20250219
  #   max_tokens: 150
  #   temperature: 0.7
  
  Gemini_2.0_flask:
      provider: gemini
      model_name: gemini-2.0-flash
      max_tokens: 150
      temperature: 0.7
  
  # # Batch processing configuration example
  # claude_3_haiku_batch:
  #   provider: claude
+44 −0
Original line number Diff line number Diff line
env1:
    env_name: primitive_skill 
    env_config:
        render_mode: vision
        prompt_format: grounding_worldmodeling_structured
        use_state_reward: true
        grounding_reward_weight: 100
        worldmodeling_reward_weight: 100
        env_id: "AlignTwoCube" # AlignTwoCube,PlaceTwoCube,PutAppleInDrawer,StackThreeCube
    train_size: 10000  
    test_size: 4
env2:
    env_name: primitive_skill 
    env_config:
        render_mode: vision
        prompt_format: grounding_worldmodeling_structured
        use_state_reward: true
        grounding_reward_weight: 100
        worldmodeling_reward_weight: 100
        env_id: "PlaceTwoCube" # AlignTwoCube,PlaceTwoCube,PutAppleInDrawer,StackThreeCube
    train_size: 10000  
    test_size: 4
env3:
    env_name: primitive_skill 
    env_config:
        render_mode: vision
        prompt_format: grounding_worldmodeling_structured
        use_state_reward: true
        grounding_reward_weight: 100
        worldmodeling_reward_weight: 100
        env_id: "PutAppleInDrawer" # AlignTwoCube,PlaceTwoCube,PutAppleInDrawer,StackThreeCube
    train_size: 10000  
    test_size: 4
env4:
    env_name: primitive_skill 
    env_config:
        render_mode: vision
        prompt_format: grounding_worldmodeling_structured
        use_state_reward: true
        grounding_reward_weight: 100
        worldmodeling_reward_weight: 100
        env_id: "StackThreeCube" # AlignTwoCube,PlaceTwoCube,PutAppleInDrawer,StackThreeCube
    train_size: 10000  
    test_size: 4
Loading