Commit 08291d44 authored by jameskrw's avatar jameskrw
Browse files

Merge branch 'main' of github.com:JamesKrW/vagen

parents 5c1c6a3a 8a55cf49
Loading
Loading
Loading
Loading
+34 −0
Original line number Diff line number Diff line
# Benchmark parameters
benchmark:
  # Functions to benchmark
  functions: 
    - system_prompt
    - reset
    - step
    - compute_reward
    - close
  
  # Number of iterations for statistical significance
  iterations: 3
  
  # Number of steps to perform for each environment
  step_count: 5
  
  # Batch sizes to test
  batch_sizes: [8, 128]
  
  # Maximum number of parallel workers
  max_workers: 8
  
  # Directory to save benchmark results
  output_dir: "env_benchmark_results"
  
  # Valid commands for testing (optional, environment-specific)
  valid_commands: ["Left", "Right", "Up", "Down"]

# Datasets to use for benchmarking
datasets:
  - name: sokoban-vision
    train_path: data/sokoban-vision-benchmark/train.parquet
    test_path: data/sokoban-vision-benchmark/test.parquet
    use_split: both
 No newline at end of file
+7 −0
Original line number Diff line number Diff line
sokoban-text:
  env_name: sokoban
  env_config:
    render_mode: text
    max_actions_per_step: 1
  train_size: 1000
  test_size: 64
+26 −0
Original line number Diff line number Diff line
#!/bin/bash
# Setup script for environment benchmark

# Create required directories
mkdir -p env_benchmark_results
mkdir -p data

echo "Generating datasets for benchmark..."

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Generate dataset
python -m vagen.env.create_dataset \
    --yaml_path "$SCRIPT_DIR/dataset_config.yaml" \
    --train_path data/sokoban-vision-benchmark/train.parquet \
    --test_path data/sokoban-vision-benchmark/test.parquet \
    --force_gen

# Set environment variables
export PYTHONPATH=$(pwd):$PYTHONPATH

# Run benchmark
echo "Running environment benchmark"
python -m vagen.utils_benchmark.env_benchmark --config "$SCRIPT_DIR/benchmark_config.yaml"

echo "Benchmark complete. Results saved to env_benchmark_results directory."
 No newline at end of file
+34 −0
Original line number Diff line number Diff line
# Server configuration
server:
  base_url: http://localhost:5000
  timeout: 600
  max_workers: 48

# Benchmark parameters
benchmark:
  # Service functions to benchmark
  functions: 
    - create_environments_batch
    - reset_batch
    - step_batch
    - compute_reward_batch
    - get_system_prompts_batch
    - close_batch
  
  # Number of iterations for statistical significance
  iterations: 3
  
  # Number of steps to perform for step_batch
  step_count: 5
  
  # Batch sizes to test
  batch_sizes: [128]
  
  # Directory to save benchmark results
  output_dir: benchmark_results

datasets:
  - name: sokoban-vision
    train_path: data/sokoban-vision-benchmark/train.parquet
    test_path: data/sokoban-vision-benchmark/test.parquet
    use_split: both
 No newline at end of file
+7 −0
Original line number Diff line number Diff line
sokoban-vision:
  env_name: sokoban
  env_config:
    render_mode: vision
    max_actions_per_step: 1
  train_size: 1000
  test_size: 64
Loading