Merge branch 'main' of github.com:JamesKrW/vagen (08291d44) · Commits · 张泽凯 / VAGEN

scripts/benchmark/env_benchmark/sokoban/benchmark_config.yaml

0 → 100644

+34 −0

Original line number	Diff line number	Diff line
		# Benchmark parameters
		benchmark:
		# Functions to benchmark
		functions:
		- system_prompt
		- reset
		- step
		- compute_reward
		- close

		# Number of iterations for statistical significance
		iterations: 3

		# Number of steps to perform for each environment
		step_count: 5

		# Batch sizes to test
		batch_sizes: [8, 128]

		# Maximum number of parallel workers
		max_workers: 8

		# Directory to save benchmark results
		output_dir: "env_benchmark_results"

		# Valid commands for testing (optional, environment-specific)
		valid_commands: ["Left", "Right", "Up", "Down"]

		# Datasets to use for benchmarking
		datasets:
		- name: sokoban-vision
		train_path: data/sokoban-vision-benchmark/train.parquet
		test_path: data/sokoban-vision-benchmark/test.parquet
		use_split: both
		No newline at end of file

0 → 100644

+7 −0

0 → 100755

+26 −0

Original line number	Diff line number	Diff line
		#!/bin/bash
		# Setup script for environment benchmark

		# Create required directories
		mkdir -p env_benchmark_results
		mkdir -p data

		echo "Generating datasets for benchmark..."

		SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

		# Generate dataset
		python -m vagen.env.create_dataset \
		--yaml_path "$SCRIPT_DIR/dataset_config.yaml" \
		--train_path data/sokoban-vision-benchmark/train.parquet \
		--test_path data/sokoban-vision-benchmark/test.parquet \
		--force_gen

		# Set environment variables
		export PYTHONPATH=$(pwd):$PYTHONPATH

		# Run benchmark
		echo "Running environment benchmark"
		python -m vagen.utils_benchmark.env_benchmark --config "$SCRIPT_DIR/benchmark_config.yaml"

		echo "Benchmark complete. Results saved to env_benchmark_results directory."
		No newline at end of file

0 → 100644

+34 −0

Original line number	Diff line number	Diff line
		# Server configuration
		server:
		base_url: http://localhost:5000
		timeout: 600
		max_workers: 48

		# Benchmark parameters
		benchmark:
		# Service functions to benchmark
		functions:
		- create_environments_batch
		- reset_batch
		- step_batch
		- compute_reward_batch
		- get_system_prompts_batch
		- close_batch

		# Number of iterations for statistical significance
		iterations: 3

		# Number of steps to perform for step_batch
		step_count: 5

		# Batch sizes to test
		batch_sizes: [128]

		# Directory to save benchmark results
		output_dir: benchmark_results

		datasets:
		- name: sokoban-vision
		train_path: data/sokoban-vision-benchmark/train.parquet
		test_path: data/sokoban-vision-benchmark/test.parquet
		use_split: both
		No newline at end of file

0 → 100644

+7 −0