Commit ba1052dd authored by YaningGao's avatar YaningGao
Browse files

temp

parents 0e234e3b c4b89752
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -21,6 +21,6 @@ export PYTHONPATH=$(pwd):$PYTHONPATH

# Run benchmark
echo "Running environment benchmark"
python -m vagen.utils_benchmark.env_benchmark --config "$SCRIPT_DIR/benchmark_config.yaml"
python -m vagen.env.verify_env --config "$SCRIPT_DIR/benchmark_config.yaml"

echo "Benchmark complete. Results saved to env_benchmark_results directory."
 No newline at end of file
+2 −2
Original line number Diff line number Diff line
@@ -39,8 +39,8 @@ python3 -m vagen.trainer.main_ppo \
    actor_rollout_ref.actor.kl_loss_coef=0.001 \
    actor_rollout_ref.actor.kl_loss_type=mse \
    actor_rollout_ref.model.enable_gradient_checkpointing=True \
    actor_rollout_ref.actor.fsdp_config.param_offload=False \
    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
    actor_rollout_ref.actor.fsdp_config.param_offload=True \
    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
    actor_rollout_ref.rollout.name=vllm \
+1 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ output_dir: "inference_outputs"
# WandB configuration
use_wandb: true
wandb_project: "vagen-inference"
val_generations_to_log_to_wandb: 10 
val_generations_to_log_to_wandb: 64 

# Display settings
show_progress: true
+11 −11
Original line number Diff line number Diff line
@@ -7,11 +7,17 @@ models:
  #   tensor_parallel_size: 2
  #   gpu_memory_utilization: 0.9

  qwen_vl_3b_api:
    provider: routerapi
    model_name: qwen/qwen2.5-vl-3b-instruct:free
    max_tokens: 150
    temperature: 0.7
  # qwen_vl_3b_api:
  #   provider: routerapi
  #   model_name: qwen/qwen2.5-vl-3b-instruct:free
  #   max_tokens: 150
  #   temperature: 0.7

  # VLM-R1-MATH:
  #   provider: vllm
  #   model_name: omlab/VLM-R1-Qwen2.5VL-3B-Math-0305
  #   max_tokens: 150
  #   temperature: 0.7

  # gpt4o:
  #   provider: openai
@@ -45,12 +51,6 @@ models:
  #   batch_poll_interval: 10  # Poll every 10 seconds
  #   batch_max_wait_time: 7200  # Wait up to 2 hours
  
  # VLM-R1-MATH:
  #   provider: vllm
  #   model_name: omlab/VLM-R1-Qwen2.5VL-3B-Math-0305
  #   max_tokens: 150
  #   temperature: 0.7
  
  # VLM-R1-OVD:
  #   provider: vllm
  #   model_name: omlab/VLM-R1-Qwen2.5VL-3B-OVD-0321
+9 −9
Original line number Diff line number Diff line
@@ -7,15 +7,15 @@ models:
  #   tensor_parallel_size: 2
  #   gpu_memory_utilization: 0.9

  # gpt4o:
  #   provider: openai
  #   model_name: gpt-4o
  #   max_tokens: 150
  #   temperature: 0.7
  #   presence_penalty: 0.0
  #   frequency_penalty: 0.0
  #   max_retries: 3
  #   timeout: 60
  gpt4o:
    provider: openai
    model_name: gpt-4o
    max_tokens: 150
    temperature: 0.7
    presence_penalty: 0.0
    frequency_penalty: 0.0
    max_retries: 3
    timeout: 60

  # claude_3_sonnet:
  #   provider: claude
Loading