Commit cf4fa1b2 authored by YaningGao's avatar YaningGao
Browse files

Merge branch 'main' of github.com:RAGEN-AI/vagen

parents 6bc165be 670462b9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -25,6 +25,6 @@ mkdir -p benchmark_results

# Run benchmark
echo "Running service benchmark"
python -m vagen.utils_benchmark.service_benchmark --config "$SCRIPT_DIR/benchmark_config.yaml"
python -m vagen.env.verify_service --config "$SCRIPT_DIR/benchmark_config.yaml"

echo "Benchmark complete. Results saved to benchmark_results directory."
 No newline at end of file
+2 −2
Original line number Diff line number Diff line
@@ -5,5 +5,5 @@ svg-vision:
    dino_weight: 1.0
    structural_weight: 1.0
    dreamsim_weight: 1.0
  train_size: 1000
  test_size: 64
  train_size: 500
  test_size: 0
+9 −3
Original line number Diff line number Diff line
@@ -3,9 +3,12 @@ env1:
    env_config:
        prompt_format: free_think
        split: train
        dino_weight: 0.0
        dino_weight: 0.0001
        structural_weight: 0.0
        dreamsim_weight: 5.0
        device:
            dino: 0
            dreamsim: 0 
    train_size: 500
    test_size: 0

@@ -14,8 +17,11 @@ env2:
    env_config:
        prompt_format: free_think
        split: test
        dino_weight: 0.0
        dino_weight: 0.0001
        structural_weight: 0.0
        dreamsim_weight: 5.0
        device:
            dino: 0
            dreamsim: 0
    train_size: 0
    test_size: 128
 No newline at end of file
    test_size: 32
 No newline at end of file
+5 −5
Original line number Diff line number Diff line
@@ -17,11 +17,11 @@ models:
  #   max_retries: 3
  #   timeout: 60

  # claude_3_sonnet:
  #   provider: claude
  #   model_name: claude-3-7-sonnet-20250219
  #   max_tokens: 150
  #   temperature: 0.7
  claude_3_sonnet:
    provider: claude
    model_name: claude-3-7-sonnet-20250219
    max_tokens: 150
    temperature: 0.7
  
  # Gemini_2.5_flask:
  #     provider: gemini
+0 −8
Original line number Diff line number Diff line
env1:
    env_name: frozenlake  
    env_config:
        render_mode: vision
        prompt_format: grounding
        use_accuracy_reward: false
    train_size: 10000 
    test_size: 128
 No newline at end of file
Loading