Commit 7b3d95a8 authored by jameskrw's avatar jameskrw
Browse files

updated state reward

parent 4a293a38
Loading
Loading
Loading
Loading
+3 −0
Original line number Original line Diff line number Diff line
@@ -78,4 +78,7 @@ python3 -m vagen.trainer.main_ppo \
    trainer.val_before_train=True \
    trainer.val_before_train=True \
    trainer.val_generations_to_log_to_wandb=8 \
    trainer.val_generations_to_log_to_wandb=8 \
    rollout_manager.n_trajectory=2 \
    rollout_manager.n_trajectory=2 \
    rollout_manager.use_service=True \
    rollout_manager.timeout=300 \
    rollout_manager.base_url="http://localhost:$PORT" \
    2>&1 | tee $EXPERIMENT_NAME.log
    2>&1 | tee $EXPERIMENT_NAME.log
+3 −0
Original line number Original line Diff line number Diff line
@@ -129,6 +129,9 @@ tmux send-keys -t "$TRAIN_SESSION" "python3 -m vagen.trainer.main_ppo \\
    trainer.val_before_train=True \\
    trainer.val_before_train=True \\
    trainer.val_generations_to_log_to_wandb=8 \\
    trainer.val_generations_to_log_to_wandb=8 \\
    rollout_manager.n_trajectory=2 \\
    rollout_manager.n_trajectory=2 \\
    rollout_manager.use_service=True \\
    rollout_manager.timeout=300 \\
    rollout_manager.base_url=\"http://localhost:$PORT\" \\
    2>&1 | tee $EXPERIMENT_NAME.log" C-m
    2>&1 | tee $EXPERIMENT_NAME.log" C-m


echo "-----------------------------------------"
echo "-----------------------------------------"
+2 −1
Original line number Original line Diff line number Diff line
@@ -86,6 +86,7 @@ python3 -m vagen.trainer.main_ppo \
    trainer.val_generations_to_log_to_wandb=8 \
    trainer.val_generations_to_log_to_wandb=8 \
    rollout_manager.n_trajectory=2 \
    rollout_manager.n_trajectory=2 \
    rollout_manager.use_service=True \
    rollout_manager.use_service=True \
    rollout_manager.timeout=240 \
    rollout_manager.timeout=300 \
    rollout_manager.base_url="http://localhost:5001" \
    rollout_manager.base_url="http://localhost:5001" \
    
    2>&1 | tee $EXPERIMENT_NAME.log
    2>&1 | tee $EXPERIMENT_NAME.log
+1 −1
Original line number Original line Diff line number Diff line
@@ -130,7 +130,7 @@ tmux send-keys -t "$TRAIN_SESSION" "python3 -m vagen.trainer.main_ppo \\
    trainer.val_generations_to_log_to_wandb=8 \\
    trainer.val_generations_to_log_to_wandb=8 \\
    rollout_manager.n_trajectory=2 \\
    rollout_manager.n_trajectory=2 \\
    rollout_manager.use_service=True \\
    rollout_manager.use_service=True \\
    rollout_manager.timeout=240 \\
    rollout_manager.timeout=300 \\
    rollout_manager.base_url=\"http://localhost:$PORT\" \\
    rollout_manager.base_url=\"http://localhost:$PORT\" \\
    2>&1 | tee $EXPERIMENT_NAME.log" C-m
    2>&1 | tee $EXPERIMENT_NAME.log" C-m


+1 −1
Original line number Original line Diff line number Diff line
@@ -82,7 +82,7 @@ python3 -m vagen.trainer.main_ppo \
    trainer.val_generations_to_log_to_wandb=8 \
    trainer.val_generations_to_log_to_wandb=8 \
    rollout_manager.n_trajectory=1 \
    rollout_manager.n_trajectory=1 \
    rollout_manager.use_service=True \
    rollout_manager.use_service=True \
    rollout_manager.timeout=240 \
    rollout_manager.timeout=300 \
    +rollout_manager.mini_batch_size=64 \
    +rollout_manager.mini_batch_size=64 \
    rollout_manager.base_url="http://localhost:5000" \
    rollout_manager.base_url="http://localhost:5000" \
    2>&1 | tee $EXPERIMENT_NAME.log
    2>&1 | tee $EXPERIMENT_NAME.log
Loading