Unverified Commit 138152e3 authored by Kangrui Wang's avatar Kangrui Wang Committed by GitHub
Browse files

Merge pull request #26 from RAGEN-AI/main

Merge Main
parents f0217ad7 4a7a1ae4
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -146,12 +146,15 @@ We thank [RAGEN](https://github.com/RAGEN-AI/RAGEN) for its innovative explorati

[verl](https://www.notion.so/VAGEN-Training-VLM-Agents-with-Multi-Turn-Reinforcement-Learning-1bfde13afb6e80b792f6d80c7c2fcad0?pvs=21): Volcano Engine Reinforcement Learning for LLM

[ArCHer](https://arxiv.org/abs/2402.19446v1): Hierarchical Multi-Turn RL Agent Training Framework

[Search-R1](https://github.com/PeterGriffinJin/Search-R1): Train your LLMs to reason and call a search engine with reinforcement learning

[Agent-R1](https://github.com/0russwest0/Agent-R1): Training Powerful LLM Agents with End-to-End Reinforcement Learning

[OpenManus-RL](https://github.com/OpenManus/OpenManus-RL): A live stream development of RL tunning for LLM agents


## Citation

If you find our repo useful, we appreciate it if you could cite our work at:
+18 −45
Original line number Diff line number Diff line
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoModel, AutoImageProcessor
from PIL import Image
import torch.nn as nn
import threading
import logging
from typing import Dict, List, Tuple, Optional, Any, Union


# @TODO clean codes of this section

_model_cache = {}
_model_cache_lock = threading.Lock()
_model_counter = 0  

def get_dino_model(model_size="small", device="cuda:0"):
    global _model_counter
    cache_key = f"{model_size}_{device}"
    
    with _model_cache_lock:
        if cache_key not in _model_cache:
            _model_counter += 1
            import os
            pid = os.getpid()
            logging.info(f"Process {pid}: Created DINO model #{_model_counter}: {model_size} on {device}")
            _model_cache[cache_key] = DINOScoreCalculator(model_size=model_size, device=device)
        return _model_cache[cache_key]
from transformers import AutoModel, AutoImageProcessor
from PIL import Image
import math

class AverageMeter(object):
    """Computes and stores the average and current value"""
@@ -46,6 +24,7 @@ class AverageMeter(object):
        self.count += n
        self.avg = self.sum / self.count


class BaseMetric:
    def __init__(self):
        self.meter = AverageMeter()
@@ -59,7 +38,7 @@ class BaseMetric:
        """
        values = []
        batch_size = len(next(iter(batch.values())))
        for index in tqdm(range(batch_size)):
        for index in range(batch_size):
            kwargs = {}
            for key in ["gt_im", "gen_im", "gt_svg", "gen_svg", "caption"]:
                if key in batch:
@@ -67,7 +46,7 @@ class BaseMetric:
            try:
                measure = self.metric(**kwargs)
            except Exception as e:
                print("Error calculating metric: {}".format(e))
                print(f"Error calculating metric: {e}")
                continue
            if math.isnan(measure):
                continue
@@ -85,14 +64,13 @@ class BaseMetric:
            return score, values

    def metric(self, **kwargs):
        """
        This method should be overridden by subclasses to provide the specific metric computation.
        """
        """This method should be overridden by subclasses"""
        raise NotImplementedError("The metric method must be implemented by subclasses.")
    
    def get_average_score(self):
        return self.meter.avg


class DINOScoreCalculator(BaseMetric): 
    def __init__(self, config=None, model_size='large', device='cuda:0'):
        super().__init__()
@@ -102,7 +80,6 @@ class DINOScoreCalculator(BaseMetric):
        self.model, self.processor = self.get_DINOv2_model(model_size)
        self.device = device
        self.model = self.model.to(self.device)

        self.metric = self.calculate_DINOv2_similarity_score

    def get_DINOv2_model(self, model_size):
@@ -117,8 +94,10 @@ class DINOScoreCalculator(BaseMetric):
        return AutoModel.from_pretrained(model_size), AutoImageProcessor.from_pretrained(model_size)

    def process_input(self, image, processor):
        """Process images efficiently in batches when possible"""
        if isinstance(image, list):
            if all(isinstance(img, Image.Image) for img in image):
                # Process all images in a single batch to maximize GPU utilization
                with torch.no_grad():
                    inputs = processor(images=image, return_tensors="pt").to(self.device)
                    outputs = self.model(**inputs)
@@ -132,18 +111,21 @@ class DINOScoreCalculator(BaseMetric):
        
        if isinstance(image, str):
            image = Image.open(image)
            
        if isinstance(image, Image.Image):
            with torch.no_grad():
                inputs = processor(images=image, return_tensors="pt").to(self.device)
                outputs = self.model(**inputs)
                features = outputs.last_hidden_state.mean(dim=1)
            return features
        elif isinstance(image, torch.Tensor):
            features = image.unsqueeze(0) if image.dim() == 1 else image
            return features
        else:
            raise ValueError("Input must be a file path, PIL Image, or tensor of features")
        return features

    def calculate_DINOv2_similarity_score(self, **kwargs):
        """Calculate similarity score between two images"""
        image1 = kwargs.get('gt_im')
        image2 = kwargs.get('gen_im')
        features1 = self.process_input(image1, self.processor)
@@ -151,26 +133,19 @@ class DINOScoreCalculator(BaseMetric):

        cos = nn.CosineSimilarity(dim=1)
        sim = cos(features1, features2).item()
        sim = (sim + 1) / 2
        sim = (sim + 1) / 2  # Convert from [-1, 1] to [0, 1] range

        return sim
    
    def calculate_batch_scores(self, gt_images: List[Any], gen_images: List[Any]) -> List[float]:
        """
        Calculate similarity scores for multiple image pairs in a single batch
        
        Args:
            gt_images: List of ground truth images (PIL Images, file paths, or tensors)
            gen_images: List of generated images (PIL Images, file paths, or tensors)
            
        Returns:
            List of similarity scores (float values between 0-1)
        Calculate similarity scores for multiple image pairs in a single batch.
        DINO can process all images in a batch efficiently.
        """      
        if not gt_images: 
            return []
        
        gt_features = self.process_input(gt_images, self.processor)
        
        gen_features = self.process_input(gen_images, self.processor)
        
        cos = nn.CosineSimilarity(dim=1)
@@ -179,5 +154,3 @@ class DINOScoreCalculator(BaseMetric):
        scores = [(sim.item() + 1) / 2 for sim in similarities]
        
        return scores
 No newline at end of file
    
    
 No newline at end of file
+16 −69
Original line number Diff line number Diff line
@@ -2,38 +2,9 @@ import torch
from PIL import Image
import os
from dreamsim import dreamsim
import threading
import logging

# Create global cache and lock, similar to DINO implementation
_model_cache = {}
_model_cache_lock = threading.Lock()
_model_counter = 0


def get_dreamsim_model(device="cuda:0"):
    """
    Get a singleton instance of DreamSim model, using cache to avoid duplicate loading

    Args:
        device: Device to run model on

    Returns:
        DreamSimScoreCalculator: Instance of DreamSim calculator
    """
    global _model_counter

    # Use device as cache key
    cache_key = f"dreamsim_{device}"

    with _model_cache_lock:
        if cache_key not in _model_cache:
            _model_counter += 1
            pid = os.getpid()
            logging.info(f"Process {pid}: Created DreamSim model #{_model_counter} on {device}")
            _model_cache[cache_key] = DreamSimScoreCalculator(device=device)
        return _model_cache[cache_key]

from concurrent.futures import ThreadPoolExecutor
from typing import List, Any

class DreamSimScoreCalculator:
    """
@@ -43,11 +14,6 @@ class DreamSimScoreCalculator:
    def __init__(self, pretrained=True, cache_dir="~/.cache", device=None):
        """
        Initialize DreamSim model.

        Args:
            pretrained: Whether to use pretrained model
            cache_dir: Cache directory for model weights
            device: Device to run the model on (defaults to CUDA if available, else CPU)
        """
        cache_dir = os.path.expanduser(cache_dir)

@@ -63,13 +29,6 @@ class DreamSimScoreCalculator:
    def calculate_similarity_score(self, gt_im, gen_im):
        """
        Calculate similarity score between ground truth and generated images.

        Args:
            gt_im: Ground truth PIL Image
            gen_im: Generated PIL Image

        Returns:
            float: Similarity score (1 - distance, normalized to [0, 1])
        """
        # Preprocess images
        img1 = self.preprocess(gt_im)
@@ -84,39 +43,27 @@ class DreamSimScoreCalculator:
            distance = self.model(img1, img2).item()

        # Convert distance to similarity score (1 - normalized distance)
        # DreamSim usually outputs values in range [0, 1] where lower means more similar
        # We invert it so that higher means more similar (1 = identical)
        similarity = 1.0 - min(1.0, max(0.0, distance))

        return similarity

    def calculate_batch_scores(self, gt_images, gen_images):
    def calculate_batch_scores(self, gt_images: List[Any], gen_images: List[Any]) -> List[float]:
        """
        Calculate similarity scores for a batch of image pairs.
        Calculate similarity scores for multiple image pairs.
        Since DreamSim doesn't natively support batch comparison, we process each pair individually.
        """
        if not gt_images or not gen_images:
            return []
            
        Args:
            gt_images: List of ground truth PIL Images
            gen_images: List of generated PIL Images
        batch_size = len(gt_images)
        
        Returns:
            List[float]: List of similarity scores
        """
        # Preprocess all images
        gt_processed = [self.preprocess(img) for img in gt_images]
        gen_processed = [self.preprocess(img) for img in gen_images]
        gt_processed = [self.preprocess(img).to(self.device) for img in gt_images]
        gen_processed = [self.preprocess(img).to(self.device) for img in gen_images]
        
        scores = []
        # Process each pair
        for gt, gen in zip(gt_processed, gen_processed):
            # Move to device
            gt = gt.to(self.device)
            gen = gen.to(self.device)

            # Calculate distance
        for i in range(batch_size):
            with torch.no_grad():
                distance = self.model(gt, gen).item()

            # Convert to similarity score
                distance = self.model(gt_processed[i], gen_processed[i]).item()
            similarity = 1.0 - min(1.0, max(0.0, distance))
            scores.append(similarity)
        
+5 −15
Original line number Diff line number Diff line
@@ -101,16 +101,8 @@ class SVGEnv(BaseEnv):
        
        return self._render(init_obs=True), {}

    def step(self, action_str: str, dino_model=None) -> Tuple[Dict, float, bool, Dict]:
        """Execute a step in the environment.
        
        Args:
            action_str: Raw text response from LLM
            dino_model: Optional DINO model for scoring
            
        Returns:
            Observation, reward, done, info
        """
    def step(self, action_str: str, dino_model=None, dreamsim_model=None) -> Tuple[Dict, float, bool, Dict]:
        """Execute a step in the environment."""
        # Process the LLM response to extract actions
        rst = self.parse_func(
            response=action_str,
@@ -170,7 +162,7 @@ class SVGEnv(BaseEnv):
                _, gen_image = process_and_rasterize_svg(self.gen_svg_code)
                self.gen_image = gen_image
                
                # Calculate score
                # Calculate score using service models if provided
                score_config = self.config.get_score_config()
                scores = calculate_total_score(
                    gt_im=self.gt_image,
@@ -178,7 +170,8 @@ class SVGEnv(BaseEnv):
                    gt_code=self.gt_svg_code,
                    gen_code=self.gen_svg_code,
                    score_config=score_config,
                    dino_model=dino_model
                    dino_model=dino_model,
                    dreamsim_model=dreamsim_model
                ) 
                
                # Set metrics and update reward
@@ -189,9 +182,6 @@ class SVGEnv(BaseEnv):
                metrics["turn_metrics"]["action_is_effective"] = scores["total_score"] > 0
                    
            except Exception as e:
                import traceback
                print(f"Error processing SVG: {e}")
                traceback.print_exc()
                # Reset actions and update metrics
                self.valid_actions = []
                metrics["turn_metrics"]["action_is_valid"] = False
+2 −15
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ class SvgEnvConfig(BaseEnvConfig):
    action_sep: str = "~~"
    # Score configuration
    model_size: str = "small"  # 'small', 'base', or 'large'
    dino_only: bool = False
    # Weights for different scoring components
    dino_weight: Optional[float] = None
    structural_weight: Optional[float] = None
    dreamsim_weight: Optional[float] = None
@@ -43,7 +43,6 @@ class SvgEnvConfig(BaseEnvConfig):
        id_fields = [
            "dataset_name", 
            "model_size", 
            "dino_only", 
            "format_reward", 
            "format_penalty"
        ]
@@ -65,7 +64,6 @@ class SvgEnvConfig(BaseEnvConfig):
        """Get the score configuration dictionary"""
        score_config = {
            "model_size": self.model_size,
            "dino_only": self.dino_only,
            "device": self.device  # Include processed device configuration in score config
        }
        
@@ -78,14 +76,3 @@ class SvgEnvConfig(BaseEnvConfig):
            score_config["dreamsim_weight"] = self.dreamsim_weight
            
        return score_config
 No newline at end of file


if __name__ == "__main__":
    # Example usage
    config = SvgEnvConfig(
        device={"dino": 1, "dreamsim": 2}  # Will be converted to "cuda:1" and "cuda:2"
    )
    
    print(config.config_id())
    print(config.get_score_config())
    print(f"Processed device config: {config.device}")  # Should show {"dino": "cuda:1", "dreamsim": "cuda:2"}
 No newline at end of file
Loading