Source code for data_juicer.ops.mapper.image_segment_mapper

import numpy as np

from data_juicer.utils.constant import Fields, MetaKeys
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.mm_utils import load_data_with_context, load_image
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, TAGGING_OPS, UNFORKABLE, Mapper
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_segment_mapper'

torch = LazyLoader('torch', 'torch')
ultralytics = LazyLoader('ultralytics', 'ultralytics')


[docs] @UNFORKABLE.register_module(OP_NAME) @TAGGING_OPS.register_module(OP_NAME) @OPERATORS.register_module(OP_NAME) @LOADED_IMAGES.register_module(OP_NAME) class ImageSegmentMapper(Mapper): """Perform segment-anything on images and return the bounding boxes.""" _accelerator = 'cuda'
[docs] def __init__(self, imgsz=1024, conf=0.05, iou=0.5, model_path='FastSAM-x.pt', *args, **kwargs): """ Initialization method. :param imgsz: resolution for image resizing :param conf: confidence score threshold :param iou: IoU (Intersection over Union) score threshold :param model_path: the path to the FastSAM model. Model name should be one of ['FastSAM-x.pt', 'FastSAM-s.pt']. """ kwargs.setdefault('mem_required', '800MB') super().__init__(*args, **kwargs) self.imgsz = imgsz self.conf = conf self.iou = iou self.model_key = prepare_model(model_type='fastsam', model_path=model_path)
[docs] def process_single(self, sample, rank=None, context=False): # there is no image in this sample if self.image_key not in sample or not sample[self.image_key]: # N x M x 4 for N images, M boxes, 4 coords sample[Fields.meta][MetaKeys.bbox_tag] = np.empty((0, 0, 4), dtype=np.float32) return sample if MetaKeys.bbox_tag in sample[Fields.meta]: return sample loaded_image_keys = sample[self.image_key] sample, images = load_data_with_context(sample, context, loaded_image_keys, load_image) model = get_model(self.model_key, rank=rank, use_cuda=self.use_cuda()) sample[Fields.meta][MetaKeys.bbox_tag] = [] for image in images: masks = model(image, retina_masks=True, imgsz=self.imgsz, conf=self.conf, iou=self.iou, verbose=False)[0] sample[Fields.meta][MetaKeys.bbox_tag].append( masks.boxes.xywh.cpu().numpy()) # match schema if len(sample[Fields.meta][MetaKeys.bbox_tag]) == 0: sample[Fields.meta][MetaKeys.bbox_tag] = np.empty((0, 0, 4), dtype=np.float32) return sample