Skip to content

LayerMask: Segformer B2 Clothes Ultra


  • Class name: LayerMask: SegformerB2ClothesUltra
  • Category: 😺dzNodes/LayerMask
  • Output node: False

This node specializes in segmenting clothing from images using the Segformer B2 model. It processes images to identify and isolate clothing items, leveraging advanced semantic segmentation techniques to achieve high precision and detail in the segmentation results.

Input types


  • image
    • The input image, which the node processes to segment clothing. It's crucial for the segmentation task, as the model applies semantic segmentation techniques to this input to isolate clothing items.
    • Comfy dtype: IMAGE
    • Python dtype: torch.Tensor
  • face
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • hair
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • hat
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • sunglass
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • left_arm
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • right_arm
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • left_leg
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • right_leg
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • upper_clothes
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • skirt
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • pants
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • dress
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • belt
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • shoe
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • bag
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • scarf
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • detail_method
    • unknown
    • Comfy dtype: COMBO[STRING]
    • Python dtype: unknown
  • detail_erode
    • unknown
    • Comfy dtype: INT
    • Python dtype: unknown
  • detail_dilate
    • unknown
    • Comfy dtype: INT
    • Python dtype: unknown
  • black_point
    • unknown
    • Comfy dtype: FLOAT
    • Python dtype: unknown
  • white_point
    • unknown
    • Comfy dtype: FLOAT
    • Python dtype: unknown
  • process_detail
    • unknown
    • Comfy dtype: BOOLEAN
    • Python dtype: unknown
  • device
    • unknown
    • Comfy dtype: COMBO[STRING]
    • Python dtype: unknown
  • max_megapixels
    • unknown
    • Comfy dtype: FLOAT
    • Python dtype: unknown

Output types

  • image
    • Comfy dtype: IMAGE
    • The segmented image with clothing items isolated, returned as a tensor. It represents the visual result of the segmentation process.
    • Python dtype: torch.Tensor
  • mask
    • Comfy dtype: MASK
    • The segmentation mask, indicating the areas of the image identified as clothing. It provides a binary or multi-class mask highlighting the segmented clothing areas.
    • Python dtype: torch.Tensor

Usage tips

  • Infra type: GPU
  • Common nodes: unknown

Source code

class Segformer_B2_Clothes:

    def __init__(self):

    # Labels: 0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 5: "Skirt",
    # 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 10: "Right-shoe", 11: "Face",
    # 12: "Left-leg", 13: "Right-leg", 14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf"

    def INPUT_TYPES(cls):
        method_list = ['VITMatte', 'VITMatte(local)', 'PyMatting', 'GuidedFilter', ]
        device_list = ['cuda','cpu']
        return {"required":
                "face": ("BOOLEAN", {"default": False}),
                "hair": ("BOOLEAN", {"default": False}),
                "hat": ("BOOLEAN", {"default": False}),
                "sunglass": ("BOOLEAN", {"default": False}),
                "left_arm": ("BOOLEAN", {"default": False}),
                "right_arm": ("BOOLEAN", {"default": False}),
                "left_leg": ("BOOLEAN", {"default": False}),
                "right_leg": ("BOOLEAN", {"default": False}),
                "upper_clothes": ("BOOLEAN", {"default": False}),
                "skirt": ("BOOLEAN", {"default": False}),
                "pants": ("BOOLEAN", {"default": False}),
                "dress": ("BOOLEAN", {"default": False}),
                "belt": ("BOOLEAN", {"default": False}),
                "shoe": ("BOOLEAN", {"default": False}),
                "bag": ("BOOLEAN", {"default": False}),
                "scarf": ("BOOLEAN", {"default": False}),
                "detail_method": (method_list,),
                "detail_erode": ("INT", {"default": 12, "min": 1, "max": 255, "step": 1}),
                "detail_dilate": ("INT", {"default": 6, "min": 1, "max": 255, "step": 1}),
                "black_point": ("FLOAT", {"default": 0.15, "min": 0.01, "max": 0.98, "step": 0.01, "display": "slider"}),
                "white_point": ("FLOAT", {"default": 0.99, "min": 0.02, "max": 0.99, "step": 0.01, "display": "slider"}),
                "process_detail": ("BOOLEAN", {"default": True}),
                "device": (device_list,),
                "max_megapixels": ("FLOAT", {"default": 2.0, "min": 1, "max": 999, "step": 0.1}),

    RETURN_NAMES = ("image", "mask", )
    FUNCTION = "segformer_ultra"
    CATEGORY = '😺dzNodes/LayerMask'

    def segformer_ultra(self, image,
                        face, hat, hair, sunglass, upper_clothes, skirt, pants, dress, belt, shoe,
                        left_leg, right_leg, left_arm, right_arm, bag, scarf, detail_method,
                        detail_erode, detail_dilate, black_point, white_point, process_detail, device, max_megapixels,

        ret_images = []
        ret_masks = []

        if detail_method == 'VITMatte(local)':
            local_files_only = True
            local_files_only = False

        for i in image:
            pred_seg, cloth = get_segmentation(i)
            i = torch.unsqueeze(i, 0)
            i = pil2tensor(tensor2pil(i).convert('RGB'))
            orig_image = tensor2pil(i).convert('RGB')

            labels_to_keep = [0]
            if not hat:
            if not hair:
            if not sunglass:
            if not upper_clothes:
            if not skirt:
            if not pants:
            if not dress:
            if not belt:
            if not shoe:
            if not face:
            if not left_leg:
            if not right_leg:
            if not left_arm:
            if not right_arm:
            if not bag:
            if not scarf:

            mask = np.isin(pred_seg, labels_to_keep).astype(np.uint8)

            # 创建agnostic-mask图像
            mask_image = Image.fromarray((1 - mask) * 255)
            mask_image = mask_image.convert("L")
            _mask = pil2tensor(mask_image)

            detail_range = detail_erode + detail_dilate
            if process_detail:
                if detail_method == 'GuidedFilter':
                    _mask = guided_filter_alpha(i, _mask, detail_range // 6 + 1)
                    _mask = tensor2pil(histogram_remap(_mask, black_point, white_point))
                elif detail_method == 'PyMatting':
                    _mask = tensor2pil(mask_edge_detail(i, _mask, detail_range // 8 + 1, black_point, white_point))
                    _trimap = generate_VITMatte_trimap(_mask, detail_erode, detail_dilate)
                    _mask = generate_VITMatte(orig_image, _trimap, local_files_only=local_files_only, device=device, max_megapixels=max_megapixels)
                    _mask = tensor2pil(histogram_remap(pil2tensor(_mask), black_point, white_point))
                _mask = mask2image(_mask)

            ret_image = RGB2RGBA(orig_image, _mask.convert('L'))

        log(f"{NODE_NAME} Processed {len(ret_images)} image(s).", message_type='finish')
        return (, dim=0),, dim=0),)