Human4D Image2SMPL¶
Documentation¶
- Class name:
Human4D_Img2SMPL
- Category:
MotionDiff
- Output node:
False
The Human4D_Img2SMPL node is designed to transform 2D human images into 3D representations using the SMPL model. It leverages deep learning models to detect human figures in images, estimate their poses, and generate corresponding 3D mesh models, enabling advanced motion analysis and visualization.
Input types¶
Required¶
human4d_model
- The human4D model encapsulates the necessary configurations and models for detecting humans in images and generating their 3D SMPL representations. It plays a crucial role in the node's ability to accurately process and transform 2D images into 3D models.
- Comfy dtype:
HUMAN4D_MODEL
- Python dtype:
SimpleNamespace
image
- The input image tensor containing human figures to be transformed into 3D SMPL models. This tensor is critical for the node to perform human detection and pose estimation.
- Comfy dtype:
IMAGE
- Python dtype:
torch.Tensor
det_confidence_thresh
- The confidence threshold for human detection. This parameter helps in filtering out detections with low confidence, ensuring that only high-confidence human figures are processed for 3D modeling.
- Comfy dtype:
FLOAT
- Python dtype:
float
det_iou_thresh
- The Intersection Over Union (IOU) threshold for human detection. It is used to manage the overlap between detected bounding boxes, improving the precision of human detection.
- Comfy dtype:
FLOAT
- Python dtype:
float
det_batch_size
- The batch size for processing detections. This parameter affects the throughput and efficiency of the human detection process, balancing between speed and memory usage.
- Comfy dtype:
INT
- Python dtype:
int
hmr_batch_size
- The batch size for the HMR (Human Mesh Recovery) process. It determines how many human figures are processed simultaneously for 3D modeling, impacting the node's performance and resource utilization.
- Comfy dtype:
INT
- Python dtype:
int
Optional¶
opt_scorehmr_refiner
- An optional parameter for refining the scores from the HMR process. If provided, it enhances the accuracy of the 3D SMPL models generated by the node.
- Comfy dtype:
SCORE_HMR_MODEL
- Python dtype:
Optional[Callable]
Output types¶
smpl_multiple_subjects
- Comfy dtype:
SMPL_MULTIPLE_SUBJECTS
- The output is a comprehensive 3D representation of multiple human subjects derived from 2D images, including mesh models, pose information, and additional metadata for advanced motion analysis.
- Python dtype:
Tuple[List[torch.Tensor], Dict]
- Comfy dtype:
Usage tips¶
- Infra type:
GPU
- Common nodes: unknown
Source code¶
class Human4D_Img2SMPL:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"human4d_model": ("HUMAN4D_MODEL", ),
"image": ("IMAGE",),
"det_confidence_thresh": ("FLOAT", {"min": 0.1, "max": 1, "step": 0.05, "default": 0.25}),
"det_iou_thresh": ("FLOAT", {"min": 0.1, "max": 1, "step": 0.05, "default": 0.7}),
"det_batch_size": ("INT", {"min": 1, "max": 20, "default": 10}),
"hmr_batch_size": ("INT", {"min": 1, "max": 20, "default": 8})
},
"optional": {
"opt_scorehmr_refiner": ("SCORE_HMR_MODEL", )
}
}
RETURN_TYPES = ("SMPL_MULTIPLE_SUBJECTS", )
FUNCTION = "sample"
CATEGORY = "MotionDiff"
def get_boxes(self, detector, image, batch_size, **kwargs):
boxes_images = []
for img_batch in tqdm(DataLoader(image, shuffle=False, batch_size=batch_size, num_workers=0)):
det_results = detector.predict([img.numpy() for img in img_batch], classes=[0], **kwargs)
boxes_images.extend([det_result.boxes.xyxy.cpu().numpy() for det_result in det_results])
return boxes_images
def sample(self, human4d_model, image, det_confidence_thresh, det_iou_thresh, det_batch_size, hmr_batch_size, opt_scorehmr_refiner=None):
models = human4d_model
if opt_scorehmr_refiner is not None:
raise NotImplementedError()
image = image.__mul__(255.).to(torch.uint8)
boxes_images = self.get_boxes(models.detector, image, conf=det_confidence_thresh, iou=det_iou_thresh, batch_size=det_batch_size)
verts_frames = []
cam_t_frames = []
kps_2d_frames = []
pbar = comfy.utils.ProgressBar(len(image))
for img_pt, boxes in tqdm(zip(image, boxes_images)):
img_cv2 = img_pt.numpy()[:, :, ::-1].copy()
# Run HMR2.0 on all detected humans
dataset = ViTDetDataset(models.model_cfg, img_cv2, boxes)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=hmr_batch_size, shuffle=False, num_workers=0)
_all_verts = []
_all_kps_2d = []
for batch in dataloader:
batch = recursive_to(batch, get_torch_device())
if models.fp16:
batch = recursive_to(batch, torch.float16)
with torch.no_grad():
out = models.human4d(batch)
pred_cam = out['pred_cam']
box_center = batch["box_center"].float()
box_size = batch["box_size"].float()
img_size = batch["img_size"].float()
scaled_focal_length = models.model_cfg.EXTRA.FOCAL_LENGTH / models.model_cfg.MODEL.IMAGE_SIZE * img_size.max()
pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size, scaled_focal_length).detach().cpu()
batch_size = batch['img'].shape[0]
for n in range(batch_size):
verts = out['pred_vertices'][n].detach().cpu() #Shape [num_verts, 3]
cam_t = pred_cam_t_full[n] # Shape [3]
kps_2d = out['pred_keypoints_2d'][n].detach().cpu() #Shape [44, 3]
verts = torch.from_numpy(vertices_to_trimesh(verts, cam_t.unsqueeze(0)).vertices)
_all_verts.append(verts)
_all_kps_2d.append(kps_2d)
if len(_all_verts):
verts_frames.append(
torch.stack(_all_verts) #Shape [num_subjects, num_verts, 3]
)
kps_2d_frames.append(
torch.stack(_all_kps_2d) #Shape [num_subjects, 44, 3]
)
else:
verts_frames.append(None)
cam_t_frames.append(None)
kps_2d_frames.append(None)
pbar.update(1)
verts_frames #List of [num_subjects, num_verts, 3]
kps_2d_frames #List of [num_subjects, 44, 3]
rot2xyz = Rotation2xyz(device="cpu", smpl_model_path=smpl_models_dict["SMPL_NEUTRAL.pkl"])
faces = rot2xyz.smpl_model.faces
return ((
verts_frames,
{"faces": faces, "normalized_to_vertices": True, 'cam': cam_t_frames,
"frame_width": int(img_size[0, 0].item()), "frame_height": int(img_size[0, 1].item()),
"focal_length": scaled_focal_length,
"render_openpose": partial(render_openpose, kps_2d_frames, boxes_images, int(img_size[0, 0].item()), int(img_size[0, 1].item()))}
# In Comfy, IMAGE is a batched Tensor so all frames always share the same size
), )