init mtp converter 241112

Wei-Chen-hub · Wei-Chen-hub · commit 5d8db61fdabf · 2024-11-12T16:32:42.000+08:00
diff --git a/mmhuman3d/data/data_converters/__init__.py b/mmhuman3d/data/data_converters/__init__.py
@@ -44,6 +44,7 @@
 from .mpi_inf_3dhp_hybrik import MpiInf3dhpHybrIKConverter
 from .mpii_neural_annot import MpiiNeuralConverter
 from .mscoco_neural_annot import MscocoNeuralConverter
+from .mtp import MtpConverter
 from .penn_action import PennActionConverter
 from .posetrack import PosetrackConverter
 from .pw3d import Pw3dConverter
@@ -92,5 +93,5 @@
     'Crowd3dConverter', 'BedlamConverter','Pw3dConverter', 'DecoConverter',
     'Hi4dConverter', 'Idea400Converter', 'Pw3dBedlamConverter',
     'RichConverter', 'SynbodyWhacConverter', 'ArcticConverter', 'SynHandConverter',
-    'SignAvatarConverter',
+    'SignAvatarConverter', 'MtpConverter', 
 ]
diff --git a/mmhuman3d/data/data_converters/mtp.py b/mmhuman3d/data/data_converters/mtp.py
@@ -0,0 +1,135 @@
+import glob
+import json
+import os
+import pdb
+import random
+import time
+from typing import List
+
+import cv2
+import numpy as np
+from tqdm import tqdm
+import torch
+# from scipy.spatial.distance import cdist
+
+# import mmcv
+# from mmhuman3d.models.body_models.builder import build_body_model
+# from mmhuman3d.core.conventions.keypoints_mapping import smplx
+from mmhuman3d.core.conventions.keypoints_mapping import (
+    convert_kps,
+    get_keypoint_idx,
+    get_keypoint_idxs_by_part,
+)
+from mmhuman3d.models.body_models.utils import batch_transform_to_camera_frame
+from mmhuman3d.models.body_models.utils import transform_to_camera_frame
+from mmhuman3d.data.data_structures.human_data import HumanData
+from .base_converter import BaseModeConverter
+from .builder import DATA_CONVERTERS
+from mmhuman3d.models.body_models.builder import build_body_model
+from mmhuman3d.core.cameras import build_cameras
+
+@DATA_CONVERTERS.register_module()
+class MtpConverter(BaseModeConverter):
+    """Synbody dataset."""
+    ACCEPTED_MODES = ['train', 'val']
+
+    def __init__(self, modes: List = []) -> None:
+
+        self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
+        self.misc_config = dict(
+            bbox_body_scale=1.2,
+            bbox_facehand_scale=1.0,
+            bbox_source='keypoints2d_original',
+            flat_hand_mean=True,
+            cam_param_type='prespective',
+            cam_param_source='original',
+            smplx_source='original',
+            # contact_label=['part_segmentation', 'contact_region'],
+            # part_segmentation=['left_foot', 'right_foot'],
+        )
+
+        self.smplx_shape = {
+            'betas': (-1, 10),
+            'transl': (-1, 3),
+            'global_orient': (-1, 3),
+            'body_pose': (-1, 21, 3),
+            'left_hand_pose': (-1, 15, 3),
+            'right_hand_pose': (-1, 15, 3),
+            'leye_pose': (-1, 3),
+            'reye_pose': (-1, 3),
+            'jaw_pose': (-1, 3),
+            'expression': (-1, 10)
+        }
+
+        super(MtpConverter, self).__init__(modes)
+        
+        
+    def _keypoints_to_scaled_bbox_fh(self,
+                                     keypoints,
+                                     occ=None,
+                                     scale=1.0,
+                                     convention='smplx'):
+        '''Obtain scaled bbox in xyxy format given keypoints
+        Args:
+            keypoints (np.ndarray): Keypoints
+            scale (float): Bounding Box scale
+
+        Returns:
+            bbox_xyxy (np.ndarray): Bounding box in xyxy format
+        '''
+        bboxs = []
+        for body_part in ['head', 'left_hand', 'right_hand']:
+            kp_id = get_keypoint_idxs_by_part(body_part, convention=convention)
+
+            # keypoints_factory=smplx.SMPLX_KEYPOINTS)
+            kps = keypoints[kp_id]
+
+            if occ == None:
+                conf = 1
+            else:
+                occ_p = occ[kp_id]
+
+                if np.sum(occ_p) / len(kp_id) >= 0.1:
+                    conf = 0
+                    # print(f'{body_part} occluded, occlusion: {np.sum(occ_p) / len(kp_id)}, skip')
+                else:
+                    # print(f'{body_part} good, {np.sum(self_occ_p + occ_p) / len(kp_id)}')
+                    conf = 1
+
+            xmin, ymin = np.amin(kps, axis=0)
+            xmax, ymax = np.amax(kps, axis=0)
+
+            width = (xmax - xmin) * scale
+            height = (ymax - ymin) * scale
+
+            x_center = 0.5 * (xmax + xmin)
+            y_center = 0.5 * (ymax + ymin)
+            xmin = x_center - 0.5 * width
+            xmax = x_center + 0.5 * width
+            ymin = y_center - 0.5 * height
+            ymax = y_center + 0.5 * height
+
+            bbox = np.stack([xmin, ymin, xmax, ymax, conf],
+                            axis=0).astype(np.float32)
+
+            bboxs.append(bbox)
+        return bboxs[0], bboxs[1], bboxs[2]
+
+
+    def convert_by_mode(self, dataset_path: str, out_path: str,
+                        mode: str) -> dict:
+        """
+        Args:
+            dataset_path (str): Path to directory where raw images and
+            annotations are stored.
+            out_path (str): Path to directory to save preprocessed npz file
+            mode (str): Mode in accepted modes
+
+        Returns:
+            dict:
+                A dict containing keys image_path, bbox_xywh, keypoints2d,
+                keypoints2d_mask, keypoints3d, keypoints3d_mask, cam_param
+                stored in HumanData() format
+        """
+       
+       # get all images
diff --git a/tools/convert_datasets.py b/tools/convert_datasets.py
@@ -183,6 +183,10 @@
         type='MscocoNeuralConverter',  # real, have few multihuman
         prefix='mscoco',
         modes=['train']),
+    mtp=dict(
+        type='MtpConverter',  # real
+        prefix='mtp',
+        modes=['train', 'val']),
     renbody=dict(
         type='RenbodyConverter',  # real
         prefix='renbody',