import numpy as np class ActionFeature(object): """Action feature indices for human and robot hand parameters. Defines the start and end indices for different hand feature components in the concatenated action feature vector. """ ALL_FEATURES = (0, 192) HUMAN_LEFT_HAND = (0, 51) HUMAN_RIGHT_HAND = (51, 102) HUMAN_LEFT_TRANS = (0, 3) HUMAN_LEFT_ROT = (3, 6) HUMAN_LEFT_6D = (0, 6) HUMAN_LEFT_JOINTS = (6, 51) HUMAN_RIGHT_TRANS = (51, 54) HUMAN_RIGHT_ROT = (54, 57) HUMAN_RIGHT_6D = (51, 57) HUMAN_RIGHT_JOINTS = (57, 102) PADDING_FEATURES = (102, 192) # not used now @classmethod def get_concatenated_action_feature_from_dict(cls, action_feature_dict): """Concatenate action features from a dictionary into a single feature vector. Args: action_feature_dict: Dictionary mapping feature names to their values Returns: Tuple of (features, feature_mask) where features is the concatenated array and feature_mask indicates which features are present """ batch_size = next(iter(action_feature_dict.values())).shape[0] features = np.zeros((batch_size, cls.ALL_FEATURES[1]), dtype=np.float32) feature_mask = np.zeros((batch_size, cls.ALL_FEATURES[1]), dtype=bool) for key, value in action_feature_dict.items(): assert len(value.shape) == 2 start, end = getattr(cls, key) k = value.shape[1] features[:, start:start + k] = value feature_mask[:, start:start + k] = True return features, feature_mask @classmethod def get_dict_from_concatenated_action_feature(cls, feature, feature_mask): """Extract action features from concatenated vector into a dictionary. Args: feature: Concatenated feature array feature_mask: Boolean mask indicating which features are present Returns: Dictionary mapping feature names to their extracted values """ action_feature_dict = {} consts = { name: getattr(cls, name) for name in dir(cls) if name.isupper() and "ALL" not in name } for key, (start, end) in consts.items(): k = np.sum(feature_mask[0, start:end]) if k == 0: continue action_feature_dict[key] = feature[:, start:start + k] return action_feature_dict @classmethod def get_loss_components(cls, action_type='angle'): """Get loss component definitions for different action types. Uses existing feature index constants to avoid hardcoding numbers. Args: action_type: 'angle' or 'keypoints' Returns: dict: Dictionary mapping component names to (start, end, weight) tuples """ if action_type == 'angle': # Directly use class constants - no hardcoded numbers! return { 'left_hand_6d': (*cls.HUMAN_LEFT_6D, 1.0), 'left_hand_joints': (*cls.HUMAN_LEFT_JOINTS, 1.0), 'right_hand_6d': (*cls.HUMAN_RIGHT_6D, 1.0), 'right_hand_joints': (*cls.HUMAN_RIGHT_JOINTS, 1.0), } elif action_type == 'keypoints': # For keypoints type, joints have different dimensions (21*3=63) left_joints_start = cls.HUMAN_LEFT_6D[1] # After 6D left_joints_end = left_joints_start + 63 # 21 joints * 3D right_joints_start = cls.HUMAN_RIGHT_6D[1] right_joints_end = right_joints_start + 63 return { 'left_6d': (*cls.HUMAN_LEFT_6D, 1.0), 'left_joints': (left_joints_start, left_joints_end, 1.0), 'right_6d': (*cls.HUMAN_RIGHT_6D, 1.0), 'right_joints': (right_joints_start, right_joints_end, 1.0), } else: raise ValueError(f"Unknown action type: {action_type}") @classmethod def get_hand_group_mapping(cls, action_type='angle'): """Get mapping from loss components to hand groups for weighted averaging. Returns: dict: Dictionary mapping hand group names to list of component names """ return { 'left_hand': ['left_trans', 'left_rot', 'left_joints'], 'right_hand': ['right_trans', 'right_rot', 'right_joints'], } @classmethod def get_xhand_loss_components(cls): """Get loss components specific to XHand dataset.""" return { 'left_hand_6d': (*cls.HUMAN_LEFT_6D, 1.6), 'left_hand_joints': (*cls.HUMAN_LEFT_JOINTS, 0.4), 'right_hand_6d': (*cls.HUMAN_RIGHT_6D, 1.6), 'right_hand_joints': (*cls.HUMAN_RIGHT_JOINTS, 0.4), } class StateFeature(ActionFeature): """Extended feature indices including state features like hand shape parameters (beta). Inherits from ActionFeature and adds additional state-specific features. """ ALL_FEATURES = (0, 212) HUMAN_LEFT_BETA = (192, 202) # MANO shape parameters for left hand, not used now HUMAN_RIGHT_BETA = (202, 212) # MANO shape parameters for right hand, not used now def calculate_fov(h, w, intrinsics): """Calculate horizontal and vertical field of view (FOV) from camera intrinsics. Args: h: Image height w: Image width intrinsics: 3x3 camera intrinsic matrix Returns: fov: np.array of shape (2,) containing horizontal and vertical FOV in radians """ hfov = 2 * np.arctan(w / (2 * intrinsics[0][0])) # fx is the horizontal focal length vfov = 2 * np.arctan(h / (2 * intrinsics[1][1])) # fy is the vertical focal length fov = np.array([hfov, vfov], dtype=np.float32) return fov def compute_new_intrinsics_crop(original_intrinsics, original_size, crop_size, resize_size): """Compute new camera intrinsics after square crop and resize operations. Args: original_intrinsics: Original 3x3 camera intrinsic matrix original_size: Original image size (single dimension for square) crop_size: Size of the square crop resize_size: Target size after resizing Returns: Updated 3x3 intrinsic matrix accounting for crop and resize """ original_fx = original_intrinsics[0][0] original_fy = original_intrinsics[1][1] original_cx = original_intrinsics[0][2] original_cy = original_intrinsics[1][2] # Compute the crop offset (top-left corner of the crop) crop_offset = (original_size - crop_size) / 2 # Update the principal point after the crop cropped_cx = original_cx - crop_offset cropped_cy = original_cy - crop_offset # Compute the scaling factor for resizing scale = resize_size / crop_size # Update the focal lengths and principal point after resizing new_fx = original_fx * scale new_fy = original_fy * scale new_cx = cropped_cx * scale new_cy = cropped_cy * scale intrinsics_matrix = np.array([ [new_fx, 0, new_cx], [0, new_fy, new_cy], [0, 0, 1] ]) return intrinsics_matrix def compute_new_intrinsics_resize(original_intrinsics, resize_size): """Compute new camera intrinsics after resize operation. Args: original_intrinsics: Original 3x3 camera intrinsic matrix resize_size: Target size as (H, W) tuple Returns: Updated 3x3 intrinsic matrix accounting for the resize """ original_fx = original_intrinsics[0][0] original_fy = original_intrinsics[1][1] original_cx = original_intrinsics[0][2] original_cy = original_intrinsics[1][2] H, W = resize_size # Compute the scaling factors for resizing scale_x = W / (2*original_cx) scale_y = H / (2*original_cy) # Update the focal lengths and principal point after resizing new_fx = original_fx * scale_x new_fy = original_fy * scale_y new_cx = original_cx * scale_x new_cy = original_cy * scale_y intrinsics_matrix = np.array([ [new_fx, 0, new_cx], [0, new_fy, new_cy], [0, 0, 1] ]) return intrinsics_matrix