Source code for handposeutils.calculations.similarity

import numpy as np
from typing import Tuple, List
from handposeutils.data.handpose import HandPose

[docs] def procrustes_alignment(pose1: HandPose, pose2: HandPose) -> Tuple[np.ndarray, np.ndarray, float]: """ Perform Procrustes alignment between two 3D hand poses. The alignment process removes translation, scale, and rotation differences between two `HandPose` objects, returning their aligned coordinates and the Procrustes distance (sum of squared differences). It is STRONGLY recommended to normalize HandPoses before computing Euclidean distance. Parameters ---------- pose1 : HandPose First hand pose to align. pose2 : HandPose Second hand pose to align against. Returns ------- aligned_pose1 : ndarray of shape (n_landmarks, 3) Aligned version of `pose1` after Procrustes transformation. aligned_pose2 : ndarray of shape (n_landmarks, 3) Normalized version of `pose2` for comparison. distance : float Procrustes distance between aligned poses. Lower values indicate greater similarity. Raises ------ ValueError If the number of landmarks (or their dimensionality) differs between poses. Notes ----- - This method uses the Kabsch algorithm to compute the optimal rotation. - The Procrustes distance is **not** invariant to landmark correspondence errors. """ # Step 1: Convert HandPoses to N x 3 numpy arrays p1 = np.array([coord.as_tuple() for coord in pose1.get_all_coordinates()]) p2 = np.array([coord.as_tuple() for coord in pose2.get_all_coordinates()]) if p1.shape != p2.shape: raise ValueError(f"Shape mismatch: pose1 has shape {p1.shape}, pose2 has shape {p2.shape}") # Step 2: Center both poses at the origin p1_centered = p1 - p1.mean(axis=0) p2_centered = p2 - p2.mean(axis=0) # Step 3: Normalize scale (Frobenius norm) p1_scaled = p1_centered / np.linalg.norm(p1_centered) p2_scaled = p2_centered / np.linalg.norm(p2_centered) # Step 4: Compute optimal rotation matrix using Kabsch algorithm H = p1_scaled.T @ p2_scaled U, S, Vt = np.linalg.svd(H) R = Vt.T @ U.T # Fix reflection issues if np.linalg.det(R) < 0: Vt[-1, :] *= -1 R = Vt.T @ U.T # Step 5: Apply rotation to pose1 p1_aligned = p1_scaled @ R p2_aligned = p2_scaled # Step 6: Compute Procrustes distance (residual sum of squares) distance = np.sum((p1_aligned - p2_aligned) ** 2) return p1_aligned, p2_aligned, distance
[docs] def euclidean_distance(pose1: HandPose, pose2: HandPose) -> float: """ Compute the mean Euclidean distance between two hand poses. This function calculates the average distance between corresponding landmarks of two `HandPose` objects. Distances are computed directly in 3D space and are sensitive to both scale and translation. It is STRONGLY recommended to normalize HandPoses before computing Euclidean distance. Parameters ---------- pose1 : HandPose First hand pose. pose2 : HandPose Second hand pose. Returns ------- mean_distance : float Mean Euclidean distance between corresponding landmarks. Lower values indicate greater similarity. Raises ------ ValueError If the number of landmarks (or their dimensionality) differs between poses. """ p1 = np.array([coord.as_tuple() for coord in pose1.get_all_coordinates()]) p2 = np.array([coord.as_tuple() for coord in pose2.get_all_coordinates()]) if p1.shape != p2.shape: raise ValueError(f"Shape mismatch: pose1 has shape {p1.shape}, pose2 has shape {p2.shape}") distances = np.linalg.norm(p1 - p2, axis=1) return np.mean(distances)
[docs] def cosine_similarity(pose1: HandPose, pose2: HandPose) -> float: """ Compute cosine similarity between two 3D hand poses. Each pose is represented as a flattened 63-dimensional vector (21 landmarks × 3 coordinates). The cosine similarity measures the angular difference between the vectors, making it invariant to scale but not to translation, so position is first normalized. Parameters ---------- pose1 : HandPose First hand pose. pose2 : HandPose Second hand pose. Returns ------- similarity : float Cosine similarity in the range [-1, 1]. - `1.0` indicates identical orientation. - `0.0` indicates orthogonal poses. - `-1.0` indicates opposite orientation. Notes ----- - This method normalizes translation by centering poses before comparison. - Similarity is undefined for zero-length pose vectors (returns 0.0). """ pose1.normalize_position() pose2.normalize_position() vec1 = np.array([c for coord in pose1.get_all_coordinates() for c in coord.as_tuple()]) vec2 = np.array([c for coord in pose2.get_all_coordinates() for c in coord.as_tuple()]) dot = np.dot(vec1, vec2) norm1 = np.linalg.norm(vec1) norm2 = np.linalg.norm(vec2) if norm1 == 0 or norm2 == 0: return 0.0 # Cannot compare with a zero vector similarity = dot / (norm1 * norm2) return similarity
def _joint_angle_descriptor(pose: HandPose) -> List[float]: """ Helper to extract joint angles in radians from a hand pose. The angles are calculated between consecutive segments in each finger, forming a compact biomechanical descriptor of the pose. Parameters ---------- pose : HandPose The hand pose to describe. Must contain 21 landmarks in MediaPipe format. Returns ------- list of float List of joint angles in radians, ordered finger by finger. Each value corresponds to the angle at a specific finger joint. """ angles = [] finger_joints = { "thumb": [1, 2, 3, 4], "index": [5, 6, 7, 8], "middle": [9, 10, 11, 12], "ring": [13, 14, 15, 16], "pinky": [17, 18, 19, 20], } for finger, indices in finger_joints.items(): for i in range(1, len(indices) - 1): a = pose.get_coordinate_by_index(indices[i - 1]) b = pose.get_coordinate_by_index(indices[i]) c = pose.get_coordinate_by_index(indices[i + 1]) # Vectors: b→a and b→c v1 = np.array([a.x - b.x, a.y - b.y, a.z - b.z]) v2 = np.array([c.x - b.x, c.y - b.y, c.z - b.z]) # Angle between v1 and v2 norm1 = np.linalg.norm(v1) norm2 = np.linalg.norm(v2) if norm1 == 0 or norm2 == 0: angle = 0.0 else: cos_angle = np.clip(np.dot(v1, v2) / (norm1 * norm2), -1.0, 1.0) angle = np.arccos(cos_angle) angles.append(angle) return angles
[docs] def joint_angle_similarity(pose1: HandPose, pose2: HandPose) -> float: """ Computes biomechanical similarity between two hand poses using joint angles. The joint angles of each pose are extracted and compared using mean squared difference. Lower values indicate more similar poses. Parameters ---------- pose1 : HandPose First hand pose. pose2 : HandPose Second hand pose. Returns ------- float Mean squared difference between joint angles. A value of 0.0 indicates identical angles. Raises ------ ValueError If the angle descriptors have different lengths. Notes ----- - Useful in determining similarity in joint curvature across regions of the hand, when used consecutively. - Can represent the same information as geometry.get_finger_curvature() when multiple function calls are fused. See Also -------- geometry.get_finger_curvature() """ angles1 = _joint_angle_descriptor(pose1) angles2 = _joint_angle_descriptor(pose2) if len(angles1) != len(angles2): raise ValueError("Angle descriptors must be of same length") diff = np.array(angles1) - np.array(angles2) return float(np.mean(diff ** 2))
[docs] def compute_joint_angle_errors(pose1: HandPose, pose2: HandPose) -> List[float]: """ Computes per-joint absolute angle differences between two hand poses. This method assumes the standard 21-landmark MediaPipe format. Angles are measured in radians for each consecutive finger joint triplet. Parameters ---------- pose1 : HandPose First hand pose. pose2 : HandPose Second hand pose. Returns ------- list of float Absolute differences in radians for each joint, ordered finger by finger. """ from math import acos from numpy.linalg import norm def angle_between(v1, v2): dot = np.dot(v1, v2) return acos(np.clip(dot / (norm(v1) * norm(v2) + 1e-6), -1.0, 1.0)) pairs = [ (1, 2, 3), (2, 3, 4), # Thumb (5, 6, 7), (6, 7, 8), # Index (9, 10, 11), (10, 11, 12),# Middle (13, 14, 15), (14, 15, 16),# Ring (17, 18, 19), (18, 19, 20) # Pinky ] angles1 = [] angles2 = [] for a, b, c in pairs: v1a = pose1[b] - pose1[a] v1b = pose1[c] - pose1[b] angles1.append(angle_between(v1a.as_tuple(), v1b.as_tuple())) v2a = pose2[b] - pose2[a] v2b = pose2[c] - pose2[b] angles2.append(angle_between(v2a.as_tuple(), v2b.as_tuple())) return np.abs(np.array(angles1) - np.array(angles2))
[docs] def pose_similarity(pose1: HandPose, pose2: HandPose, method: str = 'procrustes') -> float: """ Computes similarity between two hand poses using the specified method. Supported methods ----------------- - 'procrustes': Procrustes distance (lower = more similar) - 'euclidean' : Euclidean distance - 'cosine' : Cosine similarity - 'joint_angle': Mean squared joint angle difference Parameters ---------- pose1 : HandPose First hand pose. pose2 : HandPose Second hand pose. method : str, default='procrustes' Similarity computation method. Returns ------- float Similarity score according to the chosen method. Scale and interpretation vary depending on the method. Raises ------ NotImplementedError If the given method is not supported. """ if method == 'procrustes': _, _, distance = procrustes_alignment(pose1, pose2) return distance elif method == 'euclidean': return euclidean_distance(pose1, pose2) elif method == 'cosine': return cosine_similarity(pose1, pose2) elif method == 'joint_angle': return joint_angle_similarity(pose1, pose2) else: raise NotImplementedError(f"Similarity method '{method}' is not implemented.")
## --- Implementations for Embedding Similarity --- ##
[docs] def embedding_similarity(vec1: np.ndarray, vec2: np.ndarray, method: str = "cosine", **kwargs) -> float: """ Computes similarity or distance between two embedding vectors or sequences. Supports: - Single embeddings (1D arrays) - Temporal embeddings (2D arrays of shape [sequence_length, embedding_dim]), where similarity is computed per frame and averaged. Parameters ---------- vec1 : numpy.ndarray First embedding vector or sequence. vec2 : numpy.ndarray Second embedding vector or sequence. method : str, default="cosine" Method to compute similarity. Options: - 'cosine' - 'euclidean' - 'manhattan' - 'mahalanobis' **kwargs Additional parameters for specific methods. For example: - cov : numpy.ndarray Covariance matrix for Mahalanobis distance. Returns ------- tuple of (str, float) The method name and the computed similarity or distance score. For cosine similarity, higher is more similar. For distances, lower is more similar. Raises ------ ValueError If vectors have different shapes, or covariance matrix shape is invalid. NotImplementedError If the given method is not supported. """ if vec1.shape != vec2.shape: raise ValueError(f"Vectors must be same shape. Got {vec1.shape} vs {vec2.shape}") # If both are 2D (sequence case), compute per-frame similarity and average if vec1.ndim == 2 and vec2.ndim == 2: scores = [] for frame1, frame2 in zip(vec1, vec2): _, score = embedding_similarity(frame1, frame2, method=method, **kwargs) scores.append(score) return method, float(np.mean(scores)) # --- Single vector similarity --- if method == "cosine": dot_product = np.dot(vec1, vec2) norm_a = np.linalg.norm(vec1) norm_b = np.linalg.norm(vec2) if norm_a == 0 or norm_b == 0: return "cosine", 0.0 return "cosine", float(dot_product / (norm_a * norm_b)) elif method == "euclidean": diff = vec1 - vec2 return "euclidean", float(np.sqrt(np.sum(diff ** 2))) elif method == "manhattan": return "manhattan", float(np.sum(np.abs(vec1 - vec2))) elif method == "mahalanobis": diff = vec1 - vec2 cov = kwargs.get("cov", np.eye(len(vec1))) if cov.shape != (len(vec1), len(vec1)): raise ValueError(f"Covariance matrix must be shape ({len(vec1)}, {len(vec1)}), got {cov.shape}") try: inv_cov = np.linalg.inv(cov) except np.linalg.LinAlgError: raise ValueError("Covariance matrix is not invertible.") dist = np.dot(np.dot(diff.T, inv_cov), diff) return "mahalanobis", float(np.sqrt(dist)) else: raise NotImplementedError(f"Unknown method '{method}'.")