Source code for models.PointNet

# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Iterable

import torch
import torch.nn as nn
import torch.nn.functional as F


[docs]class PointNetFeatureExtractor(nn.Module): r"""PointNet feature extractor (extracts either global or local, i.e., per-point features). Based on the original PointNet paper:. .. note:: If you use this code, please cite the original paper in addition to Kaolin. .. code-block:: @article{qi2016pointnet, title={PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation}, author={Qi, Charles R and Su, Hao and Mo, Kaichun and Guibas, Leonidas J}, journal={arXiv preprint arXiv:1612.00593}, year={2016} } Args: in_channels (int): Number of channels in the input pointcloud (default: 3, for X, Y, Z coordinates respectively). feat_size (int): Size of the global feature vector (default: 1024) layer_dims (Iterable[int]): Sizes of fully connected layers to be used in the feature extractor (excluding the input and the output layer sizes). Note: the number of layers in the feature extractor is implicitly parsed from this variable. global_feat (bool): Extract global features (i.e., one feature for the entire pointcloud) if set to True. If set to False, extract per-point (local) features (default: True). activation (function): Nonlinearity to be used as activation function after each batchnorm (default: F.relu) batchnorm (bool): Whether or not to use batchnorm layers (default: True) transposed_input (bool): Whether the input's second and third dimension is already transposed. If so, a transpose operation can be avoided, improving performance. See documentation for the forward method for more details. For example, to specify a PointNet feature extractor with 4 linear layers (sizes 6 -> 10, 10 -> 40, 40 -> 500, 500 -> 1024), with 3 input channels in the pointcloud and a global feature vector of size 1024, see the example below. Example: >>> pointnet = PointNetFeatureExtractor(in_channels=3, feat_size=1024, layer_dims=[10, 20, 40, 500]) >>> x = torch.rand(2, 3, 30) >>> y = pointnet(x) print(y.shape) """ def __init__(self, in_channels: int = 3, feat_size: int = 1024, layer_dims: Iterable[int] = [64, 128], global_feat: bool = True, activation=F.relu, batchnorm: bool = True, transposed_input: bool = False): super(PointNetFeatureExtractor, self).__init__() if not isinstance(in_channels, int): raise TypeError('Argument in_channels expected to be of type int. ' 'Got {0} instead.'.format(type(in_channels))) if not isinstance(feat_size, int): raise TypeError('Argument feat_size expected to be of type int. ' 'Got {0} instead.'.format(type(feat_size))) if not hasattr(layer_dims, '__iter__'): raise TypeError('Argument layer_dims is not iterable.') for idx, layer_dim in enumerate(layer_dims): if not isinstance(layer_dim, int): raise TypeError('Elements of layer_dims must be of type int. ' 'Found type {0} at index {1}.'.format( type(layer_dim), idx)) if not isinstance(global_feat, bool): raise TypeError('Argument global_feat expected to be of type ' 'bool. Got {0} instead.'.format( type(global_feat))) # Store feat_size as a class attribute self.feat_size = feat_size # Store activation as a class attribute self.activation = activation # Store global_feat as a class attribute self.global_feat = global_feat # Add in_channels to the head of layer_dims (the first layer # has number of channels equal to `in_channels`). Also, add # feat_size to the tail of layer_dims. if not isinstance(layer_dims, list): layer_dims = list(layer_dims) layer_dims.insert(0, in_channels) layer_dims.append(feat_size) self.conv_layers = nn.ModuleList() if batchnorm: self.bn_layers = nn.ModuleList() for idx in range(len(layer_dims) - 1): self.conv_layers.append(nn.Conv1d(layer_dims[idx], layer_dims[idx + 1], 1)) if batchnorm: self.bn_layers.append(nn.BatchNorm1d(layer_dims[idx + 1])) # Store whether or not to use batchnorm as a class attribute self.batchnorm = batchnorm self.transposed_input = transposed_input def forward(self, x: torch.Tensor): r"""Forward pass through the PointNet feature extractor. Args: x (torch.Tensor): Tensor representing a pointcloud (shape: :math:`B \times N \times D`, where :math:`B` is the batchsize, :math:`N` is the number of points in the pointcloud, and :math:`D` is the dimensionality of each point in the pointcloud). If self.transposed_input is True, then the shape is :math:`B \times D \times N`. """ if not self.transposed_input: x = x.transpose(1, 2) # Number of points num_points = x.shape[2] # By default, initialize local features (per-point features) # to None. local_features = None # Apply a sequence of conv-batchnorm-nonlinearity operations # For the first layer, store the features, as these will be # used to compute local features (if specified). if self.batchnorm: x = self.activation(self.bn_layers[0](self.conv_layers[0](x))) else: x = self.activation(self.conv_layers[0](x)) if self.global_feat is False: local_features = x # Pass through the remaining layers (until the penultimate layer). for idx in range(1, len(self.conv_layers) - 1): if self.batchnorm: x = self.activation(self.bn_layers[idx]( self.conv_layers[idx](x))) else: x = self.activation(self.conv_layers[idx](x)) # For the last layer, do not apply nonlinearity. if self.batchnorm: x = self.bn_layers[-1](self.conv_layers[-1](x)) else: x = self.conv_layers[-1](x) # Max pooling. x = torch.max(x, 2, keepdim=True)[0] x = x.view(-1, self.feat_size) # If extracting global features, return at this point. if self.global_feat: return x # If extracting local features, compute local features by # concatenating global features, and per-point features x = x.view(-1, self.feat_size, 1).repeat(1, 1, num_points) return torch.cat((x, local_features), dim=1)
[docs]class PointNetClassifier(nn.Module): r"""PointNet classifier. Uses the PointNet feature extractor, and adds classification layers on top. .. note:: If you use this code, please cite the original paper in addition to Kaolin. .. code-block:: @article{qi2016pointnet, title={PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation}, author={Qi, Charles R and Su, Hao and Mo, Kaichun and Guibas, Leonidas J}, journal={arXiv preprint arXiv:1612.00593}, year={2016} } Args: in_channels (int): Number of channels in the input pointcloud (default: 3, for X, Y, Z coordinates respectively). feat_size (int): Size of the global feature vector (default: 1024) num_classes (int): Number of classes (for the classification task) (default: 2). dropout (float): Dropout ratio to use (default: 0.). Note: If the ratio is set to 0., we altogether skip using a dropout layer. layer_dims (Iterable[int]): Sizes of fully connected layers to be used in the feature extractor (excluding the input and the output layer sizes). Note: the number of layers in the feature extractor is implicitly parsed from this variable. activation (function): Nonlinearity to be used as activation function after each batchnorm (default: F.relu) batchnorm (bool): Whether or not to use batchnorm layers (default: True) transposed_input (bool): Whether the input's second and third dimension is already transposed. If so, a transpose operation can be avoided, improving performance. See documentation of PointNetFeatureExtractor for more details. Example: pointnet = PointNetClassifier(in_channels=6, feat_size=1024, feat_layer_dims=[32, 64, 256], classifier_layer_dims=[500, 200, 100]) x = torch.rand(5, 6, 30) y = pointnet(x) print(y.shape) """ def __init__(self, in_channels: int = 3, feat_size: int = 1024, num_classes: int = 2, dropout: float = 0., classifier_layer_dims: Iterable[int] = [512, 256], feat_layer_dims: Iterable[int] = [64, 128], activation=F.relu, batchnorm: bool = True, transposed_input: bool = False): super(PointNetClassifier, self).__init__() if not isinstance(num_classes, int): raise TypeError('Argument num_classes must be of type int. ' 'Got {0} instead.'.format(type(num_classes))) if not isinstance(dropout, float): raise TypeError('Argument dropout must be of type float. ' 'Got {0} instead.'.format(type(dropout))) if dropout < 0 or dropout > 1: raise ValueError('Dropout ratio must always be in the range' '[0, 1]. Got {0} instead.'.format(dropout)) if not hasattr(classifier_layer_dims, '__iter__'): raise TypeError('Argument classifier_layer_dims is not iterable.') for idx, layer_dim in enumerate(classifier_layer_dims): if not isinstance(layer_dim, int): raise TypeError('Expected classifier_layer_dims to contain ' 'int. Found type {0} at index {1}.'.format( type(layer_dim), idx)) # Add feat_size to the head of classifier_layer_dims (the output of # the PointNet feature extractor has number of elements equal to # has number of channels equal to `in_channels`). if not isinstance(classifier_layer_dims, list): classifier_layer_dims = list(classifier_layer_dims) classifier_layer_dims.insert(0, feat_size) # Note that `global_feat` MUST be set to True, for global # classification tasks. self.feature_extractor = PointNetFeatureExtractor( in_channels=in_channels, feat_size=feat_size, layer_dims=feat_layer_dims, global_feat=True, activation=activation, batchnorm=batchnorm, transposed_input=transposed_input ) self.linear_layers = nn.ModuleList() if batchnorm: self.bn_layers = nn.ModuleList() for idx in range(len(classifier_layer_dims) - 1): self.linear_layers.append(nn.Linear(classifier_layer_dims[idx], classifier_layer_dims[idx + 1])) if batchnorm: self.bn_layers.append(nn.BatchNorm1d( classifier_layer_dims[idx + 1])) self.last_linear_layer = nn.Linear(classifier_layer_dims[-1], num_classes) # Store activation as a class attribute self.activation = activation # Dropout layer (if dropout ratio is in the interval (0, 1]). if dropout > 0: self.dropout = nn.Dropout(p=dropout) else: self.dropout = None # Store whether or not to use batchnorm as a class attribute self.batchnorm = batchnorm self.transposed_input = transposed_input def forward(self, x): r"""Forward pass through the PointNet classifier. Args: x (torch.Tensor): Tensor representing a pointcloud (shape: :math:`B \times N \times D`, where :math:`B` is the batchsize, :math:`N` is the number of points in the pointcloud, and :math:`D` is the dimensionality of each point in the pointcloud). If self.transposed_input is True, then the shape is :math:`B \times D \times N`. """ x = self.feature_extractor(x) for idx in range(len(self.linear_layers) - 1): if self.batchnorm: x = self.activation(self.bn_layers[idx]( self.linear_layers[idx](x))) else: x = self.activation(self.linear_layers[idx](x)) # For penultimate linear layer, apply dropout before batchnorm if self.dropout: if self.batchnorm: x = self.activation(self.bn_layers[-1](self.dropout( self.linear_layers[-1](x)))) else: x = self.activation(self.dropout(self.linear_layers[-1](x))) else: if self.batchnorm: x = self.activation(self.bn_layers[-1]( self.linear_layers[-1](x))) else: x = self.activation(self.linear_layers[-1](x)) # TODO: Use dropout before batchnorm of penultimate linear layer x = self.last_linear_layer(x) # return F.log_softmax(x, dim=1) return x
[docs]class PointNetSegmenter(nn.Module): r"""PointNet segmenter. Uses the PointNet feature extractor, and adds per-point segmentation layers on top. .. note:: If you use this code, please cite the original paper in addition to Kaolin. .. code-block:: @article{qi2016pointnet, title={PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation}, author={Qi, Charles R and Su, Hao and Mo, Kaichun and Guibas, Leonidas J}, journal={arXiv preprint arXiv:1612.00593}, year={2016} } Args: in_channels (int): Number of channels in the input pointcloud (default: 3, for X, Y, Z coordinates respectively). feat_size (int): Size of the global feature vector (default: 1024) num_classes (int): Number of classes (for the segmentation task) (default: 2). dropout (float): Dropout ratio to use (default: 0.). Note: If the ratio is set to 0., we altogether skip using a dropout layer. layer_dims (Iterable[int]): Sizes of fully connected layers to be used in the feature extractor (excluding the input and the output layer sizes). Note: the number of layers in the feature extractor is implicitly parsed from this variable. activation (function): Nonlinearity to be used as activation function after each batchnorm (default: F.relu) batchnorm (bool): Whether or not to use batchnorm layers (default: True) transposed_input (bool): Whether the input's second and third dimension is already transposed. If so, a transpose operation can be avoided, improving performance. See documentation of PointNetFeatureExtractor for more details. Example: pointnet = PointNetSegmenter(in_channels=6, feat_size=1024, feat_layer_dims=[32, 64, 256], classifier_layer_dims=[500, 200, 100]) x = torch.rand(5, 6, 30) y = pointnet(x) print(y.shape) """ def __init__(self, in_channels: int = 3, feat_size: int = 1024, num_classes: int = 2, dropout: float = 0., classifier_layer_dims: Iterable[int] = [512, 256], feat_layer_dims: Iterable[int] = [64, 128], activation=F.relu, batchnorm: bool = True, transposed_input: bool = False): super(PointNetSegmenter, self).__init__() if not isinstance(num_classes, int): raise TypeError('Argument num_classes must be of type int. ' 'Got {0} instead.'.format(type(num_classes))) if not isinstance(dropout, float): raise TypeError('Argument dropout must be of type float. ' 'Got {0} instead.'.format(type(dropout))) if not hasattr(classifier_layer_dims, '__iter__'): raise TypeError('Argument classifier_layer_dims is not iterable.') for idx, layer_dim in enumerate(classifier_layer_dims): if not isinstance(layer_dim, int): raise TypeError('Expected classifier_layer_dims to contain ' 'int. Found type {0} at index {1}.'.format( type(layer_dim), idx)) # Add feat_size to the head of classifier_layer_dims (the output of # the PointNet feature extractor has number of elements equal to # has number of channels equal to `in_channels`). if not isinstance(classifier_layer_dims, list): classifier_layer_dims = list(classifier_layer_dims) classifier_layer_dims.insert(0, feat_size) # Note that `global_feat` MUST be set to False, for # segmentation tasks. self.feature_extractor = PointNetFeatureExtractor( in_channels=in_channels, feat_size=feat_size, layer_dims=feat_layer_dims, global_feat=False, activation=activation, batchnorm=batchnorm, transposed_input=transposed_input ) # Compute the dimensionality of local features # Local feature size = (global feature size) + (feature size # from the output of the first layer of feature extractor) # Note: In self.feature_extractor, we manually append in_channels # to the head of feat_layer_dims. So, we use index 1 of # feat_layer_dims in the below line, to compute local_feat_size, # and not index 0. self.local_feat_size = feat_size + feat_layer_dims[1] self.conv_layers = nn.ModuleList() if batchnorm: self.bn_layers = nn.ModuleList() # First classifier layer self.conv_layers.append(nn.Conv1d(self.local_feat_size, classifier_layer_dims[0], 1)) if batchnorm: self.bn_layers.append(nn.BatchNorm1d(classifier_layer_dims[0])) for idx in range(len(classifier_layer_dims) - 1): self.conv_layers.append(nn.Conv1d(classifier_layer_dims[idx], classifier_layer_dims[idx + 1], 1)) if batchnorm: self.bn_layers.append(nn.BatchNorm1d( classifier_layer_dims[idx + 1])) self.last_conv_layer = nn.Conv1d(classifier_layer_dims[-1], num_classes, 1) # Store activation as a class attribute self.activation = activation # Store the number of classes as an attribute self.num_classes = num_classes # Store whether or not to use batchnorm as a class attribute self.batchnorm = batchnorm self.transposed_input = transposed_input def forward(self, x): r"""Forward pass through the PointNet segmentation model. Args: x (torch.Tensor): Tensor representing a pointcloud shape: :math:`B \times N \times D`, where :math:`B` is the batchsize, :math:`N` is the number of points in the pointcloud, and :math:`D` is the dimensionality of each point in the pointcloud. If self.transposed_input is True, then the shape is :math:`B \times D \times N`. """ batchsize = x.shape[0] num_points = x.shape[2] if self.transposed_input else x.shape[1] x = self.feature_extractor(x) for idx in range(len(self.conv_layers)): if self.batchnorm: x = self.activation(self.bn_layers[idx]( self.conv_layers[idx](x))) else: x = self.activation(self.conv_layers[idx](x)) x = self.last_conv_layer(x) x = x.transpose(2, 1).contiguous() # x = F.log_softmax(x.view(-1, self.num_classes), dim=-1) print("x.shape = {}".format(x.shape)) return x.view(batchsize, num_points, self.num_classes)