1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
| int GroupNormalizationPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { nvinfer1::Dims input_dims = inputDesc[0].dims; int batchSize = input_dims.d[0]; int nbChannels = input_dims.d[1];
int groupSize = nbChannels / mNbGroups;
mChannelVolume = std::accumulate(input_dims.d + 2, input_dims.d + inputDesc[0].dims.nbDims, 1, std::multiplies<int>());
CHECK_CUDNN(cudnnSetTensor4dDescriptor(desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, batchSize * mNbGroups, groupSize, mChannelVolume ));
CHECK_CUDNN(cudnnDeriveBNTensorDescriptor(bnDesc, desc, CUDNN_BATCHNORM_SPATIAL)); CHECK_CUDNN(cudnnSetStream(_cudnn_handle, stream));
float a = 1.F; float b = 0.F; CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(_cudnn_handle, CUDNN_BATCHNORM_SPATIAL, &a, &b, desc, inputs[0], desc, outputs[0], bnDesc, bnScale, bnBias, 0.0, nullptr, nullptr, mEpsilon, nullptr, nullptr ));
float* output = static_cast<float*>(outputs[0]); return scaleShiftChannelsInplace(output, batchSize, nbChannels, mChannelVolume, static_cast<const float*>(inputs[2]), static_cast<const float*>(inputs[1]), stream); }
|