I am testing to train Emotion FerPlus
emotion recognition model.
Training has cuDNN failure 8: CUDNN_STATUS_EXECUTION_FAILED
error.
I am using Nvidia GPU TitanRTX 24G
.
Then change the minibatch_size from 32 to 1
. But still have error.
I am using CNTK-GPU docker.
The complete error messages are
About to throw exception 'cuDNN failure 8: CUDNN_STATUS_EXECUTION_FAILED ; GPU=0 ; hostname=d9150da5d531 ; expr=cudnnConvolutionForward(*m_cudnn, &C::One, m_inT, ptr(in), *m_kernelT, ptr(kernel), *m_conv, m_fwdAlgo.selectedAlgo, ptr(workspace), workspace.BufferSize(), &C::Zero, m_outT, ptr(out))'
cuDNN failure 8: CUDNN_STATUS_EXECUTION_FAILED ; GPU=0 ; hostname=d9150da5d531 ; expr=cudnnConvolutionForward(*m_cudnn, &C::One, m_inT, ptr(in), *m_kernelT, ptr(kernel), *m_conv, m_fwdAlgo.selectedAlgo, ptr(workspace), workspace.BufferSize(), &C::Zero, m_outT, ptr(out))
Traceback (most recent call last):
File "train.py", line 193, in <module>
main(args.base_folder, args.training_mode)
File "train.py", line 124, in main
trainer.train_minibatch({input_var : images, label_var : labels})
File "/root/anaconda3/envs/cntk-py35/lib/python3.5/site-packages/cntk/train/trainer.py", line 184, in train_minibatch
device)
File "/root/anaconda3/envs/cntk-py35/lib/python3.5/site-packages/cntk/cntk_py.py", line 3065, in train_minibatch
return _cntk_py.Trainer_train_minibatch(self, *args)
RuntimeError: cuDNN failure 8: CUDNN_STATUS_EXECUTION_FAILED ; GPU=0 ; hostname=d9150da5d531 ; expr=cudnnConvolutionForward(*m_cudnn, &C::One, m_inT, ptr(in), *m_kernelT, ptr(kernel), *m_conv, m_fwdAlgo.selectedAlgo, ptr(workspace), workspace.BufferSize(), &C::Zero, m_outT, ptr(out))
[CALL STACK]
[0x7fc04da7ce89] + 0x732e89
[0x7fc045a71aaf] + 0xeabaaf
[0x7fc045a7b613] Microsoft::MSR::CNTK::CuDnnConvolutionEngine<float>:: ForwardCore (Microsoft::MSR::CNTK::Matrix<float> const&, Microsoft::MSR::CNTK::Matrix<float> const&, Microsoft::MSR::CNTK::Matrix<float>&, Microsoft::MSR::CNTK::Matrix<float>&) + 0x1a3
[0x7fc04dd4f8d3] Microsoft::MSR::CNTK::ConvolutionNode<float>:: ForwardProp (Microsoft::MSR::CNTK::FrameRange const&) + 0xa3
[0x7fc04dfba654] Microsoft::MSR::CNTK::ComputationNetwork::PARTraversalFlowControlNode:: ForwardProp (std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase> const&, Microsoft::MSR::CNTK::FrameRange const&) + 0xf4
[0x7fc04dcb6e33] std::_Function_handler<void (std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase> const&),void Microsoft::MSR::CNTK::ComputationNetwork::ForwardProp<std::vector<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>,std::allocator<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>>>>(std::vector<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>,std::allocator<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>>> const&)::{lambda(std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase> const&)#1}>:: _M_invoke (std::_Any_data const&, std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase> const&) + 0x63
[0x7fc04dd04ed9] void Microsoft::MSR::CNTK::ComputationNetwork:: TravserseInSortedGlobalEvalOrder <std::vector<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>,std::allocator<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>>>>(std::vector<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>,std::allocator<std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase>>> const&, std::function<void (std::shared_ptr<Microsoft::MSR::CNTK::ComputationNodeBase> const&)> const&) + 0x5b9
[0x7fc04dca64da] CNTK::CompositeFunction:: Forward (std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>> const&, std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>>&, CNTK::DeviceDescriptor const&, std::unordered_set<CNTK::Variable,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<CNTK::Variable>> const&, std::unordered_set<CNTK::Variable,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<CNTK::Variable>> const&) + 0x15da
[0x7fc04dc3d603] CNTK::Function:: Forward (std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>> const&, std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>>&, CNTK::DeviceDescriptor const&, std::unordered_set<CNTK::Variable,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<CNTK::Variable>> const&, std::unordered_set<CNTK::Variable,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<CNTK::Variable>> const&) + 0x93
[0x7fc04ddbf91b] CNTK::Trainer:: ExecuteForwardBackward (std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>> const&, std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>>&, CNTK::DeviceDescriptor const&, std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>>&) + 0x36b
[0x7fc04ddc06e4] CNTK::Trainer:: TrainLocalMinibatch (std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>> const&, std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>>&, bool, CNTK::DeviceDescriptor const&) + 0x94
[0x7fc04ddc178a] CNTK::Trainer:: TrainMinibatch (std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>> const&, bool, std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>>&, CNTK::DeviceDescriptor const&) + 0x5a
[0x7fc04ddc1852] CNTK::Trainer:: TrainMinibatch (std::unordered_map<CNTK::Variable,std::shared_ptr<CNTK::Value>,std::hash<CNTK::Variable>,std::equal_to<CNTK::Variable>,std::allocator<std::pair<CNTK::Variable const,std::shared_ptr<CNTK::Value>>>> const&, bool, CNTK::DeviceDescriptor const&) + 0x52
[0x7fc04eb2db22] + 0x229b22
[0x7fc057ea15e9] PyCFunction_Call + 0xf9
[0x7fc057f267c0] PyEval_EvalFrameEx + 0x6ba0
[0x7fc057f29b49] + 0x144b49
[0x7fc057f28df5] PyEval_EvalFrameEx + 0x91d5
[0x7fc057f29b49] + 0x144b49
[0x7fc057f28df5] PyEval_EvalFrameEx + 0x91d5
[0x7fc057f29b49] + 0x144b49
[0x7fc057f28df5] PyEval_EvalFrameEx + 0x91d5
[0x7fc057f29b49] + 0x144b49
[0x7fc057f29cd8] PyEval_EvalCodeEx + 0x48
[0x7fc057f29d1b] PyEval_EvalCode + 0x3b
[0x7fc057f4f020] PyRun_FileExFlags + 0x130
[0x7fc057f50623] PyRun_SimpleFileExFlags + 0x173
[0x7fc057f6b8c7] Py_Main + 0xca7
[0x400add] main + 0x15d
[0x7fc056f06830] __libc_start_main + 0xf0
[0x4008b9]
CNTK is in maintenance mode now (basically deprecated). While CNTK can export to ONNX pretty OK, importing ONNX models is not really well-supported.
ONNX Runtime https://github.com/microsoft/onnxruntime now supports training, so please try it. ONNX Runtime training is actively developing and is supported, so if something doesn't quite work, it's likely the issues will be resolved fast.