c++cmakecuda

CMake project with mixed CUDA and C++ leads to error cudart_static.lib LNK2005 XXX already defined in cudart_static.lib


I have a CMake project similar to the one below:

cmake_minimum_required(VERSION 3.22)

project(example
  DESCRIPTION "Example for interop between D3D11, CUDA 11.8 and libtorch 2.0.1 (CUDA 11.8)"
  LANGUAGES CXX C CUDA
)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# When using Visual Studio generator, CMAKE_BUILD_TYPE is ignored since the type of build is set
# during the build step (e.g. "cmake --build <build-dir> --target ALL_BUILD --config Debug").
set(CMAKE_CONFIGURATION_TYPES "debug;release" CACHE STRING "") #FORCE 
# Release, Debug (not supported: RelWithDebInfo, MinSizeRel)

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/")
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/bin/")

# Note: using file(READ) may lead to incorrect parsing of the token.txt file if there are 
#       multiple lines in that file (including empty terminating line!)
#       By employing file(STRINGS) with a LIMIT_COUNT set to 1 we ensure that only the first
#       string (the GitLab token) is retrieved
file(
    STRINGS
    "${CMAKE_SOURCE_DIR}/token.txt"
    TOKEN
    LIMIT_COUNT 1
)
if(NOT DEFINED TOKEN)
    message(SEND_ERROR "Could not find ${CMAKE_SOURCE_DIR}/token.txt with token string inside")
elseif(TOKEN STREQUAL "")
    message(SEND_ERROR "Found ${CMAKE_SOURCE_DIR}/token.txt but token string is empty")
endif()

include("${CMAKE_SOURCE_DIR}/cmake/download.cmake")

set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_ARCHITECTURES 75)
set(CUDA_USE_STATIC_CUDA_RUNTIME ON)
#find_package(CUDA 11.8 REQUIRED) # deprecated, do not use!

set(CUDA_HOME "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8") # C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/bin

get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(isMultiConfig)
    foreach(config ${CMAKE_CONFIGURATION_TYPES})
        get_property(INTERNAL_DEPS_${config} GLOBAL PROPERTY INTERNAL_DEPS_PROP_${config})
        string(TOLOWER "${config}" config)
        
        # Common for all supported build types
        if (("${config}" STREQUAL "debug") OR ("${config}" STREQUAL "release"))

            ... # Dependencies such as ImGUI and DirectXTex
            
            find_package(Torch 2.0.1 REQUIRED)

            add_library(kernels
                STATIC
                src/kernels/default.cu
                src/kernels/simple.cu
                src/kernels/grayscale.cu
            )
            target_compile_features(
                kernels
                PUBLIC
                cxx_std_17
            )
            set_target_properties(
                kernels
                PROPERTIES
                CUDA_SEPARABLE_COMPILATION ON
                CUDA_RESOLVE_DEVICE_SYMBOLS ON
            )

            add_executable(
                ${PROJECT_NAME}
                WIN32
                "${CMAKE_CURRENT_SOURCE_DIR}/src/load_sample.cpp"
                "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp"
            )
            target_include_directories(
                ${PROJECT_NAME}
                PUBLIC
                ${CUDA_INCLUDE_DIRS}
                ${TORCH_INCLUDE_DIRS}
                "${CMAKE_BINARY_DIR}/deps/${config}/include"
                "${CMAKE_CURRENT_SOURCE_DIR}/include"
            )
            target_link_directories(
                ${PROJECT_NAME}
                PUBLIC
                "${CMAKE_BINARY_DIR}/deps/${config}/lib"
            )
            target_link_libraries(
                ${PROJECT_NAME}
                PUBLIC
                d3d11.lib dxgi.lib dxguid.lib d3dcompiler.lib
                uuid.lib kernel32.lib user32.lib
                comdlg32.lib advapi32.lib shell32.lib
                ole32.lib oleaut32.lib
                ${CUDA_LIBRARIES}
                ${TORCH_LIBRARIES}
                kernels
                ... # Dependencies such as ImGUI and DirectXTex
            )
            set_target_properties(
                ${PROJECT_NAME}
                PROPERTIES
                CUDA_SEPARABLE_COMPILATION ON
                CUDA_RESOLVE_DEVICE_SYMBOLS ON
            )

            file(GLOB
                SHADERS
                "${CMAKE_SOURCE_DIR}/shaders/*.*"
            )
            file(COPY
                ${SHADERS}
                DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${config}/"
            )
        endif()
    endforeach()
endif()

The project is configured and built on:

The log from the configuration step (here debug type) can be seen below:

 cmake -Bbuild -G "Visual Studio 16 2019" -S. -DCMAKE_CONFIGURATION_TYPES="debug"
-- Selecting Windows SDK version 10.0.19041.0 to target Windows 10.0.19043.
-- The CXX compiler identification is MSVC 19.29.30152.0
-- The C compiler identification is MSVC 19.29.30152.0
-- The CUDA compiler identification is NVIDIA 12.2.91
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/Hostx64/x64/cl.exe - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/Hostx64/x64/cl.exe - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
Selected libtorch installation type: local
Finding Torch 2.0.1 package
CMake Warning at CMakeLists.txt:286 (message):
  Will override variable NVTOOLEXT_HOME and environmental variable
  NVTOOLSEXT_PATH

CMake Warning at CMakeLists.txt:287 (message):
  Make sure to install NVXT with the CUDA 11.8 installer

CMake Warning at CMakeLists.txt:288 (message):
  NVXT for CUDA 12.x has been moved to a headers-only library

-- Found CUDA: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8")
-- Caffe2: CUDA detected: 12.2
-- Caffe2: CUDA nvcc is: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/bin/nvcc.exe
-- Caffe2: CUDA toolkit directory: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8
-- Caffe2: Header version is: 11.8
CMake Warning at C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/share/cmake/Caffe2/public/cuda.cmake:166 (message):
  Failed to compute shorthash for libnvrtc.so
Call Stack (most recent call first):
  C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/share/cmake/Caffe2/Caffe2Config.cmake:88 (include)
  C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/share/cmake/Torch/TorchConfig.cmake:68 (find_package)
  CMakeLists.txt:295 (find_package)


-- USE_CUDNN is set to 0. Compiling without cuDNN support
-- Autodetected CUDA architecture(s):  8.6
-- Added CUDA NVCC flags for: -gencode;arch=compute_86,code=sm_86
-- Found Torch: C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/lib/torch.lib (Required is at least version "2.0.1")
-- Configuring done (25.1s)
-- Generating done (0.0s)
-- Build files have been written to: C:/Users/example/Projects/Example/build

As environment variables I have

In addition I have

The building step is where things get problematic all boiling down to the following pattern:

cudart_static.lib(cudart_generated_cuda_runtime_api.obj)  : error LNK2005: <SOME-CUDA-FUNCTION> already defined in cudart_static.lib(cudart_generated_cuda_runtime_api.obj) [C:\Users\example\Projects\Example\build\example.v
cxproj]

A direct example can be seen below

cudart_static.lib(cudart_generated_cuda_runtime_api.obj) : error LNK2005: cudaArrayGetInfo already defined in cudart_static.lib(cudart_generated_cuda_runtime_api.obj) [C:\Users\example\Projects\Example\build\example.v
cxproj]
cudart_static.lib(cudart_generated_cuda_runtime_api.obj) : error LNK2005: cudaArrayGetMemoryRequirements already defined in cudart_static.lib(cudart_generated_cuda_runtime_api.obj) [C:\Users\example\Projects\Example\build\example.vcxproj]

Everything ultimately fails with

Creating library C:/Users/example/Projects/Example/build/bin/debug/demo.lib and object C:/Users/example/Projects/Example/build/bin/debug/demo.exp
LINK : warning LNK4098: defaultlib 'LIBCMT' conflicts with use of other libs; use /NODEFAULTLIB:library [C:\Users\example\Projects\Example\build\demo.vcxproj]
C:\Users\example\Projects\Example\build\bin\debug\demo.exe : fatal error LNK1169: one or more multiply defined symbols found [C:\Users\example\Projects\Example\build\demo.vcxproj]

An example for one of the kernels (here default.cu which just passes D3D11 buffer data through):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
* The following kernel creates a simply copy by reading from one surface object and writing the read data to another.
* No alteration of the data takes place within the CUDA context
* @param surface_in Source, where the data will be loaded from. Requires to be mapped with read access
* @param surface_out Destination, where the data will be written to. Requires to be mapped with write access
*/
__global__ void cuda_kernel_default(const cudaSurfaceObject_t surface_in, cudaSurfaceObject_t surface_out, size_t width, size_t height)
{
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x >= width || y >= height) return;

    uchar4 pixel = {
        0,  // red      x
        0,  // green    y
        0,  // blue     z
        0   // alpha    w
    };
    surf2Dread<uchar4>(&pixel, surface_in, x * sizeof(uchar4), y, cudaBoundaryModeClamp);
    surf2Dwrite<uchar4>(pixel, surface_out, x * sizeof(uchar4), y, cudaBoundaryModeClamp);
}

extern "C"
cudaError_t cuda_default(const cudaSurfaceObject_t surface_in, cudaSurfaceObject_t surface_out, size_t width, size_t height)
{
    cudaError_t error = cudaSuccess;
    
    dim3 Db = dim3(32, 32);                                                 // Block dimensions
    dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);  // Grid dimensions

    cuda_kernel_default << <Dg, Db >> > (surface_in, surface_out, width, height);

    error = cudaGetLastError();

    return error;
}

My main project main.cpp has the following includes:

#define NOMINMAX
#include <windows.h>
#include <windowsx.h>
#include <dxgi.h>                       // DirectX graphics infrastructure
#include <d3d11.h>                      // Direct3D functionality
#include <DirectXMath.h>                // Math (vectors and matrices), successor of the deprecated D3DXimage files, successor of the deprecated D3DX11SaveTextureToFile
#include <exception>
#include <string>
#include <cuda_runtime_api.h>           // Runtime CUDA API offers better code management but less control compared the the CUDA driver API
#include <cuda_d3d11_interop.h>         // Provides facilities for registering and mapping graphics resources (among others) from/to Direct3D to/from CUDA context
#include <torch/torch.h>                // Torch general
#include <torch/script.h>               // Torch script
...                                     // Other dependencies such as ImGUI and DirectXTex
#include "load_sample.h"                // Contains D3D11 stuff, creates and loads the cube and shaders for the scene

I know and also the message is pretty clear from the error that I am including somewhere twice the CUDA RT static library but I have no idea where.


UPDATE 1:

I followed the advices in the comment section I ran the configuration step with --trace-expand to see the full output (sadly too big to paste on Pastebin XD). I see that both add_library() and add_executable() are affected by CMake policy CMP0156:

CMake Warning (dev) at CMakeLists.txt:306 (add_library):
  Policy CMP0156 is not set: De-duplicate libraries on link lines based on
  linker capabilities.  Run "cmake --help-policy CMP0156" for policy details.
  Use the cmake_policy command to set the policy and suppress this warning.

  Since the policy is not set, legacy libraries de-duplication strategy will
  be applied.
This warning is for project developers.  Use -Wno-dev to suppress it.

CMake Warning (dev) at CMakeLists.txt:343 (add_executable):
  Policy CMP0156 is not set: De-duplicate libraries on link lines based on
  linker capabilities.  Run "cmake --help-policy CMP0156" for policy details.
  Use the cmake_policy command to set the policy and suppress this warning.

  Since the policy is not set, legacy libraries de-duplication strategy will
  be applied.
This warning is for project developers.  Use -Wno-dev to suppress it.

I am just learning about what de-duplication means but I do believe that part if not the main culprit for my problem lies there. I do recall that I was able to build the code last year when I was using - I believe - CMake 3.12 as a minimum required version.


Solution

  • While not a very exact answer I did find out that it was libtorch (specifically the CUDA version of it) that was the problem.

    I dug up an older project where I had D3D11 and CUDA interop (which is part of my current setup minus libtorch). It was building and running without any issues. Since I have transferred the code to my new project (with libtorch) it was not really likely that it broke all of a sudden. Just in case I did use my old code with the new CMakeLists.txt. Result

    Even though it's libtorch für CUDA 11.8 apparently it does really depend on which version of MSVC was used for the building of the binaries. If someone can provide a more exact information on this (release notes are too many to read through...), I would appreciate it.

    The solution was to move to libtorch 2.4.0 for CUDA 11.8. I did not change anything else in my project except set the path for libtorch to the newer version and everything works fine now.