396 lines
15 KiB
Diff
Executable file
396 lines
15 KiB
Diff
Executable file
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index 8054d98..35934f5 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -56,6 +56,14 @@ option(USE_ROCKSDB "Use RocksDB" ON)
|
|
option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
|
|
option(USE_THREADS "Use Threads" ON)
|
|
option(USE_ZMQ "Use ZMQ" OFF)
|
|
+if(MSVC)
|
|
+ if(BUILD_BUILD_SHARED_LIBS)
|
|
+ set(USE_STATIC_RUNTIME_DEFAULT OFF)
|
|
+ else()
|
|
+ set(USE_STATIC_RUNTIME_DEFAULT ON)
|
|
+ endif()
|
|
+ option(USE_STATIC_RUNTIME "Link to the static runtime (/MT) instead of dynamic (/MD)" ${USE_STATIC_RUNTIME_DEFAULT})
|
|
+endif()
|
|
|
|
# External projects
|
|
include(ExternalProject)
|
|
@@ -99,7 +107,7 @@ else()
|
|
foreach(flag_var
|
|
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
|
|
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
|
|
- if (NOT ${BUILD_SHARED_LIBS})
|
|
+ if (USE_STATIC_RUNTIME)
|
|
if(${flag_var} MATCHES "/MD")
|
|
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
|
endif(${flag_var} MATCHES "/MD")
|
|
diff --git a/caffe2/core/logging.cc b/caffe2/core/logging.cc
|
|
index 1b4468e..1379f3a 100644
|
|
--- a/caffe2/core/logging.cc
|
|
+++ b/caffe2/core/logging.cc
|
|
@@ -117,7 +117,7 @@ using fLB::FLAGS_logtostderr;
|
|
|
|
#endif // CAFFE2_USE_GFLAGS
|
|
|
|
-CAFFE2_DEFINE_int(caffe2_log_level, google::ERROR,
|
|
+CAFFE2_DEFINE_int(caffe2_log_level, google::GLOG_ERROR,
|
|
"The minimum log level that caffe2 will output.");
|
|
|
|
// Google glog's api does not have an external function that allows one to check
|
|
@@ -134,15 +134,23 @@ bool IsGoogleLoggingInitialized();
|
|
namespace caffe2 {
|
|
bool InitCaffeLogging(int* argc, char** argv) {
|
|
if (*argc == 0) return true;
|
|
- if (!::google::glog_internal_namespace_::IsGoogleLoggingInitialized()) {
|
|
+#if !(defined(_MSC_VER) && defined(GLOG_IS_A_DLL))
|
|
+ // IsGoogleLoggingInitialized is not exported from the glog DLL
|
|
+ // so we can't call it. If our program calls InitGoogleLogging twice glog will
|
|
+ // abort it.
|
|
+ if (!::google::glog_internal_namespace_::IsGoogleLoggingInitialized())
|
|
+#endif
|
|
+ {
|
|
::google::InitGoogleLogging(argv[0]);
|
|
+#if !defined(_MSC_VER)
|
|
::google::InstallFailureSignalHandler();
|
|
+#endif
|
|
}
|
|
// If caffe2_log_level is set and is lower than the min log level by glog,
|
|
// we will transfer the caffe2_log_level setting to glog to override that.
|
|
FLAGS_minloglevel = std::min(FLAGS_caffe2_log_level, FLAGS_minloglevel);
|
|
// If caffe2_log_level is explicitly set, let's also turn on logtostderr.
|
|
- if (FLAGS_caffe2_log_level < google::ERROR) {
|
|
+ if (FLAGS_caffe2_log_level < google::GLOG_ERROR) {
|
|
FLAGS_logtostderr = 1;
|
|
}
|
|
// Also, transfer the caffe2_log_level verbose setting to glog.
|
|
@@ -154,7 +162,7 @@ bool InitCaffeLogging(int* argc, char** argv) {
|
|
|
|
void ShowLogInfoToStderr() {
|
|
FLAGS_logtostderr = 1;
|
|
- FLAGS_minloglevel = std::min(FLAGS_minloglevel, google::INFO);
|
|
+ FLAGS_minloglevel = std::min(FLAGS_minloglevel, google::GLOG_INFO);
|
|
}
|
|
} // namespace caffe2
|
|
|
|
diff --git a/caffe2/core/logging_is_google_glog.h b/caffe2/core/logging_is_google_glog.h
|
|
index 7dd2b4f..2df4435 100644
|
|
--- a/caffe2/core/logging_is_google_glog.h
|
|
+++ b/caffe2/core/logging_is_google_glog.h
|
|
@@ -8,7 +8,7 @@
|
|
// it. Some mobile platforms do not like stl_logging, so we add an
|
|
// overload in that case as well.
|
|
|
|
-#if !defined(__CUDACC__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
|
+#if !defined(__CUDARCH__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
|
#include <glog/stl_logging.h>
|
|
#else // !defined(__CUDACC__) && !!defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
|
|
|
diff --git a/caffe2/image/image_input_op.h b/caffe2/image/image_input_op.h
|
|
index 9604e98..7d90014 100644
|
|
--- a/caffe2/image/image_input_op.h
|
|
+++ b/caffe2/image/image_input_op.h
|
|
@@ -214,13 +214,13 @@ ImageInputOp<Context>::ImageInputOp(
|
|
|
|
// hard-coded PCA eigenvectors and eigenvalues, based on RBG channel order
|
|
color_lighting_eigvecs_.push_back(
|
|
- std::vector<float>{-144.7125, 183.396, 102.2295});
|
|
+ std::vector<float>{-144.7125f, 183.396f, 102.2295f});
|
|
color_lighting_eigvecs_.push_back(
|
|
- std::vector<float>{-148.104, -1.1475, -207.57});
|
|
+ std::vector<float>{-148.104f, -1.1475f, -207.57f});
|
|
color_lighting_eigvecs_.push_back(
|
|
- std::vector<float>{-148.818, -177.174, 107.1765});
|
|
+ std::vector<float>{-148.818f, -177.174f, 107.1765f});
|
|
|
|
- color_lighting_eigvals_ = std::vector<float>{0.2175, 0.0188, 0.0045};
|
|
+ color_lighting_eigvals_ = std::vector<float>{0.2175f, 0.0188f, 0.0045f};
|
|
|
|
CAFFE_ENFORCE_GT(batch_size_, 0, "Batch size should be nonnegative.");
|
|
if (use_caffe_datum_) {
|
|
diff --git a/caffe2/operators/batch_matmul_op.cc b/caffe2/operators/batch_matmul_op.cc
|
|
index c2e578d..28cf030 100644
|
|
--- a/caffe2/operators/batch_matmul_op.cc
|
|
+++ b/caffe2/operators/batch_matmul_op.cc
|
|
@@ -34,7 +34,7 @@ size (C x K x N) where C is the batch size and i ranges from 0 to C-1.
|
|
b_dim1 = in[1].dims(2);
|
|
}
|
|
return vector<TensorShape> {
|
|
- CreateTensorShape(vector<int> {
|
|
+ CreateTensorShape(vector<TIndex> {
|
|
in[0].dims(0), a_dim0, b_dim1},
|
|
in[0].data_type())
|
|
};
|
|
diff --git a/caffe2/operators/layer_norm_op.cu b/caffe2/operators/layer_norm_op.cu
|
|
index df13fc3..68bbc97 100644
|
|
--- a/caffe2/operators/layer_norm_op.cu
|
|
+++ b/caffe2/operators/layer_norm_op.cu
|
|
@@ -252,8 +252,8 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
|
|
auto* ginput = Output(0);
|
|
|
|
const auto canonical_axis = norm_inputs.canonical_axis_index(axis_);
|
|
- const int left = norm_inputs.size_to_dim(canonical_axis);
|
|
- const int right = norm_inputs.size_from_dim(canonical_axis);
|
|
+ const TIndex left = norm_inputs.size_to_dim(canonical_axis);
|
|
+ const TIndex right = norm_inputs.size_from_dim(canonical_axis);
|
|
|
|
ginput->ResizeLike(norm_inputs);
|
|
std::vector<TIndex> stats_dims(
|
|
@@ -261,7 +261,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
|
|
stats_dims.push_back(1);
|
|
dmean_.Resize(stats_dims);
|
|
dstdev_.Resize(stats_dims);
|
|
- gscratch_.Resize(std::vector<size_t>{left, right});
|
|
+ gscratch_.Resize(std::vector<TIndex>{left, right});
|
|
|
|
std::vector<int> segs(left + 1);
|
|
std::iota(segs.begin(), segs.end(), 0);
|
|
@@ -291,7 +291,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
|
|
dout.data<float>(),
|
|
gscratch_.mutable_data<float>());
|
|
|
|
- dstdev_.Resize(vector<size_t>{left, 1});
|
|
+ dstdev_.Resize(vector<TIndex>{left, 1});
|
|
// dstdev = reduce(temp1)
|
|
allocScratchAndReduce(
|
|
gscratch_.data<float>(),
|
|
diff --git a/caffe2/operators/lengths_top_k_op.cc b/caffe2/operators/lengths_top_k_op.cc
|
|
index c871d53..bad741a 100644
|
|
--- a/caffe2/operators/lengths_top_k_op.cc
|
|
+++ b/caffe2/operators/lengths_top_k_op.cc
|
|
@@ -14,7 +14,7 @@ bool LengthsTopKOp<T, Context>::RunOnDevice() {
|
|
|
|
output_topk_values->Resize(N * k_);
|
|
output_topk_indices->Resize(N * k_);
|
|
- std::vector<int> output_dims = std::vector<int>({N, k_});
|
|
+ std::vector<TIndex> output_dims = std::vector<TIndex>({N, k_});
|
|
output_topk_values->Reshape(output_dims);
|
|
output_topk_indices->Reshape(output_dims);
|
|
T* output_topk_values_data = output_topk_values->template mutable_data<T>();
|
|
diff --git a/caffe2/operators/pool_op_cudnn.cu b/caffe2/operators/pool_op_cudnn.cu
|
|
index a380d8d..b0cd326 100644
|
|
--- a/caffe2/operators/pool_op_cudnn.cu
|
|
+++ b/caffe2/operators/pool_op_cudnn.cu
|
|
@@ -467,6 +467,15 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase<CUDAContext> {
|
|
cudnnPoolingDescriptor_t pooling_desc_;
|
|
cudnnPoolingMode_t mode_;
|
|
|
|
+// MSVC defines IN and OUT in minwindef.h
|
|
+#ifdef IN
|
|
+#undef IN
|
|
+#endif
|
|
+
|
|
+#ifdef OUT
|
|
+#undef OUT
|
|
+#endif
|
|
+
|
|
// Input: X, Y, dY
|
|
// Output: dX
|
|
INPUT_TAGS(IN, OUT, OUT_GRAD);
|
|
diff --git a/caffe2/operators/recurrent_op_cudnn.cc b/caffe2/operators/recurrent_op_cudnn.cc
|
|
index 7777813..58bc8c3 100644
|
|
--- a/caffe2/operators/recurrent_op_cudnn.cc
|
|
+++ b/caffe2/operators/recurrent_op_cudnn.cc
|
|
@@ -115,10 +115,11 @@ void RecurrentBaseOp<T>::initialize(
|
|
|
|
// RNN setup
|
|
{
|
|
- CUDNN_ENFORCE(cudnnSetRNNDescriptor(
|
|
+// Do not use #if condition inside CUDNN_ENFORCE
|
|
+// to avoid macro expansion errors.
|
|
#if CUDNN_MAJOR >= 7
|
|
+CUDNN_ENFORCE(cudnnSetRNNDescriptor(
|
|
cudnn_wrapper_.inline_cudnn_handle(),
|
|
-#endif
|
|
rnnDesc_,
|
|
hiddenSize,
|
|
numLayers,
|
|
@@ -126,10 +127,19 @@ void RecurrentBaseOp<T>::initialize(
|
|
rnnInput,
|
|
rnnDirection,
|
|
rnnMode,
|
|
-#if CUDNN_MAJOR >= 7
|
|
CUDNN_RNN_ALGO_STANDARD, // TODO: verify correctness / efficiency.
|
|
-#endif
|
|
cudnnTypeWrapper<T>::type));
|
|
+#else
|
|
+ CUDNN_ENFORCE(cudnnSetRNNDescriptor(
|
|
+ rnnDesc_,
|
|
+ hiddenSize,
|
|
+ numLayers,
|
|
+ dropoutDesc_,
|
|
+ rnnInput,
|
|
+ rnnDirection,
|
|
+ rnnMode,
|
|
+ cudnnTypeWrapper<T>::type));
|
|
+#endif // CUDNN_MAJOR >= 7
|
|
}
|
|
// X setup
|
|
{
|
|
diff --git a/caffe2/utils/CMakeLists.txt b/caffe2/utils/CMakeLists.txt
|
|
index f90af5a..919a638 100644
|
|
--- a/caffe2/utils/CMakeLists.txt
|
|
+++ b/caffe2/utils/CMakeLists.txt
|
|
@@ -27,6 +27,10 @@ exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${Caffe2_GPU_SRCS})
|
|
# will directly link nnpack pthreadpool.
|
|
file(GLOB_RECURSE tmp pthreadpool*)
|
|
exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
|
|
+if(MSVC)
|
|
+ file(GLOB_RECURSE tmp *ThreadPool.cc)
|
|
+ exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
|
|
+endif()
|
|
|
|
# ---[ GPU test files
|
|
file(GLOB_RECURSE tmp *_gpu_test.cc)
|
|
diff --git a/caffe2/utils/GpuBitonicSort.cuh b/caffe2/utils/GpuBitonicSort.cuh
|
|
index f52bb50..a199bcb 100644
|
|
--- a/caffe2/utils/GpuBitonicSort.cuh
|
|
+++ b/caffe2/utils/GpuBitonicSort.cuh
|
|
@@ -39,9 +39,11 @@ __device__ inline void bitonicSort(K* keys,
|
|
// Assume the sort is taking place in shared memory
|
|
// static_assert(Power2SortSize * (sizeof(K) + sizeof(V)) < 32768,
|
|
// "sort data too large (>32768 bytes)");
|
|
- static_assert(math::integerIsPowerOf2(Power2SortSize),
|
|
+ static_assert(math::integerIsPowerOf2(
|
|
+ std::integral_constant<int, Power2SortSize>::value),
|
|
"sort size must be power of 2");
|
|
- static_assert(math::integerIsPowerOf2(ThreadsPerBlock),
|
|
+ static_assert(math::integerIsPowerOf2(
|
|
+ std::integral_constant<int, ThreadsPerBlock>::value),
|
|
"threads in block must be power of 2");
|
|
|
|
// If what we are sorting is too small, then not all threads
|
|
@@ -107,7 +109,8 @@ __device__ inline void warpBitonicSort(K* keys,
|
|
// Smaller sorts should use a warp shuffle sort
|
|
static_assert(Power2SortSize > kWarpSize,
|
|
"sort not large enough");
|
|
- static_assert(math::integerIsPowerOf2(Power2SortSize),
|
|
+ static_assert(math::integerIsPowerOf2(
|
|
+ std::integral_constant<int, Power2SortSize>::value),
|
|
"sort size must be power of 2");
|
|
static_assert(Power2SortSize <= kMaxBitonicSortSize,
|
|
"sort size <= 4096 only supported");
|
|
diff --git a/caffe2/utils/math.h b/caffe2/utils/math.h
|
|
index 6c352dc..a1eda9d 100644
|
|
--- a/caffe2/utils/math.h
|
|
+++ b/caffe2/utils/math.h
|
|
@@ -426,18 +426,10 @@ constexpr T roundUp(T a, T b) {
|
|
return divUp<T>(a, b) * b;
|
|
}
|
|
|
|
-// Returns true if the given integer type is a power-of-2 (positive only)
|
|
-// Note(jiayq): windows reported an error per
|
|
-// https://github.com/caffe2/caffe2/issues/997
|
|
-// and as a result will make it a macro.
|
|
-#ifdef _MSC_VER
|
|
-#define integerIsPowerOf2(v) ((v) && !((v) & ((v) - 1)))
|
|
-#else // _MSC_VER
|
|
template <typename T>
|
|
constexpr bool integerIsPowerOf2(T v) {
|
|
return (v && !(v & (v - 1)));
|
|
}
|
|
-#endif // _MSC_VER
|
|
|
|
// Returns log2(n) for a positive integer type
|
|
template <typename T>
|
|
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
|
|
index df70e2f..ca1b123 100644
|
|
--- a/cmake/Cuda.cmake
|
|
+++ b/cmake/Cuda.cmake
|
|
@@ -37,6 +37,10 @@ function(caffe2_detect_installed_gpus out_variable)
|
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
|
|
if(__nvcc_res EQUAL 0)
|
|
+ # nvcc outputs text containing line breaks when building with MSVC.
|
|
+ # The line below prevents CMake from inserting a variable with line
|
|
+ # breaks in the cache
|
|
+ string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
|
|
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
|
|
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from caffe_detect_gpus tool" FORCE)
|
|
endif()
|
|
@@ -249,7 +253,7 @@ endif()
|
|
# Debug and Release symbol support
|
|
if (MSVC)
|
|
if (${CMAKE_BUILD_TYPE} MATCHES "Release")
|
|
- if (${BUILD_SHARED_LIBS})
|
|
+ if (NOT USE_STATIC_RUNTIME)
|
|
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MD")
|
|
else()
|
|
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MT")
|
|
@@ -259,7 +263,7 @@ if (MSVC)
|
|
"Caffe2 currently does not support the combination of MSVC, Cuda "
|
|
"and Debug mode. Either set USE_CUDA=OFF or set the build type "
|
|
"to Release")
|
|
- if (${BUILD_SHARED_LIBS})
|
|
+ if (NOT USE_STATIC_RUNTIME)
|
|
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MDd")
|
|
else()
|
|
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MTd")
|
|
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
|
index afb6b68..bdad8b6 100644
|
|
--- a/cmake/Dependencies.cmake
|
|
+++ b/cmake/Dependencies.cmake
|
|
@@ -182,6 +182,15 @@ if(USE_OPENCV)
|
|
message(WARNING "Not compiling with OpenCV. Suppress this warning with -DUSE_OPENCV=OFF")
|
|
set(USE_OPENCV OFF)
|
|
endif()
|
|
+ if(USE_OPENCV AND VCPKG_TARGET_TRIPLET MATCHES static)
|
|
+ find_package(LibLZMA QUIET)
|
|
+ if(LIBLZMA_FOUND)
|
|
+ list(APPEND Caffe2_DEPENDENCY_LIBS ${LIBLZMA_LIBRARIES})
|
|
+ else()
|
|
+ message(WARNING "Not compiling with OpenCV. Could not find liblzma. Suppress this warning with -DUSE_OPENCV=OFF")
|
|
+ set(USE_OPENCV OFF)
|
|
+ endif()
|
|
+ endif()
|
|
endif()
|
|
|
|
# ---[ FFMPEG
|
|
diff --git a/cmake/Modules/FindGlog.cmake b/cmake/Modules/FindGlog.cmake
|
|
index 1167532..9780ba5 100644
|
|
--- a/cmake/Modules/FindGlog.cmake
|
|
+++ b/cmake/Modules/FindGlog.cmake
|
|
@@ -27,6 +27,10 @@ if(MSVC)
|
|
endif()
|
|
if(TARGET ${GLOG_LIBRARY})
|
|
get_target_property(GLOG_INCLUDE_DIR ${GLOG_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES)
|
|
+ get_target_property(GLOG_TYPE ${GLOG_LIBRARY} TYPE)
|
|
+ if("${GLOG_TYPE}" STREQUAL "SHARED_LIBRARY")
|
|
+ add_definitions(-DGLOG_IS_A_DLL=1)
|
|
+ endif()
|
|
endif()
|
|
else()
|
|
find_library(GLOG_LIBRARY glog
|
|
diff --git a/cmake/ProtoBuf.cmake b/cmake/ProtoBuf.cmake
|
|
index 89975c8..e37d6da 100644
|
|
--- a/cmake/ProtoBuf.cmake
|
|
+++ b/cmake/ProtoBuf.cmake
|
|
@@ -13,6 +13,9 @@ function(custom_protobuf_find)
|
|
# so we turn it off here.
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations" PARENT_SCOPE)
|
|
endif()
|
|
+ if(MSVC)
|
|
+ set(protobuf_MSVC_STATIC_RUNTIME ${USE_STATIC_RUNTIME})
|
|
+ endif()
|
|
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/protobuf/cmake)
|
|
caffe2_include_directories(${PROJECT_SOURCE_DIR}/third_party/protobuf/src)
|
|
list(APPEND Caffe2_DEPENDENCY_LIBS libprotobuf)
|
|
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
|
|
index b7423a6..2996484 100644
|
|
--- a/cmake/Summary.cmake
|
|
+++ b/cmake/Summary.cmake
|
|
@@ -19,6 +19,13 @@ function (Caffe2_print_configuration_summary)
|
|
message(STATUS " System : ${CMAKE_SYSTEM_NAME}")
|
|
message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}")
|
|
message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
|
|
+ if(MSVC)
|
|
+ if(USE_STATIC_RUNTIME)
|
|
+ message(STATUS " Runtime : static (/MT)")
|
|
+ else()
|
|
+ message(STATUS " Runtime : dynamic (/MD)")
|
|
+ endif()
|
|
+ endif()
|
|
message(STATUS " Protobuf compiler : ${PROTOBUF_PROTOC_EXECUTABLE}")
|
|
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
|
|
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
|