diff --git a/CMakeLists.txt b/CMakeLists.txt index c5603cd838d4ca4e0b6fbbbfae31f7a430ff8398..0277a02318b5fd146bc98fb27e7c353951712de9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ IF(ENABLE_LIBFABRIC) message(STATUS "LibFabric support enabled") message(STATUS "LIB_FABRIC: Path: ${LIBFABRIC_LIBRARY} Include: ${LIBFABRIC_INCLUDE_DIR}") add_definitions(-DLIBFABRIC_ENABLED) - SET(ASAPO_COMMON_FABRIC_LIBRARIES ${ASAPO_COMMON_FABRIC_LIBRARIES} fabric) + SET(ASAPO_COMMON_FABRIC_LIBRARIES ${ASAPO_COMMON_FABRIC_LIBRARIES} dl) IF(ENABLE_LIBFABRIC_LOCALHOST) message(STATUS "LIB_FABRIC: Enabled emulated RDMA when localhost is used. Should only be used for tests.") add_definitions(-DLIBFARBIC_ALLOW_LOCALHOST) diff --git a/common/cpp/include/asapo_fabric/asapo_fabric.h b/common/cpp/include/asapo_fabric/asapo_fabric.h index a9b9f8da1e9729d4b636dcc92ec804ce4f1ba926..9f53e284df5441de6a1d094d75f6cd8f3e1dcda3 100644 --- a/common/cpp/include/asapo_fabric/asapo_fabric.h +++ b/common/cpp/include/asapo_fabric/asapo_fabric.h @@ -78,7 +78,7 @@ class FabricFactory { virtual std::unique_ptr<FabricClient> CreateClient(Error* error) const = 0; }; -std::unique_ptr<FabricFactory> GenerateDefaultFabricFactory(); +std::unique_ptr<FabricFactory> GenerateDefaultFabricFactory(); // <- will try to load the library with dlopen fist } } diff --git a/common/cpp/include/asapo_fabric/fabric_error.h b/common/cpp/include/asapo_fabric/fabric_error.h index 3c15ea6040cc8026c01fe468b98a67288c4a0534..5d37da5bd6a04d4a158d94c2a0827c61ea741ff2 100644 --- a/common/cpp/include/asapo_fabric/fabric_error.h +++ b/common/cpp/include/asapo_fabric/fabric_error.h @@ -5,7 +5,9 @@ namespace asapo { namespace fabric { enum class FabricErrorType { kNotSupported, - kOutdatedLibrary, + kLibraryNotFound, + kLibraryCompatibilityError, + kLibraryOutdated, kInternalError, // An error that was produced by LibFabric kInternalOperationCanceled, // An error that was produced by LibFabric kInternalConnectionError, // This might occur when the connection is unexpectedly closed @@ -20,10 +22,16 @@ using FabricErrorTemplate = ServiceErrorTemplate<FabricErrorType, ErrorType::kFa namespace FabricErrorTemplates { auto const kNotSupportedOnBuildError = FabricErrorTemplate { - "This build of ASAPO does not support LibFabric", FabricErrorType::kNotSupported + "This build of ASAPO does not support LibFabric", FabricErrorType::kNotSupported +}; +auto const kLibraryNotFoundError = FabricErrorTemplate { + "asapo-fabric, LibFabric or dependencies were not found", FabricErrorType::kLibraryNotFound +}; +auto const kLibraryCompatibilityError = FabricErrorTemplate { + "LibFabric was found but somehow some a function is missing", FabricErrorType::kLibraryCompatibilityError }; auto const kOutdatedLibraryError = FabricErrorTemplate { - "LibFabric outdated", FabricErrorType::kOutdatedLibrary + "LibFabric outdated", FabricErrorType::kLibraryOutdated }; auto const kInternalError = FabricErrorTemplate { "Internal LibFabric error", FabricErrorType::kInternalError diff --git a/common/cpp/src/asapo_fabric/CMakeLists.txt b/common/cpp/src/asapo_fabric/CMakeLists.txt index c818f9d1a4323c90bfaab7cf46cfbadcb9885e15..89e0591ee0cdc34f7784e8c7564d6b84e25f921c 100644 --- a/common/cpp/src/asapo_fabric/CMakeLists.txt +++ b/common/cpp/src/asapo_fabric/CMakeLists.txt @@ -2,7 +2,10 @@ set(TARGET_NAME asapo-fabric) include_directories(include) -set(SOURCE_FILES asapo_fabric.cpp) +set(SOURCE_FILES + asapo_fabric.cpp + fabric_factory_not_supported.cpp + ) IF(ENABLE_LIBFABRIC) set(SOURCE_FILES ${SOURCE_FILES} @@ -19,10 +22,6 @@ IF(ENABLE_LIBFABRIC) server/task/fabric_recv_any_task.cpp server/task/fabric_handshake_accepting_task.cpp ) -ELSE() - set(SOURCE_FILES ${SOURCE_FILES} - fabric_factory_not_supported.cpp - ) ENDIF() ################################ @@ -31,4 +30,4 @@ ENDIF() add_library(${TARGET_NAME} STATIC ${SOURCE_FILES} $<TARGET_OBJECTS:system_io>) -target_include_directories(${TARGET_NAME} PUBLIC ${ASAPO_CXX_COMMON_INCLUDE_DIR} ${LIBFABRIC_INCLUDE_DIR}) +target_include_directories(${TARGET_NAME} PUBLIC ${ASAPO_CXX_COMMON_INCLUDE_DIR} ${LIBFABRIC_INCLUDE_DIR}) diff --git a/common/cpp/src/asapo_fabric/asapo_fabric.cpp b/common/cpp/src/asapo_fabric/asapo_fabric.cpp index 96aa4fd5215db008e4ec0a9548b8db7bba9e31a9..78a605f9d0da981d525d6a7d05a71642d2fde6e6 100644 --- a/common/cpp/src/asapo_fabric/asapo_fabric.cpp +++ b/common/cpp/src/asapo_fabric/asapo_fabric.cpp @@ -1,17 +1,46 @@ #include <asapo_fabric/asapo_fabric.h> +#include <dlfcn.h> +#include "fabric_factory_not_supported.h" #ifdef LIBFABRIC_ENABLED #include "fabric_factory_impl.h" -#else -#include "fabric_factory_not_supported.h" +#include "fabric_function_map.h" #endif using namespace asapo::fabric; std::unique_ptr<FabricFactory> asapo::fabric::GenerateDefaultFabricFactory() { #ifdef LIBFABRIC_ENABLED - return std::unique_ptr<FabricFactory>(new FabricFactoryImpl()); -#else - return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported()); + if (gffm().is_init_) { + return std::unique_ptr<FabricFactory>(new FabricFactoryImpl()); + } + + void* handle = dlopen("libfabric.so", RTLD_LAZY); + if (handle) { +#define ADD_FABRIC_CALL(fName) do { if (!(*((void**)&gffm().fName) = dlsym(handle, #fName))) goto functionNotFoundError; } while(0) + ADD_FABRIC_CALL(fi_version); + ADD_FABRIC_CALL(fi_dupinfo); + ADD_FABRIC_CALL(fi_freeinfo); + ADD_FABRIC_CALL(fi_getinfo); + ADD_FABRIC_CALL(fi_fabric); + ADD_FABRIC_CALL(fi_strerror); +#undef ADD_FABRIC_CALL + + gffm().is_init_ = true; + + return std::unique_ptr<FabricFactory>(new FabricFactoryImpl()); + functionNotFoundError: + dlclose(handle); + return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported(FabricErrorTemplates::kLibraryCompatibilityError)); + } else { + return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported(FabricErrorTemplates::kLibraryNotFoundError)); + } #endif + return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported(FabricErrorTemplates::kNotSupportedOnBuildError)); +} + +// Global fabric function map +extern FabricFunctionMap& gffm() { + static FabricFunctionMap gffm_ {}; + return gffm_; } diff --git a/common/cpp/src/asapo_fabric/common/fabric_context_impl.cpp b/common/cpp/src/asapo_fabric/common/fabric_context_impl.cpp index 2614c32bf25327ae237c323dfdaa66e5aed407bc..67e56366f05834ec4c4c32b3b3e63b8a72b55901 100644 --- a/common/cpp/src/asapo_fabric/common/fabric_context_impl.cpp +++ b/common/cpp/src/asapo_fabric/common/fabric_context_impl.cpp @@ -56,7 +56,7 @@ FabricContextImpl::~FabricContextImpl() { fi_close(&fabric_->fid); if (fabric_info_) - fi_freeinfo(fabric_info_); + gffm().fi_freeinfo(fabric_info_); } void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t serverListenPort, Error* error) { @@ -65,7 +65,7 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se // The server must know where the packages are coming from, FI_SOURCE allows this. uint64_t additionalFlags = isServer ? FI_SOURCE : 0; - fi_info* hints = fi_allocinfo(); + fi_info* hints = gffm().fi_dupinfo(nullptr); #ifdef LIBFARBIC_ALLOW_LOCALHOST constexpr bool allowLocalhost = true; @@ -96,7 +96,7 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se hints->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY;// | FI_MR_LOCAL; hints->addr_format = FI_SOCKADDR_IN; - int ret = fi_getinfo( + int ret = gffm().fi_getinfo( kMinExpectedLibFabricVersion, networkIpHint.c_str(), isServer ? std::to_string(serverListenPort).c_str() : nullptr, additionalFlags, hints, &fabric_info_); @@ -106,7 +106,7 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se } else { *error = ErrorFromFabricInternal("fi_getinfo", ret); } - fi_freeinfo(hints); + gffm().fi_freeinfo(hints); return; } // fprintf(stderr, fi_tostr(fabric_info_, FI_TYPE_INFO)); // Print the found fabric details @@ -119,9 +119,9 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se // fabric_info_->rx_attr->total_buffered_recv = 0; // If something strange is happening with receive requests, we should set this to 0. - fi_freeinfo(hints); + gffm().fi_freeinfo(hints); - FI_OK(fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr)); + FI_OK(gffm().fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr)); FI_OK(fi_domain(fabric_, fabric_info_, &domain_, nullptr)); fi_av_attr av_attr{}; diff --git a/common/cpp/src/asapo_fabric/common/fabric_context_impl.h b/common/cpp/src/asapo_fabric/common/fabric_context_impl.h index b6db94a94794f929158045f4174890b26690616a..c4c66a1b7319628b8cddc8ac6e7befcd807e37f8 100644 --- a/common/cpp/src/asapo_fabric/common/fabric_context_impl.h +++ b/common/cpp/src/asapo_fabric/common/fabric_context_impl.h @@ -10,6 +10,7 @@ #include "task/fabric_waitable_task.h" #include "../fabric_internal_error.h" #include "task/fabric_alive_check_response_task.h" +#include "../fabric_function_map.h" namespace asapo { namespace fabric { diff --git a/common/cpp/src/asapo_fabric/fabric_factory_impl.cpp b/common/cpp/src/asapo_fabric/fabric_factory_impl.cpp index 3ae10ed3bb131a3cefb198341dd1d11f52fe6ab1..d2c2be821b12637117030bb809cd830a04bdf6f4 100644 --- a/common/cpp/src/asapo_fabric/fabric_factory_impl.cpp +++ b/common/cpp/src/asapo_fabric/fabric_factory_impl.cpp @@ -11,7 +11,7 @@ std::string fi_version_string(uint32_t version) { } bool FabricFactoryImpl::HasValidVersion(Error* error) const { - auto current_version = fi_version(); + auto current_version = gffm().fi_version(); if (FI_VERSION_LT(current_version, FabricContextImpl::kMinExpectedLibFabricVersion)) { std::string found_version_str = fi_version_string(current_version); diff --git a/common/cpp/src/asapo_fabric/fabric_factory_impl.h b/common/cpp/src/asapo_fabric/fabric_factory_impl.h index ce0ec84eeb0d4e5a61a0af811c71ba8f50846c6c..2df0f8049d96ed94f9da7415517ecc2a443aae85 100644 --- a/common/cpp/src/asapo_fabric/fabric_factory_impl.h +++ b/common/cpp/src/asapo_fabric/fabric_factory_impl.h @@ -1,4 +1,5 @@ #include <asapo_fabric/asapo_fabric.h> +#include "fabric_function_map.h" #ifndef ASAPO_FABRIC_FACTORY_IMPL_H #define ASAPO_FABRIC_FACTORY_IMPL_H diff --git a/common/cpp/src/asapo_fabric/fabric_factory_not_supported.cpp b/common/cpp/src/asapo_fabric/fabric_factory_not_supported.cpp index 09e33cd8cb71bee6c1740a6c53b97e375ac641d9..1e3be7d22aa463c9eb7c01d5e012ee73663555f0 100644 --- a/common/cpp/src/asapo_fabric/fabric_factory_not_supported.cpp +++ b/common/cpp/src/asapo_fabric/fabric_factory_not_supported.cpp @@ -1,16 +1,22 @@ #include "fabric_factory_not_supported.h" + +#include <utility> #include "fabric_internal_error.h" using namespace asapo::fabric; +FabricFactoryNotSupported::FabricFactoryNotSupported(FabricErrorTemplate reason) : reason_(std::move(reason)) { +} + + std::unique_ptr<FabricServer> asapo::fabric::FabricFactoryNotSupported::CreateAndBindServer( const AbstractLogger* logger, const std::string& host, uint16_t port, Error* error) const { - *error = FabricErrorTemplates::kNotSupportedOnBuildError.Generate(); + *error = reason_.Generate(); return nullptr; } std::unique_ptr<FabricClient> asapo::fabric::FabricFactoryNotSupported::CreateClient(Error* error) const { - *error = FabricErrorTemplates::kNotSupportedOnBuildError.Generate(); + *error = reason_.Generate(); return nullptr; } diff --git a/common/cpp/src/asapo_fabric/fabric_factory_not_supported.h b/common/cpp/src/asapo_fabric/fabric_factory_not_supported.h index 789fe4e031eda5d096deeb300db654019d9b4400..3514c2f705d46d2d84727058af286e2a4d80facc 100644 --- a/common/cpp/src/asapo_fabric/fabric_factory_not_supported.h +++ b/common/cpp/src/asapo_fabric/fabric_factory_not_supported.h @@ -6,6 +6,11 @@ namespace asapo { namespace fabric { class FabricFactoryNotSupported : public FabricFactory { +private: + FabricErrorTemplate reason_; +public: + explicit FabricFactoryNotSupported(FabricErrorTemplate reason); + std::unique_ptr<FabricServer> CreateAndBindServer( const AbstractLogger* logger, const std::string& host, uint16_t port, Error* error) const override; diff --git a/common/cpp/src/asapo_fabric/fabric_function_map.h b/common/cpp/src/asapo_fabric/fabric_function_map.h new file mode 100644 index 0000000000000000000000000000000000000000..c35121fb8ddb5781c97e3a364a7aa601fbfbf996 --- /dev/null +++ b/common/cpp/src/asapo_fabric/fabric_function_map.h @@ -0,0 +1,20 @@ +#ifndef ASAPO_FABRIC_DYNAMIC_CALLS_H +#define ASAPO_FABRIC_DYNAMIC_CALLS_H + +#include <rdma/fabric.h> + +struct FabricFunctionMap { + bool is_init_ = false; + + uint32_t(*fi_version)(); + fi_info*(*fi_dupinfo)(const fi_info* info); + void(*fi_freeinfo)(fi_info* info); + int(*fi_getinfo)(uint32_t version, const char* node, const char* service, + uint64_t flags, const fi_info* hints, fi_info** info); + int(*fi_fabric)(fi_fabric_attr* attr, fid_fabric** fabric, void* context); + const char*(*fi_strerror)(int errnum); +}; + +FabricFunctionMap& gffm(); + +#endif //ASAPO_FABRIC_DYNAMIC_CALLS_H diff --git a/common/cpp/src/asapo_fabric/fabric_internal_error.cpp b/common/cpp/src/asapo_fabric/fabric_internal_error.cpp index fb8629e09f447a836f28496a09b2dfd8f8dfeb4b..cde5ddacd517469e97451d1429e47569a5a68534 100644 --- a/common/cpp/src/asapo_fabric/fabric_internal_error.cpp +++ b/common/cpp/src/asapo_fabric/fabric_internal_error.cpp @@ -1,9 +1,10 @@ #include "fabric_internal_error.h" +#include "fabric_function_map.h" #include <rdma/fi_errno.h> #include <asapo_fabric/fabric_error.h> asapo::Error asapo::fabric::ErrorFromFabricInternal(const std::string& where, int internalStatusCode) { - std::string errText = where + ": " + fi_strerror(-internalStatusCode); + std::string errText = where + ": " + gffm().fi_strerror(-internalStatusCode); switch (-internalStatusCode) { case FI_ECANCELED: return FabricErrorTemplates::kInternalOperationCanceledError.Generate(errText);