Skip to content
Snippets Groups Projects
Commit cf41cad3 authored by Carsten Patzke's avatar Carsten Patzke
Browse files

Dynamically load LibFabric and other dependencies

parent 3e28a07d
No related branches found
No related tags found
No related merge requests found
Showing
with 96 additions and 26 deletions
......@@ -75,7 +75,7 @@ IF(ENABLE_LIBFABRIC)
message(STATUS "LibFabric support enabled")
message(STATUS "LIB_FABRIC: Path: ${LIBFABRIC_LIBRARY} Include: ${LIBFABRIC_INCLUDE_DIR}")
add_definitions(-DLIBFABRIC_ENABLED)
SET(ASAPO_COMMON_FABRIC_LIBRARIES ${ASAPO_COMMON_FABRIC_LIBRARIES} fabric)
SET(ASAPO_COMMON_FABRIC_LIBRARIES ${ASAPO_COMMON_FABRIC_LIBRARIES} dl)
IF(ENABLE_LIBFABRIC_LOCALHOST)
message(STATUS "LIB_FABRIC: Enabled emulated RDMA when localhost is used. Should only be used for tests.")
add_definitions(-DLIBFARBIC_ALLOW_LOCALHOST)
......
......@@ -78,7 +78,7 @@ class FabricFactory {
virtual std::unique_ptr<FabricClient> CreateClient(Error* error) const = 0;
};
std::unique_ptr<FabricFactory> GenerateDefaultFabricFactory();
std::unique_ptr<FabricFactory> GenerateDefaultFabricFactory(); // <- will try to load the library with dlopen fist
}
}
......
......@@ -5,7 +5,9 @@ namespace asapo {
namespace fabric {
enum class FabricErrorType {
kNotSupported,
kOutdatedLibrary,
kLibraryNotFound,
kLibraryCompatibilityError,
kLibraryOutdated,
kInternalError, // An error that was produced by LibFabric
kInternalOperationCanceled, // An error that was produced by LibFabric
kInternalConnectionError, // This might occur when the connection is unexpectedly closed
......@@ -20,10 +22,16 @@ using FabricErrorTemplate = ServiceErrorTemplate<FabricErrorType, ErrorType::kFa
namespace FabricErrorTemplates {
auto const kNotSupportedOnBuildError = FabricErrorTemplate {
"This build of ASAPO does not support LibFabric", FabricErrorType::kNotSupported
"This build of ASAPO does not support LibFabric", FabricErrorType::kNotSupported
};
auto const kLibraryNotFoundError = FabricErrorTemplate {
"asapo-fabric, LibFabric or dependencies were not found", FabricErrorType::kLibraryNotFound
};
auto const kLibraryCompatibilityError = FabricErrorTemplate {
"LibFabric was found but somehow some a function is missing", FabricErrorType::kLibraryCompatibilityError
};
auto const kOutdatedLibraryError = FabricErrorTemplate {
"LibFabric outdated", FabricErrorType::kOutdatedLibrary
"LibFabric outdated", FabricErrorType::kLibraryOutdated
};
auto const kInternalError = FabricErrorTemplate {
"Internal LibFabric error", FabricErrorType::kInternalError
......
......@@ -2,7 +2,10 @@ set(TARGET_NAME asapo-fabric)
include_directories(include)
set(SOURCE_FILES asapo_fabric.cpp)
set(SOURCE_FILES
asapo_fabric.cpp
fabric_factory_not_supported.cpp
)
IF(ENABLE_LIBFABRIC)
set(SOURCE_FILES ${SOURCE_FILES}
......@@ -19,10 +22,6 @@ IF(ENABLE_LIBFABRIC)
server/task/fabric_recv_any_task.cpp
server/task/fabric_handshake_accepting_task.cpp
)
ELSE()
set(SOURCE_FILES ${SOURCE_FILES}
fabric_factory_not_supported.cpp
)
ENDIF()
################################
......@@ -31,4 +30,4 @@ ENDIF()
add_library(${TARGET_NAME} STATIC ${SOURCE_FILES} $<TARGET_OBJECTS:system_io>)
target_include_directories(${TARGET_NAME} PUBLIC ${ASAPO_CXX_COMMON_INCLUDE_DIR} ${LIBFABRIC_INCLUDE_DIR})
target_include_directories(${TARGET_NAME} PUBLIC ${ASAPO_CXX_COMMON_INCLUDE_DIR} ${LIBFABRIC_INCLUDE_DIR})
#include <asapo_fabric/asapo_fabric.h>
#include <dlfcn.h>
#include "fabric_factory_not_supported.h"
#ifdef LIBFABRIC_ENABLED
#include "fabric_factory_impl.h"
#else
#include "fabric_factory_not_supported.h"
#include "fabric_function_map.h"
#endif
using namespace asapo::fabric;
std::unique_ptr<FabricFactory> asapo::fabric::GenerateDefaultFabricFactory() {
#ifdef LIBFABRIC_ENABLED
return std::unique_ptr<FabricFactory>(new FabricFactoryImpl());
#else
return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported());
if (gffm().is_init_) {
return std::unique_ptr<FabricFactory>(new FabricFactoryImpl());
}
void* handle = dlopen("libfabric.so", RTLD_LAZY);
if (handle) {
#define ADD_FABRIC_CALL(fName) do { if (!(*((void**)&gffm().fName) = dlsym(handle, #fName))) goto functionNotFoundError; } while(0)
ADD_FABRIC_CALL(fi_version);
ADD_FABRIC_CALL(fi_dupinfo);
ADD_FABRIC_CALL(fi_freeinfo);
ADD_FABRIC_CALL(fi_getinfo);
ADD_FABRIC_CALL(fi_fabric);
ADD_FABRIC_CALL(fi_strerror);
#undef ADD_FABRIC_CALL
gffm().is_init_ = true;
return std::unique_ptr<FabricFactory>(new FabricFactoryImpl());
functionNotFoundError:
dlclose(handle);
return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported(FabricErrorTemplates::kLibraryCompatibilityError));
} else {
return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported(FabricErrorTemplates::kLibraryNotFoundError));
}
#endif
return std::unique_ptr<FabricFactory>(new FabricFactoryNotSupported(FabricErrorTemplates::kNotSupportedOnBuildError));
}
// Global fabric function map
extern FabricFunctionMap& gffm() {
static FabricFunctionMap gffm_ {};
return gffm_;
}
......@@ -56,7 +56,7 @@ FabricContextImpl::~FabricContextImpl() {
fi_close(&fabric_->fid);
if (fabric_info_)
fi_freeinfo(fabric_info_);
gffm().fi_freeinfo(fabric_info_);
}
void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t serverListenPort, Error* error) {
......@@ -65,7 +65,7 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se
// The server must know where the packages are coming from, FI_SOURCE allows this.
uint64_t additionalFlags = isServer ? FI_SOURCE : 0;
fi_info* hints = fi_allocinfo();
fi_info* hints = gffm().fi_dupinfo(nullptr);
#ifdef LIBFARBIC_ALLOW_LOCALHOST
constexpr bool allowLocalhost = true;
......@@ -96,7 +96,7 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se
hints->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY;// | FI_MR_LOCAL;
hints->addr_format = FI_SOCKADDR_IN;
int ret = fi_getinfo(
int ret = gffm().fi_getinfo(
kMinExpectedLibFabricVersion, networkIpHint.c_str(), isServer ? std::to_string(serverListenPort).c_str() : nullptr,
additionalFlags, hints, &fabric_info_);
......@@ -106,7 +106,7 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se
} else {
*error = ErrorFromFabricInternal("fi_getinfo", ret);
}
fi_freeinfo(hints);
gffm().fi_freeinfo(hints);
return;
}
// fprintf(stderr, fi_tostr(fabric_info_, FI_TYPE_INFO)); // Print the found fabric details
......@@ -119,9 +119,9 @@ void FabricContextImpl::InitCommon(const std::string& networkIpHint, uint16_t se
// fabric_info_->rx_attr->total_buffered_recv = 0;
// If something strange is happening with receive requests, we should set this to 0.
fi_freeinfo(hints);
gffm().fi_freeinfo(hints);
FI_OK(fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr));
FI_OK(gffm().fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr));
FI_OK(fi_domain(fabric_, fabric_info_, &domain_, nullptr));
fi_av_attr av_attr{};
......
......@@ -10,6 +10,7 @@
#include "task/fabric_waitable_task.h"
#include "../fabric_internal_error.h"
#include "task/fabric_alive_check_response_task.h"
#include "../fabric_function_map.h"
namespace asapo {
namespace fabric {
......
......@@ -11,7 +11,7 @@ std::string fi_version_string(uint32_t version) {
}
bool FabricFactoryImpl::HasValidVersion(Error* error) const {
auto current_version = fi_version();
auto current_version = gffm().fi_version();
if (FI_VERSION_LT(current_version, FabricContextImpl::kMinExpectedLibFabricVersion)) {
std::string found_version_str = fi_version_string(current_version);
......
#include <asapo_fabric/asapo_fabric.h>
#include "fabric_function_map.h"
#ifndef ASAPO_FABRIC_FACTORY_IMPL_H
#define ASAPO_FABRIC_FACTORY_IMPL_H
......
#include "fabric_factory_not_supported.h"
#include <utility>
#include "fabric_internal_error.h"
using namespace asapo::fabric;
FabricFactoryNotSupported::FabricFactoryNotSupported(FabricErrorTemplate reason) : reason_(std::move(reason)) {
}
std::unique_ptr<FabricServer> asapo::fabric::FabricFactoryNotSupported::CreateAndBindServer(
const AbstractLogger* logger, const std::string& host, uint16_t port,
Error* error) const {
*error = FabricErrorTemplates::kNotSupportedOnBuildError.Generate();
*error = reason_.Generate();
return nullptr;
}
std::unique_ptr<FabricClient> asapo::fabric::FabricFactoryNotSupported::CreateClient(Error* error) const {
*error = FabricErrorTemplates::kNotSupportedOnBuildError.Generate();
*error = reason_.Generate();
return nullptr;
}
......@@ -6,6 +6,11 @@
namespace asapo {
namespace fabric {
class FabricFactoryNotSupported : public FabricFactory {
private:
FabricErrorTemplate reason_;
public:
explicit FabricFactoryNotSupported(FabricErrorTemplate reason);
std::unique_ptr<FabricServer> CreateAndBindServer(
const AbstractLogger* logger, const std::string& host, uint16_t port, Error* error) const override;
......
#ifndef ASAPO_FABRIC_DYNAMIC_CALLS_H
#define ASAPO_FABRIC_DYNAMIC_CALLS_H
#include <rdma/fabric.h>
struct FabricFunctionMap {
bool is_init_ = false;
uint32_t(*fi_version)();
fi_info*(*fi_dupinfo)(const fi_info* info);
void(*fi_freeinfo)(fi_info* info);
int(*fi_getinfo)(uint32_t version, const char* node, const char* service,
uint64_t flags, const fi_info* hints, fi_info** info);
int(*fi_fabric)(fi_fabric_attr* attr, fid_fabric** fabric, void* context);
const char*(*fi_strerror)(int errnum);
};
FabricFunctionMap& gffm();
#endif //ASAPO_FABRIC_DYNAMIC_CALLS_H
#include "fabric_internal_error.h"
#include "fabric_function_map.h"
#include <rdma/fi_errno.h>
#include <asapo_fabric/fabric_error.h>
asapo::Error asapo::fabric::ErrorFromFabricInternal(const std::string& where, int internalStatusCode) {
std::string errText = where + ": " + fi_strerror(-internalStatusCode);
std::string errText = where + ": " + gffm().fi_strerror(-internalStatusCode);
switch (-internalStatusCode) {
case FI_ECANCELED:
return FabricErrorTemplates::kInternalOperationCanceledError.Generate(errText);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment