Skip to content

Commit 79411cd

Browse files
committed
btl,mtl/ofi: set device only flag
In OFI, the FI_HMEM_DEVICE_ONLY registration flag signals to the provider that the memory is only on the device and is not unified memory (which can migrate between the GPU and host). IPC is only usable with device only memory and is not valid for unified memory. Without this flag, providers cannot provide optimizations like IPC. Set the flag if the address was found to be non-unified memory. This enables IPC copies in OFI. The flag is available starting in v1.13.0 so this adds a configure check to make sure we only use it if available. This also includes an indentation fix and typos within the scope of the patch Signed-off-by: Zach Dworkin <[email protected]> Signed-off-by: Alexia Ingerson <[email protected]>
1 parent 66fe583 commit 79411cd

File tree

3 files changed

+27
-9
lines changed

3 files changed

+27
-9
lines changed

config/opal_check_ofi.m4

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ AC_DEFUN([OPAL_CHECK_OFI],[
148148
149149
AC_DEFINE_UNQUOTED([OPAL_OFI_HAVE_FI_MR_IFACE],
150150
[${opal_check_fi_mr_attr_iface}],
151-
[check if iface avaiable in fi_mr_attr])
151+
[check if iface available in fi_mr_attr])
152152
153153
AC_CHECK_DECL([FI_HMEM_ROCR],
154154
[opal_check_fi_hmem_rocr=1],
@@ -157,7 +157,7 @@ AC_DEFUN([OPAL_CHECK_OFI],[
157157
158158
AC_DEFINE_UNQUOTED([OPAL_OFI_HAVE_FI_HMEM_ROCR],
159159
[${opal_check_fi_hmem_rocr}],
160-
[check if FI_HMEM_ROCR avaiable in fi_hmem_iface])
160+
[check if FI_HMEM_ROCR available in fi_hmem_iface])
161161
162162
AC_CHECK_DECL([FI_HMEM_ZE],
163163
[opal_check_fi_hmem_ze=1],
@@ -166,7 +166,16 @@ AC_DEFUN([OPAL_CHECK_OFI],[
166166
167167
AC_DEFINE_UNQUOTED([OPAL_OFI_HAVE_FI_HMEM_ZE],
168168
[${opal_check_fi_hmem_ze}],
169-
[check if FI_HMEM_ZE avaiable in fi_hmem_iface])])
169+
[check if FI_HMEM_ZE available in fi_hmem_iface])
170+
171+
AC_CHECK_DECL([FI_HMEM_DEVICE_ONLY],
172+
[opal_check_fi_hmem_device_only=1],
173+
[opal_check_fi_hmem_device_only=0],
174+
[#include <rdma/fi_domain.h>])
175+
176+
AC_DEFINE_UNQUOTED([OPAL_OFI_HAVE_FI_HMEM_DEVICE_ONLY],
177+
[${opal_check_fi_hmem_device_only}],
178+
[check if OPAL_OFI_HAVE_FI_HMEM_DEVICE_ONLY available])])
170179
171180
CPPFLAGS=${opal_check_ofi_save_CPPFLAGS}
172181
LDFLAGS=${opal_check_ofi_save_LDFLAGS}

ompi/mca/mtl/ofi/mtl_ofi_mr.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ ompi_mtl_ofi_reg_mem(void *reg_data, void *base, size_t size,
2121
struct iovec iov = {0};
2222
ompi_mtl_ofi_reg_t *mtl_reg = (ompi_mtl_ofi_reg_t *)reg;
2323
int dev_id;
24-
uint64_t flags;
24+
uint64_t flags, mr_flags = 0;
2525

2626
iov.iov_base = base;
2727
iov.iov_len = size;
@@ -41,7 +41,7 @@ ompi_mtl_ofi_reg_mem(void *reg_data, void *base, size_t size,
4141
attr.iface = FI_HMEM_CUDA;
4242
opal_accelerator.get_device(&attr.device.cuda);
4343
#if OPAL_OFI_HAVE_FI_HMEM_ROCR
44-
} else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) {
44+
} else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) {
4545
attr.iface = FI_HMEM_ROCR;
4646
opal_accelerator.get_device(&attr.device.cuda);
4747
#endif
@@ -53,11 +53,16 @@ ompi_mtl_ofi_reg_mem(void *reg_data, void *base, size_t size,
5353
} else {
5454
return OPAL_ERROR;
5555
}
56+
#if OPAL_OFI_HAVE_FI_HMEM_DEVICE_ONLY
57+
mr_flags = flags & MCA_ACCELERATOR_FLAGS_UNIFIED_MEMORY ? 0 :
58+
FI_HMEM_DEVICE_ONLY
59+
#endif
5660
}
5761
}
62+
5863
#endif
5964

60-
ret = fi_mr_regattr(ompi_mtl_ofi.domain, &attr, 0, &mtl_reg->ofi_mr);
65+
ret = fi_mr_regattr(ompi_mtl_ofi.domain, &attr, mr_flags, &mtl_reg->ofi_mr);
6166
if (0 != ret) {
6267
opal_show_help("help-mtl-ofi.txt", "Buffer Memory Registration Failed", true,
6368
opal_accelerator_base_selected_component.base_version.mca_component_name,

opal/mca/btl/ofi/btl_ofi_module.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ int mca_btl_ofi_reg_mem(void *reg_data, void *base, size_t size,
254254
mca_rcache_base_registration_t *reg)
255255
{
256256
int rc, dev_id;
257-
uint64_t flags;
257+
uint64_t flags, mr_flags = 0;
258258
static uint64_t access_flags = FI_REMOTE_WRITE | FI_REMOTE_READ | FI_READ | FI_WRITE;
259259
struct fi_mr_attr attr = {0};
260260
struct iovec iov = {0};
@@ -281,7 +281,7 @@ int mca_btl_ofi_reg_mem(void *reg_data, void *base, size_t size,
281281
attr.iface = FI_HMEM_CUDA;
282282
opal_accelerator.get_device(&attr.device.cuda);
283283
#if OPAL_OFI_HAVE_FI_HMEM_ROCR
284-
} else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) {
284+
} else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) {
285285
attr.iface = FI_HMEM_ROCR;
286286
opal_accelerator.get_device(&attr.device.cuda);
287287
#endif
@@ -293,11 +293,15 @@ int mca_btl_ofi_reg_mem(void *reg_data, void *base, size_t size,
293293
} else {
294294
return OPAL_ERROR;
295295
}
296+
#if OPAL_OFI_HAVE_FI_HMEM_DEVICE_ONLY
297+
mr_flags = flags & MCA_ACCELERATOR_FLAGS_UNIFIED_MEMORY ? 0 :
298+
FI_HMEM_DEVICE_ONLY
299+
#endif
296300
}
297301
}
298302
#endif
299303

300-
rc = fi_mr_regattr(btl->domain, &attr, 0, &ur->ur_mr);
304+
rc = fi_mr_regattr(btl->domain, &attr, mr_flags, &ur->ur_mr);
301305
if (0 != rc) {
302306
ur->ur_mr = NULL;
303307
return OPAL_ERR_OUT_OF_RESOURCE;

0 commit comments

Comments
 (0)