diff options
-rw-r--r-- | gnu/local.mk | 1 | ||||
-rw-r--r-- | gnu/packages/linux.scm | 3 | ||||
-rw-r--r-- | gnu/packages/patches/psm-disable-memory-stats.patch | 62 |
3 files changed, 65 insertions, 1 deletions
diff --git a/gnu/local.mk b/gnu/local.mk index 3f2e9f265f..a49c9109ab 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -1259,6 +1259,7 @@ dist_patch_DATA = \ %D%/packages/patches/procmail-CVE-2017-16844.patch \ %D%/packages/patches/proot-test-fhs.patch \ %D%/packages/patches/psm-arch.patch \ + %D%/packages/patches/psm-disable-memory-stats.patch \ %D%/packages/patches/psm-ldflags.patch \ %D%/packages/patches/psm-repro.patch \ %D%/packages/patches/pugixml-versioned-libdir.patch \ diff --git a/gnu/packages/linux.scm b/gnu/packages/linux.scm index 457f6579a8..a5564d9a90 100644 --- a/gnu/packages/linux.scm +++ b/gnu/packages/linux.scm @@ -5539,7 +5539,8 @@ libraries, which are often integrated directly into libfabric.") (patches (search-patches "psm-arch.patch" ; uname -p returns "unknown" on Debian 9 "psm-ldflags.patch" ; build shared lib with LDFLAGS - "psm-repro.patch")))) ; reproducibility + "psm-repro.patch" ; reproducibility + "psm-disable-memory-stats.patch")))) (build-system gnu-build-system) (outputs '("out" "debug")) (inputs `(("libuuid" ,util-linux))) diff --git a/gnu/packages/patches/psm-disable-memory-stats.patch b/gnu/packages/patches/psm-disable-memory-stats.patch new file mode 100644 index 0000000000..52cd88a4e1 --- /dev/null +++ b/gnu/packages/patches/psm-disable-memory-stats.patch @@ -0,0 +1,62 @@ +The memory statistics code leads to segfaults during initialization (on +machines with InfiniPath networking): + + (gdb) bt full + #0 ips_ptl_init (ep=0x1fc6af8, ptl=0x1fc6f88, ctl=0x1fc6d78) at ptl.c:224 + err = PSM_OK + num_of_send_bufs = 1024 + num_of_send_desc = 4096 + imm_size = 128 + context = 0x1fc6b70 + user_info = 0x1fc6b90 + enable_shcontexts = 0 + current_count = <optimized out> + #1 0x00007fb2aa672abf in __psm_ep_open_internal ( + unique_job_key=unique_job_key@entry=0x7ffed1ee5800 "<\207\020#5\271\267\200\354x\242e8\364zo", + devid_enabled=devid_enabled@entry=0x7ffed1ee5724, opts_i=opts_i@entry=0x7ffed1ee5810, mq=<optimized out>, + epo=epo@entry=0x7ffed1ee5710, epido=epido@entry=0x7ffed1ee5708) at psm_ep.c:929 + ep = 0x1fc6af8 + num_units = 1 + len = <optimized out> + err = <optimized out> + epaddr = 0x1e9dd78 + buf = "miriel044:2.0.", '\000' <repeats 113 times> + p = <optimized out> + e = <optimized out> + old_cpuaff = 0x0 + old_unit = 0x0 + yield_cnt = {e_void = 0xfa, e_str = 0xfa <error: Cannot access memory at address 0xfa>, e_int = 250, + e_uint = 250, e_long = 250, e_ulong = 250, e_ulonglong = 250} + no_cpuaff = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0} + env_unit_id = {e_void = 0xffffffffffffffff, + e_str = 0xffffffffffffffff <error: Cannot access memory at address 0xffffffffffffffff>, e_int = -1, + e_uint = 4294967295, e_long = -1, e_ulong = 18446744073709551615, e_ulonglong = 18446744073709551615} + env_port_id = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0} + env_sl = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0} + ptl_sizes = <optimized out> + default_cpuaff = <optimized out> + opts = {timeout = 180000000000, unit = -1, affinity = 0, shm_mbytes = 10, sendbufs_num = 1024, + network_pkey = 65535, port = 0, outsl = 0, service_id = 1152940698815692800, + path_res_type = PSM_PATH_RES_NONE, senddesc_num = 4096, imm_size = 128} + amsh_ptl = 0x1fc6e48 + ips_ptl = 0x1fc6f88 + self_ptl = 0x1fc99c8 + i = 3 + +It looks like ptl.c:24 is writing past the region that was malloc'd. + +Turning stats off solves the problem. + +diff --git a/psm_utils.c b/psm_utils.c +index c8651fe..5514921 100644 +--- a/psm_utils.c ++++ b/psm_utils.c +@@ -1058,7 +1058,7 @@ psmi_log_memstats(psmi_memtype_t type, int64_t nbytes) + return; + } + +-#define psmi_stats_mask PSMI_STATSTYPE_MEMORY ++#define psmi_stats_mask 0 + + #ifdef malloc + #undef malloc |