Since /sys is unavailable in build environments, the list of available TCP network interfaces cannot be obtained via /sys/class/net. This patch provides alternative code that uses the SIOCGIFCONF ioctl to get the names of the available TCP network interfaces. diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 81ad459..10024a6 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -12,6 +12,8 @@ #include <sys/poll.h> #include <netinet/tcp.h> #include <dirent.h> +#include <net/if.h> +#include <sys/ioctl.h> static ucs_config_field_t uct_tcp_iface_config_table[] = { {"", "MAX_SHORT=8k", NULL, @@ -483,6 +485,70 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h, uct_worker_h, const uct_iface_params_t*, const uct_iface_config_t*); +/* Fetch information about available network devices through an ioctl. */ +static ucs_status_t query_devices_ioctl(uct_md_h md, + uct_tl_resource_desc_t **resource_p, + unsigned *num_resources_p) +{ + int sock, err, i; + uct_tl_resource_desc_t *resources, *tmp; + unsigned num_resources; + ucs_status_t status; + struct ifconf conf; + struct ifreq reqs[10]; + + conf.ifc_len = sizeof reqs; + conf.ifc_req = reqs; + + sock = socket(SOCK_STREAM, AF_INET, 0); + if (sock < 0) { + ucs_error("socket(2) failed: %m"); + status = UCS_ERR_IO_ERROR; + goto out; + } + + err = ioctl(sock, SIOCGIFCONF, &conf); + if (err < 0) { + ucs_error("SIOCGIFCONF ioctl failed: %m"); + status = UCS_ERR_IO_ERROR; + goto out; + } + + resources = NULL; + num_resources = 0; + for (i = 0; i < conf.ifc_len / sizeof(struct ifreq); i++) { + const char *name = reqs[i].ifr_name; + + if (!ucs_netif_is_active(name)) { + continue; + } + + tmp = ucs_realloc(resources, sizeof(*resources) * (num_resources + 1), + "tcp resources"); + if (tmp == NULL) { + ucs_free(resources); + status = UCS_ERR_NO_MEMORY; + goto out; + } + resources = tmp; + + ucs_snprintf_zero(resources[i].tl_name, sizeof(resources[i].tl_name), + "%s", UCT_TCP_NAME); + ucs_snprintf_zero(resources[i].dev_name, sizeof(resources[i].dev_name), + "%s", name); + resources[i].dev_type = UCT_DEVICE_TYPE_NET; + ++num_resources; + } + + *num_resources_p = num_resources; + *resource_p = resources; + status = UCS_OK; + +out: + if (sock >= 0) close(sock); + return status; +} + static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md, uct_tl_resource_desc_t **resource_p, unsigned *num_resources_p) @@ -496,9 +562,9 @@ static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md, dir = opendir(netdev_dir); if (dir == NULL) { - ucs_error("opendir(%s) failed: %m", netdev_dir); - status = UCS_ERR_IO_ERROR; - goto out; + /* When /sys is unavailable, as can be the case in a container, + * resort to a good old 'ioctl'. */ + return query_devices_ioctl(md, resource_p, num_resources_p); } resources = NULL; @@ -543,6 +609,5 @@ static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md, out_closedir: closedir(dir); -out: return status; }