[slurm-users] slurm 17 node feature Failed to reboot nodes

Tueur Volvo huitre39 at gmail.com
Tue Mar 27 06:39:06 MDT 2018

Hello i migrated from slurm 16 to slurm 17.11.4

but my node feature not work, i apply my feature and i reboot but when
machine is rebooted slurm make my node in drain

with sinfo -r i have this error message :
Failed to reboot nodes machine415 into expected state for job 945

My problem is located in slurm source at this file :
ligne 4222

static void *_wait_boot(void *arg)

    if (boot_node_bitmap && bit_set_count(boot_node_bitmap)) {
        char *node_list = bitmap2node_name(boot_node_bitmap);
        error("Failed to reboot nodes %s into expected state for job %u",
              node_list, job_ptr->job_id);
        (void) drain_nodes(node_list, "Node mode change failure",
        (void) job_requeue(getuid(), job_ptr->job_id, NULL, false, 0);

i comment this function and my node feature plugin work !

but i want to solve my problem with modify my plugin code
but I don't know what to change in my code
what should I save as a variable in my code? with xalloc or xrealloc.

i share my basic sour ce code, i think that i save active_features when
function node_features_p_node_set is called, but how can i save this
variable ? i try to save *active_features into char **current_mode but i
have same error

thanks for advance for your help

#include "config.h"

#define _GNU_SOURCE    /* For POLLRDHUP */
#include <ctype.h>
#include <poll.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#if defined(__FreeBSD__) || defined(__NetBSD__)

#include "slurm/slurm.h"

#include "src/common/assoc_mgr.h"
#include "src/common/bitstring.h"
#include "src/common/fd.h"
#include "src/common/gres.h"
#include "src/common/list.h"
#include "src/common/macros.h"
#include "src/common/pack.h"
#include "src/common/parse_config.h"
#include "src/common/slurm_protocol_api.h"
//#include "src/common/slurm_strcasestr.h"
#include "src/common/timers.h"
#include "src/common/uid.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/slurmctld/job_scheduler.h"
#include "src/slurmctld/locks.h"
#include "src/slurmctld/node_scheduler.h"
#include "src/slurmctld/reservation.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/state_save.h"

const char plugin_name[]        = "node_features update_linux plugin";
const char plugin_type[]        = "node_features/update_linux";
const uint32_t plugin_version   = SLURM_VERSION_NUMBER;

extern int node_features_p_get_config()  {

extern bool node_features_p_node_update_valid(void *node_ptr,
update_node_msg_t *update_node_msg) {
    return true;

extern uint32_t node_features_p_boot_time(void) {
    uint32_t boot_time = (6 * 60);    /* 6 minute estimated boot time */
    return boot_time;

extern bool node_features_p_changible_feature(char *feature) {
    return true;

extern bool node_features_p_changeable_feature(char *feature) {
    return true;

extern void node_features_p_step_config(bool mem_sort, bitstr_t
*numa_bitmap)  {

extern char *node_features_p_node_xlate2(char *new_features)  {
    return new_features;

/* Load configuration */
extern int init(void)
    int rc = SLURM_SUCCESS;
    return rc;

extern int fini(void)

    return SLURM_SUCCESS;

extern int node_features_p_get_node(char *node_list)

    return SLURM_SUCCESS;

extern char *node_features_p_node_xlate(char *new_features, char
*orig_features, char *avail_features)
    return avail_features;

extern void node_features_p_node_state(char **avail_modes, char


/* Translate a job's feature request to the node features needed at boot
time */
extern char *node_features_p_job_xlate(char *job_features)

    return job_features;

/* Test if a job's feature specification is valid */
extern int node_features_p_job_valid(char *job_features)

    return SLURM_SUCCESS;

bool node_features_g_node_update_valid(void *node_ptr, update_node_msg_t
*update_node_msg) {
    return true;

/* Set's the node's active features based upon job constraints.
 * NOTE: Executed by the slurmd daemon.
 * IN active_features - New active features
 * RET error code */
extern int node_features_p_node_set(char *active_features)

    int error_code = SLURM_SUCCESS;
    active_features[0] = '\0';
    return error_code;

/* Return true if the plugin requires PowerSave mode for booting nodes */
extern bool node_features_p_node_power(void)

    return false;

extern int node_features_p_node_update(char *active_features, bitstr_t

    return SLURM_SUCCESS;

/* Reload configuration */
extern int node_features_p_reconfig(void)

    return 1;

/* Determine if the specified user can modify the currently available node
 * features */
extern bool node_features_p_user_update(uid_t uid)

    return true;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20180327/370d0fa8/attachment-0001.html>

More information about the slurm-users mailing list