[slurm-users] slurm 17 node feature Failed to reboot nodes
Tueur Volvo
huitre39 at gmail.com
Tue Mar 27 06:39:06 MDT 2018
Hello i migrated from slurm 16 to slurm 17.11.4
but my node feature not work, i apply my feature and i reboot but when
machine is rebooted slurm make my node in drain
with sinfo -r i have this error message :
Failed to reboot nodes machine415 into expected state for job 945
My problem is located in slurm source at this file :
slurm-17.11.4/src/slurmctld/job_sheduler.c
ligne 4222
static void *_wait_boot(void *arg)
{
if (boot_node_bitmap && bit_set_count(boot_node_bitmap)) {
char *node_list = bitmap2node_name(boot_node_bitmap);
error("Failed to reboot nodes %s into expected state for job %u",
node_list, job_ptr->job_id);
(void) drain_nodes(node_list, "Node mode change failure",
getuid());
xfree(node_list);
(void) job_requeue(getuid(), job_ptr->job_id, NULL, false, 0);
}
i comment this function and my node feature plugin work !
but i want to solve my problem with modify my plugin code
but I don't know what to change in my code
what should I save as a variable in my code? with xalloc or xrealloc.
i share my basic sour ce code, i think that i save active_features when
function node_features_p_node_set is called, but how can i save this
variable ? i try to save *active_features into char **current_mode but i
have same error
thanks for advance for your help
#include "config.h"
#define _GNU_SOURCE /* For POLLRDHUP */
#include <ctype.h>
#include <poll.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#if defined(__FreeBSD__) || defined(__NetBSD__)
#define POLLRDHUP POLLHUP
#endif
#include "slurm/slurm.h"
#include "src/common/assoc_mgr.h"
#include "src/common/bitstring.h"
#include "src/common/fd.h"
#include "src/common/gres.h"
#include "src/common/list.h"
#include "src/common/macros.h"
#include "src/common/pack.h"
#include "src/common/parse_config.h"
#include "src/common/slurm_protocol_api.h"
//#include "src/common/slurm_strcasestr.h"
#include "src/common/timers.h"
#include "src/common/uid.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/slurmctld/job_scheduler.h"
#include "src/slurmctld/locks.h"
#include "src/slurmctld/node_scheduler.h"
#include "src/slurmctld/reservation.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/state_save.h"
const char plugin_name[] = "node_features update_linux plugin";
const char plugin_type[] = "node_features/update_linux";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
extern int node_features_p_get_config() {
}
extern bool node_features_p_node_update_valid(void *node_ptr,
update_node_msg_t *update_node_msg) {
return true;
}
extern uint32_t node_features_p_boot_time(void) {
uint32_t boot_time = (6 * 60); /* 6 minute estimated boot time */
return boot_time;
}
extern bool node_features_p_changible_feature(char *feature) {
return true;
}
extern bool node_features_p_changeable_feature(char *feature) {
return true;
}
extern void node_features_p_step_config(bool mem_sort, bitstr_t
*numa_bitmap) {
}
extern char *node_features_p_node_xlate2(char *new_features) {
return new_features;
}
/* Load configuration */
extern int init(void)
{
debug("init_new
plugin*********************************************************************************");
int rc = SLURM_SUCCESS;
return rc;
}
extern int fini(void)
{
debug("fini_*********************************************************************************");
return SLURM_SUCCESS;
}
extern int node_features_p_get_node(char *node_list)
{
debug("node_features_p_get_node_*********************************************************************************");
return SLURM_SUCCESS;
}
extern char *node_features_p_node_xlate(char *new_features, char
*orig_features, char *avail_features)
{
return avail_features;
}
extern void node_features_p_node_state(char **avail_modes, char
**current_mode)
{
debug("node_features_p_node_state_*********************************************************************************");
}
/* Translate a job's feature request to the node features needed at boot
time */
extern char *node_features_p_job_xlate(char *job_features)
{
debug("node_features_p_job_xlate_*********************************************************************************");
return job_features;
}
/* Test if a job's feature specification is valid */
extern int node_features_p_job_valid(char *job_features)
{
debug("node_features_p_job_valid_*********************************************************************************");
return SLURM_SUCCESS;
}
bool node_features_g_node_update_valid(void *node_ptr, update_node_msg_t
*update_node_msg) {
return true;
}
/* Set's the node's active features based upon job constraints.
* NOTE: Executed by the slurmd daemon.
* IN active_features - New active features
* RET error code */
extern int node_features_p_node_set(char *active_features)
{
debug("node_features_p_node_set_*********************************************************************************");
int error_code = SLURM_SUCCESS;
active_features[0] = '\0';
return error_code;
}
/* Return true if the plugin requires PowerSave mode for booting nodes */
extern bool node_features_p_node_power(void)
{
debug("node_features_p_node_power_*********************************************************************************");
return false;
}
extern int node_features_p_node_update(char *active_features, bitstr_t
*node_bitmap)
{
debug("_node_features_p_node_update********************************************************************************");
return SLURM_SUCCESS;
}
/* Reload configuration */
extern int node_features_p_reconfig(void)
{
debug("_node_features_p_reconfig********************************************************************************");
return 1;
}
/* Determine if the specified user can modify the currently available node
* features */
extern bool node_features_p_user_update(uid_t uid)
{
debug("_node_features_p_user_update********************************************************************************");
return true;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20180327/370d0fa8/attachment-0001.html>
More information about the slurm-users
mailing list