Hi,

I want to run a simple test that uses one node and four cores. Also, in my script, I execute a binary that reports me in what core is running one of the four tasks. These are my files:

submit script:

#!/bin/bash

#SBATCH --job-name=test_jobs # Job name

#SBATCH --output=test_job_%j.out # Output file (using array index)

#SBATCH --error=test_job_%j.err # Error file (using array index)

#SBATCH --ntasks=4

#SBATCH --ntasks-per-core=1

#SBATCH --cpus-per-task=1 # Number of CPU cores per task

echo "START : $(date)"

srun -n1 -c1 check_cores >> output_small.txt &

# Wait for all jobs to finish before exiting the job submission script

wait

echo "END : $(date)"

check_cores.c file:

#include <stdio.h>

#include<unistd.h>

struct pstat

{

int pid; //process id

char tcomm[256];//filename of the executable

char state[2]; //state (R is running, S is sleeping, D is sleeping in an

//uninterruptible wait, Z is zombie, T is traced or stopped)

int ppid;// process id of the parent process

int pgrp;// pgrp of the process

int sid;// session id

int tty_nr;// tty the process uses

int tty_pgrp;// pgrp of the tty

int flags;// task flags

int min_flt;// number of minor faults

int cmin_flt;// number of minor faults with child's

int maj_flt;// number of major faults

int cmaj_flt;// number of major faults with child's

int utime;// user mode jiffies

int stime;// kernel mode jiffies

int cutime;// user mode jiffies with child's

int cstime;// kernel mode jiffies with child's

int priority;// priority level

int nice;// nice level

int num_threads;// number of threads

int it_real_value;// (obsolete, always 0)

int start_time;// time the process started after system boot

int vsize;// virtual memory size

int rss;// resident set memory size

int rsslim;// current limit in bytes on the rss

int start_code;// address above which program text can run

int end_code;// address below which program text can run

int start_stack;// address of the start of the stack

int esp;// current value of ESP

int eip;// current value of EIP

int pending;// bitmap of pending signals

int blocked;// bitmap of blocked signals

int sigign;// bitmap of ignored signals

int sigcatch;// bitmap of catched signals

int wchan;// address where process went to sleep

int i0;// (place holder)

int i1;// (place holder)

int exit_signal;// signal to send to parent thread on exit

int task_cpu;// which CPU the task is scheduled on

int rt_priority;// realtime priority

int policy;// scheduling policy (man sched_setscheduler)

int blkio_ticks;// time spent waiting for block IO

int gtime;// guest time of the task in jiffies

int cgtime;// guest time of the task children in jiffies

} p ;

int main()

{

char name[256];

char state[8];

FILE* f = fopen("/proc/self/stat", "r");

char hostname[1024];

gethostname(hostname, 1024);

fscanf(f, "%d%s%s%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d",

&p.pid, &p.tcomm, &p.state, &p.ppid, &p.pgrp, &p.sid, &p.tty_nr, &p.tty_pgrp, &p.flags,

&p.min_flt, &p.cmin_flt, &p.maj_flt, &p.cmaj_flt, &p.utime, &p.stime, &p.cutime, &p.cstime,

&p.priority, &p.nice, &p.num_threads, &p.it_real_value, &p.start_time, &p.vsize, &p.rss,

&p.rsslim, &p.start_code, &p.end_code, &p.start_stack, &p.esp, &p.eip, &p.pending, &p.blocked,

&p.sigign, &p.sigcatch, &p.wchan, &p.i0, &p.i1, &p.exit_signal, &p.task_cpu, &p.rt_priority, &p.policy,

&p.blkio_ticks, &p.gtime, &p.cgtime);

sleep (15);

printf("HOST %s\n", hostname);

printf("CPU %d\n", p.task_cpu);

return 0;

}

When I run this script, I should receive an output like this:

HOST clus04.hpc.local

CPU 0

HOST clus04.hpc.local

CPU 7

HOST clus04.hpc.local

CPU 1

HOST clus04.hpc.local

CPU 6

because system has scheduled my job in node “clus04.hpc.local” (a node with two 6-cores processors) and has reserved cores 0-1 of each processor.

However, the real output I receive is

HOST clus04.hpc.local

CPU 1

HOST clus04.hpc.local

CPU 1

HOST clus04.hpc.local

CPU 1

HOST clus04.hpc.local

CPU 6

(and others outputs where there are one CPU repeated), so it seems SLURMs is reserving correctly four cores but, then, process is running two or more times in the same core.

Slurmctld.log file in server shows this information:

[...]

[2024-01-25T09:38:42.867] DEBUG: JobId=206129 node clus04 vpus 1 cpus 4

[2024-01-25T09:38:42.867] ====================

[2024-01-25T09:38:42.867] JobId=206129 nhosts:1 ncpus:4 node_req:1 nodes=clus04

[2024-01-25T09:38:42.867] Node[0]:

[2024-01-25T09:38:42.867] Mem(MB):0:0 Sockets:2 Cores:6 CPUs:4:0

[2024-01-25T09:38:42.867] Socket[0] Core[0] is allocated

[2024-01-25T09:38:42.867] Socket[0] Core[1] is allocated

[2024-01-25T09:38:42.867] Socket[1] Core[0] is allocated

[2024-01-25T09:38:42.867] Socket[1] Core[1] is allocated

[2024-01-25T09:38:42.867] --------------------

[2024-01-25T09:38:42.867] cpu_array_value[0]:4 reps:1

[2024-01-25T09:38:42.867] ====================

[2024-01-25T09:38:42.867] DEBUG: Dump job_resources: nhosts 1 cb 0-1,6-7

[...]

[2024-01-25T09:38:58.213] _job_complete: JobId=206129 WEXITSTATUS 0

[2024-01-25T09:38:58.213] deallocate_nodes: JobId=206129

[2024-01-25T09:38:58.213] DEBUG: Dump job_resources: nhosts 1 cb 0-1,6-7

[2024-01-25T09:38:58.213] _job_complete: JobId=206129 done

And slurmd.log file in node clus04 shows this information:

[...]

[2024-01-25T09:38:30.685] [206129.batch] debug: task/cgroup: job abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.685] [206129.batch] debug: task/cgroup: step abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.685] [206129.batch] debug: task/cgroup: job physical cores are '0-1,6-7'

[2024-01-25T09:38:30.685] [206129.batch] debug: task/cgroup: step physical cores are '0-1,6-7'

[...]

[2024-01-25T09:38:30.883] [206129.1] debug: task/cgroup: job abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.883] [206129.1] debug: task/cgroup: step abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.883] [206129.1] debug: task/cgroup: job physical cores are '0-1,6-7'

[2024-01-25T09:38:30.883] [206129.1] debug: task/cgroup: step physical cores are '0-1,6-7'

[...]

[2024-01-25T09:38:30.911] [206129.0] debug: task/cgroup: job abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.911] [206129.0] debug: task/cgroup: step abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.911] [206129.0] debug: task/cgroup: job physical cores are '0-1,6-7'

[2024-01-25T09:38:30.911] [206129.0] debug: task/cgroup: step physical cores are '0-1,6-7'

[...]

[2024-01-25T09:38:30.912] [206129.2] debug: task/cgroup: job abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.912] [206129.2] debug: task/cgroup: step abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.912] [206129.2] debug: task/cgroup: job physical cores are '0-1,6-7'

[2024-01-25T09:38:30.912] [206129.2] debug: task/cgroup: step physical cores are '0-1,6-7'

[...]

[2024-01-25T09:38:30.922] [206129.3] debug: task/cgroup: job abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.922] [206129.3] debug: task/cgroup: step abstract cores are '0-1,6-7'

[2024-01-25T09:38:30.922] [206129.3] debug: task/cgroup: job physical cores are '0-1,6-7'

[2024-01-25T09:38:30.922] [206129.3] debug: task/cgroup: step physical cores are '0-1,6-7'

It seems that SLURM is reserving me correctly four cores for my job but, when inside my script I run four “srun”, then each srun can freely take one of the four reserved cores, not exactly one to each task.

What am I doing wrong?

Thanks a lot!