[slurm-users] Core reserved/bound to a GPU

Mon Aug 31 14:41:13 UTC 2020

Hi list,

I am totally new to Slurm and have just deployed a heterogeneous GPU/CPU 
cluster by following the latest OpenHPC recipe on CentOS 8.2 (thanks 
OpenHPC team for making those !)
Every thing works great so far but now I would like to bound a specific 
core to each GPUs on each node. By "bound" I mean to make a particular 
core not assignable to a CPU job alone so that the GPU is available 
whatever the CPU workload on the node. I'm asking this because in the 
actual state a CPU only user can monopolize the whole node, preventing a 
GPU user to come in as there is no CPU available even if the GPU is 
free. I'm not sure what is the best way to enforce this. Hope this is 
clear :)

Any help greatly appreciated !

Here is my gres.conf, cgroup.conf, partitions configuration, followed by 
the output of 'scontrol show config':

########### gres.conf ############
NodeName=gpunode1 Name=gpu  File=/dev/nvidia0
NodeName=gpunode1 Name=gpu  File=/dev/nvidia1
NodeName=gpunode1 Name=gpu  File=/dev/nvidia2
NodeName=gpunode1 Name=gpu  File=/dev/nvidia3
NodeName=gpunode2 Name=gpu  File=/dev/nvidia0
NodeName=gpunode2 Name=gpu  File=/dev/nvidia1
NodeName=gpunode2 Name=gpu  File=/dev/nvidia2
NodeName=gpunode3 Name=gpu  File=/dev/nvidia0
NodeName=gpunode3 Name=gpu  File=/dev/nvidia1
NodeName=gpunode3 Name=gpu  File=/dev/nvidia2
NodeName=gpunode3 Name=gpu  File=/dev/nvidia3
NodeName=gpunode3 Name=gpu  File=/dev/nvidia4
NodeName=gpunode3 Name=gpu  File=/dev/nvidia5
NodeName=gpunode3 Name=gpu  File=/dev/nvidia6
NodeName=gpunode3 Name=gpu  File=/dev/nvidia7
NodeName=gpunode4 Name=gpu  File=/dev/nvidia0
NodeName=gpunode4 Name=gpu  File=/dev/nvidia1
NodeName=gpunode5 Name=gpu  File=/dev/nvidia0
NodeName=gpunode5 Name=gpu  File=/dev/nvidia1
NodeName=gpunode5 Name=gpu  File=/dev/nvidia2
NodeName=gpunode5 Name=gpu  File=/dev/nvidia3
NodeName=gpunode5 Name=gpu  File=/dev/nvidia4
NodeName=gpunode5 Name=gpu  File=/dev/nvidia5
NodeName=gpunode6 Name=gpu  File=/dev/nvidia0
NodeName=gpunode6 Name=gpu  File=/dev/nvidia1
NodeName=gpunode6 Name=gpu  File=/dev/nvidia2
NodeName=gpunode6 Name=gpu  File=/dev/nvidia3
NodeName=gpunode7 Name=gpu  File=/dev/nvidia0
NodeName=gpunode7 Name=gpu  File=/dev/nvidia1
NodeName=gpunode7 Name=gpu  File=/dev/nvidia2
NodeName=gpunode7 Name=gpu  File=/dev/nvidia3
NodeName=gpunode8 Name=gpu  File=/dev/nvidia0
NodeName=gpunode8 Name=gpu  File=/dev/nvidia1

########### cgroup.conf ############
CgroupAutomount=yes
TaskAffinity=no
ConstrainCores=yes
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes
ConstrainKmemSpace=no
ConstrainDevices=yes

########### partitions configuration ###########
PartitionName=cpu Nodes=cpunode1,cpunode2,cpunode3,cpunode4,cpunode5 
Default=NO DefaultTime=60 MaxTime=168:00:00 State=UP
PartitionName=gpu 
Nodes=gpunode1,gpunode2,gpunode3,gpunode4,gpunode5,gpunode6,gpunode7,gpunode8 
Default=NO DefaultTime=60 MaxTime=168:00:00 State=UP
PartitionName=all Nodes=ALL Default=YES DefaultTime=60 MaxTime=168:00:00 
State=UP

########### Slurm configuration ###########
Configuration data as of 2020-08-31T16:23:54
AccountingStorageBackupHost = (null)
AccountingStorageEnforce = none
AccountingStorageHost   = sms.mycluster
AccountingStorageLoc    = N/A
AccountingStoragePort   = 6819
AccountingStorageTRES   = cpu,mem,energy,node,billing,fs/disk,vmem,pages
AccountingStorageType   = accounting_storage/slurmdbd
AccountingStorageUser   = N/A
AccountingStoreJobComment = No
AcctGatherEnergyType    = acct_gather_energy/none
AcctGatherFilesystemType = acct_gather_filesystem/none
AcctGatherInterconnectType = acct_gather_interconnect/none
AcctGatherNodeFreq      = 0 sec
AcctGatherProfileType   = acct_gather_profile/none
AllowSpecResourcesUsage = No
AuthAltTypes            = (null)
AuthInfo                = (null)
AuthType                = auth/munge
BatchStartTimeout       = 10 sec

EpilogMsgTime           = 2000 usec
EpilogSlurmctld         = (null)
ExtSensorsType          = ext_sensors/none
ExtSensorsFreq          = 0 sec
FederationParameters    = (null)
FirstJobId              = 1
GetEnvTimeout           = 2 sec
GresTypes               = gpu
GpuFreqDef              = high,memory=high
GroupUpdateForce        = 1
GroupUpdateTime         = 600 sec
HASH_VAL                = Match
HealthCheckInterval     = 300 sec
HealthCheckNodeState    = ANY
HealthCheckProgram      = /usr/sbin/nhc
InactiveLimit           = 0 sec
JobAcctGatherFrequency  = 30
JobAcctGatherType       = jobacct_gather/none
JobAcctGatherParams     = (null)
JobCompHost             = localhost
JobCompLoc              = /var/log/slurm_jobcomp.log
JobCompPort             = 0
JobCompType             = jobcomp/none
JobCompUser             = root
JobContainerType        = job_container/none
JobCredentialPrivateKey = (null)
JobCredentialPublicCertificate = (null)
JobDefaults             = (null)
JobFileAppend           = 0
JobRequeue              = 1
JobSubmitPlugins        = (null)
KeepAliveTime           = SYSTEM_DEFAULT
KillOnBadExit           = 0
KillWait                = 30 sec
LaunchParameters        = (null)
LaunchType              = launch/slurm
Layouts                 =
Licenses                = (null)
LogTimeFormat           = iso8601_ms
MailDomain              = (null)
MailProg                = /usr/bin/mail
MaxArraySize            = 1001
MaxDBDMsgs              = 20052
MaxJobCount             = 10000
MaxJobId                = 67043328
MaxMemPerNode           = UNLIMITED
MaxStepCount            = 40000

PropagateResourceLimits = (null)
PropagateResourceLimitsExcept = MEMLOCK
RebootProgram           = /sbin/reboot
ReconfigFlags           = (null)
RequeueExit             = (null)
RequeueExitHold         = (null)
ResumeFailProgram       = (null)
ResumeProgram           = (null)
ResumeRate              = 300 nodes/min
ResumeTimeout           = 600 sec
ResvEpilog              = (null)
ResvOverRun             = 0 min
ResvProlog              = (null)
ReturnToService         = 2
RoutePlugin             = route/default
SallocDefaultCommand    = (null)
SbcastParameters        = (null)
SchedulerParameters     = (null)
SchedulerTimeSlice      = 30 sec
SchedulerType           = sched/backfill
SelectType              = select/cons_tres
SelectTypeParameters    = CR_CORE
SlurmUser               = slurm(202)
SlurmctldAddr           = (null)
SlurmctldDebug          = debug2
SlurmctldHost[0]        = sms.mycluster
SlurmctldLogFile        = /var/log/slurmctld.log
SlurmctldPort           = 6817
SlurmctldSyslogDebug    = unknown
SlurmctldPrimaryOffProg = (null)
SlurmctldPrimaryOnProg  = (null)
SlurmctldTimeout        = 300 sec
SlurmctldParameters     = enable_configless
SlurmdDebug             = debug2
SlurmdLogFile           = /var/log/slurmd.log
SlurmdParameters        = (null)
SlurmdPidFile           = /var/run/slurmd.pid
SlurmdPort              = 6818
SlurmdSpoolDir          = /var/spool/slurm/d
SlurmdSyslogDebug       = unknown
SlurmdTimeout           = 300 sec
SlurmdUser              = root(0)
SlurmSchedLogFile       = (null)
SlurmSchedLogLevel      = 0
SlurmctldPidFile        = /var/run/slurmctld.pid
SlurmctldPlugstack      = (null)
SLURM_CONF              = /etc/slurm/slurm.conf

SrunPortRange           = 0-0
SrunProlog              = (null)
StateSaveLocation       = /var/spool/slurm/ctld
SuspendExcNodes         = (null)
SuspendExcParts         = (null)
SuspendProgram          = (null)
SuspendRate             = 60 nodes/min
SuspendTime             = NONE
SuspendTimeout          = 30 sec
SwitchType              = switch/none
TaskEpilog              = (null)
TaskPlugin              = task/affinity,task/cgroup
TaskPluginParam         = (null type)
TaskProlog              = (null)
TCPTimeout              = 2 sec
TmpFS                   = /scratch
TopologyParam           = (null)
TopologyPlugin          = topology/none
TrackWCKey              = No
TreeWidth               = 50
UsePam                  = No
UnkillableStepProgram   = (null)
UnkillableStepTimeout   = 60 sec
VSizeFactor             = 0 percent
WaitTime                = 0 sec
X11Parameters           = (null)

Cgroup Support Configuration:
AllowedDevicesFile      = /etc/slurm/cgroup_allowed_devices_file.conf
AllowedKmemSpace        = (null)
AllowedRAMSpace         = 100.0%
AllowedSwapSpace        = 0.0%
CgroupAutomount         = yes
CgroupMountpoint        = /sys/fs/cgroup
ConstrainCores          = yes
ConstrainDevices        = yes
ConstrainKmemSpace      = no
ConstrainRAMSpace       = yes
ConstrainSwapSpace      = yes
MaxKmemPercent          = 100.0%
MaxRAMPercent           = 100.0%
MaxSwapPercent          = 100.0%
MemorySwappiness        = (null)
MinKmemSpace            = 30 MB
MinRAMSpace             = 30 MB
TaskAffinity            = no

Slurmctld(primary) at sms.mycluster is UP