[slurm-users] Core reserved/bound to a GPU
Manuel BERTRAND
Manuel.Bertrand at lis-lab.fr
Mon Aug 31 14:41:13 UTC 2020
Hi list,
I am totally new to Slurm and have just deployed a heterogeneous GPU/CPU
cluster by following the latest OpenHPC recipe on CentOS 8.2 (thanks
OpenHPC team for making those !)
Every thing works great so far but now I would like to bound a specific
core to each GPUs on each node. By "bound" I mean to make a particular
core not assignable to a CPU job alone so that the GPU is available
whatever the CPU workload on the node. I'm asking this because in the
actual state a CPU only user can monopolize the whole node, preventing a
GPU user to come in as there is no CPU available even if the GPU is
free. I'm not sure what is the best way to enforce this. Hope this is
clear :)
Any help greatly appreciated !
Here is my gres.conf, cgroup.conf, partitions configuration, followed by
the output of 'scontrol show config':
########### gres.conf ############
NodeName=gpunode1 Name=gpu File=/dev/nvidia0
NodeName=gpunode1 Name=gpu File=/dev/nvidia1
NodeName=gpunode1 Name=gpu File=/dev/nvidia2
NodeName=gpunode1 Name=gpu File=/dev/nvidia3
NodeName=gpunode2 Name=gpu File=/dev/nvidia0
NodeName=gpunode2 Name=gpu File=/dev/nvidia1
NodeName=gpunode2 Name=gpu File=/dev/nvidia2
NodeName=gpunode3 Name=gpu File=/dev/nvidia0
NodeName=gpunode3 Name=gpu File=/dev/nvidia1
NodeName=gpunode3 Name=gpu File=/dev/nvidia2
NodeName=gpunode3 Name=gpu File=/dev/nvidia3
NodeName=gpunode3 Name=gpu File=/dev/nvidia4
NodeName=gpunode3 Name=gpu File=/dev/nvidia5
NodeName=gpunode3 Name=gpu File=/dev/nvidia6
NodeName=gpunode3 Name=gpu File=/dev/nvidia7
NodeName=gpunode4 Name=gpu File=/dev/nvidia0
NodeName=gpunode4 Name=gpu File=/dev/nvidia1
NodeName=gpunode5 Name=gpu File=/dev/nvidia0
NodeName=gpunode5 Name=gpu File=/dev/nvidia1
NodeName=gpunode5 Name=gpu File=/dev/nvidia2
NodeName=gpunode5 Name=gpu File=/dev/nvidia3
NodeName=gpunode5 Name=gpu File=/dev/nvidia4
NodeName=gpunode5 Name=gpu File=/dev/nvidia5
NodeName=gpunode6 Name=gpu File=/dev/nvidia0
NodeName=gpunode6 Name=gpu File=/dev/nvidia1
NodeName=gpunode6 Name=gpu File=/dev/nvidia2
NodeName=gpunode6 Name=gpu File=/dev/nvidia3
NodeName=gpunode7 Name=gpu File=/dev/nvidia0
NodeName=gpunode7 Name=gpu File=/dev/nvidia1
NodeName=gpunode7 Name=gpu File=/dev/nvidia2
NodeName=gpunode7 Name=gpu File=/dev/nvidia3
NodeName=gpunode8 Name=gpu File=/dev/nvidia0
NodeName=gpunode8 Name=gpu File=/dev/nvidia1
########### cgroup.conf ############
CgroupAutomount=yes
TaskAffinity=no
ConstrainCores=yes
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes
ConstrainKmemSpace=no
ConstrainDevices=yes
########### partitions configuration ###########
PartitionName=cpu Nodes=cpunode1,cpunode2,cpunode3,cpunode4,cpunode5
Default=NO DefaultTime=60 MaxTime=168:00:00 State=UP
PartitionName=gpu
Nodes=gpunode1,gpunode2,gpunode3,gpunode4,gpunode5,gpunode6,gpunode7,gpunode8
Default=NO DefaultTime=60 MaxTime=168:00:00 State=UP
PartitionName=all Nodes=ALL Default=YES DefaultTime=60 MaxTime=168:00:00
State=UP
########### Slurm configuration ###########
Configuration data as of 2020-08-31T16:23:54
AccountingStorageBackupHost = (null)
AccountingStorageEnforce = none
AccountingStorageHost = sms.mycluster
AccountingStorageLoc = N/A
AccountingStoragePort = 6819
AccountingStorageTRES = cpu,mem,energy,node,billing,fs/disk,vmem,pages
AccountingStorageType = accounting_storage/slurmdbd
AccountingStorageUser = N/A
AccountingStoreJobComment = No
AcctGatherEnergyType = acct_gather_energy/none
AcctGatherFilesystemType = acct_gather_filesystem/none
AcctGatherInterconnectType = acct_gather_interconnect/none
AcctGatherNodeFreq = 0 sec
AcctGatherProfileType = acct_gather_profile/none
AllowSpecResourcesUsage = No
AuthAltTypes = (null)
AuthInfo = (null)
AuthType = auth/munge
BatchStartTimeout = 10 sec
EpilogMsgTime = 2000 usec
EpilogSlurmctld = (null)
ExtSensorsType = ext_sensors/none
ExtSensorsFreq = 0 sec
FederationParameters = (null)
FirstJobId = 1
GetEnvTimeout = 2 sec
GresTypes = gpu
GpuFreqDef = high,memory=high
GroupUpdateForce = 1
GroupUpdateTime = 600 sec
HASH_VAL = Match
HealthCheckInterval = 300 sec
HealthCheckNodeState = ANY
HealthCheckProgram = /usr/sbin/nhc
InactiveLimit = 0 sec
JobAcctGatherFrequency = 30
JobAcctGatherType = jobacct_gather/none
JobAcctGatherParams = (null)
JobCompHost = localhost
JobCompLoc = /var/log/slurm_jobcomp.log
JobCompPort = 0
JobCompType = jobcomp/none
JobCompUser = root
JobContainerType = job_container/none
JobCredentialPrivateKey = (null)
JobCredentialPublicCertificate = (null)
JobDefaults = (null)
JobFileAppend = 0
JobRequeue = 1
JobSubmitPlugins = (null)
KeepAliveTime = SYSTEM_DEFAULT
KillOnBadExit = 0
KillWait = 30 sec
LaunchParameters = (null)
LaunchType = launch/slurm
Layouts =
Licenses = (null)
LogTimeFormat = iso8601_ms
MailDomain = (null)
MailProg = /usr/bin/mail
MaxArraySize = 1001
MaxDBDMsgs = 20052
MaxJobCount = 10000
MaxJobId = 67043328
MaxMemPerNode = UNLIMITED
MaxStepCount = 40000
PropagateResourceLimits = (null)
PropagateResourceLimitsExcept = MEMLOCK
RebootProgram = /sbin/reboot
ReconfigFlags = (null)
RequeueExit = (null)
RequeueExitHold = (null)
ResumeFailProgram = (null)
ResumeProgram = (null)
ResumeRate = 300 nodes/min
ResumeTimeout = 600 sec
ResvEpilog = (null)
ResvOverRun = 0 min
ResvProlog = (null)
ReturnToService = 2
RoutePlugin = route/default
SallocDefaultCommand = (null)
SbcastParameters = (null)
SchedulerParameters = (null)
SchedulerTimeSlice = 30 sec
SchedulerType = sched/backfill
SelectType = select/cons_tres
SelectTypeParameters = CR_CORE
SlurmUser = slurm(202)
SlurmctldAddr = (null)
SlurmctldDebug = debug2
SlurmctldHost[0] = sms.mycluster
SlurmctldLogFile = /var/log/slurmctld.log
SlurmctldPort = 6817
SlurmctldSyslogDebug = unknown
SlurmctldPrimaryOffProg = (null)
SlurmctldPrimaryOnProg = (null)
SlurmctldTimeout = 300 sec
SlurmctldParameters = enable_configless
SlurmdDebug = debug2
SlurmdLogFile = /var/log/slurmd.log
SlurmdParameters = (null)
SlurmdPidFile = /var/run/slurmd.pid
SlurmdPort = 6818
SlurmdSpoolDir = /var/spool/slurm/d
SlurmdSyslogDebug = unknown
SlurmdTimeout = 300 sec
SlurmdUser = root(0)
SlurmSchedLogFile = (null)
SlurmSchedLogLevel = 0
SlurmctldPidFile = /var/run/slurmctld.pid
SlurmctldPlugstack = (null)
SLURM_CONF = /etc/slurm/slurm.conf
SrunPortRange = 0-0
SrunProlog = (null)
StateSaveLocation = /var/spool/slurm/ctld
SuspendExcNodes = (null)
SuspendExcParts = (null)
SuspendProgram = (null)
SuspendRate = 60 nodes/min
SuspendTime = NONE
SuspendTimeout = 30 sec
SwitchType = switch/none
TaskEpilog = (null)
TaskPlugin = task/affinity,task/cgroup
TaskPluginParam = (null type)
TaskProlog = (null)
TCPTimeout = 2 sec
TmpFS = /scratch
TopologyParam = (null)
TopologyPlugin = topology/none
TrackWCKey = No
TreeWidth = 50
UsePam = No
UnkillableStepProgram = (null)
UnkillableStepTimeout = 60 sec
VSizeFactor = 0 percent
WaitTime = 0 sec
X11Parameters = (null)
Cgroup Support Configuration:
AllowedDevicesFile = /etc/slurm/cgroup_allowed_devices_file.conf
AllowedKmemSpace = (null)
AllowedRAMSpace = 100.0%
AllowedSwapSpace = 0.0%
CgroupAutomount = yes
CgroupMountpoint = /sys/fs/cgroup
ConstrainCores = yes
ConstrainDevices = yes
ConstrainKmemSpace = no
ConstrainRAMSpace = yes
ConstrainSwapSpace = yes
MaxKmemPercent = 100.0%
MaxRAMPercent = 100.0%
MaxSwapPercent = 100.0%
MemorySwappiness = (null)
MinKmemSpace = 30 MB
MinRAMSpace = 30 MB
TaskAffinity = no
Slurmctld(primary) at sms.mycluster is UP
More information about the slurm-users
mailing list