[slurm-users] Oversubscribe partition not oversubscribing

Matthew Brown brownm12 at vt.edu
Wed Feb 5 18:51:31 UTC 2020


*I apologize if this comes up as a repost of my message about a week ago. I
think I had not officially joined the group when I first posted and perhaps
sent to the wrong email address.*

I'm trying to setup a small partition where oversubscription is allowed. I
want to be able to have several jobs assigned to the same core
simultaneously. The idea is to facilitate some low-consumption interactive
workloads in instructional settings (eg. students running Matlab during a
class). I've followed all the instructions I can find online and in the
manuals for oversubscription, but cores are only being assigned to one job
at a time.

Any suggestions?

Below, I've provided details about the configuration, and what I'm seeing
when I try to make a job oversubscribe. In the example, I'm forcing jobs to
use the same node and letting oversubscription follow from the FORCE:4
setting on the partition, but I've tried every other combination I can
think of. Scheduler logs and slurmctld logs haven't show anything that
looked useful to me. I went all the way to debug4 on the ctld and tried any
debugflag that seemed remotely useful. I've done service restarts on
slurmctld and on the nodes' slurmd, but no change and no errors. It just
won't oversubscribe.

Thanks for looking - I'm hoping someone out there can help me out!

Matt



*#submit first job and it starts*
[dtlogin2 ~]$ salloc --partition=interactive_q --nodelist=dt046
--reservation=INC0383888 --ntasks=24 --nodes=1 --account=arctest srun sleep
600 &
[1] 5018
[dtlogin2 ~]$ salloc: Granted job allocation 221715
salloc: Waiting for resource configuration
salloc: Nodes dt046 are ready for job

*#submit second job aimed at same cores as first job and it stays queued
until first job completes*
[dtlogin2 ~]$ salloc --partition=interactive_q --nodelist=dt046
--reservation=INC0383888 --ntasks=24 --nodes=1 --account=arctest srun sleep
600 &
[2] 5055
[dtlogin2 ~]$ salloc: Pending job allocation 221716
salloc: job 221716 queued and waiting for resources

[dtlogin2 ~]$ squeue --partition=interactive_q
             JOBID PARTITION     NAME     USER ST       TIME  NODES
NODELIST(REASON)
            221716 interacti     srun brownm12 PD       0:00      1
(Resources)
            221715 interacti     srun brownm12  R       0:23      1 dt046

*#job details while one is running and the other pending*
[dtlogin2 ~]$ scontrol show job 221716
JobId=221716 JobName=srun
   UserId=brownm12(1709627) GroupId=brownm12(1709627) MCS_label=N/A
   Priority=100 Nice=0 Account=arctest QOS=dt
   JobState=PENDING Reason=Resources Dependency=(null)
   Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0
   RunTime=00:00:00 TimeLimit=04:00:00 TimeMin=N/A
   SubmitTime=2020-01-30T15:53:05 EligibleTime=2020-01-30T15:53:05
   StartTime=2020-01-30T19:52:58 EndTime=2020-01-30T23:52:58 Deadline=N/A
   PreemptTime=None SuspendTime=None SecsPreSuspend=0
   LastSchedEval=2020-01-30T15:53:38
   Partition=interactive_q AllocNode:Sid=dtlogin2:2148
   ReqNodeList=dt046 ExcNodeList=(null)
   NodeList=(null) SchedNodeList=dt046
   NumNodes=1-1 NumCPUs=24 NumTasks=24 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
   TRES=cpu=24,mem=60000M,node=1
   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
   MinCPUsNode=1 MinMemoryCPU=2500M MinTmpDiskNode=0
   Features=(null) DelayBoot=00:00:00
   Gres=(null) Reservation=INC0383888
   OverSubscribe=YES Contiguous=0 Licenses=(null) Network=(null)
   Command=(null)
   WorkDir=/home/brownm12
   Power=

[dtlogin2 ~]$ scontrol show job 221715
JobId=221715 JobName=srun
   UserId=brownm12(1709627) GroupId=brownm12(1709627) MCS_label=N/A
   Priority=100 Nice=0 Account=arctest QOS=dt
   JobState=RUNNING Reason=None Dependency=(null)
   Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0
   RunTime=00:00:57 TimeLimit=04:00:00 TimeMin=N/A
   SubmitTime=2020-01-30T15:52:58 EligibleTime=2020-01-30T15:52:58
   StartTime=2020-01-30T15:52:58 EndTime=2020-01-30T19:52:58 Deadline=N/A
   PreemptTime=None SuspendTime=None SecsPreSuspend=0
   LastSchedEval=2020-01-30T15:52:58
   Partition=interactive_q AllocNode:Sid=dtlogin2:2148
   ReqNodeList=dt046 ExcNodeList=(null)
   NodeList=dt046
   BatchHost=dt046
   NumNodes=1 NumCPUs=24 NumTasks=24 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
   TRES=cpu=24,mem=60000M,node=1,billing=24
   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
   MinCPUsNode=1 MinMemoryCPU=2500M MinTmpDiskNode=0
   Features=(null) DelayBoot=00:00:00
   Gres=(null) Reservation=INC0383888
   OverSubscribe=YES Contiguous=0 Licenses=(null) Network=(null)
   Command=(null)
   WorkDir=/home/brownm12
   Power=

*#running config contains*
MaxTasksPerNode         = 512
SelectType              = select/cons_res
SelectTypeParameters    = CR_CORE_MEMORY
SLURM_VERSION           = 17.11.8

*#partition config*
PartitionName=interactive_q
   AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
   AllocNodes=ALL Default=NO QoS=dt_interactive_q
   DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0
Hidden=NO
   MaxNodes=4 MaxTime=04:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
   Nodes=dt04[5-7]
   PriorityJobFactor=40 PriorityTier=40 RootOnly=NO ReqResv=NO
OverSubscribe=FORCE:4
   OverTimeLimit=NONE PreemptMode=OFF
   State=UP TotalCPUs=72 TotalNodes=3 SelectTypeParameters=NONE
   DefMemPerCPU=2500 MaxMemPerNode=UNLIMITED

*#node config*
NodeName=dt045 Arch=x86_64 CoresPerSocket=12
   CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=(null)
   NodeAddr=dt045 NodeHostName=dt045 Version=17.11
   OS=Linux 3.10.0-693.21.1.el7.x86_64 #1 SMP Wed Mar 7 19:03:37 UTC 2018
   RealMemory=257357 AllocMem=0 FreeMem=244044 Sockets=2 Boards=1
   State=RESERVED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A
MCS_label=N/A
   Partitions=interactive_q
   BootTime=2019-08-15T17:15:54 SlurmdStartTime=2020-01-28T16:24:08
   CfgTRES=cpu=24,mem=257357M,billing=24
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=70 LowestJoules=2850 ConsumedJoules=12714880
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

[brownm12 at dtlogin2 ~]$ scontrol show nodes dt046
NodeName=dt046 Arch=x86_64 CoresPerSocket=12
   CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.02
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=(null)
   NodeAddr=dt046 NodeHostName=dt046 Version=17.11
   OS=Linux 3.10.0-693.21.1.el7.x86_64 #1 SMP Wed Mar 7 19:03:37 UTC 2018
   RealMemory=257357 AllocMem=0 FreeMem=250596 Sockets=2 Boards=1
   State=RESERVED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A
MCS_label=N/A
   Partitions=interactive_q
   BootTime=2020-01-27T14:03:46 SlurmdStartTime=2020-01-28T16:25:51
   CfgTRES=cpu=24,mem=257357M,billing=24
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=70 LowestJoules=2100 ConsumedJoules=12208440
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

[brownm12 at dtlogin2 ~]$ scontrol show nodes dt047
NodeName=dt047 Arch=x86_64 CoresPerSocket=12
   CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=(null)
   NodeAddr=dt047 NodeHostName=dt047 Version=17.11
   OS=Linux 3.10.0-693.21.1.el7.x86_64 #1 SMP Wed Mar 7 19:03:37 UTC 2018
   RealMemory=257357 AllocMem=0 FreeMem=242610 Sockets=2 Boards=1
   State=RESERVED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A
MCS_label=N/A
   Partitions=interactive_q
   BootTime=2019-07-05T16:20:41 SlurmdStartTime=2020-01-28T16:25:59
   CfgTRES=cpu=24,mem=257357M,billing=24
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=70 LowestJoules=2100 ConsumedJoules=12181165
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

*#effective qos limits*
[dtlogin2 ~]$ showqos
                Name   Flags   MaxTRESMins MaxJobsPA MaxJobsPU MaxSubmitPA
MaxSubmitPU            MaxTRESPA                 MaxTRESPU
-------------------- ------- ------------- --------- --------- -----------
----------- -------------------- -------------------------
    dt_interactive_q NoDecay                                10
                                   cpu=96,mem=250G,node=96

*#reservation being used to restrict access to nodes in partition during
development*
[brownm12 at dtlogin2 ~]$ scontrol show res
ReservationName=INC0383888 StartTime=2019-09-12T01:02:15
EndTime=2020-09-11T01:02:15 Duration=365-00:00:00
   Nodes=dt[045-047] NodeCnt=3 CoreCnt=72 Features=(null)
PartitionName=interactive_q Flags=OVERLAP,IGNORE_JOBS,SPEC_NODES
   TRES=cpu=72
   Users=(null) Accounts=test,redacted1,redacted2,redacted3 Licenses=(null)
State=ACTIVE BurstBuffer=(null) Watts=n/a
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20200205/d4e05909/attachment-0001.htm>


More information about the slurm-users mailing list