[slurm-users] not allocating jobs even resources are free

Fri Apr 24 17:44:45 UTC 2020

Hi Team,

we are facing some issue in our environment. The resources are free but job
is going into the QUEUE state but not running.

i have attached the slurm.conf file here.

scenario:-

There are job only in the 2 partitions:
 344 jobs are in PD state in normal partition and the node belongs from the
normal partitions are full and no more job can run.

1300 JOBS are in GPUsmall partition are in queue and enough CPU is
avaiable to execute the jobs but i see the jobs are not scheduling on free
nodes.

Rest there are no pend jobs in any other partition .
eg:-
node status:- node18

NodeName=node18 Arch=x86_64 CoresPerSocket=18
   CPUAlloc=6 CPUErr=0 CPUTot=36 CPULoad=4.07
   AvailableFeatures=K2200
   ActiveFeatures=K2200
   Gres=gpu:2
   NodeAddr=node18 NodeHostName=node18 Version=17.11
   OS=Linux 4.4.140-94.42-default #1 SMP Tue Jul 17 07:44:50 UTC 2018
(0b375e4)
   RealMemory=1 AllocMem=0 FreeMem=79532 Sockets=2 Boards=1
   State=MIXED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=GPUsmall,pm_shared
   BootTime=2019-12-10T14:16:37 SlurmdStartTime=2019-12-10T14:24:08
   CfgTRES=cpu=36,mem=1M,billing=36
   AllocTRES=cpu=6
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

node19:-

NodeName=node19 Arch=x86_64 CoresPerSocket=18
   CPUAlloc=16 CPUErr=0 CPUTot=36 CPULoad=15.43
   AvailableFeatures=K2200
   ActiveFeatures=K2200
   Gres=gpu:2
   NodeAddr=node19 NodeHostName=node19 Version=17.11
   OS=Linux 4.12.14-94.41-default #1 SMP Wed Oct 31 12:25:04 UTC 2018
(3090901)
   RealMemory=1 AllocMem=0 FreeMem=63998 Sockets=2 Boards=1
   State=MIXED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=GPUsmall,pm_shared
   BootTime=2020-03-12T06:51:54 SlurmdStartTime=2020-03-12T06:53:14
   CfgTRES=cpu=36,mem=1M,billing=36
   AllocTRES=cpu=16
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

could you please help me to understand what could be the reason?
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20200424/f53e321d/attachment.htm>
-------------- next part --------------
 cat /etc/slurm/slurm.conf
# slurm.conf file generated by configurator.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
#Running_config_start
#ControlMachine=node0
ControlMachine=slurmmaster
ControlAddr=192.168.150.21
AuthType=auth/munge
CryptoType=crypto/munge
CacheGroups=1
ReturnToService=0
ProctrackType=proctrack/linuxproc
SlurmctldPort=6817
SlurmdPort=6818
SchedulerPort=7321
SlurmctldPidFile=/var/slurm/slurmctld.pid
SlurmdPidFile=/var/slurm/slurmd.pid
SlurmdSpoolDir=/var/slurm/spool/slurmd.%n.spool
StateSaveLocation=/var/slurm/state
SlurmctldLogFile=/var/slurm/log/slurmctld.log
SlurmdLogFile=/var/slurm/log/slurmd.%n.log.%h
SlurmUser=hpcadmin
MpiDefault=none
SwitchType=switch/none
TaskPlugin=task/affinity
TaskPluginParam=Sched
SlurmctldTimeout=120
SlurmdTimeout=300
InactiveLimit=0
KillWait=30
MinJobAge=3600
FastSchedule=1
SchedulerType=sched/builtin
#SchedulerParameters=enable_user_top
SelectType=select/cons_res
#SelectTypeParameters=CR_Core_Memory
SelectTypeParameters=CR_Core
AccountingStorageEnforce=associations
AccountingStorageHost=155.250.126.30
AccountingStorageType=accounting_storage/slurmdbd
#AccountingStoreJobComment=YES
ClusterName=merckhpc
JobCompType=jobcomp/slurmdbd
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/linux
SlurmctldDebug=5
SlurmdDebug=5
Waittime=0
#Running_config_end
#ControlAddr=
#BackupController=
#BackupAddr=
#
#CheckpointType=checkpoint/none
#DisableRootJobs=NO
#EnforcePartLimits=NO
Epilog=/etc/slurm/slurm.epilog.clean
#EpilogSlurmctld=
#FirstJobId=1
#MaxJobId=999999
GresTypes=gpu
#GroupUpdateForce=0
#GroupUpdateTime=600
#JobCheckpointDir=/var/slurm/checkpoint
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
#JobFileAppend=0
#JobRequeue=1
#JobSubmitPlugins=1
#KillOnBadExit=0
#Licenses=foo*4,bar
#MailProg=/bin/mail
#MaxJobCount=5000
MaxJobCount=5000000
#MaxStepCount=40000
#MaxTasksPerNode=128
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
#PrivateData=jobs
#Prolog=
#PrologSlurmctld=
#PropagatePrioProcess=0
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#SallocDefaultCommand=
#SrunEpilog=
#SrunProlog=
#TaskEpilog=
#TaskProlog=
#TopologyPlugin=topology/tree
#TmpFs=/tmp
#TrackWCKey=no
#TreeWidth=
#UnkillableStepProgram=
#UsePAM=0
#UsePAM=0
#
#
# TIMERS
#BatchStartTimeout=10
#CompleteWait=0
#EpilogMsgTime=2000
#GetEnvTimeout=2
#HealthCheckInterval=0
#HealthCheckProgram=
MessageTimeout=100
#ResvOverRun=0
#OverTimeLimit=0
#UnkillableStepTimeout=60
#VSizeFactor=0
SchedulerParameters=enable_user_top,default_queue_depth=1000000
#
#
# SCHEDULING
#DefMemPerCPU=0
#MaxMemPerCPU=0
#SchedulerRootFilter=1
#SchedulerTimeSlice=30
#
#
# JOB PRIORITY
PriorityType=priority/multifactor
#PriortyFlags=Ticket_Based
#PriorityDecayHalfLife=1-0
PriorityDecayHalfLife=2
#PriorityCalcPeriod=
#PriorityFavorSmall=YES
#PriorityMaxAge=7-0
PriorityUsageResetPeriod=DAILY
#PriorityWeightAge=1000
PriorityWeightFairshare=500000
#PriorityWeightJobSize=1000
#PriorityWeightPartition=1000
#PriorityWeightQOS=
PriorityFlags=FAIR_TREE
#
#
# LOGGING AND ACCOUNTING
#AccountingStorageHost=
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStoragePort=
#AccountingStorageUser=
#DebugFlags=
#JobCompHost=
#JobCompLoc=
#JobCompPass=
#JobCompPort=
#JobCompUser=
#SlurmSchedLogFile=
#SlurmSchedLogLevel=
#
#
# POWER SAVE SUPPORT FOR IDLE NODES (optional)
#SuspendProgram=
#ResumeProgram=
#SuspendTimeout=
#ResumeTimeout=
#ResumeRate=
#SuspendExcNodes=
#SuspendExcParts=
#SuspendRate=
#SuspendTime=
#
#
# COMPUTE NODES
NodeName=node[1-12] Sockets=2 CoresPerSocket=10  State=UNKNOWN
NodeName=node[13-16] Sockets=2 CoresPerSocket=10 Feature=HIGHMEM State=UNKNOWN
NodeName=node32 Sockets=2 CoresPerSocket=10 State=UNKNOWN
NodeName=node[17-26] Sockets=2 CoresPerSocket=18 Feature=K2200 Gres=gpu:2
NodeName=node[27] Sockets=2 CoresPerSocket=18 Feature=K40 Gres=gpu:2
NodeName=node[28-31] Sockets=2 CoresPerSocket=28
PartitionName=normal Nodes=node[1-10,13-16,28-31],node32  Default=YES MaxTime=INFINITE State=UP Shared=YES 
PartitionName=medium Nodes=node32  Default=NO MaxTime=INFINITE State=UP Shared=YES 
PartitionName=GPUsmall Nodes=node[18-19,21-26]  Default=NO MaxTime=INFINITE State=UP Shared=YES 
PartitionName=priority Nodes=node[17,20]  Default=NO MaxTime=INFINITE State=UP Shared=YES Priority=2000 OverSubscribe=NO 
PartitionName=smalljobs Nodes=node[12,17,20]  Default=NO MaxTime=INFINITE State=UP Shared=YES Priority=100 OverSubscribe=NO 
PartitionName=big_scratch Nodes=node[13-16,27-31]  Default=NO MaxTime=INFINITE State=UP Shared=YES Priority=100 
PartitionName=GPUbig Nodes=node[27]  Default=NO MaxTime=INFINITE State=UP Shared=YES 
PartitionName=shared Nodes=node[1-10,13-31],lc1  Default=NO MaxTime=INFINITE State=UP Shared=YES