[slurm-users] require info on merging diff core count nodes under single queue or partition

Sudeep Narayan Banerjee snbanerjee at iitgn.ac.in
Mon May 18 12:46:23 UTC 2020


Dear Support,

node11-22 is having 16cores socket x 2 and node23-24 is having 20cores 
socket x 2. In slurm.conf file (attached), can we merge all the nodes 
11-24 (having different core count) and have a single queue or partition 
name?



-- 
Thanks & Regards,
Sudeep Narayan Banerjee
System Analyst | Scientist B
Information System Technology Facility
Academic Block 5 | Room 110
Indian Institute of Technology Gandhinagar
Palaj, Gujarat 382355 INDIA

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20200518/da09703d/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: imnpbclfpjjajimi.png
Type: image/png
Size: 8817 bytes
Desc: not available
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20200518/da09703d/attachment-0002.png>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: occmpckmckgkbmjh.png
Type: image/png
Size: 21726 bytes
Desc: not available
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20200518/da09703d/attachment-0003.png>
-------------- next part --------------
# slurm.conf file generated by configurator.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
ControlMachine=hpc
#ControlAddr=
#BackupAddr=
#
AuthType=auth/munge
CacheGroups=0
#CheckpointType=checkpoint/none
#CryptoType=crypto/none
#DisableRootJobs=NO
#EnforcePartLimits=NO
#Epilog=
#EpilogSlurmctld=
#FirstJobId=1
#MaxJobId=999999
#GresTypes=gpu
#GroupUpdateForce=0
#GroupUpdateTime=600
#JobCheckpointDir=/var/slurm/checkpoint
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
#JobFileAppend=0
#JobRequeue=1
#JobSubmitPlugins=1
#KillOnBadExit=0
#Licenses=foo*4,bar
#MailProg=/bin/mail
MaxJobCount=5000
MaxStepCount=40000
MaxTasksPerNode=128
MpiDefault=none
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
#PrivateData=jobs
ProctrackType=proctrack/pgid
#Prolog=
#PrologSlurmctld=
#PropagatePrioProcess=0
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
ReturnToService=1
#SallocDefaultCommand=
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/tmp/slurmd
SlurmUser=root
#SlurmdUser=root
#SrunEpilog=
#SrunProlog=
StateSaveLocation=/tmp
SwitchType=switch/none
#TaskEpilog=
TaskPlugin=task/none
#TaskPluginParam=
#TaskProlog=
#TopologyPlugin=topology/tree
#TmpFs=/tmp
#TrackWCKey=no
#TreeWidth=
#UnkillableStepProgram=
#UsePAM=0
#
#
# TIMERS
#BatchStartTimeout=10
#CompleteWait=0
#EpilogMsgTime=2000
#GetEnvTimeout=2
#HealthCheckInterval=0
#HealthCheckProgram=
InactiveLimit=0
KillWait=30
MessageTimeout=80
#ResvOverRun=0
MinJobAge=300
#OverTimeLimit=0
SlurmctldTimeout=120
SlurmdTimeout=300
#UnkillableStepTimeout=60
#VSizeFactor=0
Waittime=0
#
#
# SCHEDULING
#DefMemPerCPU=0
FastSchedule=1
#MaxMemPerCPU=0
#SchedulerRootFilter=1
#SchedulerTimeSlice=30
SchedulerType=sched/backfill
SchedulerPort=7321
SelectType=select/cons_res
SelectTypeParameters=CR_CORE_Memory
#
#
# JOB PRIORITY
#PriorityType=priority/basic

PriorityType=priority/multifactor
#PriorityDecayHalfLife=
DebugFlags=NO_CONF_HASH
#PriorityCalcPeriod=
#PriorityFavorSmall=
#PriorityMaxAge=
#PriorityUsageResetPeriod=
#PriorityWeightAge=
#PriorityWeightFairshare=
#PriorityWeightJobSize=
#PriorityWeightPartition=
#PriorityWeightQOS=
#
#
# LOGGING AND ACCOUNTING
AccountingStorageEnforce=limits
#AccountingStorageHost=
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStoragePort=
AccountingStorageType=accounting_storage/mysql
#AccountingStorageUser=
AccountingStoreJobComment=YES
ClusterName=cluster-iitgn
#DebugFlags=
#JobCompHost=
#JobCompLoc=
#JobCompPass=
#JobCompPort=
JobCompType=jobcomp/mysql
#JobCompUser=
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=3
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdDebug=3
SlurmdLogFile=/var/log/slurmd.log
#SlurmSchedLogFile=
#SlurmSchedLogLevel=
#
#
# POWER SAVE SUPPORT FOR IDLE NODES (optional)
#SuspendProgram=
#ResumeProgram=
#SuspendTimeout=
#ResumeTimeout=
#ResumeRate=
#SuspendExcNodes=
#SuspendExcParts=
#SuspendRate=
#SuspendTime=
GresTypes=gpu
#
#
# COMPUTE NODES

NodeName=node[1-10] Sockets=2 CoresPerSocket=8 ThreadsPerCore=1 Procs=16  RealMemory=60000  State=IDLE
NodeName=gpu[1-2] CPUs=16 Gres=gpu:2 State=IDLE

NodeName=node[11-22] Sockets=2 CoresPerSocket=16 ThreadsPerCore=1 Procs=32 State=IDLE
NodeName=node[23-24] Sockets=2 CoresPerSocket=20 ThreadsPerCore=1 Procs=40 State=IDLE
NodeName=gpu[3-4] CPUs=32 Gres=gpu:1 State=IDLE

#NodeName=hpc CPUs=12 State=UNKNOWN

PartitionName=serial Nodes=gpu1 Default=YES Shared=YES Priority=20 PreemptMode=suspend MaxTime=1-0:0 MaxCPUsPerNode=10 State=UP


PartitionName=main Nodes=node[1-10] Default=YES Shared=YES Priority=10 PreemptMode=suspend MaxTime=2-0:0 State=UP
PartitionName=main_new Nodes=node[11-22] Default=YES Shared=YES Priority=10 PreemptMode=suspend MaxTime=2-0:0 State=UP
#PartitionName=main_new Nodes=node[11-24] Default=YES Shared=YES Priority=10 PreemptMode=suspend MaxTime=2-0:0 State=UP

PartitionName=gsgroup Nodes=node[23-24] Default=NO Shared=YES Priority=30 PreemptMode=suspend MaxTime=2-0:0 State=UP Allowgroups=GauravS_grp 
PartitionName=pdgroup Nodes=node[9-10] Default=NO Shared=YES Priority=30 PreemptMode=suspend MaxTime=3-0:0 State=UP Allowgroups=PD_grp 
PartitionName=ssmgroup Nodes=gpu[3-4] Default=NO Shared=YES Priority=30 PreemptMode=suspend MaxTime=7-0:0 State=UP Allowgroups=SSM_grp 


PartitionName=gpu Nodes=gpu[1-2] Default=NO Shared=yes  MaxTime=3-0:0 State=UP
PartitionName=gpu_new Nodes=gpu[3-4] Default=NO Shared=yes  MaxTime=3-0:0 State=UP

#PartitionName=extgrp Nodes=node[1-10],gpu[1-2] Default=NO Shared=YES Priority=40 MaxTime=3-0:0 State=UP Allowgroups=External_grp
#PartitionName=extgrp Nodes=gpu[1-2] Default=NO Shared=YES Priority=40 MaxTime=3-0:0 State=UP Allowgroups=External_grp


More information about the slurm-users mailing list