scontrol show config
:AccountingStorageBackupHost = (null) AccountingStorageEnforce = associations,limits,qos AccountingStorageHost = -Some-server- AccountingStorageExternalHost = (null) AccountingStorageParameters = (null) AccountingStoragePort = 6819 AccountingStorageTRES = cpu,mem,energy,node,billing,fs/disk,vmem,pages AccountingStorageType = accounting_storage/slurmdbd AccountingStorageUser = N/A AccountingStoreFlags = job_comment,job_env,job_extra,job_script AcctGatherEnergyType = (null) AcctGatherFilesystemType = (null) AcctGatherInterconnectType = (null) AcctGatherNodeFreq = 0 sec AcctGatherProfileType = (null) AllowSpecResourcesUsage = No AuthAltTypes = (null) AuthAltParameters = (null) AuthInfo = (null) AuthType = auth/munge BatchStartTimeout = 10 sec BcastExclude = /lib,/usr/lib,/lib64,/usr/lib64 BcastParameters = (null) BOOT_TIME = 2024-03-15T13:32:05 BurstBufferType = (null) CliFilterPlugins = (null) ClusterName = cluster CommunicationParameters = (null) CompleteWait = 0 sec CoreSpecPlugin = (null) CpuFreqDef = Unknown CpuFreqGovernors = OnDemand,Performance,UserSpace CredType = cred/munge DebugFlags = (null) DefMemPerCPU = 3500 DependencyParameters = (null) DisableRootJobs = Yes EioTimeout = 60 EnforcePartLimits = ALL Epilog = (null) EpilogMsgTime = 2000 usec EpilogSlurmctld = (null) ExtSensorsType = (null) ExtSensorsFreq = 0 sec FairShareDampeningFactor = 1 FederationParameters = (null) FirstJobId = 1 GetEnvTimeout = 2 sec GresTypes = gpu GpuFreqDef = (null) GroupUpdateForce = 1 GroupUpdateTime = 600 sec HASH_VAL = Match HealthCheckInterval = 300 sec HealthCheckNodeState = ANY HealthCheckProgram = /usr/sbin/nhc InactiveLimit = 0 sec InteractiveStepOptions = --interactive --preserve-env --pty $SHELL JobAcctGatherFrequency = 30 JobAcctGatherType = jobacct_gather/linux JobAcctGatherParams = (null) JobCompHost = localhost JobCompLoc = (null) JobCompParams = (null) JobCompPort = 0 JobCompType = (null) JobCompUser = root JobContainerType = (null) JobDefaults = (null) JobFileAppend = 0 JobRequeue = 1 JobSubmitPlugins = lua KillOnBadExit = 0 KillWait = 30 sec LaunchParameters = (null) Licenses = (null) LogTimeFormat = iso8601_ms MailDomain = (null) MailProg = /bin/mail MaxArraySize = 1001 MaxBatchRequeue = 5 MaxDBDMsgs = 20024 MaxJobCount = 10000 MaxJobId = 67043328 MaxMemPerNode = UNLIMITED MaxNodeCount = 6 MaxStepCount = 40000 MaxTasksPerNode = 512 MCSPlugin = (null) MCSParameters = (null) MessageTimeout = 10 sec MinJobAge = 300 sec MpiDefault = pmix_v2 MpiParams = (null) NEXT_JOB_ID = 3626 NodeFeaturesPlugins = (null) OverTimeLimit = 0 min PluginDir = /usr/lib64/slurm PlugStackConfig = (null) PowerParameters = (null) PowerPlugin = (null) PreemptMode = GANG,SUSPEND PreemptParameters = (null) PreemptType = preempt/qos PreemptExemptTime = 00:00:00 PrEpParameters = (null) PrEpPlugins = prep/script PriorityParameters = (null) PrioritySiteFactorParameters = (null) PrioritySiteFactorPlugin = (null) PriorityDecayHalfLife = 7-00:00:00 PriorityCalcPeriod = 00:05:00 PriorityFavorSmall = No PriorityFlags = PriorityMaxAge = 7-00:00:00 PriorityUsageResetPeriod = NONE PriorityType = priority/multifactor PriorityWeightAge = 0 PriorityWeightAssoc = 0 PriorityWeightFairShare = 0 PriorityWeightJobSize = 0 PriorityWeightPartition = 0 PriorityWeightQOS = 5000 PriorityWeightTRES = (null) PrivateData = none ProctrackType = proctrack/cgroup Prolog = (null) PrologEpilogTimeout = 65534 PrologSlurmctld = (null) PrologFlags = (null) PropagatePrioProcess = 0 PropagateResourceLimits = ALL PropagateResourceLimitsExcept = (null) RebootProgram = /etc/slurm/slurmupdate.sh ReconfigFlags = (null) RequeueExit = (null) RequeueExitHold = (null) ResumeFailProgram = (null) ResumeProgram = (null) ResumeRate = 300 nodes/min ResumeTimeout = 60 sec ResvEpilog = (null) ResvOverRun = 0 min ResvProlog = (null) ReturnToService = 1 SchedulerParameters = bf_max_job_user=2 SchedulerTimeSlice = 30 sec SchedulerType = sched/backfill ScronParameters = (null) SelectType = select/cons_tres SelectTypeParameters = CR_CORE_MEMORY SlurmUser = slurm(888) SlurmctldAddr = (null) SlurmctldDebug = info SlurmctldHost[0] = -some-server- SlurmctldLogFile = /var/log/slurm/slurmctld.log SlurmctldPort = 6817 SlurmctldSyslogDebug = (null) SlurmctldPrimaryOffProg = (null) SlurmctldPrimaryOnProg = (null) SlurmctldTimeout = 120 sec SlurmctldParameters = (null) SlurmdDebug = info SlurmdLogFile = /var/log/slurm/slurmd.log SlurmdParameters = (null) SlurmdPidFile = /var/run/slurmd.pid SlurmdPort = 6818 SlurmdSpoolDir = /var/spool/slurm SlurmdSyslogDebug = (null) SlurmdTimeout = 300 sec SlurmdUser = root(0) SlurmSchedLogFile = (null) SlurmSchedLogLevel = 0 SlurmctldPidFile = /var/run/slurmctld.pid SLURM_CONF = /etc/slurm/slurm.conf SLURM_VERSION = 23.11.1 SrunEpilog = (null) SrunPortRange = 0-0 SrunProlog = (null) StateSaveLocation = /var/spool/slurmctld SuspendExcNodes = (null) SuspendExcParts = (null) SuspendExcStates = (null) SuspendProgram = (null) SuspendRate = 60 nodes/min SuspendTime = INFINITE SuspendTimeout = 30 sec SwitchParameters = (null) SwitchType = (null) TaskEpilog = (null) TaskPlugin = task/cgroup,task/affinity TaskPluginParam = none TaskProlog = (null) TCPTimeout = 2 sec TmpFS = /tmp TopologyParam = (null) TopologyPlugin = topology/default TrackWCKey = No TreeWidth = 16 UsePam = No UnkillableStepProgram = (null) UnkillableStepTimeout = 60 sec VSizeFactor = 0 percent WaitTime = 0 sec X11Parameters = (null) Cgroup Support Configuration: AllowedRAMSpace = 100.0% AllowedSwapSpace = 0.0% CgroupMountpoint = /sys/fs/cgroup CgroupPlugin = autodetect ConstrainCores = yes ConstrainDevices = yes ConstrainRAMSpace = yes ConstrainSwapSpace = no EnableControllers = no IgnoreSystemd = no IgnoreSystemdOnFailure = no MaxRAMPercent = 100.0% MaxSwapPercent = 100.0% MemorySwappiness = (null) MinRAMSpace = 30 MB MPI Plugins Configuration: PMIxCliTmpDirBase = (null) PMIxCollFence = (null) PMIxDebug = 0 PMIxDirectConn = yes PMIxDirectConnEarly = no PMIxDirectConnUCX = no PMIxDirectSameArch = no PMIxEnv = (null) PMIxFenceBarrier = no PMIxNetDevicesUCX = (null) PMIxTimeout = 300 PMIxTlsUCX = (null)
PartitionName=test-partition AllowGroups=sysadmin,users AllowAccounts=ALL AllowQos=ALL AllocNodes=ALL Default=NO QoS=N/A DefaultTime=NONE DisableRootJobs=YES ExclusiveUser=NO GraceTime=0 Hidden=NO MaxNodes=UNLIMITED MaxTime=UNLIMITED MinNodes=0 LLN=NO MaxCPUsPerNode=UNLIMITED MaxCPUsPerSocket=UNLIMITED Nodes=vserv-[275-277] PriorityJobFactor=1 PriorityTier=100 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:1 OverTimeLimit=NONE PreemptMode=GANG,SUSPEND State=UP TotalCPUs=32 TotalNodes=4 SelectTypeParameters=NONE JobDefaults=(null) DefMemPerNode=UNLIMITED MaxMemPerNode=UNLIMITED TRES=cpu=32,mem=304933M,node=4,billing=32
| Name | Priority | GraceTime | Preempt | PreemptMode | Flags | UsageFactor | MaxTRESPU | MaxJobsPU | MaxSubmitPU |-----------|----------|-----------|-----------|-------------|-----------------|-------------|-----------------|-----------|----------- | normal | 50 | 00:00:00 | | cluster | | 1.000000 | | 20 | 50 | preempter | 1000 | 00:00:00 | preempted | gang,suspe+| | 1.000000 | | | | preempted | 0 | 00:00:00 | | gang,suspe+ | OverPartQOS | 1.000000 | cpu=100,node=10 | |
|JOBID | QOS | ST | TIME | NODELIST(REASON) | PARTITION | PRIORITY || ---------------- ----------- --------- ----------- ---------------------- ----------------- ----------| | 3629 | preempted | PD | 0:00 | (Resources) | test-partition | 1 | | 3627 | preempted | R | 0:20 | vserv-276 | test-partition | 1 | | 3628 | preempted | R | 0:20 | vserv-277 | test-partition | 1 | | 3626 | preempted | R | 0:27 | vserv-275 | test-partition | 1 | | 3630 | preempter | PD | 0:00 | (Resources) | test-partition | 5000 |
Nischey Verma
Nischey Verma