[slurm-users] Slurm Configuration assistance: Unable to use srun after installation (slurm on fedora 33)

Johnsy K. John johnsyjohnk at gmail.com
Sun Apr 18 23:43:25 UTC 2021


Hello SchedMD team,

I would like to use your slurm workload manager for learning purposes.
And I tried installing the the software (downloaded from:
https://www.schedmd.com/downloads.php ) and followed the steps as mentioned
in:

https://slurm.schedmd.com/download.html
https://slurm.schedmd.com/quickstart_admin.html


My Linux OS is fedora 33, and i tried installing it as root login.
After installation and configuration as mentioned in page:
https://slurm.schedmd.com/quickstart_admin.html
I got some errors when I tried to do srun.
Details about the installation and use are as follows:

Using root permissions, copied to: /root/installations/

cd /root/installations/

tar --bzip -x -f slurm-20.11.5.tar.bz2

cd slurm-20.11.5/

./configure --enable-debug --prefix=/usr/local --sysconfdir=/usr/local/etc

make
make install


Following steps are based on
https://wiki.fysik.dtu.dk/niflheim/Slurm_configuration

mkdir /var/spool/slurmctld /var/log/slurm
chown johnsy /var/spool/slurmctld
chown johnsy /var/log/slurm
chmod 755 /var/spool/slurmctld /var/log/slurm

 cp /var/run/slurmctld.pid /var/run/slurmd.pid

touch /var/log/slurm/slurmctld.log
chown johnsy /var/log/slurm/slurmctld.log

touch /var/log/slurm/slurm_jobacct.log /var/log/slurm/slurm_jobcomp.log
chown johnsy /var/log/slurm/slurm_jobacct.log
/var/log/slurm/slurm_jobcomp.log

ldconfig -n /usr/lib64


Now when I tried an example command for trial:

srun /proc/cpuinfo


I get the following error:

srun: error: Unable to allocate resources: Unable to contact slurm
controller (connect failure)



My configuration file slurm.conf f that i created is:

######################################################################################################

######################################################################################################

# slurm.conf file generated by configurator.html.

# Put this file on all nodes of your cluster.

# See the slurm.conf man page for more information.

#

SlurmctldHost=homepc

#SlurmctldHost=

#

#DisableRootJobs=NO

#EnforcePartLimits=NO

#Epilog=

#EpilogSlurmctld=

#FirstJobId=1

#MaxJobId=999999

#GresTypes=

#GroupUpdateForce=0

#GroupUpdateTime=600

#JobFileAppend=0

#JobRequeue=1

#JobSubmitPlugins=1

#KillOnBadExit=0

#LaunchType=launch/slurm

#Licenses=foo*4,bar

#MailProg=/bin/mail

#MaxJobCount=5000

#MaxStepCount=40000

#MaxTasksPerNode=128

MpiDefault=none

#MpiParams=ports=#-#

#PluginDir=

#PlugStackConfig=

#PrivateData=jobs

ProctrackType=proctrack/cgroup

#Prolog=

#PrologFlags=

#PrologSlurmctld=

#PropagatePrioProcess=0

#PropagateResourceLimits=

#PropagateResourceLimitsExcept=

#RebootProgram=

ReturnToService=1

SlurmctldPidFile=/var/run/slurmctld.pid

SlurmctldPort=6817

SlurmdPidFile=/var/run/slurmd.pid

SlurmdPort=6818

SlurmdSpoolDir=/var/spool/slurmd

SlurmUser=johnsy

#SlurmdUser=root

#SrunEpilog=

#SrunProlog=

StateSaveLocation=/var/spool

SwitchType=switch/none

#TaskEpilog=

TaskPlugin=task/affinity

#TaskProlog=

#TopologyPlugin=topology/tree

#TmpFS=/tmp

#TrackWCKey=no

#TreeWidth=

#UnkillableStepProgram=

#UsePAM=0

#

#

# TIMERS

#BatchStartTimeout=10

#CompleteWait=0

#EpilogMsgTime=2000

#GetEnvTimeout=2

#HealthCheckInterval=0

#HealthCheckProgram=

InactiveLimit=0

KillWait=30

#MessageTimeout=10

#ResvOverRun=0

MinJobAge=300

#OverTimeLimit=0

SlurmctldTimeout=120

SlurmdTimeout=300

#UnkillableStepTimeout=60

#VSizeFactor=0

Waittime=0

#

#

# SCHEDULING

#DefMemPerCPU=0

#MaxMemPerCPU=0

#SchedulerTimeSlice=30

SchedulerType=sched/backfill

SelectType=select/cons_tres

SelectTypeParameters=CR_Core

#

#

# JOB PRIORITY

#PriorityFlags=

#PriorityType=priority/basic

#PriorityDecayHalfLife=

#PriorityCalcPeriod=

#PriorityFavorSmall=

#PriorityMaxAge=

#PriorityUsageResetPeriod=

#PriorityWeightAge=

#PriorityWeightFairshare=

#PriorityWeightJobSize=

#PriorityWeightPartition=

#PriorityWeightQOS=

#

#

# LOGGING AND ACCOUNTING

#AccountingStorageEnforce=0

#AccountingStorageHost=

#AccountingStoragePass=

#AccountingStoragePort=

AccountingStorageType=accounting_storage/none

#AccountingStorageUser=

AccountingStoreJobComment=YES

ClusterName=cluster

#DebugFlags=

#JobCompHost=

#JobCompLoc=

#JobCompPass=

#JobCompPort=

JobCompType=jobcomp/none

#JobCompUser=

#JobContainerType=job_container/none

JobAcctGatherFrequency=30

JobAcctGatherType=jobacct_gather/none

SlurmctldDebug=info

#SlurmctldLogFile=

SlurmdDebug=info

#SlurmdLogFile=

#SlurmSchedLogFile=

#SlurmSchedLogLevel=

#

#

# POWER SAVE SUPPORT FOR IDLE NODES (optional)

#SuspendProgram=

#ResumeProgram=

#SuspendTimeout=

#ResumeTimeout=

#ResumeRate=

#SuspendExcNodes=

#SuspendExcParts=

#SuspendRate=

#SuspendTime=

#

#

# COMPUTE NODES

NodeName=localhost CPUs=12 Sockets=1 CoresPerSocket=6 ThreadsPerCore=2
State=UNKNOWN

PartitionName=debug Nodes=localhost Default=YES MaxTime=INFINITE State=UP

######################################################################################################

######################################################################################################

######################################################################################################
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20210418/7ff57a54/attachment-0001.htm>


More information about the slurm-users mailing list