[slurm-users] Slurm Configuration assistance: Unable to use srun after installation (slurm on fedora 33)
Johnsy K. John
johnsyjohnk at gmail.com
Sun Apr 18 23:43:25 UTC 2021
Hello SchedMD team,
I would like to use your slurm workload manager for learning purposes.
And I tried installing the the software (downloaded from:
https://www.schedmd.com/downloads.php ) and followed the steps as mentioned
in:
https://slurm.schedmd.com/download.html
https://slurm.schedmd.com/quickstart_admin.html
My Linux OS is fedora 33, and i tried installing it as root login.
After installation and configuration as mentioned in page:
https://slurm.schedmd.com/quickstart_admin.html
I got some errors when I tried to do srun.
Details about the installation and use are as follows:
Using root permissions, copied to: /root/installations/
cd /root/installations/
tar --bzip -x -f slurm-20.11.5.tar.bz2
cd slurm-20.11.5/
./configure --enable-debug --prefix=/usr/local --sysconfdir=/usr/local/etc
make
make install
Following steps are based on
https://wiki.fysik.dtu.dk/niflheim/Slurm_configuration
mkdir /var/spool/slurmctld /var/log/slurm
chown johnsy /var/spool/slurmctld
chown johnsy /var/log/slurm
chmod 755 /var/spool/slurmctld /var/log/slurm
cp /var/run/slurmctld.pid /var/run/slurmd.pid
touch /var/log/slurm/slurmctld.log
chown johnsy /var/log/slurm/slurmctld.log
touch /var/log/slurm/slurm_jobacct.log /var/log/slurm/slurm_jobcomp.log
chown johnsy /var/log/slurm/slurm_jobacct.log
/var/log/slurm/slurm_jobcomp.log
ldconfig -n /usr/lib64
Now when I tried an example command for trial:
srun /proc/cpuinfo
I get the following error:
srun: error: Unable to allocate resources: Unable to contact slurm
controller (connect failure)
My configuration file slurm.conf f that i created is:
######################################################################################################
######################################################################################################
# slurm.conf file generated by configurator.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
SlurmctldHost=homepc
#SlurmctldHost=
#
#DisableRootJobs=NO
#EnforcePartLimits=NO
#Epilog=
#EpilogSlurmctld=
#FirstJobId=1
#MaxJobId=999999
#GresTypes=
#GroupUpdateForce=0
#GroupUpdateTime=600
#JobFileAppend=0
#JobRequeue=1
#JobSubmitPlugins=1
#KillOnBadExit=0
#LaunchType=launch/slurm
#Licenses=foo*4,bar
#MailProg=/bin/mail
#MaxJobCount=5000
#MaxStepCount=40000
#MaxTasksPerNode=128
MpiDefault=none
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
#PrivateData=jobs
ProctrackType=proctrack/cgroup
#Prolog=
#PrologFlags=
#PrologSlurmctld=
#PropagatePrioProcess=0
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#RebootProgram=
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=johnsy
#SlurmdUser=root
#SrunEpilog=
#SrunProlog=
StateSaveLocation=/var/spool
SwitchType=switch/none
#TaskEpilog=
TaskPlugin=task/affinity
#TaskProlog=
#TopologyPlugin=topology/tree
#TmpFS=/tmp
#TrackWCKey=no
#TreeWidth=
#UnkillableStepProgram=
#UsePAM=0
#
#
# TIMERS
#BatchStartTimeout=10
#CompleteWait=0
#EpilogMsgTime=2000
#GetEnvTimeout=2
#HealthCheckInterval=0
#HealthCheckProgram=
InactiveLimit=0
KillWait=30
#MessageTimeout=10
#ResvOverRun=0
MinJobAge=300
#OverTimeLimit=0
SlurmctldTimeout=120
SlurmdTimeout=300
#UnkillableStepTimeout=60
#VSizeFactor=0
Waittime=0
#
#
# SCHEDULING
#DefMemPerCPU=0
#MaxMemPerCPU=0
#SchedulerTimeSlice=30
SchedulerType=sched/backfill
SelectType=select/cons_tres
SelectTypeParameters=CR_Core
#
#
# JOB PRIORITY
#PriorityFlags=
#PriorityType=priority/basic
#PriorityDecayHalfLife=
#PriorityCalcPeriod=
#PriorityFavorSmall=
#PriorityMaxAge=
#PriorityUsageResetPeriod=
#PriorityWeightAge=
#PriorityWeightFairshare=
#PriorityWeightJobSize=
#PriorityWeightPartition=
#PriorityWeightQOS=
#
#
# LOGGING AND ACCOUNTING
#AccountingStorageEnforce=0
#AccountingStorageHost=
#AccountingStoragePass=
#AccountingStoragePort=
AccountingStorageType=accounting_storage/none
#AccountingStorageUser=
AccountingStoreJobComment=YES
ClusterName=cluster
#DebugFlags=
#JobCompHost=
#JobCompLoc=
#JobCompPass=
#JobCompPort=
JobCompType=jobcomp/none
#JobCompUser=
#JobContainerType=job_container/none
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=info
#SlurmctldLogFile=
SlurmdDebug=info
#SlurmdLogFile=
#SlurmSchedLogFile=
#SlurmSchedLogLevel=
#
#
# POWER SAVE SUPPORT FOR IDLE NODES (optional)
#SuspendProgram=
#ResumeProgram=
#SuspendTimeout=
#ResumeTimeout=
#ResumeRate=
#SuspendExcNodes=
#SuspendExcParts=
#SuspendRate=
#SuspendTime=
#
#
# COMPUTE NODES
NodeName=localhost CPUs=12 Sockets=1 CoresPerSocket=6 ThreadsPerCore=2
State=UNKNOWN
PartitionName=debug Nodes=localhost Default=YES MaxTime=INFINITE State=UP
######################################################################################################
######################################################################################################
######################################################################################################
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.schedmd.com/pipermail/slurm-users/attachments/20210418/7ff57a54/attachment-0001.htm>
More information about the slurm-users
mailing list