Best for:
Advantages:
Example Commands:
# Basic interactive session (2 hours, 1 CPU)
srun --pty bash
# Interactive session with custom resources
srun --time=4:00:00 --mem=8G --cpus-per-task=4 --pty bash
# Interactive Python session
srun --time=2:00:00 --mem=4G --pty python
# Interactive session with GPU
srun --time=8:00:00 --mem=16G --gres=gpu:1 --pty bash
Best for:
Advantages:
Example Script (job.sh
):
#!/bin/bash
#SBATCH --time=4:00:00
#SBATCH --mem=8G
#SBATCH --cpus-per-task=4
#SBATCH --output=output_%j.log # %j will be replaced with job ID
#SBATCH --error=error_%j.log
# Your commands here
python my_script.py
Submit with:
sbatch job.sh
Best for:
Advantages:
Example Array Job:
#!/bin/bash
#SBATCH --time=4:00:00
#SBATCH --mem=4G
#SBATCH --array=1-5 # Run 5 copies
#SBATCH --output=array_%A_%a.log # %A: array job ID, %a: task ID
# Example of different behaviors based on array task ID
case $SLURM_ARRAY_TASK_ID in
1) python process.py --dataset "data1.csv";;
2) python process.py --dataset "data2.csv";;
3) python process.py --dataset "data3.csv";;
4) python process.py --dataset "data4.csv";;
5) python process.py --dataset "data5.csv";;
esac
#!/bin/bash
#SBATCH --time=2:00:00
#SBATCH --mem=4G
#SBATCH --cpus-per-task=1
source activate myenv # If using conda
python my_analysis.py
#!/bin/bash
#SBATCH --time=8:00:00
#SBATCH --mem=16G
#SBATCH --gres=gpu:a100:1
#SBATCH --cpus-per-task=4
source activate pytorch_env
python train_model.py
#!/bin/bash
#SBATCH --time=4:00:00
#SBATCH --mem=4G
#SBATCH --array=0-9
#SBATCH --output=paramSweep_%A_%a.log
# Define parameter arrays
LEARNING_RATES=(0.001 0.01 0.1)
BATCH_SIZES=(32 64 128)
OPTIMIZERS=("adam" "sgd")
# Calculate parameters for this task
lr_idx=$(( $SLURM_ARRAY_TASK_ID % 3 ))
bs_idx=$(( ($SLURM_ARRAY_TASK_ID / 3) % 3 ))
opt_idx=$(( $SLURM_ARRAY_TASK_ID / 9 ))
# Run with specific parameters
python train.py \
--lr ${LEARNING_RATES[$lr_idx]} \
--batch-size ${BATCH_SIZES[$bs_idx]} \
--optimizer ${OPTIMIZERS[$opt_idx]}
# Check job status
squeue -u $USER
# Cancel a job
scancel <job_id>
# Check job details
scontrol show job <job_id>
# View completed job info
sacct -j <job_id>
# Cancel all your jobs
scancel -u $USER
# Check estimated start time
squeue --start -j <job_id>
squeue
and sacct
to track jobs--output
and --error
to capture logs--mem
or --mem-per-cpu
--time
if job didn't completesqueue --start
to check estimated start time