linuxshellkshtorque

monitoring job titles on torque cluster using ksh


If I use qstat I can get the list of jobs currently running

host     username othername  NameTask_JOBXXXX_G1_namesubtask                  --      1   1     -- 8783: Q 00:00
host     username othername  NameTask2_JOBXXXX_G2_namesubtask                       --      1   1     -- 8783: C 00:00

So far, I could track the status with a ksh script:

#!/usr/bin/ksh 
while sleep 2; do ; echo -n $(mystat | grep JOB | grep -c Q) 'Queued   '  ; 
echo -n $(mystat | grep JOB | grep -c ': R') 'Running   '  ;
echo  $(mystat | grep JOB | grep -c ': C') 'Completed   '  ;
echo   "$(mystat | grep C | tail -n 5 | tr -s ' ' | cut -d ' ' -f 4,11)"
echo
done

which gives me every 2 second the number of Running, Queud, Complemented, at time of completion for the last 5 completed jobs.

How do I extend this to get also a count of how many rows with different 'NameTask_' are currently running? I would like to have the results of the previous script, but grouped by NameTask_ and G_


Solution

  • Seems like a job for arrays

        typeset -A jobCnt       # Associative array for job counts
        typeset -A ntCnt        # A. array for NameTask count
        typeset -A stCnt        # A. array for namesubtask count
        typeset -a cjList       # Indexed array for Completed job list
        integer ci=0
    
        while read host uName oName tName v w x y z stat statTime
          do
            ((jobCnt[${stat}]++))   # Count of (current) jobs by status
    
            if [[ ${stat} == C ]]   # Completed task (time) ordered
              then  
                cjList[$((ci++))]=${statTime}
            fi
    
            ((ntCnt[${tName%%_*}]++))   # NameTask count
            ((stCnt[${tName##*_}]++))   # namesubtask count
         done
    
        for st in ${!jobCnt[@]}         # Over 'st'atus in jobCnt array
          do
            echo "Number ${st} jobs: ${jobCnt[${st}]}"
         done
    
        echo "Last 5 Completed jobs:"
        if (( ${#cjList[@]} <= 5 ))
          then
            integer istart=0            # All C jobs in array
          else
            ((istart = ${#cjList[@]} - 5))      # Last 5 jobs in array
        fi
    
        for ((i=${istart} ; i<${#cjList[@]} ; i++))
          do
            echo ${cjList[${i}]}
         done
    
        echo "Job counts by NameTask: "
        for n in ${!ntCnt[@]}
          do
            echo "${n} ${ntCnt[${n}]}"
         done
    
        echo "Job counts by NameSubTask: "
        for s in ${!stCnt[@]}
          do
            echo "${s} ${stCnt[${s}]}"
         done