Version 15 (modified by mmamonski, 11 years ago) (diff)

--

This document serves as guide in conducting the UMD verification process of the QCG-Computing installation, especially the job and parallel job criteria. This document is targeted only at the system administrators, not the end users (those should use  QCG-Icon or the broker clients), as it relies on low level QCG-Computing interface.

JOBEXEC_JOB_1

Execute a simple job in the appliance.

$ qcg-comp -c -J /usr/share/qcg-comp/doc/examples/jsdl/sleep.xml 
Activity Id: 3f6558f5-76af-42cb-ad12-b37df187cf47
$ qcg-comp -s -a 3f6558f5-76af-42cb-ad12-b37df187cf47            
status = Executing
$ qcg-comp -s -a 3f6558f5-76af-42cb-ad12-b37df187cf47
status = Executing
$ qcg-comp -s -a 3f6558f5-76af-42cb-ad12-b37df187cf47
status = Finished (exit status = 0)

JOBEXEC_JOB_2

Execute a simple job in the appliance that uses both input and output files.

$ cat > bash.xml << EOF 
<?xml version="1.0" encoding="UTF-8"?>

<jsdl:JobDefinition
 xmlns:jsdl="http://schemas.ggf.org/jsdl/2005/11/jsdl"
 xmlns:jsdl-hpcpa="http://schemas.ggf.org/jsdl/2006/07/jsdl-hpcpa"
 xmlns:jsdl-qcg-comp-factory="http://schemas.qoscosgrid.org/comp/2011/04/jsdl/factory">
   <jsdl:JobDescription>
      <jsdl:JobIdentification>
         <jsdl:JobName>JOBEXEC_JOB_2</jsdl:JobName>
      </jsdl:JobIdentification>
      <jsdl:Application>
        <jsdl:ApplicationName>bash</jsdl:ApplicationName>
        <jsdl-hpcpa:HPCProfileApplication>
            <jsdl-hpcpa:Argument>script.sh</jsdl-hpcpa:Argument>
	    <jsdl-hpcpa:Output>JOBEXEC_JOB_2.out</jsdl-hpcpa:Output>
        </jsdl-hpcpa:HPCProfileApplication>
      </jsdl:Application>      
      <jsdl:DataStaging>
        <jsdl:FileName>script.sh</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Source>
            <jsdl:URI>script.sh</jsdl:URI>
        </jsdl:Source>
      </jsdl:DataStaging>
      <jsdl:DataStaging>
        <jsdl:FileName>JOBEXEC_JOB_2.out</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Target>
            <jsdl:URI>JOBEXEC_JOB_2.out</jsdl:URI>
        </jsdl:Target>
      </jsdl:DataStaging>
      <jsdl:Resources>
	   <jsdl:TotalCPUCount>
            <jsdl:Exact>1</jsdl:Exact>
         </jsdl:TotalCPUCount>
 	</jsdl:Resources>
      </jsdl:JobDescription>
</jsdl:JobDefinition>
EOF
$cat > script.sh << EOF
#!/bin/bash

hostname
EOF
$ qcg-comp -i -J bash.xml 
Staging file: script.sh
All files staged in.
$ qcg-comp -c -J bash.xml 
Activity Id: 72533fe7-764b-4b0c-93f9-b4dc7d73b5d4
$ qcg-comp -s -a 72533fe7-764b-4b0c-93f9-b4dc7d73b5d4
status = Queued
$ qcg-comp -s -a 72533fe7-764b-4b0c-93f9-b4dc7d73b5d4
status = Finished (exit status = 0)
$ qcg-com -o -J bash.xml 
-bash: qcg-com: command not found
$ qcg-comp -o -J bash.xml 
File JOBEXEC_JOB_2.out staged out.
All files staged out.
$ cat JOBEXEC_JOB_2.out 
grass1.man.poznan.pl

JOBEXEC_JOB_3

$ qcg-comp -c -J /usr/share/qcg-comp/doc/examples/jsdl/sleep.xml 
Activity Id: 116c0702-2705-4c64-9d72-51c264061f8c
$ qcg-comp -s -a  116c0702-2705-4c64-9d72-51c264061f8c
status = Queued
$ qcg-comp -s -a  116c0702-2705-4c64-9d72-51c264061f8c
status = Executing
$ qcg-comp -t -a  116c0702-2705-4c64-9d72-51c264061f8c
Activity is being terminated.
$ qcg-comp -s -a  116c0702-2705-4c64-9d72-51c264061f8c #for a very short time after job termination we may still observe the Executing state
status = Executing 
$ qcg-comp -s -a  116c0702-2705-4c64-9d72-51c264061f8c
status = Cancelled

JOBEXEC_EXECMNGR_3

Job Execution Appliances must be able to collect information from the underlying execution manager.

$qcg-comp -G | xmllint --format - | grep  Queue
    <qcm:Queues>
      <qcm:Queue default="true" enabled="true" name="plgrid" runningJobs="0" started="true" waitingJobs="0" walltime="PT0H0M0S" qcm:default="true" qcm:enabled="true" qcm:name="plgrid" qcm:runningJobs="0" qcm:started="true" qcm:waitingJobs="0" qcm:walltime="PT0H0M0S"/>
    </qcm:Queues>

PARALLEL_JOB_1

Job Execution Appliances that also provide the Parallel Job Capability must allow users to submit a job requesting more than one execution slot.

$ cat  > parallel-1.xml << EOF 
<?xml version="1.0" encoding="UTF-8"?>

<jsdl:JobDefinition
 xmlns:jsdl="http://schemas.ggf.org/jsdl/2005/11/jsdl"
 xmlns:jsdl-hpcpa="http://schemas.ggf.org/jsdl/2006/07/jsdl-hpcpa"
 xmlns:jsdl-qcg-comp-factory="http://schemas.qoscosgrid.org/comp/2011/04/jsdl/factory">
   <jsdl:JobDescription>
      <jsdl:JobIdentification>
         <jsdl:JobName>PARALLEL_JOB_1</jsdl:JobName>
      </jsdl:JobIdentification>
      <jsdl:Application>
        <jsdl-hpcpa:HPCProfileApplication>
            <jsdl-hpcpa:Executable>/bin/sleep</jsdl-hpcpa:Executable>
            <jsdl-hpcpa:Argument>60</jsdl-hpcpa:Argument>
        </jsdl-hpcpa:HPCProfileApplication>
      </jsdl:Application>      
     <jsdl:Resources>
	   <jsdl:TotalCPUCount>
            <jsdl:Exact>2</jsdl:Exact>
         </jsdl:TotalCPUCount>
 	</jsdl:Resources>
      </jsdl:JobDescription>
</jsdl:JobDefinition>
EOF
$ qcg-comp -c -J parallel-1.xml
Activity Id: 2757fdde-e055-432a-ae28-e627ff8f6aa6
$ qcg-comp -g -a 2757fdde-e055-432a-ae28-e627ff8f6aa6 -x | grep Local
Local job ID: 15297.grass1.man.poznan.pl
$ qstat -f 15297 | grep exec
    exec_host = grass1.man.poznan.pl/2+grass1.man.poznan.pl/1

PARALLEL_JOB_2

PARALLEL_JOB_3

PARALLEL_MPI_1

Parallel Job Appliances must support the execution of MPI jobs.

$ cat > hello-mpi.c  << EOF
#include <stdio.h>
#include <mpi.h>

int main(int argc, char *argv[]) {
  int numprocs, rank, namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name(processor_name, &namelen);

  printf("Process %d on %s out of %d\n", rank, processor_name, numprocs);

  MPI_Finalize();
}
EOF
$ module load openmpi
$ mpicc hello-mpi.c -o hello-mpi
$ cat > bash.xml  << EOF
<?xml version="1.0" encoding="UTF-8"?>

<jsdl:JobDefinition
 xmlns:jsdl="http://schemas.ggf.org/jsdl/2005/11/jsdl"
 xmlns:jsdl-hpcpa="http://schemas.ggf.org/jsdl/2006/07/jsdl-hpcpa"
 xmlns:jsdl-qcg-comp-factory="http://schemas.qoscosgrid.org/comp/2011/04/jsdl/factory">
   <jsdl:JobDescription>
      <jsdl:JobIdentification>
         <jsdl:JobName>PARALLEL_MPI_1</jsdl:JobName>
      </jsdl:JobIdentification>
      <jsdl:Application>
        <jsdl:ApplicationName>bash</jsdl:ApplicationName>
        <jsdl-hpcpa:HPCProfileApplication>
            <jsdl-hpcpa:Argument>script.sh</jsdl-hpcpa:Argument>
	    <jsdl-hpcpa:Output>PARALLEL_MPI_1.out</jsdl-hpcpa:Output>
        </jsdl-hpcpa:HPCProfileApplication>
      </jsdl:Application>      
      <jsdl:DataStaging>
        <jsdl:FileName>hello-mpi</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Source>
            <jsdl:URI>hello-mpi</jsdl:URI>
        </jsdl:Source>
      </jsdl:DataStaging>
      <jsdl:DataStaging>
        <jsdl:FileName>script.sh</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Source>
            <jsdl:URI>script.sh</jsdl:URI>
        </jsdl:Source>
      </jsdl:DataStaging>
      <jsdl:DataStaging>
        <jsdl:FileName>PARALLEL_MPI_1.out</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Target>
            <jsdl:URI>PARALLEL_MPI_1.out</jsdl:URI>
        </jsdl:Target>
      </jsdl:DataStaging>
      <jsdl:Resources>
	   <jsdl:TotalCPUCount>
            <jsdl:Exact>2</jsdl:Exact>
         </jsdl:TotalCPUCount>
 	</jsdl:Resources>
      </jsdl:JobDescription>
</jsdl:JobDefinition>
EOF
$ cat > script.sh << EOF
#!/bin/bash

chmod a+x hello-mpi
module load openmpi
mpiexec ./hello-mpi
EOF
$ qcg-comp -i -J bash.xml 
Staging file: hello-mpi
Staging file: script.sh
All files staged in.
$ qcg-comp -c -J bash.xml 
Activity Id: 45caa67c-2973-4bac-93e8-58b331f492c7
$ qcg-comp -s -a 45caa67c-2973-4bac-93e8-58b331f492c7
status = Finished (exit status = 0)
$ qcg-comp -o -J bash.xml                            
File PARALLEL_MPI_1.out staged out.
All files staged out.
$ cat PARALLEL_MPI_1.out 
Process 0 on grass1.man.poznan.pl out of 2
Process 1 on grass1.man.poznan.pl out of 2

PARALLEL_MPI_2

Parallel Job Appliances must support the execution of MPI jobs that are compiled at submission time.

$ cat > hello-mpi.c  << EOF
#include <stdio.h>
#include <mpi.h>

int main(int argc, char *argv[]) {
  int numprocs, rank, namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name(processor_name, &namelen);

  printf("Process %d on %s out of %d\n", rank, processor_name, numprocs);

  MPI_Finalize();
}
EOF
$ cat > bash.xml  << EOF
<?xml version="1.0" encoding="UTF-8"?>

<jsdl:JobDefinition
 xmlns:jsdl="http://schemas.ggf.org/jsdl/2005/11/jsdl"
 xmlns:jsdl-hpcpa="http://schemas.ggf.org/jsdl/2006/07/jsdl-hpcpa"
 xmlns:jsdl-qcg-comp-factory="http://schemas.qoscosgrid.org/comp/2011/04/jsdl/factory">
   <jsdl:JobDescription>
      <jsdl:JobIdentification>
         <jsdl:JobName>PARALLEL_MPI_2</jsdl:JobName>
      </jsdl:JobIdentification>
      <jsdl:Application>
        <jsdl:ApplicationName>bash</jsdl:ApplicationName>
        <jsdl-hpcpa:HPCProfileApplication>
            <jsdl-hpcpa:Argument>script.sh</jsdl-hpcpa:Argument>
	    <jsdl-hpcpa:Output>PARALLEL_MPI_2.out</jsdl-hpcpa:Output>
        </jsdl-hpcpa:HPCProfileApplication>
      </jsdl:Application>      
      <jsdl:DataStaging>
        <jsdl:FileName>hello-mpi.c</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Source>
            <jsdl:URI>hello-mpi.c</jsdl:URI>
        </jsdl:Source>
      </jsdl:DataStaging>
      <jsdl:DataStaging>
        <jsdl:FileName>script.sh</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Source>
            <jsdl:URI>script.sh</jsdl:URI>
        </jsdl:Source>
      </jsdl:DataStaging>
      <jsdl:DataStaging>
        <jsdl:FileName>PARALLEL_MPI_2.out</jsdl:FileName>
        <jsdl:CreationFlag>overwrite</jsdl:CreationFlag>
        <jsdl:Target>
            <jsdl:URI>PARALLEL_MPI_2.out</jsdl:URI>
        </jsdl:Target>
      </jsdl:DataStaging>
      <jsdl:Resources>
	   <jsdl:TotalCPUCount>
            <jsdl:Exact>2</jsdl:Exact>
         </jsdl:TotalCPUCount>
 	</jsdl:Resources>
      </jsdl:JobDescription>
</jsdl:JobDefinition>
EOF
$ cat > script.sh << EOF
#!/bin/bash

module load openmpi
mpicc hello-mpi.c -o hello-mpi
mpiexec ./hello-mpi
EOF
$ qcg-comp -i -J bash.xml 
Staging file: hello-mpi
Staging file: script.sh
All files staged in.
$ qcg-comp -c -J bash.xml 
Activity Id: 45caa67c-2973-4bac-93e8-58b331f492c7
$ qcg-comp -s -a 45caa67c-2973-4bac-93e8-58b331f492c7
status = Finished (exit status = 0)
$ qcg-comp -o -J bash.xml                            
File PARALLEL_MPI_2.out staged out.
All files staged out.
$ cat PARALLEL_MPI_2.out 
Process 0 on grass1.man.poznan.pl out of 2
Process 1 on grass1.man.poznan.pl out of 2

PARALLEL_OMP_1

PARALLEL_OMP_2