[16] | 1 | #include <stdio.h> |
---|
| 2 | #include <unistd.h> |
---|
| 3 | #include <string.h> |
---|
| 4 | #include <drmaa.h> |
---|
| 5 | |
---|
| 6 | #define JOB_CHUNK 8 |
---|
| 7 | #define NBULKS 1 |
---|
| 8 | |
---|
| 9 | static drmaa_job_template_t *create_job_template(const char *job_path, int seconds, |
---|
| 10 | int as_bulk_job); |
---|
| 11 | |
---|
| 12 | int main(int argc, char *argv[]) |
---|
| 13 | { |
---|
| 14 | char diagnosis[DRMAA_ERROR_STRING_BUFFER]; |
---|
| 15 | const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK+1]; |
---|
| 16 | char jobid[100]; |
---|
| 17 | int drmaa_errno, i, pos = 0; |
---|
| 18 | const char *job_path = NULL; |
---|
| 19 | drmaa_job_template_t *jt = NULL; |
---|
| 20 | |
---|
| 21 | if (argc<2) |
---|
| 22 | { |
---|
| 23 | fprintf(stderr, "usage: example <path-to-job>\n"); |
---|
| 24 | return 1; |
---|
| 25 | } |
---|
| 26 | |
---|
| 27 | job_path = argv[1]; |
---|
| 28 | if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) |
---|
| 29 | { |
---|
| 30 | fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); |
---|
| 31 | return 1; |
---|
| 32 | } |
---|
| 33 | |
---|
| 34 | /* |
---|
| 35 | * submit some bulk jobs |
---|
| 36 | */ |
---|
| 37 | if (!(jt = create_job_template(job_path, 5, 1))) |
---|
| 38 | { |
---|
| 39 | fprintf(stderr, "create_job_template() failed\n"); |
---|
| 40 | return 1; |
---|
| 41 | } |
---|
| 42 | for (i=0; i<NBULKS; i++) |
---|
| 43 | { |
---|
| 44 | drmaa_job_ids_t *jobids = NULL; |
---|
| 45 | int j; |
---|
| 46 | while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 2, 8, 3, diagnosis, |
---|
| 47 | sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) |
---|
| 48 | { |
---|
| 49 | fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s %s\n", diagnosis, |
---|
| 50 | drmaa_strerror(drmaa_errno)); |
---|
| 51 | sleep(1); |
---|
| 52 | } |
---|
| 53 | |
---|
| 54 | if (drmaa_errno != DRMAA_ERRNO_SUCCESS) |
---|
| 55 | { |
---|
| 56 | fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s %s\n", diagnosis, |
---|
| 57 | drmaa_strerror(drmaa_errno)); |
---|
| 58 | return 1; |
---|
| 59 | } |
---|
| 60 | printf("submitted bulk job with jobids:\n"); |
---|
| 61 | for (j=0; j<3; j++) |
---|
| 62 | { |
---|
| 63 | drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1); |
---|
| 64 | all_jobids[pos++] = strdup(jobid); |
---|
| 65 | printf("\t \"%s\"\n", jobid); |
---|
| 66 | } |
---|
| 67 | drmaa_release_job_ids(jobids); |
---|
| 68 | } |
---|
| 69 | drmaa_delete_job_template(jt, NULL, 0); |
---|
| 70 | /* |
---|
| 71 | * submit some sequential jobs |
---|
| 72 | */ |
---|
| 73 | if (!(jt = create_job_template(job_path, 5, 0))) |
---|
| 74 | { |
---|
| 75 | fprintf(stderr, "create_sleeper_job_template() failed\n"); |
---|
| 76 | return 1; |
---|
| 77 | } |
---|
| 78 | |
---|
| 79 | for (i=0; i<3; i++) |
---|
| 80 | { |
---|
| 81 | while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, |
---|
| 82 | sizeof(diagnosis)-1)) == |
---|
| 83 | DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) |
---|
| 84 | { |
---|
| 85 | fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis); |
---|
| 86 | sleep(1); |
---|
| 87 | } |
---|
| 88 | |
---|
| 89 | if (drmaa_errno != DRMAA_ERRNO_SUCCESS) |
---|
| 90 | { |
---|
| 91 | fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis); |
---|
| 92 | return 1; |
---|
| 93 | } |
---|
| 94 | |
---|
| 95 | printf("\t \"%s\"\n", jobid); |
---|
| 96 | all_jobids[pos++] = strdup(jobid); |
---|
| 97 | } |
---|
| 98 | /* set string array end mark */ |
---|
| 99 | /*all_jobids[pos] = NULL; |
---|
| 100 | drmaa_delete_job_template(jt, NULL, 0);*/ |
---|
| 101 | /* |
---|
| 102 | * synchronize with all jobs |
---|
| 103 | */ |
---|
| 104 | drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 0, diagnosis, |
---|
| 105 | sizeof(diagnosis)-1); |
---|
| 106 | if (drmaa_errno != DRMAA_ERRNO_SUCCESS) |
---|
| 107 | { |
---|
| 108 | fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis); |
---|
| 109 | return 1; |
---|
| 110 | } |
---|
| 111 | printf("synchronized with all jobs\n"); |
---|
| 112 | |
---|
| 113 | /* |
---|
| 114 | * wait all those jobs |
---|
| 115 | */ |
---|
| 116 | for (pos=0; pos<6; pos++) |
---|
| 117 | { |
---|
| 118 | int stat; |
---|
| 119 | int aborted, exited, exit_status, signaled; |
---|
| 120 | drmaa_errno = drmaa_wait(all_jobids[pos], jobid, sizeof(jobid)-1, &stat,DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1); |
---|
| 121 | if (drmaa_errno != DRMAA_ERRNO_SUCCESS) |
---|
| 122 | { |
---|
| 123 | fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[pos], diagnosis); |
---|
| 124 | return 1; |
---|
| 125 | } |
---|
| 126 | /* |
---|
| 127 | * report how job finished |
---|
| 128 | */ |
---|
| 129 | drmaa_wifaborted(&aborted, stat, NULL, 0); |
---|
| 130 | if (aborted) |
---|
| 131 | { |
---|
| 132 | printf("job \"%s\" never ran\n", all_jobids[pos]); |
---|
| 133 | } |
---|
| 134 | else |
---|
| 135 | { |
---|
| 136 | drmaa_wifexited(&exited, stat, NULL, 0); |
---|
| 137 | if (exited) |
---|
| 138 | { |
---|
| 139 | drmaa_wexitstatus(&exit_status, stat, NULL, 0); |
---|
| 140 | printf("job \"%s\" finished regularly with exit status %d\n", |
---|
| 141 | all_jobids[pos], exit_status); |
---|
| 142 | } |
---|
| 143 | else |
---|
| 144 | { |
---|
| 145 | drmaa_wifsignaled(&signaled, stat, NULL, 0); |
---|
| 146 | if (signaled) |
---|
| 147 | { |
---|
| 148 | char termsig[DRMAA_SIGNAL_BUFFER+1]; |
---|
| 149 | drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, stat, NULL, 0); |
---|
| 150 | printf("job \"%s\" finished due to signal %s\n", |
---|
| 151 | all_jobids[pos], termsig); |
---|
| 152 | } |
---|
| 153 | else |
---|
| 154 | { |
---|
| 155 | printf("job \"%s\" finished with unclear conditions\n", |
---|
| 156 | all_jobids[pos]); |
---|
| 157 | } |
---|
| 158 | } |
---|
| 159 | } |
---|
| 160 | } |
---|
| 161 | |
---|
| 162 | if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) |
---|
| 163 | { |
---|
| 164 | fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis); |
---|
| 165 | return 1; |
---|
| 166 | } |
---|
| 167 | return 0; |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | static drmaa_job_template_t *create_job_template(const char *job_path, int seconds, int as_bulk_job) |
---|
| 171 | { |
---|
| 172 | const char *job_argv[2]; |
---|
| 173 | drmaa_job_template_t *jt = NULL; |
---|
| 174 | char buffer[100]; |
---|
| 175 | if (drmaa_allocate_job_template(&jt, NULL, 0)!=DRMAA_ERRNO_SUCCESS) |
---|
| 176 | { |
---|
| 177 | return NULL; |
---|
| 178 | } |
---|
| 179 | /* run in users home directory */ |
---|
| 180 | drmaa_set_attribute(jt, DRMAA_WD, DRMAA_PLACEHOLDER_HD, NULL, 0); |
---|
| 181 | |
---|
| 182 | /* the job to be run */ |
---|
| 183 | drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, job_path, NULL, 0); |
---|
| 184 | /* the job's arguments */ |
---|
| 185 | sprintf(buffer, "%d", seconds); |
---|
| 186 | job_argv[0] = buffer; |
---|
| 187 | job_argv[1] = NULL; |
---|
| 188 | drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0); |
---|
| 189 | drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, "@core_limit = 100mb @comment = this is my very first job @class = workq", NULL, 0); |
---|
| 190 | /* join output/error file */ |
---|
| 191 | drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0); |
---|
| 192 | /* path for output */ |
---|
| 193 | if (!as_bulk_job) |
---|
| 194 | { |
---|
| 195 | drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB", NULL, 0); |
---|
| 196 | } |
---|
| 197 | else |
---|
| 198 | { |
---|
| 199 | drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH,":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB."DRMAA_PLACEHOLDER_INCR, NULL, 0); |
---|
| 200 | } |
---|
| 201 | return jt; |
---|
| 202 | } |
---|