source: drmaa_utils/trunk/drmaa_utils/drmaa_run.c @ 40

Revision 40, 14.1 KB checked in by mmamonski, 19 months ago (diff)

Dont be -pedantic

  • Property svn:keywords set to Id
Line 
1/* $Id$ */
2/*
3 * HPC-BASH - part of the DRMAA utilities library
4 * Poznan Supercomputing and Networking Center Copyright (C) 2011
5 *
6 *  This program is free software: you can redistribute it and/or modify
7 *  it under the terms of the GNU General Public License as published by
8 *  the Free Software Foundation, either version 3 of the License, or
9 *  (at your option) any later version.
10 *
11 *  This program is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 *  GNU General Public License for more details.
15 *
16 *  You should have received a copy of the GNU General Public License
17 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20
21#include <drmaa_utils/drmaa.h>
22#include <drmaa_utils/logging.h>
23#include <drmaa_utils/exception.h>
24#include <drmaa_utils/xmalloc.h>
25
26#include <stdlib.h>
27#include <dlfcn.h>
28#include <unistd.h>
29#include <sys/types.h>
30#include <sys/stat.h>
31#include <fcntl.h>
32
33
34
35#define DRMAA_LIBRARY_PATH "DRMAA_LIBRARY_PATH"
36
37typedef int (*drmaa_init_function_t)(const char *, char *, size_t );
38typedef int (*drmaa_exit_function_t)(char *, size_t );
39typedef int (*drmaa_allocate_job_template_function_t)(drmaa_job_template_t **, char *, size_t);
40typedef int (*drmaa_delete_job_template_function_t)(drmaa_job_template_t *, char *, size_t);
41typedef int (*drmaa_set_attribute_function_t)(drmaa_job_template_t *, const char *, const char *, char *, size_t);
42typedef int (*drmaa_get_attribute_function_t)(drmaa_job_template_t *, const char *, char *, size_t , char *, size_t);
43typedef int (*drmaa_set_vector_attribute_function_t)(drmaa_job_template_t *, const char *, const char *[], char *, size_t);
44typedef int (*drmaa_get_vector_attribute_function_t)(drmaa_job_template_t *, const char *, drmaa_attr_values_t **, char *, size_t);
45typedef int (*drmaa_run_job_function_t)(char *, size_t, const drmaa_job_template_t *, char *, size_t);
46typedef int (*drmaa_control_function_t)(const char *, int, char *, size_t);
47typedef int (*drmaa_job_ps_function_t)(const char *, int *, char *, size_t);
48typedef int (*drmaa_wait_function_t)(const char *, char *, size_t, int *, signed long, drmaa_attr_values_t **, char *, size_t);
49typedef int (*drmaa_wifexited_function_t)(int *, int, char *, size_t);
50typedef int (*drmaa_wexitstatus_function_t)(int *exit_status, int, char *, size_t);
51typedef int (*drmaa_wifsignaled_function_t)(int *signaled, int, char *, size_t);
52typedef int (*drmaa_wtermsig_function_t)(char *signal, size_t signal_len, int, char *, size_t);
53typedef int (*drmaa_wcoredump_function_t)(int *core_dumped, int, char *, size_t);
54typedef int (*drmaa_wifaborted_function_t)(int *aborted, int, char *, size_t);
55typedef char (*drmaa_strerror_function_t)(int);
56typedef int (*drmaa_get_contact_function_t)(char *, size_t , char *, size_t);
57typedef int (*drmaa_version_function_t)(unsigned int *, unsigned int *, char *, size_t);
58typedef int (*drmaa_get_DRM_system_function_t)(char *, size_t, char *, size_t);
59typedef int (*drmaa_get_DRMAA_implementation_function_t)(char *, size_t, char *, size_t);
60
61typedef struct
62{
63        drmaa_init_function_t init;
64        drmaa_exit_function_t exit;
65        drmaa_allocate_job_template_function_t allocate_job_template;
66        drmaa_delete_job_template_function_t delete_job_template;
67        drmaa_set_attribute_function_t set_attribute;
68        drmaa_get_attribute_function_t get_attribute;
69        drmaa_set_vector_attribute_function_t set_vector_attribute;
70        drmaa_get_vector_attribute_function_t get_vector_attribute;
71        drmaa_run_job_function_t run_job;
72        drmaa_control_function_t control;
73        drmaa_job_ps_function_t job_ps;
74        drmaa_wait_function_t wait;
75        drmaa_wifexited_function_t wifexited;
76        drmaa_wexitstatus_function_t wexitstatus;
77        drmaa_wifsignaled_function_t wifsignaled;
78        drmaa_wtermsig_function_t wtermsig;
79        drmaa_wcoredump_function_t wcoredump;
80        drmaa_wifaborted_function_t wifaborted;
81        drmaa_strerror_function_t strerror;
82        drmaa_get_contact_function_t get_contact;
83        drmaa_version_function_t version;
84        drmaa_get_DRM_system_function_t get_DRM_system;
85        drmaa_get_DRMAA_implementation_function_t get_DRMAA_implementation;
86        void *handle;
87} fsd_drmaa_api_t;
88
89typedef struct
90{
91        char *native_specification;
92        char *walltime;
93        char *rusage_file;
94        bool interactive;
95        bool print_rusage;
96        char *command;
97        char **command_args;
98        int command_argc;
99} fsd_drmaa_run_opt_t;
100
101
102static fsd_drmaa_api_t load_drmaa();
103static void unload_drmaa(fsd_drmaa_api_t *drmaa_api);
104
105static fsd_drmaa_run_opt_t parse_args(int argc, char **argv);
106
107static int run_and_wait(fsd_drmaa_api_t drmaa_api, fsd_drmaa_run_opt_t run_opt);
108
109int main(int argc, char **argv)
110{
111        fsd_drmaa_api_t drmaa_api = { .handle = NULL };
112        fsd_drmaa_run_opt_t run_opt;
113        int status = -1;
114
115        fsd_log_enter(("(argc=%d)", argc));
116
117        TRY
118         {
119                drmaa_api = load_drmaa();
120                run_opt = parse_args(argc,argv);
121                status = run_and_wait(drmaa_api, run_opt);
122         }
123        EXCEPT_DEFAULT
124         {
125                fsd_log_fatal(("Error"));
126         }
127        FINALLY
128         {
129                unload_drmaa(&drmaa_api);
130         }
131        END_TRY
132
133        exit(status);
134}
135
136
137fsd_drmaa_api_t load_drmaa()
138{
139        fsd_drmaa_api_t api;
140        const char *path_to_drmaa = getenv(DRMAA_LIBRARY_PATH);
141
142        fsd_log_enter(("(path=%s)", path_to_drmaa));
143
144        memset(&api, 0, sizeof(api));
145
146        if (!path_to_drmaa) {
147#ifdef __APPLE__
148                path_to_drmaa = DRMAA_DIR_PREFIX"/lib/libdrmaa.dylib";
149#else
150                path_to_drmaa = DRMAA_DIR_PREFIX"/lib/libdrmaa.so";
151#endif
152        }
153
154        api.handle = dlopen(path_to_drmaa, RTLD_LAZY | RTLD_GLOBAL);
155
156        if (!api.handle) {
157                const char *msg = dlerror();
158
159                if (!msg)
160                        fsd_log_fatal(("Could not load DRMAA library: %s (DRMAA_LIBRARY_PATH=%s)\n", msg, path_to_drmaa));
161                else
162                        fsd_log_fatal(("Could not load DRMAA library (DRMAA_LIBRARY_PATH=%s)\n", path_to_drmaa));
163
164                fsd_exc_raise_code(FSD_ERRNO_INVALID_VALUE);
165        }
166
167        if ((api.init = (drmaa_init_function_t)dlsym(api.handle, "drmaa_init")) == 0)
168                goto fault;
169        if ((api.exit = (drmaa_exit_function_t)dlsym(api.handle, "drmaa_exit")) == 0)
170                goto fault;
171        if ((api.allocate_job_template = (drmaa_allocate_job_template_function_t)dlsym(api.handle, "drmaa_allocate_job_template")) == 0)
172                goto fault;
173        if ((api.delete_job_template = (drmaa_delete_job_template_function_t)dlsym(api.handle, "drmaa_delete_job_template")) == 0)
174                goto fault;
175        if ((api.set_attribute = (drmaa_set_attribute_function_t)dlsym(api.handle, "drmaa_set_attribute")) == 0)
176                goto fault;
177        if ((api.get_attribute = (drmaa_get_attribute_function_t)dlsym(api.handle, "drmaa_get_attribute")) == 0)
178                goto fault;
179        if ((api.set_vector_attribute = (drmaa_set_vector_attribute_function_t)dlsym(api.handle, "drmaa_set_vector_attribute")) == 0)
180                goto fault;
181        if ((api.get_vector_attribute = (drmaa_get_vector_attribute_function_t)dlsym(api.handle, "drmaa_get_vector_attribute")) == 0)
182                goto fault;
183        if ((api.run_job = (drmaa_run_job_function_t)dlsym(api.handle, "drmaa_run_job")) == 0)
184                goto fault;
185        if ((api.control = (drmaa_control_function_t)dlsym(api.handle, "drmaa_control")) == 0)
186                goto fault;
187        if ((api.job_ps = (drmaa_job_ps_function_t)dlsym(api.handle, "drmaa_job_ps")) == 0)
188                goto fault;
189        if ((api.wait = (drmaa_wait_function_t)dlsym(api.handle, "drmaa_wait")) == 0)
190                goto fault;
191        if ((api.wifexited = (drmaa_wifexited_function_t)dlsym(api.handle, "drmaa_wifexited")) == 0)
192                goto fault;
193        if ((api.wexitstatus = (drmaa_wexitstatus_function_t)dlsym(api.handle, "drmaa_wexitstatus")) == 0)
194                goto fault;
195        if ((api.wifsignaled = (drmaa_wifsignaled_function_t)dlsym(api.handle, "drmaa_wifsignaled")) == 0)
196                goto fault;
197        if ((api.wtermsig = (drmaa_wtermsig_function_t)dlsym(api.handle, "drmaa_wtermsig")) == 0)
198                goto fault;
199        if ((api.wcoredump = (drmaa_wcoredump_function_t)dlsym(api.handle, "drmaa_wcoredump")) == 0)
200                goto fault;
201        if ((api.wifaborted = (drmaa_wifaborted_function_t)dlsym(api.handle, "drmaa_wifaborted")) == 0)
202                goto fault;
203        if ((api.strerror = (drmaa_strerror_function_t)dlsym(api.handle, "drmaa_strerror")) == 0)
204                goto fault;
205        if ((api.get_contact = (drmaa_get_contact_function_t)dlsym(api.handle, "drmaa_get_contact")) == 0)
206                goto fault;
207        if ((api.version = (drmaa_version_function_t)dlsym(api.handle, "drmaa_version")) == 0)
208                goto fault;
209        if ((api.get_DRM_system = (drmaa_get_DRM_system_function_t)dlsym(api.handle, "drmaa_get_DRM_system")) == 0)
210                goto fault;
211        if ((api.get_DRMAA_implementation = (drmaa_get_DRMAA_implementation_function_t)dlsym(api.handle, "drmaa_get_DRMAA_implementation")) == 0)
212                goto fault;
213
214        return api;
215
216fault:
217        fsd_log_fatal(("Failed to dlsym DRMAA function"));
218
219        if (api.handle)
220                dlclose(api.handle);
221
222        /*make invalid */
223        memset(&api, 0, sizeof(api));
224
225        return api;
226}
227
228void unload_drmaa(fsd_drmaa_api_t *drmaa_api_handle)
229{
230        fsd_log_enter(("()"));
231       
232        if (drmaa_api_handle->handle)
233                dlclose(drmaa_api_handle->handle);
234}
235
236static fsd_drmaa_run_opt_t parse_args(int argc, char **argv)
237{
238        fsd_drmaa_run_opt_t options;
239
240        memset(&options, 0, sizeof(options));
241
242        argv++;
243        argc--;
244
245        while (argc >= 0 && argv[0][0] == '-')
246        {
247
248                if (strncmp(argv[0],"-native=", 8) == 0) {
249                        options.native_specification = argv[0] + 8;
250                        fsd_log_info(("native specification = '%s'", options.native_specification));
251                } else {
252                        fsd_log_fatal(("unknown option: %s", argv[0]));
253                        exit(1); /* TODO exception */
254                }
255
256                argv++;
257                argc--;
258        }
259
260        /* TODO arg count check */
261        options.command = argv[0];
262        argv++;
263        argc--;
264
265        options.command_args = argv;
266        options.command_argc = argc;
267
268        return options;
269}
270
271int run_and_wait(fsd_drmaa_api_t api, fsd_drmaa_run_opt_t run_opt)
272{
273        char working_directory[1024] = ".";
274        drmaa_job_template_t *jt = NULL;
275        char errbuf[DRMAA_ERROR_STRING_BUFFER] = "";
276        char stdin_name[1048] = "";
277        char stdout_name[1048] = "";
278        char stderr_name[1048] = "";
279        char jobid[DRMAA_JOBNAME_BUFFER] = "";
280        int status;
281
282        extern char **environ;
283
284
285        if ((api.init(NULL, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS))
286                goto fault;
287        if ((api.allocate_job_template(&jt, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS))
288                goto fault;
289
290
291        if ((api.set_attribute(jt, DRMAA_REMOTE_COMMAND, run_opt.command, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS)) goto fault;
292
293        /*  args */
294        if (run_opt.command_argc > 0) {
295                char **args_vector = NULL;
296                int i;
297
298
299                fsd_calloc(args_vector, run_opt.command_argc + 1, char *);
300
301                for (i = 0; i < run_opt.command_argc; i++) {
302                        args_vector[i] = run_opt.command_args[i];
303                }
304
305                if ((api.set_vector_attribute(jt, DRMAA_V_ARGV, (const char **) args_vector, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS)) goto fault;
306        }
307       
308        unsetenv("module");
309
310        /*  environment */
311        if ((api.set_vector_attribute(jt, DRMAA_V_ENV, (const char **) environ, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS)) goto fault;
312
313        /*  working directory */
314        getcwd(working_directory, sizeof(working_directory));
315
316        if ((api.set_attribute(jt, DRMAA_WD, working_directory, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS)) goto fault;
317
318        if (run_opt.native_specification && (api.set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, run_opt.native_specification, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS)) goto fault;
319
320
321        /* stdout.PID stderr.PID */
322        sprintf(stdin_name, ":%s/.stdin.%u", working_directory, (unsigned int) getpid());
323        sprintf(stdout_name, ":%s/.stdout.%u", working_directory, (unsigned int) getpid());
324        sprintf(stderr_name, ":%s/.stderr.%u", working_directory, (unsigned int) getpid());
325
326
327        /* read stdin */
328        if (! isatty(0)) {
329                int fd = -1;
330                char buf[1024] = "";
331                int bread = -1;
332
333                if ((fd = open(stdin_name + 1, O_WRONLY | O_EXCL | O_CREAT, 0600)) < 0) {
334                        perror("open failed:");
335                        exit(3);
336                }
337
338                while ((bread = read(0, buf, sizeof(buf))) > 0 ) {
339                        write(fd, buf, bread);
340                }
341
342                close(fd);
343
344                if (api.set_attribute(jt, DRMAA_INPUT_PATH, stdin_name, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS)
345                        goto fault;
346        }
347
348        if (api.set_attribute(jt, DRMAA_OUTPUT_PATH, stdout_name, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS) goto fault;
349        if (api.set_attribute(jt, DRMAA_ERROR_PATH, stderr_name, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS) goto fault;
350
351        /* run */
352        if (api.run_job(jobid, sizeof(jobid) - 1, jt, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS) {
353                fsd_log_fatal(("Failed to submit a job: %s ", errbuf));
354                exit(2); /* TODO exception */
355        }
356
357        /* wait */
358
359        if (api.wait(jobid, NULL, 0, &status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS) {
360                fsd_log_fatal(("Failed to wait for a job %s: %s ", jobid, errbuf));
361                exit(132); /* TODO Exception */
362        }
363
364        /*  print stdout and stderr */
365        {
366                char buf[1024] = "";
367                struct stat stat_buf;
368                int breads;
369                int tries_count = 0;
370
371
372                fsd_log_info(("opening stdout file: %s", stdout_name));
373retry1:
374                if (stat(stdout_name + 1, &stat_buf) == -1) {
375                        if (tries_count > 3)
376                                fsd_log_fatal(("Failed to get stdout (%s) of job %s", stdout_name + 1, jobid));
377                        else {
378                                sleep(3);
379                                tries_count++;
380                                goto retry1;
381                        }
382                } else {
383                        int fd = open(stdout_name + 1, O_RDONLY);
384
385                        if (fd < 0) { perror("open failed"); exit(3); }
386
387                        fsd_log_info(("opened stdout file:%s", stdout_name));
388
389                        while ((breads = read(fd, buf, sizeof(buf))) > 0) {
390                                write(1, buf, breads);
391                        }
392
393                        close(fd);
394
395                        unlink(stdout_name + 1);
396                }
397retry2:
398                if (stat(stderr_name + 1, & stat_buf) == -1) {
399                        if (tries_count > 3)
400                                fsd_log_fatal(("Failed to get stderr (%s) of job %s\n", stderr_name + 1, jobid));
401                        else {
402                                sleep(3);
403                                tries_count++;
404                                goto retry2;
405                        }
406                } else {
407                        int fd = open(stderr_name + 1, O_RDONLY);
408
409                        if (fd < 0) { perror("open failed"); exit(3); }
410
411                        while ((breads = read(fd, buf, sizeof(buf))) > 0) {
412                                write(2, buf, breads);
413                        }
414
415                        close(fd);
416
417                        unlink(stderr_name + 1);
418                }
419
420        }
421
422        if (strlen(stdin_name) != 0) {
423                unlink(stdin_name + 1);
424        }
425
426        /* exit with appropriate code */
427        {
428                int exited = 0;
429                int signaled = 0;
430                int exit_status = 1;
431
432
433                if (api.wifexited(&exited, status, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS) {
434                        goto fault;
435                }
436
437                if (api.wifsignaled(&signaled, status, errbuf, sizeof(errbuf) - 1) != DRMAA_ERRNO_SUCCESS) {
438                        goto fault;
439                }
440
441                if (exited) {
442                        (void) api.wexitstatus(&exit_status, status, errbuf, sizeof(errbuf) - 1);
443                } else {
444                        if (signaled) {
445                                exit_status = 128;
446                        } else {
447                                exit_status = 1;
448                        }
449                }
450
451                api.exit(errbuf, sizeof(errbuf) - 1);
452                fsd_log_info(("exit_status = %d", exit_status));
453                return exit_status;
454        }
455fault:
456        fsd_log_fatal(("Error"));
457        return 1;
458}
459
Note: See TracBrowser for help on using the repository browser.