source: trunk/drmaa_utils/drmaa_utils/job.h @ 1

Revision 1, 6.3 KB checked in by mmamonski, 13 years ago (diff)

Torque/PBS DRMAA initial commit

Line 
1/* $Id: job.h 255 2010-08-05 15:53:36Z mamonski $ */
2/*
3 * FedStage DRMAA utilities library
4 * Copyright (C) 2006-2008  FedStage Systems
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/**
21 * @file job.h
22 * Job managing functions.
23 */
24
25#ifndef __DRMAA_UTILS__JOB_H
26#define __DRMAA_UTILS__JOB_H
27
28#ifdef HAVE_CONFIG_H
29#       include <config.h>
30#endif
31
32#include <sys/time.h>
33
34#include <drmaa_utils/common.h>
35#include <drmaa_utils/thread.h>
36
37/**
38 * Create new job structure.
39 * @return Sole reference to newly created job.
40 */
41fsd_job_t *
42fsd_job_new( char *job_id );
43
44/** Job state flags. */
45typedef enum {
46        /**
47         * Job is in queued state (either active or hold in queue).
48         */
49        FSD_JOB_QUEUED             = 1<<0,
50        /** Job is hold in queue. */
51        FSD_JOB_HOLD               = 1<<1,
52        /** Job is running (suspended or not). */
53        FSD_JOB_RUNNING            = 1<<2,
54        /**
55         * Set when job was suspended within session by
56         * drmaa_control(DRMAA_CONTROL_SUSPEND).
57         */
58        FSD_JOB_SUSPENDED          = 1<<3,
59        /**
60         * Whether we know that job terminated and its status
61         * is waiting to rip.
62         */
63        FSD_JOB_TERMINATED         = 1<<4,
64        /**
65         * It is known that job was aborted by user.
66         */
67        FSD_JOB_ABORTED            = 1<<5,
68
69        /**
70         * Job was submitted in current session.
71         * It is set for all jobs which are or was
72         * in fsd_drmaa_session_t#jobs set.
73         */
74        FSD_JOB_CURRENT_SESSION    = 1<<6,
75
76        /**
77         * Job is removed from session (but references to job still
78         * exist).  No such job shall exist in session's submited
79         * jobs set.
80         */
81        FSD_JOB_DISPOSED           = 1<<7,
82
83        FSD_JOB_MISSING            = 1<<8,
84
85        FSD_JOB_QUEUED_MASK      = FSD_JOB_QUEUED | FSD_JOB_HOLD,
86        FSD_JOB_RUNNING_MASK     = FSD_JOB_RUNNING | FSD_JOB_SUSPENDED,
87        FSD_JOB_TERMINATED_MASK  = FSD_JOB_TERMINATED | FSD_JOB_ABORTED,
88        FSD_JOB_STATE_MASK       = FSD_JOB_HOLD | FSD_JOB_RUNNING
89                | FSD_JOB_SUSPENDED | FSD_JOB_TERMINATED
90} fsd_job_flag_t;
91
92
93/** Submitted job data. */
94struct fsd_job_s {
95        /** Release reference to job. */
96        void (*release)( fsd_job_t *self );
97
98        /** Destroy job data. */
99        void (*destroy)( fsd_job_t *self );
100
101        /** Implements drmaa_control(). */
102        void (*control)( fsd_job_t *self, int action );
103
104        /** Fetch job state from DRM. */
105        void (*update_status)( fsd_job_t *self );
106
107        /** Return job termination status and resource usage for drmaa_wait(). */
108        void (*get_termination_status)( fsd_job_t *self,
109                        int *status, fsd_iter_t **rusage_out );
110
111        /**
112         * Called whenever job is detected to be missing from DRM queue
113         * while stil being within DRMAA session.
114         */
115        void (*on_missing)( fsd_job_t *self );
116
117        /**
118         * Points to next job in list.
119         * Used by #fsd_job_set_t.
120         */
121        fsd_job_t *next;
122
123        /** Number of references. */
124        int ref_cnt;
125
126        /** Job identifier (as null terminated string). */
127        char *job_id;
128
129        /** DRMAA session which job was submitted in. */
130        fsd_drmaa_session_t *session;
131
132        /**
133         * Time of last update of job status and rusage information
134         * (when status, exit_status, cpu_usage, mem_usage and flags
135         * fields was updated according to DRM).
136         */
137        time_t last_update_time;
138
139        /** Job state flags.  @see job_flag_t */
140        unsigned flags;
141
142        /**
143         * State of job (as returned by drmaa_job_ps())
144         * from last retrieval from DRM.
145         */
146        int state;
147
148        /** Exit status of job as from <tt>wait(2)</tt>. */
149        int exit_status;
150        /** Time of job submission (local). */
151        time_t submit_time;
152        /** Time when job started execution (taken from DRM). */
153        time_t start_time;
154        /** Time when job ended execution (taken from DRM). */
155        time_t end_time;
156        /** CPU time usage in seconds. */
157        long cpu_usage;
158        /** Resident memory usage in bytes. */
159        long mem_usage;
160        /** Virtual memory usage in bytes. */
161        long vmem_usage;
162        /** Total run time in seconds. */
163        long walltime;
164        /** List of space separated execution host names. */
165        char *execution_hosts;
166        /** Batch queue name */
167        char *queue;
168        /** Account string */
169        char *project;
170
171        /** Mutex for accessing fsd_job_s structure (beside #next pointer). */
172        fsd_mutex_t  mutex;
173        /** Job status changed condition. */
174        fsd_cond_t   status_cond;
175        /** Able to destroy condition variable (ref_cnt==1). */
176        fsd_cond_t   destroy_cond;
177};
178
179
180
181/** Create empty set of jobs. */
182fsd_job_set_t *
183fsd_job_set_new(void);
184
185struct fsd_job_set_s {
186        /** Destroy set of jobs (including contained job handles). */
187        void (*
188        destroy)( fsd_job_set_t *self );
189
190        /** Adds job to set. */
191        void (*
192        add)( fsd_job_set_t *self, fsd_job_t *job );
193
194        /** Remove job from set. */
195        void (*
196        remove)( fsd_job_set_t *self, fsd_job_t *job );
197
198        /**
199         * Finds job with given job_id.
200         * @param job_set Set of jobs to search in.
201         * @param job_id Job identifier.
202         * @return If successful job handle is returned
203         * and caller have exclusive access right to it.
204         * It should be released by #release().
205         * \c NULL is returned when job was not found.
206         */
207        fsd_job_t* (*
208        get)( fsd_job_set_t *self, const char *job_id );
209
210        /** Whether the set is empty. */
211        bool (*
212        empty)( fsd_job_set_t *self );
213
214        /**
215         * Find any job in set which was terminated (either successfully or not).
216         * It is usefull for drmaa_wait( DRMAA_JOB_IDS_ANY ) implementation.
217         * @param job_set Set of jobs to search in.
218         * @return New reference to terminated job
219         *   or \c NULL if no such job is present in set.
220         * @see fsd_job_find
221         */
222        fsd_job_t* (*
223        find_terminated)( fsd_job_set_t *self );
224
225        /**
226         * Return idenetifiers of all jobs in set.
227         * @param job_set Set of jobs.
228         * @return Vector of job idenetifiers
229         *   when done free it with fsd_free_vector.
230         */
231        char** (*
232        get_all_job_ids)( fsd_job_set_t *self );
233
234        void (*
235        signal_all)( fsd_job_set_t *self );
236
237        fsd_job_t    **tab;
238        size_t         tab_size;
239        uint32_t       tab_mask;
240        /** Number of jobs in set. */
241        unsigned       n_jobs;
242        /** Mutex for job set data (e.g. for adding/removing job from set). */
243        fsd_mutex_t    mutex;
244};
245
246#endif /* __DRMAA_UTILS__JOB_H */
Note: See TracBrowser for help on using the repository browser.