source: trunk/drmaa_utils/drmaa_utils/session.h @ 1

Revision 1, 8.2 KB checked in by mmamonski, 13 years ago (diff)

Torque/PBS DRMAA initial commit

Line 
1/* $Id: session.h 2 2009-10-12 09:51:22Z mamonski $ */
2/*
3 *  FedStage DRMAA utilities library
4 *  Copyright (C) 2006-2008  FedStage Systems
5 *
6 *  This program is free software: you can redistribute it and/or modify
7 *  it under the terms of the GNU General Public License as published by
8 *  the Free Software Foundation, either version 3 of the License, or
9 *  (at your option) any later version.
10 *
11 *  This program is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 *  GNU General Public License for more details.
15 *
16 *  You should have received a copy of the GNU General Public License
17 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/**
21 * @file session.h
22 * DRMAA session.
23 */
24
25#ifndef __DRMAA_UTILS__SESSION_H
26#define __DRMAA_UTILS__SESSION_H
27
28#include <sys/time.h>
29
30#include <drmaa_utils/common.h>
31#include <drmaa_utils/thread.h>
32
33/** Creates new DRMAA session. */
34fsd_drmaa_session_t *
35fsd_drmaa_session_new( const char *contact );
36
37/**
38 * Get current (global) DRMAA session
39 * and increase it's reference count.
40 */
41fsd_drmaa_session_t *
42fsd_drmaa_session_get(void);
43
44/**
45 * An action to be taken when job is missing from queue.
46 */
47typedef enum {
48        /**
49         * Always raise #FSD_ERRNO_INTERNAL_ERROR (one per job)
50         * in drmaa_wait() or drmaa_synchronize()
51         * when job disappearance is spotted.
52         */
53        FSD_REVEAL_MISSING_JOBS,
54        /**
55         * Treat missing jobs as terminated with status
56         * depending on last known state and action (drmaa_control()).
57         */
58        FSD_IGNORE_MISSING_JOBS,
59        /**
60         * Treat missing jobs as terminated
61         * but only before they enter into running state.
62         */
63        FSD_IGNORE_QUEUED_MISSING_JOBS
64} fsd_missing_jobs_behaviour_t;
65
66
67/** DRMAA session data. */
68struct fsd_drmaa_session_s {
69        /** Release one reference to DRMAA session. */
70        void (*
71        release)( fsd_drmaa_session_t *self );
72
73        /**
74         * Destroy session after ensuring no other references to it are held.
75         *
76         * May block until reference count decreases to 1.
77         * May return before actual session destruction if another thread
78         * have already taken responsibility for freeing session.
79         */
80        void (*
81        destroy)(
82                        fsd_drmaa_session_t *self );
83
84        /**
85         * Destroy session without waiting for releasing references.
86         */
87        void (*
88        destroy_nowait)( fsd_drmaa_session_t *self );
89
90
91        /** Implements drmaa_run_job(). */
92        char* (*
93        run_job)(
94                        fsd_drmaa_session_t *self,
95                        const fsd_template_t *jt
96                        );
97
98        /** Implements drmaa_run_bulk_jobs(). */
99        fsd_iter_t* (*
100        run_bulk)(
101                        fsd_drmaa_session_t *self,
102                        const fsd_template_t *jt,
103                        int start, int end, int incr
104                        );
105
106        /** Implements drmaa_control(). */
107        void (*
108        control_job)(
109                        fsd_drmaa_session_t *self,
110                        const char *job_id, int action
111                        );
112
113        /** Implements drmaa_job_ps(). */
114        void (*
115        job_ps)(
116                        fsd_drmaa_session_t *self,
117                        const char *job_id, int *remote_ps
118                        );
119
120        /** Implements drmaa_synchronize(). */
121        void (*
122        synchronize)(
123                        fsd_drmaa_session_t *self,
124                        const char **input_job_ids, const struct timespec *timeout,
125                        bool dispose
126                        );
127
128        /**
129         * Implements drmaa_wait() call.
130         * @param self     DRMAA session.
131         * @param job_id   Job identifer to wait for or #DRMAA_JOB_IDS_SESSION_ANY
132         * @param timeout  Timeout after which call shall raise
133         *   #FSD_DRMAA_ERRNO_EXIT_TIMEOUT if job not terminated or \c NULL.
134         * @param status   Terminated job status (output only).
135         * @param rusage   Job resource usage (output only).
136         * @see drmaa_wait()
137         * @see #wait_for_single_job  #wait_for_any_job
138         */
139        char* (*
140        wait)(
141                        fsd_drmaa_session_t *self,
142                        const char *job_id, const struct timespec *timeout,
143                        int *status, fsd_iter_t **rusage
144                        );
145
146        /**
147         * Construct new job object.
148         * @return Reference to newly created job.
149         */
150        fsd_job_t* (*
151        new_job)( fsd_drmaa_session_t *self, const char *job_id );
152
153        char* (*
154        run_impl)(
155                        fsd_drmaa_session_t *self,
156                        const fsd_template_t *jt, int bulk_incr
157                        );
158
159        /**
160         * Wait until given job terminates (either successfuly or not).
161         * @param self     DRMAA session.
162         * @param job_id   Identifier of job to wait for.
163         * @param timeout  If not \c NULL and job does not terminate
164         *   in given amount of time function returns and
165         *   #FSD_DRMAA_ERRNO_EXIT_TIMEOUT is raised.
166         * @param status   If not \c NULL job status code is stored here.
167         * @param rusage   If not \c NULL list of used resources is returned.
168         * @param dispose  If \c true job information is removed from session
169         *   at the end of call and further accesses to this job_id will
170         *   raise #FSD_DRMAA_ERRNO_INVALID_JOB.  Otherwise job data is held.
171         * @see #wait_for_any_job #wait
172         */
173        void (*
174        wait_for_single_job)(
175                        fsd_drmaa_session_t *self,
176                        const char *job_id, const struct timespec *timeout,
177                        int *status, fsd_iter_t **rusage, bool dispose
178                        );
179
180        /**
181         * Wait until and job left in session terminates.
182         * @return  Identifier of waited job.
183         *   Freeing responsobility is left to the callee.
184         * @see #wait_for_single_job #wait
185         */
186        char* (*
187        wait_for_any_job)(
188                        fsd_drmaa_session_t *self,
189                        const struct timespec *timeout,
190                        int *status, fsd_iter_t **rusage,
191                        bool dispose
192                        );
193
194        void (*
195        wait_for_job_status_change)(
196                        fsd_drmaa_session_t *self,
197                        fsd_cond_t *wait_condition,
198                        fsd_mutex_t *mutex,
199                        const struct timespec *timeout
200                        );
201
202        void* (*
203        wait_thread)( fsd_drmaa_session_t *self );
204
205        void (*
206        stop_wait_thread)( fsd_drmaa_session_t *self );
207
208        /**
209         * Make status of all jobs held in session up to date.
210         */
211        void (*
212        update_all_jobs_status)( fsd_drmaa_session_t *self );
213
214        /** Return list of all jobs within session. */
215        char** (*
216        get_submited_job_ids)(
217                        fsd_drmaa_session_t *self
218                        );
219
220        /**
221         * Get job with given identifier.
222         * @return New reference to job or \c NULL
223         *   if no job found.
224         */
225        fsd_job_t* (*
226        get_job)(
227                        fsd_drmaa_session_t *self, const char *job_id
228                        );
229
230        /**
231         * Tries to load configuration from following locations in order
232         * (configuration settings from later locations override
233         * those from earlier):
234         * /etc/basename.conf, ~/.basename.conf, $basename_CONF.
235         */
236        void (*
237        load_configuration)(
238                        fsd_drmaa_session_t *self, const char *basename
239                        );
240
241        void (*
242        read_configuration)(
243                        fsd_drmaa_session_t *self,
244                        const char *filename, bool must_exist,
245                        const char *configuration, size_t config_len
246                        );
247
248        void (*
249        apply_configuration)(
250                        fsd_drmaa_session_t *self
251                        );
252
253        /** Reference counter. */
254        int ref_cnt;
255
256        /**
257         * Whether #destroy was called and waits for releasing
258         * references.  If set pending #wait calls shall
259         * return with #FSD_DRMAA_ERRNO_NO_ACTIVE_SESSION error.
260         */
261        bool destroy_requested;
262
263        /**
264         * Copy of contact string passed to drmaa_init()
265         * or default contact if drmaa_init() was called with \c NULL.
266         */
267        char *contact;
268
269        /**
270         * Jobs submitted within this DRMAA session
271         * and not disposed yet.
272         */
273        fsd_job_set_t *jobs;
274
275        /** DRMAA configuration. */
276        fsd_conf_dict_t *configuration;
277
278        /** Queue pooling delay (time delta). */
279        struct timespec pool_delay;
280
281        /**
282         * Cache job state for number of seconds.
283         * If positive drmaa_job_ps() returns remembered state without
284         * communicating with DRM for \a cache_job_state seconds since last
285         * update.
286         */
287        int cache_job_state;
288
289        /** Whether to wait for jobs in separate thread. */
290        bool enable_wait_thread;
291
292        /**
293         * Configuration dictionary which maps
294         * job categories to native specification.
295         */
296        fsd_conf_dict_t *job_categories;
297
298        /**
299         * How to behave when submitted job disappears from DRM queue.
300         */
301        fsd_missing_jobs_behaviour_t missing_jobs;
302
303        fsd_mutex_t mutex; /**< Mutex for accessing session data. */
304        fsd_cond_t wait_condition;  /**< Conditional for drmaa_wait() */
305        fsd_cond_t destroy_condition;  /**< Conditional for ref_cnt==1 */
306
307        /**
308         * Mutex for accessing connection to DRM.
309         *
310         * To prevent deadlocks #mutex should be acquired first
311         * when both session data and DRM connection are needed.
312         */
313        fsd_mutex_t drm_connection_mutex;
314
315        fsd_thread_t wait_thread_handle;
316        bool wait_thread_started;
317        bool wait_thread_run_flag;
318};
319
320
321typedef struct fsd_drmaa_ctx_s {
322        fsd_drmaa_session_t *session;
323        fsd_mutex_t session_mutex;
324       
325} fsd_drmaa_ctx_t;
326
327
328extern fsd_drmaa_ctx_t fsd_drmaa_ctx;
329
330#endif /* __DRMAA_UTILS__SESSION_H */
331
Note: See TracBrowser for help on using the repository browser.