Changeset 83 for trunk/pbs_drmaa
- Timestamp:
- 01/07/13 17:03:23 (12 years ago)
- Location:
- trunk/pbs_drmaa
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/pbs_drmaa/job.c
r71 r83 319 319 struct attrl *i = NULL; 320 320 char pbs_state = 0; 321 int exit_status = - 2;321 int exit_status = -101; 322 322 const char *cpu_usage = NULL; 323 323 const char *mem_usage = NULL; … … 394 394 fsd_log_debug(( "pbs_state: %c", pbs_state )); 395 395 396 if( exit_status != - 2)396 if( exit_status != -101 ) 397 397 { 398 398 fsd_log_debug(( "exit_status: %d", exit_status )); 399 399 self->exit_status = exit_status; 400 401 if (self->exit_status < 0) 402 { 403 self->exit_status = -1; 404 fsd_log_error(("ExitStatus = %d, probably system problem, report job %s to the local administrator", exit_status, self->job_id)); 405 } 406 400 407 } 401 408 if(pbs_state){ … … 405 412 self->flags &= FSD_JOB_TERMINATED_MASK; 406 413 self->flags |= FSD_JOB_TERMINATED; 407 if (exit_status != - 2) { /* has exit code */414 if (exit_status != -101) { /* has exit code */ 408 415 if( self->exit_status == 0) 409 416 self->state = DRMAA_PS_DONE; -
trunk/pbs_drmaa/pbs_conn.c
r76 r83 54 54 55 55 static void pbsdrmaa_pbs_reconnect_internal( pbsdrmaa_pbs_conn_t *self, bool reconnect); 56 57 static void pbsdrmaa_pbs_check_connect_internal( pbsdrmaa_pbs_conn_t *self, bool reconnect); 58 59 #define IS_TRANSIENT_ERROR (pbs_errno == PBSE_PROTOCOL || pbs_errno == PBSE_EXPIRED || pbs_errno == PBSOLDE_PROTOCOL || pbs_errno == PBSOLDE_EXPIRED) 60 56 61 57 62 pbsdrmaa_pbs_conn_t * … … 80 85 self->connection_fd = -1; 81 86 self->last_usage = time(NULL); 82 83 /*ignore SIGPIPE - other iwse pbs_disconnect cause the program to exit */87 88 /*ignore SIGPIPE - otherwise pbs_disconnect cause the program to exit */ 84 89 signal(SIGPIPE, SIG_IGN); 85 90 … … 111 116 { 112 117 fsd_log_enter(("")); 118 113 119 TRY 114 120 { … … 134 140 pbsdrmaa_pbs_submit( pbsdrmaa_pbs_conn_t *self, struct attropl *attrib, char *script, char *destination ) 135 141 { 136 137 142 char *volatile job_id = NULL; 143 volatile bool first_try = true; 144 volatile bool conn_lock = false; 145 146 fsd_log_enter(("")); 147 148 TRY 149 { 150 conn_lock = fsd_mutex_lock(&self->session->super.drm_connection_mutex); 151 152 pbsdrmaa_pbs_reconnect_internal(self, false); 153 154 retry: 155 job_id = pbs_submit(self->connection_fd, attrib, script, destination, NULL); 156 157 fsd_log_info(("pbs_submit(%s, %s) = %s", script, destination, job_id)); 158 159 if(job_id == NULL) 160 { 161 fsd_log_error(( "pbs_submit failed, pbs_errno = %d", pbs_errno )); 162 if (IS_TRANSIENT_ERROR && first_try) 163 { 164 pbsdrmaa_pbs_reconnect_internal(self, true); 165 first_try = false; 166 goto retry; 167 } 168 else 169 { 170 pbsdrmaa_exc_raise_pbs( "pbs_submit"); 171 } 172 } 173 } 174 EXCEPT_DEFAULT 175 { 176 fsd_free(job_id); 177 fsd_exc_reraise(); 178 } 179 FINALLY 180 { 181 if(conn_lock) 182 conn_lock = fsd_mutex_unlock(&self->session->super.drm_connection_mutex); 183 } 184 END_TRY 185 186 187 fsd_log_return(("%s", job_id)); 188 189 return job_id; 138 190 } 139 191 … … 182 234 int tries_left = self->session->max_retries_count; 183 235 int sleep_time = 1; 184 185 236 186 237 fsd_log_enter(("(%d)", self->connection_fd));
Note: See TracChangeset
for help on using the changeset viewer.