Changeset 45 for trunk/pbs_drmaa/job.c
- Timestamp:
- 11/28/11 15:02:58 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/pbs_drmaa/job.c
r40 r45 90 90 TRY 91 91 { 92 int tr y_count;93 const int max_tries = 3;92 int tries_left = session->max_retries_count; 93 int sleep_time = 1; 94 94 95 95 conn_lock = fsd_mutex_lock( &self->session->drm_connection_mutex ); 96 96 97 97 /*TODO reconnect */ 98 for( try_count=0; try_count < max_tries; try_count++)98 while ( true ) 99 99 { 100 100 switch( action ) … … 151 151 } 152 152 153 if( rc == PBSE_NONE ) 154 break; 155 else if( rc == PBSE_INTERNAL ) 153 retry_connect: 154 if ( rc == PBSE_NONE ) 155 break; 156 else if (( rc == PBSE_INTERNAL || rc == PBSE_PROTOCOL || rc == PBSE_EXPIRED) && (tries_left--)) 156 157 { 157 /* 158 * In PBS Pro pbs_sigjob raises internal server error (PBSE_INTERNAL) 159 * when job just changed its state to running. 160 */ 161 fsd_log_debug(( "repeating request (%d of %d)", 162 try_count+2, max_tries )); 163 sleep( 1 ); 158 if (rc == PBSE_PROTOCOL || rc == PBSE_EXPIRED) 159 { 160 if ( session->pbs_conn >= 0) 161 pbs_disconnect( session->pbs_conn ); 162 163 sleep( sleep_time++ ); 164 165 session->pbs_conn = pbs_connect( session->super.contact ); 166 167 if (session->pbs_conn < 0) 168 goto retry_connect; 169 170 fsd_log_info(( "pbs_connect(%s) =%d", session->super.contact, session->pbs_conn )); 171 } 172 else /* PBSE_INTERNAL */ 173 { 174 /* 175 * In PBS Pro pbs_sigjob raises internal server error (PBSE_INTERNAL) 176 * when job just changed its state to running. 177 */ 178 sleep( sleep_time++ ); 179 } 180 fsd_log_debug(( "repeating request (%d of %d)", tries_left, session->max_retries_count)); 164 181 } 165 182 else 166 183 pbsdrmaa_exc_raise_pbs( apicall ); 167 } /* end for*/184 } /* end while */ 168 185 } 169 186 FINALLY … … 184 201 struct batch_status *volatile status = NULL; 185 202 pbsdrmaa_session_t *session = (pbsdrmaa_session_t*)self->session; 203 int tries_left = session->max_retries_count; 204 int sleep_time = 1; 186 205 187 206 fsd_log_enter(( "({job_id=%s})", self->job_id )); … … 229 248 if ( session->pbs_conn >= 0 ) 230 249 pbs_disconnect( session->pbs_conn ); 231 sleep(1); 250 retry_connect: 251 sleep(sleep_time++); 232 252 session->pbs_conn = pbs_connect( session->super.contact ); 233 253 if( session->pbs_conn < 0 ) 234 pbsdrmaa_exc_raise_pbs( "pbs_connect" ); 254 { 255 if (tries_left--) 256 goto retry_connect; 257 else 258 pbsdrmaa_exc_raise_pbs( "pbs_connect" ); 259 } 235 260 else 236 261 { 237 fsd_log_error(("retry:"));238 262 goto retry; 239 263 }
Note: See TracChangeset
for help on using the changeset viewer.