Changeset 85 for trunk/pbs_drmaa/session.c
- Timestamp:
- 01/17/13 18:44:15 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/pbs_drmaa/session.c
r65 r85 98 98 99 99 self->log_file_initial_size = 0; 100 self->pbs_conn = -1;101 100 self->pbs_home = NULL; 102 101 … … 123 122 self->super.missing_jobs = FSD_IGNORE_MISSING_JOBS; 124 123 125 { 126 int tries_left = self->max_retries_count; 127 int sleep_time = 1; 128 /*ignore SIGPIPE - otheriwse pbs_disconnect cause the program to exit */ 129 signal(SIGPIPE, SIG_IGN); 130 retry_connect: /* Life... */ 131 self->pbs_conn = pbs_connect( self->super.contact ); 132 fsd_log_info(( "pbs_connect(%s) =%d", self->super.contact, self->pbs_conn )); 133 if( self->pbs_conn < 0 && tries_left-- ) 134 { 135 sleep(sleep_time++); 136 goto retry_connect; 137 } 138 139 if( self->pbs_conn < 0 ) 140 pbsdrmaa_exc_raise_pbs( "pbs_connect" ); 141 } 124 self->pbs_connection = pbsdrmaa_pbs_conn_new( (fsd_drmaa_session_t *)self, contact ); 125 self->connection_max_lifetime = 30; /* 30 seconds */ 142 126 143 127 } … … 162 146 pbsdrmaa_session_t *pbsself = (pbsdrmaa_session_t*)self; 163 147 self->stop_wait_thread( self ); 164 if( pbsself->pbs_conn >= 0 ) 165 pbs_disconnect( pbsself->pbs_conn ); 148 pbsdrmaa_pbs_conn_destroy(pbsself->pbs_connection); 166 149 fsd_free( pbsself->status_attrl ); 167 150 fsd_free( pbsself->job_exit_status_file_prefix ); … … 229 212 fsd_conf_option_t *max_retries_count = NULL; 230 213 fsd_conf_option_t *user_state_dir = NULL; 214 fsd_conf_option_t *connection_max_lifetime = NULL; 215 231 216 232 217 pbs_home = fsd_conf_dict_get(self->configuration, "pbs_home" ); … … 234 219 max_retries_count = fsd_conf_dict_get(self->configuration, "max_retries_count" ); 235 220 user_state_dir = fsd_conf_dict_get(self->configuration, "user_state_dir" ); 221 connection_max_lifetime = fsd_conf_dict_get(self->configuration, "connection_max_lifetime"); 236 222 237 223 if( pbs_home && pbs_home->type == FSD_CONF_STRING ) … … 274 260 } 275 261 262 if ( connection_max_lifetime && connection_max_lifetime->type == FSD_CONF_INTEGER) 263 { 264 pbsself->connection_max_lifetime = connection_max_lifetime->val.integer; 265 fsd_log_info(("Max connection lifetime: %d", pbsself->connection_max_lifetime)); 266 } 267 276 268 if ( wait_thread_sleep_time && wait_thread_sleep_time->type == FSD_CONF_INTEGER) 277 269 { … … 315 307 pbsdrmaa_session_update_all_jobs_status( fsd_drmaa_session_t *self ) 316 308 { 317 volatile bool conn_lock = false;318 309 volatile bool jobs_lock = false; 319 310 pbsdrmaa_session_t *pbsself = (pbsdrmaa_session_t*)self; 320 311 fsd_job_set_t *jobs = self->jobs; 321 312 struct batch_status *volatile status = NULL; 322 volatile int tries_left = pbsself->max_retries_count;323 volatile int sleep_time = 1;324 313 325 314 fsd_log_enter(("")); … … 327 316 TRY 328 317 { 329 conn_lock = fsd_mutex_lock( &self->drm_connection_mutex ); 330 retry: 318 331 319 /* TODO: query only for user's jobs pbs_selstat + ATTR_u */ 332 320 #ifdef PBS_PROFESSIONAL 333 status = pbs _statjob( pbsself->pbs_conn, NULL, NULL, NULL);321 status = pbsself->pbs_connection->statjob(pbsself->pbs_connection, NULL, NULL); 334 322 #else 335 status = pbs _statjob( pbsself->pbs_conn, NULL, pbsself->status_attrl, NULL);323 status = pbsself->pbs_connection->statjob(pbsself->pbs_connection, NULL, pbsself->status_attrl); 336 324 #endif 337 fsd_log_info(( "pbs_statjob( fd=%d, job_id=NULL, attribs={...} ) =%p", pbsself->pbs_conn, (void*)status ));338 if( status == NULL && pbs_errno != 0 )339 {340 if (pbs_errno == PBSE_PROTOCOL || pbs_errno == PBSE_EXPIRED || pbs_errno == PBSOLDE_PROTOCOL || pbs_errno == PBSOLDE_EXPIRED)341 {342 if ( pbsself->pbs_conn >= 0)343 pbs_disconnect( pbsself->pbs_conn );344 retry_connect:345 sleep(sleep_time++);346 pbsself->pbs_conn = pbs_connect( pbsself->super.contact );347 if( pbsself->pbs_conn < 0)348 {349 if (tries_left--)350 goto retry_connect;351 else352 pbsdrmaa_exc_raise_pbs( "pbs_connect" );353 }354 else355 goto retry;356 }357 else358 {359 pbsdrmaa_exc_raise_pbs( "pbs_statjob" );360 }361 }362 conn_lock = fsd_mutex_unlock( &self->drm_connection_mutex );363 325 364 326 { … … 421 383 { 422 384 if( status != NULL ) 423 pbs_statfree( status ); 424 if( conn_lock ) 425 conn_lock = fsd_mutex_unlock( &self->drm_connection_mutex ); 385 pbsself->pbs_connection->statjob_free(pbsself->pbs_connection, status ); 426 386 if( jobs_lock ) 427 387 jobs_lock = fsd_mutex_unlock( &jobs->mutex );
Note: See TracChangeset
for help on using the changeset viewer.