Changeset 85 for trunk/pbs_drmaa/pbs_conn.c
- Timestamp:
- 01/17/13 18:44:15 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/pbs_drmaa/pbs_conn.c
r84 r85 28 28 #include <drmaa_utils/iter.h> 29 29 #include <drmaa_utils/conf.h> 30 #include <drmaa_utils/session.h>31 30 #include <drmaa_utils/datetime.h> 32 31 32 #include <pbs_drmaa/session.h> 33 33 #include <pbs_drmaa/pbs_conn.h> 34 34 #include <pbs_drmaa/util.h> … … 53 53 static void pbsdrmaa_pbs_holdjob( pbsdrmaa_pbs_conn_t *self, char *job_id ); 54 54 55 static void pbsdrmaa_pbs_reconnect_internal( pbsdrmaa_pbs_conn_t *self, bool reconnect); 56 57 static void pbsdrmaa_pbs_check_connect_internal( pbsdrmaa_pbs_conn_t *self, bool reconnect); 58 55 static void pbsdrmaa_pbs_connection_autoclose_thread_loop( pbsdrmaa_pbs_conn_t *self, bool reconnect); 56 57 58 static void check_reconnect( pbsdrmaa_pbs_conn_t *self, bool reconnect); 59 60 static void start_autoclose_thread( pbsdrmaa_pbs_conn_t *self ); 61 62 static void stop_autoclose_thread( pbsdrmaa_pbs_conn_t *self ); 63 64 65 #if defined PBS_PROFESSIONAL && defined PBSE_HISTJOBID 66 #define IS_MISSING_JOB (pbs_errno == PBSE_UNKJOBID || pbs_errno == PBSE_HISTJOBID) 67 #else 68 #define IS_MISSING_JOB (pbs_errno == PBSE_UNKJOBID) 69 #endif 59 70 #define IS_TRANSIENT_ERROR (pbs_errno == PBSE_PROTOCOL || pbs_errno == PBSE_EXPIRED || pbs_errno == PBSOLDE_PROTOCOL || pbs_errno == PBSOLDE_EXPIRED) 60 71 61 62 72 pbsdrmaa_pbs_conn_t * 63 pbsdrmaa_pbs_conn_new( pbsdrmaa_session_t *session,char *server )73 pbsdrmaa_pbs_conn_new( fsd_drmaa_session_t *session, const char *server ) 64 74 { 65 75 pbsdrmaa_pbs_conn_t *volatile self = NULL; … … 84 94 85 95 self->connection_fd = -1; 86 self->last_usage = time(NULL);87 96 88 97 /*ignore SIGPIPE - otherwise pbs_disconnect cause the program to exit */ 89 98 signal(SIGPIPE, SIG_IGN); 90 99 91 pbsdrmaa_pbs_reconnect_internal(self, false);100 check_reconnect(self, false); 92 101 } 93 102 EXCEPT_DEFAULT … … 100 109 if (self->connection_fd != -1) 101 110 pbs_disconnect(self->connection_fd); 111 stop_autoclose_thread(self); 102 112 } 103 113 … … 110 120 return self; 111 121 } 112 113 122 114 123 void … … 148 157 TRY 149 158 { 150 conn_lock = fsd_mutex_lock(&self->session-> super.drm_connection_mutex);151 152 pbsdrmaa_pbs_reconnect_internal(self, false);159 conn_lock = fsd_mutex_lock(&self->session->drm_connection_mutex); 160 161 check_reconnect(self, false); 153 162 154 163 retry: … … 162 171 if (IS_TRANSIENT_ERROR && first_try) 163 172 { 164 pbsdrmaa_pbs_reconnect_internal(self, true);173 check_reconnect(self, true); 165 174 first_try = false; 166 175 goto retry; … … 168 177 else 169 178 { 170 pbsdrmaa_exc_raise_pbs( "pbs_submit" );179 pbsdrmaa_exc_raise_pbs( "pbs_submit", self->connection_fd); 171 180 } 172 181 } … … 180 189 { 181 190 if(conn_lock) 182 conn_lock = fsd_mutex_unlock(&self->session-> super.drm_connection_mutex);191 conn_lock = fsd_mutex_unlock(&self->session->drm_connection_mutex); 183 192 } 184 193 END_TRY … … 202 211 TRY 203 212 { 204 conn_lock = fsd_mutex_lock(&self->session-> super.drm_connection_mutex);205 206 pbsdrmaa_pbs_reconnect_internal(self, false);213 conn_lock = fsd_mutex_lock(&self->session->drm_connection_mutex); 214 215 check_reconnect(self, false); 207 216 208 217 retry: … … 213 222 if(status == NULL) 214 223 { 215 fsd_log_error(( "pbs_statjob failed, pbs_errno = %d", pbs_errno )); 216 if (IS_TRANSIENT_ERROR && first_try) 217 { 218 pbsdrmaa_pbs_reconnect_internal(self, true); 224 if (IS_MISSING_JOB) 225 { 226 fsd_log_info(( "missing job = %s (code=%d)", job_id, pbs_errno )); 227 } 228 else if (IS_TRANSIENT_ERROR && first_try) 229 { 230 fsd_log_error(( "pbs_statjob failed, pbs_errno = %d", pbs_errno )); 231 check_reconnect(self, true); 219 232 first_try = false; 220 233 goto retry; … … 222 235 else 223 236 { 224 pbsdrmaa_exc_raise_pbs( "pbs_statjob" );237 pbsdrmaa_exc_raise_pbs( "pbs_statjob", self->connection_fd); 225 238 } 226 239 } … … 236 249 { 237 250 if(conn_lock) 238 conn_lock = fsd_mutex_unlock(&self->session-> super.drm_connection_mutex);251 conn_lock = fsd_mutex_unlock(&self->session->drm_connection_mutex); 239 252 } 240 253 END_TRY … … 266 279 TRY 267 280 { 268 conn_lock = fsd_mutex_lock(&self->session-> super.drm_connection_mutex);269 270 pbsdrmaa_pbs_reconnect_internal(self, false);281 conn_lock = fsd_mutex_lock(&self->session->drm_connection_mutex); 282 283 check_reconnect(self, false); 271 284 272 285 retry: … … 280 293 if (IS_TRANSIENT_ERROR && first_try) 281 294 { 282 pbsdrmaa_pbs_reconnect_internal(self, true);295 check_reconnect(self, true); 283 296 first_try = false; 284 297 goto retry; … … 286 299 else 287 300 { 288 pbsdrmaa_exc_raise_pbs( "pbs_sigjob" );301 pbsdrmaa_exc_raise_pbs( "pbs_sigjob", self->connection_fd); 289 302 } 290 303 } … … 297 310 { 298 311 if(conn_lock) 299 conn_lock = fsd_mutex_unlock(&self->session-> super.drm_connection_mutex);312 conn_lock = fsd_mutex_unlock(&self->session->drm_connection_mutex); 300 313 } 301 314 END_TRY … … 318 331 TRY 319 332 { 320 conn_lock = fsd_mutex_lock(&self->session-> super.drm_connection_mutex);321 322 pbsdrmaa_pbs_reconnect_internal(self, false);333 conn_lock = fsd_mutex_lock(&self->session->drm_connection_mutex); 334 335 check_reconnect(self, false); 323 336 324 337 retry: … … 332 345 if (IS_TRANSIENT_ERROR && first_try) 333 346 { 334 pbsdrmaa_pbs_reconnect_internal(self, true);347 check_reconnect(self, true); 335 348 first_try = false; 336 349 goto retry; … … 338 351 else 339 352 { 340 pbsdrmaa_exc_raise_pbs( "pbs_deljob" );353 pbsdrmaa_exc_raise_pbs( "pbs_deljob", self->connection_fd); 341 354 } 342 355 } … … 349 362 { 350 363 if(conn_lock) 351 conn_lock = fsd_mutex_unlock(&self->session-> super.drm_connection_mutex);364 conn_lock = fsd_mutex_unlock(&self->session->drm_connection_mutex); 352 365 } 353 366 END_TRY … … 369 382 TRY 370 383 { 371 conn_lock = fsd_mutex_lock(&self->session-> super.drm_connection_mutex);372 373 pbsdrmaa_pbs_reconnect_internal(self, false);384 conn_lock = fsd_mutex_lock(&self->session->drm_connection_mutex); 385 386 check_reconnect(self, false); 374 387 375 388 retry: … … 383 396 if (IS_TRANSIENT_ERROR && first_try) 384 397 { 385 pbsdrmaa_pbs_reconnect_internal(self, true);398 check_reconnect(self, true); 386 399 first_try = false; 387 400 goto retry; … … 389 402 else 390 403 { 391 pbsdrmaa_exc_raise_pbs( "pbs_rlsjob" );404 pbsdrmaa_exc_raise_pbs( "pbs_rlsjob", self->connection_fd); 392 405 } 393 406 } … … 400 413 { 401 414 if(conn_lock) 402 conn_lock = fsd_mutex_unlock(&self->session-> super.drm_connection_mutex);415 conn_lock = fsd_mutex_unlock(&self->session->drm_connection_mutex); 403 416 } 404 417 END_TRY … … 420 433 TRY 421 434 { 422 conn_lock = fsd_mutex_lock(&self->session-> super.drm_connection_mutex);423 424 pbsdrmaa_pbs_reconnect_internal(self, false);435 conn_lock = fsd_mutex_lock(&self->session->drm_connection_mutex); 436 437 check_reconnect(self, false); 425 438 426 439 retry: … … 434 447 if (IS_TRANSIENT_ERROR && first_try) 435 448 { 436 pbsdrmaa_pbs_reconnect_internal(self, true);449 check_reconnect(self, true); 437 450 first_try = false; 438 451 goto retry; … … 440 453 else 441 454 { 442 pbsdrmaa_exc_raise_pbs( "pbs_holdjob" );455 pbsdrmaa_exc_raise_pbs( "pbs_holdjob", self->connection_fd); 443 456 } 444 457 } … … 451 464 { 452 465 if(conn_lock) 453 conn_lock = fsd_mutex_unlock(&self->session-> super.drm_connection_mutex);466 conn_lock = fsd_mutex_unlock(&self->session->drm_connection_mutex); 454 467 } 455 468 END_TRY … … 460 473 461 474 void 462 pbsdrmaa_pbs_reconnect_internal( pbsdrmaa_pbs_conn_t *self, bool force_reconnect)463 { 464 int tries_left = self->session->max_retries_count;475 check_reconnect( pbsdrmaa_pbs_conn_t *self, bool force_reconnect) 476 { 477 int tries_left = ((pbsdrmaa_session_t *)self->session)->max_retries_count; 465 478 int sleep_time = 1; 466 479 … … 476 489 else 477 490 { 491 stop_autoclose_thread(self); 478 492 pbs_disconnect(self->connection_fd); 479 493 self->connection_fd = -1; 480 494 } 481 495 } 496 497 482 498 483 499 retry_connect: /* Life... */ … … 492 508 493 509 if( self->connection_fd < 0 ) 494 pbsdrmaa_exc_raise_pbs( "pbs_connect" );510 pbsdrmaa_exc_raise_pbs( "pbs_connect", self->connection_fd ); 495 511 496 512 fsd_log_return(("(%d)", self->connection_fd)); 497 513 } 498 514 515 516 static void start_autoclose_thread( pbsdrmaa_pbs_conn_t *self ) 517 { 518 519 520 } 521 522 static void stop_autoclose_thread( pbsdrmaa_pbs_conn_t *self ) 523 { 524 525 526 } 527
Note: See TracChangeset
for help on using the changeset viewer.