Ignore:
Timestamp:
11/11/12 15:06:16 (13 years ago)
Author:
mmamonski
Message:

SupMUC on site fixes: 1. Polling mode 2. Handling missing jobs 3. monitor -> drmaa_monitor 4. force stderr file creation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/ll_drmaa/session.c

    r20 r26  
    132132        LL_job job_info; 
    133133 
    134         char *monitor_program = LL_DRMAA_BIN_DIR"/monitor"; 
     134        char *monitor_program = LL_DRMAA_BIN_DIR"/lldrmaa_monitor"; 
    135135 
    136136        TRY 
     
    147147 
    148148                connection_lock = fsd_mutex_lock( &self->drm_connection_mutex ); 
    149                 status = llsubmit(cmd_path, monitor_program, llself->unix_socket_name, &job_info, LL_JOB_VERSION); 
     149                if (self->wait_thread_run_flag)  
     150                  { 
     151                        fsd_log_info(("llsubmit(%s, %s, %s, %p, %d)",cmd_path, monitor_program, llself->unix_socket_name, (void*)&job_info, LL_JOB_VERSION)); 
     152                        status = llsubmit(cmd_path, monitor_program, llself->unix_socket_name, &job_info, LL_JOB_VERSION); 
     153                  } 
     154                else 
     155                  { 
     156                        fsd_log_info(("llsubmit(%s, NULL, NULL, %p, %d)",cmd_path, (void*)&job_info, LL_JOB_VERSION)); 
     157                        status = llsubmit(cmd_path, NULL, NULL, &job_info, LL_JOB_VERSION); 
     158                  } 
    150159                connection_lock = fsd_mutex_unlock( &self->drm_connection_mutex ); 
    151160 
    152                 if(remove(cmd_path) == -1) 
     161                if(getenv("LLDRMAA_KEEP_CMD") == NULL && remove(cmd_path) == -1) 
    153162                        fsd_log_warning(("Can't delete cmd file: %s", cmd_path)); 
    154163 
     
    161170                 } 
    162171                else /* 0 */ 
     172                 { 
    163173                        fsd_log_debug(("llsubmit: %s",lldrmaa_err_submit(status))); 
     174                 } 
    164175 
    165176                if( start != end ) 
     
    170181                         { 
    171182                                job_ids[i] = fsd_asprintf("%s.%d.%d", job_info.step_list[i]->id.from_host, job_info.step_list[i]->id.cluster, job_info.step_list[i]->id.proc); 
    172  
     183                                fsd_log_info((" new array job id: %s", job_ids[i])); 
    173184                                job = lldrmaa_job_new( fsd_strdup(job_ids[i]) ); 
    174185                                job->session = self; 
     
    185196                        job_ids[0] = fsd_asprintf( "%s.%d.0", job_info.step_list[0]->id.from_host, job_info.step_list[0]->id.cluster); 
    186197 
     198                        fsd_log_info((" new job id: %s", job_ids[0])); 
    187199                        job = lldrmaa_job_new( fsd_strdup(job_ids[0]) ); 
    188200                        job->session = self; 
     
    247259         } 
    248260 
    249         if ( !self->wait_thread_started ) 
    250                 fsd_exc_raise_msg(FSD_ERRNO_INTERNAL_ERROR, "DRMAA for LL requires that wait thread is enable. Don't disable it in configuration file!" ); 
    251261} 
    252262 
Note: See TracChangeset for help on using the changeset viewer.