[16] | 1 | #include <stdio.h> |
---|
| 2 | #include <stdlib.h> |
---|
| 3 | |
---|
| 4 | #include <llapi.h> |
---|
| 5 | #include <unistd.h> |
---|
| 6 | #include <fcntl.h> |
---|
| 7 | #include <string.h> |
---|
| 8 | #include <errno.h> |
---|
| 9 | #include <assert.h> |
---|
| 10 | |
---|
| 11 | #define LL_CKPT_UNCLEAR 1 |
---|
| 12 | #define LL_CKPT_ERROR -1 |
---|
| 13 | #define LL_CKPT_JOB_ID_ERR -2 |
---|
| 14 | #define LL_CKPT_MALLOC_ERR -3 |
---|
| 15 | #define LL_CKPT_SOCKET_ERR -4 |
---|
| 16 | #define LL_CKPT_CONF_ERR -6 |
---|
| 17 | |
---|
| 18 | const char * |
---|
| 19 | lldrmaa_err_ckpt(int _llerrno) |
---|
| 20 | { |
---|
| 21 | switch(_llerrno) |
---|
| 22 | { |
---|
| 23 | case API_OK: |
---|
| 24 | return "Checkpoint completed successfully."; |
---|
| 25 | case LL_CKPT_UNCLEAR: |
---|
| 26 | return "Checkpoint event did not receive status and the success or failure of the checkpoint is unclear."; |
---|
| 27 | case LL_CKPT_ERROR: |
---|
| 28 | return "Error occurred attempting to checkpoint."; |
---|
| 29 | case LL_CKPT_JOB_ID_ERR: |
---|
| 30 | return "Format not valid for job step, not in the form host.jobid.stepid."; |
---|
| 31 | case LL_CKPT_MALLOC_ERR: |
---|
| 32 | return "Cannot allocate memory."; |
---|
| 33 | case LL_CKPT_SOCKET_ERR: |
---|
| 34 | return "API cannot create listen socket."; |
---|
| 35 | case LL_CKPT_CONF_ERR: |
---|
| 36 | return "Configuration file errors."; |
---|
| 37 | default: |
---|
| 38 | assert(0); |
---|
| 39 | return "Invalid error code"; |
---|
| 40 | } |
---|
| 41 | } |
---|
| 42 | /* |
---|
| 43 | int |
---|
| 44 | lldrmaa_map_ckpt(int _llerrno) |
---|
| 45 | { |
---|
| 46 | switch(_llerrno) |
---|
| 47 | { |
---|
| 48 | case API_OK: |
---|
| 49 | return FSD_ERRNO_SUCCESS; |
---|
| 50 | case LL_CKPT_UNCLEAR: |
---|
| 51 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 52 | case LL_CKPT_ERROR: |
---|
| 53 | return FSD_DRMAA_ERRNO_SUSPEND_INCONSISTENT_STATE; |
---|
| 54 | case LL_CKPT_JOB_ID_ERR: |
---|
| 55 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 56 | case LL_CKPT_MALLOC_ERR: |
---|
| 57 | return FSD_ERRNO_NO_MEMORY; |
---|
| 58 | case LL_CKPT_SOCKET_ERR: |
---|
| 59 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 60 | case LL_CKPT_CONF_ERR: |
---|
| 61 | return FSD_ERRNO_DENIED_BY_DRM; |
---|
| 62 | default: |
---|
| 63 | fsd_assert(false); |
---|
| 64 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 65 | } |
---|
| 66 | } |
---|
| 67 | */ |
---|
| 68 | |
---|
| 69 | void ckpt_job(char * job_id) |
---|
| 70 | { |
---|
| 71 | int rc = 0; |
---|
| 72 | char buf[256]; |
---|
| 73 | cr_error_t cp; |
---|
| 74 | LL_ckpt_info ckpt_info; |
---|
| 75 | ckpt_info.version=LL_API_VERSION; |
---|
| 76 | ckpt_info.step_id=strdup(job_id); |
---|
| 77 | ckpt_info.ckptType=CKPT_AND_TERMINATE; |
---|
| 78 | ckpt_info.waitType=CKPT_NO_WAIT; |
---|
| 79 | ckpt_info.abort_sig=SIGINT; |
---|
| 80 | ckpt_info.cp_error_data=NULL; |
---|
| 81 | ckpt_info.ckpt_rc = 0; |
---|
| 82 | ckpt_info.soft_limit = 0; |
---|
| 83 | ckpt_info.hard_limit = 0; |
---|
| 84 | |
---|
| 85 | rc = ll_ckpt(&ckpt_info); /* TODO: segfault */ |
---|
| 86 | |
---|
| 87 | if(rc) |
---|
| 88 | { |
---|
| 89 | printf("job::control: could to job %s. Error code: %d,%s\n", job_id, rc, lldrmaa_err_ckpt(rc) ); |
---|
| 90 | |
---|
| 91 | printf("Return code from checkpoint: %d\n",ckpt_info.ckpt_rc); |
---|
| 92 | printf("Error data: %s\n",cp.error_data); |
---|
| 93 | strerror_r(cp.Py_error,buf,256); |
---|
| 94 | printf("cp_error_data %d: %s\n",cp.Py_error,buf); |
---|
| 95 | strerror_r(cp.Sy_error,buf,256); |
---|
| 96 | printf("cp_error_data2 %d: %s\n",cp.Sy_error,buf); |
---|
| 97 | } |
---|
| 98 | } |
---|
| 99 | |
---|
| 100 | int main(int argc, char *argv[]) |
---|
| 101 | { |
---|
| 102 | if(argc<2) |
---|
| 103 | { |
---|
| 104 | printf("./a <job_id>\n"); |
---|
| 105 | exit(1); |
---|
| 106 | } |
---|
| 107 | |
---|
| 108 | printf("Trying to checkpoint %s\n",argv[1]); |
---|
| 109 | |
---|
| 110 | ckpt_job(argv[1]); |
---|
| 111 | llsubmit(NULL,NULL,NULL,NULL,0); |
---|
| 112 | printf("After ckpt\n"); |
---|
| 113 | |
---|
| 114 | return 0; |
---|
| 115 | } |
---|