[12] | 1 | /* $Id$ */ |
---|
[1] | 2 | /* |
---|
| 3 | * FedStage DRMAA for PBS Pro |
---|
| 4 | * Copyright (C) 2006-2007 FedStage Systems |
---|
| 5 | * |
---|
| 6 | * This program is free software: you can redistribute it and/or modify |
---|
| 7 | * it under the terms of the GNU General Public License as published by |
---|
| 8 | * the Free Software Foundation, either version 3 of the License, or |
---|
| 9 | * (at your option) any later version. |
---|
| 10 | * |
---|
| 11 | * This program is distributed in the hope that it will be useful, |
---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 14 | * GNU General Public License for more details. |
---|
| 15 | * |
---|
| 16 | * You should have received a copy of the GNU General Public License |
---|
| 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
| 18 | */ |
---|
| 19 | |
---|
| 20 | /** |
---|
| 21 | * @file pbs_drmaa/util.c |
---|
| 22 | * PBS DRMAA utilities. |
---|
| 23 | */ |
---|
| 24 | |
---|
| 25 | #ifdef HAVE_CONFIG_H |
---|
| 26 | # include <config.h> |
---|
| 27 | #endif |
---|
| 28 | |
---|
| 29 | #include <stdlib.h> |
---|
| 30 | #include <string.h> |
---|
| 31 | #include <unistd.h> |
---|
[68] | 32 | #include <sys/types.h> |
---|
| 33 | #include <sys/stat.h> |
---|
[1] | 34 | |
---|
[68] | 35 | |
---|
[1] | 36 | #include <drmaa_utils/common.h> |
---|
| 37 | #include <pbs_drmaa/util.h> |
---|
| 38 | #include <pbs_error.h> |
---|
| 39 | #include <pbs_ifl.h> |
---|
| 40 | |
---|
| 41 | #ifndef lint |
---|
| 42 | static char rcsid[] |
---|
| 43 | # ifdef __GNUC__ |
---|
| 44 | __attribute__ ((unused)) |
---|
| 45 | # endif |
---|
[12] | 46 | = "$Id$"; |
---|
[1] | 47 | #endif |
---|
| 48 | |
---|
| 49 | |
---|
| 50 | void |
---|
| 51 | pbsdrmaa_dump_attrl( const struct attrl *attribute_list, const char *prefix ) |
---|
| 52 | { |
---|
| 53 | const struct attrl *i; |
---|
| 54 | |
---|
| 55 | if( prefix == NULL ) |
---|
| 56 | prefix = ""; |
---|
| 57 | for( i = attribute_list; i != NULL; i = i->next ) |
---|
| 58 | fsd_log_debug(( "\n %s %s%s%s=%s", |
---|
| 59 | prefix, i->name, |
---|
| 60 | i->resource ? "." : "", i->resource ? i->resource : "", |
---|
| 61 | i->value |
---|
| 62 | )); |
---|
| 63 | } |
---|
| 64 | |
---|
| 65 | |
---|
| 66 | void |
---|
| 67 | pbsdrmaa_free_attrl( struct attrl *attr ) |
---|
| 68 | { |
---|
| 69 | while( attr != NULL ) |
---|
| 70 | { |
---|
| 71 | struct attrl *p = attr; |
---|
| 72 | attr = attr->next; |
---|
| 73 | fsd_free( p->name ); |
---|
| 74 | fsd_free( p->value ); |
---|
| 75 | fsd_free( p->resource ); |
---|
| 76 | fsd_free( p ); |
---|
| 77 | } |
---|
| 78 | } |
---|
| 79 | |
---|
[29] | 80 | struct attrl * |
---|
| 81 | pbsdrmaa_add_attr( struct attrl *head, const char *name, const char *value) |
---|
| 82 | { |
---|
| 83 | struct attrl *p = NULL; |
---|
| 84 | char *resource = NULL; |
---|
[1] | 85 | |
---|
[29] | 86 | fsd_malloc( p, struct attrl ); |
---|
| 87 | memset( p, 0, sizeof(struct attrl) ); |
---|
| 88 | |
---|
| 89 | resource = strchr( name, '.' ); |
---|
| 90 | |
---|
| 91 | if( resource ) |
---|
| 92 | { |
---|
| 93 | p->name = fsd_strndup( name, resource - name ); |
---|
| 94 | p->resource = fsd_strdup( resource+1 ); |
---|
| 95 | } |
---|
| 96 | else |
---|
| 97 | { |
---|
| 98 | p->name = fsd_strdup( name ); |
---|
| 99 | } |
---|
| 100 | |
---|
| 101 | p->value = fsd_strdup(value); |
---|
| 102 | p->op = SET; |
---|
| 103 | |
---|
| 104 | fsd_log_debug(("set attr: %s = %s", name, value)); |
---|
| 105 | |
---|
| 106 | if (head) |
---|
| 107 | p->next = head; |
---|
| 108 | else |
---|
| 109 | p->next = NULL; |
---|
| 110 | |
---|
| 111 | return p; |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | |
---|
[1] | 115 | void |
---|
| 116 | pbsdrmaa_exc_raise_pbs( const char *function ) |
---|
| 117 | { |
---|
| 118 | int _pbs_errno; |
---|
| 119 | int fsd_errno; |
---|
| 120 | const char *message = NULL; |
---|
| 121 | |
---|
| 122 | _pbs_errno = pbs_errno; |
---|
[16] | 123 | |
---|
| 124 | #ifndef PBS_PROFESSIONAL_NO_LOG |
---|
[1] | 125 | message = pbse_to_txt( pbs_errno ); |
---|
| 126 | #else |
---|
[70] | 127 | message = "PBS error"; |
---|
[1] | 128 | #endif |
---|
| 129 | |
---|
| 130 | fsd_errno = pbsdrmaa_map_pbs_errno( _pbs_errno ); |
---|
| 131 | fsd_log_error(( |
---|
| 132 | "call to %s returned with error %d:%s mapped to %d:%s", |
---|
| 133 | function, |
---|
| 134 | _pbs_errno, message, |
---|
| 135 | fsd_errno, fsd_strerror(fsd_errno) |
---|
| 136 | )); |
---|
[70] | 137 | fsd_exc_raise_fmt( fsd_errno, " %s", function, message ); |
---|
[1] | 138 | } |
---|
| 139 | |
---|
| 140 | |
---|
| 141 | /** Maps PBS error code into DMRAA code. */ |
---|
| 142 | int |
---|
| 143 | pbsdrmaa_map_pbs_errno( int _pbs_errno ) |
---|
| 144 | { |
---|
| 145 | fsd_log_enter(( "(pbs_errno=%d)", _pbs_errno )); |
---|
| 146 | switch( _pbs_errno ) |
---|
| 147 | { |
---|
| 148 | case PBSE_NONE: /* no error */ |
---|
| 149 | return FSD_ERRNO_SUCCESS; |
---|
| 150 | case PBSE_UNKJOBID: /* Unknown Job Identifier */ |
---|
| 151 | return FSD_DRMAA_ERRNO_INVALID_JOB; |
---|
| 152 | case PBSE_NOATTR: /* Undefined Attribute */ |
---|
| 153 | case PBSE_ATTRRO: /* attempt to set READ ONLY attribute */ |
---|
| 154 | case PBSE_IVALREQ: /* Invalid request */ |
---|
| 155 | case PBSE_UNKREQ: /* Unknown batch request */ |
---|
| 156 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 157 | case PBSE_PERM: /* No permission */ |
---|
| 158 | case PBSE_BADHOST: /* access from host not allowed */ |
---|
| 159 | return FSD_ERRNO_AUTHZ_FAILURE; |
---|
| 160 | case PBSE_JOBEXIST: /* job already exists */ |
---|
| 161 | case PBSE_SVRDOWN: /* req rejected -server shutting down */ |
---|
| 162 | case PBSE_EXECTHERE: /* cannot execute there */ |
---|
| 163 | case PBSE_NOSUP: /* Feature/function not supported */ |
---|
| 164 | case PBSE_EXCQRESC: /* Job exceeds Queue resource limits */ |
---|
| 165 | case PBSE_QUENODFLT: /* No Default Queue Defined */ |
---|
| 166 | case PBSE_NOTSNODE: /* no time-shared nodes */ |
---|
| 167 | return FSD_ERRNO_DENIED_BY_DRM; |
---|
| 168 | case PBSE_SYSTEM: /* system error occurred */ |
---|
| 169 | case PBSE_INTERNAL: /* internal server error occurred */ |
---|
| 170 | case PBSE_REGROUTE: /* parent job of dependent in rte que */ |
---|
| 171 | case PBSE_UNKSIG: /* unknown signal name */ |
---|
| 172 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 173 | case PBSE_BADATVAL: /* bad attribute value */ |
---|
| 174 | case PBSE_BADATLST: /* Bad attribute list structure */ |
---|
| 175 | case PBSE_BADUSER: /* Bad user - no password entry */ |
---|
| 176 | case PBSE_BADGRP: /* Bad Group specified */ |
---|
| 177 | case PBSE_BADACCT: /* Bad Account attribute value */ |
---|
| 178 | case PBSE_UNKQUE: /* Unknown queue name */ |
---|
| 179 | case PBSE_UNKRESC: /* Unknown resource */ |
---|
| 180 | case PBSE_UNKNODEATR: /* node-attribute not recognized */ |
---|
| 181 | case PBSE_BADNDATVAL: /* Bad node-attribute value */ |
---|
| 182 | case PBSE_BADDEPEND: /* Invalid dependency */ |
---|
| 183 | case PBSE_DUPLIST: /* Duplicate entry in List */ |
---|
| 184 | return FSD_ERRNO_INVALID_VALUE; |
---|
| 185 | case PBSE_MODATRRUN: /* Cannot modify attrib in run state */ |
---|
| 186 | case PBSE_BADSTATE: /* request invalid for job state */ |
---|
| 187 | case PBSE_BADCRED: /* Invalid Credential in request */ |
---|
| 188 | case PBSE_EXPIRED: /* Expired Credential in request */ |
---|
| 189 | case PBSE_QUNOENB: /* Queue not enabled */ |
---|
| 190 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 191 | case PBSE_QACESS: /* No access permission for queue */ |
---|
| 192 | return FSD_ERRNO_AUTHZ_FAILURE; |
---|
| 193 | case PBSE_HOPCOUNT: /* Max hop count exceeded */ |
---|
| 194 | case PBSE_QUEEXIST: /* Queue already exists */ |
---|
| 195 | case PBSE_ATTRTYPE: /* incompatable queue attribute type */ |
---|
| 196 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 197 | # ifdef PBSE_QUEBUSY |
---|
| 198 | case PBSE_QUEBUSY: /* Queue Busy (not empty) */ |
---|
| 199 | # endif |
---|
| 200 | case PBSE_MAXQUED: /* Max number of jobs in queue */ |
---|
| 201 | case PBSE_NOCONNECTS: /* No free connections */ |
---|
| 202 | case PBSE_TOOMANY: /* Too many submit retries */ |
---|
| 203 | case PBSE_RESCUNAV: /* Resources temporarily unavailable */ |
---|
| 204 | return FSD_ERRNO_TRY_LATER; |
---|
| 205 | case 111: |
---|
| 206 | case PBSE_PROTOCOL: /* Protocol (ASN.1) error */ |
---|
| 207 | case PBSE_DISPROTO: /* Bad DIS based Request Protocol */ |
---|
| 208 | return FSD_ERRNO_DRM_COMMUNICATION_FAILURE; |
---|
| 209 | #if 0 |
---|
| 210 | case PBSE_QUENBIG: /* Queue name too long */ |
---|
| 211 | case PBSE_QUENOEN: /* Cannot enable queue,needs add def */ |
---|
| 212 | case PBSE_NOSERVER: /* No server to connect to */ |
---|
| 213 | case PBSE_NORERUN: /* Job Not Rerunnable */ |
---|
| 214 | case PBSE_ROUTEREJ: /* Route rejected by all destinations */ |
---|
| 215 | case PBSE_ROUTEEXPD: /* Time in Route Queue Expired */ |
---|
| 216 | case PBSE_MOMREJECT: /* Request to MOM failed */ |
---|
| 217 | case PBSE_BADSCRIPT: /* (qsub) cannot access script file */ |
---|
| 218 | case PBSE_STAGEIN: /* Stage In of files failed */ |
---|
| 219 | case PBSE_CKPBSY: /* Checkpoint Busy, may be retries */ |
---|
| 220 | case PBSE_EXLIMIT: /* Limit exceeds allowable */ |
---|
| 221 | case PBSE_ALRDYEXIT: /* Job already in exit state */ |
---|
| 222 | case PBSE_NOCOPYFILE: /* Job files not copied */ |
---|
| 223 | case PBSE_CLEANEDOUT: /* unknown job id after clean init */ |
---|
| 224 | case PBSE_NOSYNCMSTR: /* No Master in Sync Set */ |
---|
| 225 | case PBSE_SISREJECT: /* sister rejected */ |
---|
| 226 | case PBSE_SISCOMM: /* sister could not communicate */ |
---|
| 227 | case PBSE_CKPSHORT: /* not all tasks could checkpoint */ |
---|
| 228 | case PBSE_UNKNODE: /* Named node is not in the list */ |
---|
| 229 | case PBSE_NONODES: /* Server has no node list */ |
---|
| 230 | case PBSE_NODENBIG: /* Node name is too big */ |
---|
| 231 | case PBSE_NODEEXIST: /* Node name already exists */ |
---|
| 232 | case PBSE_MUTUALEX: /* State values are mutually exclusive */ |
---|
| 233 | case PBSE_GMODERR: /* Error(s) during global modification of nodes */ |
---|
| 234 | case PBSE_NORELYMOM: /* could not contact Mom */ |
---|
| 235 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 236 | #endif |
---|
| 237 | default: |
---|
| 238 | return FSD_ERRNO_INTERNAL_ERROR; |
---|
| 239 | } |
---|
| 240 | } |
---|
| 241 | |
---|
| 242 | |
---|
| 243 | char * |
---|
| 244 | pbsdrmaa_write_tmpfile( const char *content, size_t len ) |
---|
| 245 | { |
---|
| 246 | static const char *tmpfile_template = "/tmp/pbs_drmaa.XXXXXX"; |
---|
| 247 | char *volatile name = NULL; |
---|
| 248 | volatile int fd = -1; |
---|
| 249 | |
---|
| 250 | fsd_log_enter(( "" )); |
---|
| 251 | |
---|
| 252 | TRY |
---|
| 253 | { |
---|
| 254 | name = fsd_strdup( tmpfile_template ); |
---|
| 255 | fd = mkstemp( name ); |
---|
| 256 | if( fd < 0 ) |
---|
| 257 | fsd_exc_raise_sys(0); |
---|
[66] | 258 | |
---|
| 259 | if( fchmod(fd, 0600 ) != 0) |
---|
| 260 | fsd_exc_raise_sys(0); |
---|
| 261 | |
---|
[1] | 262 | while( len > 0 ) |
---|
| 263 | { |
---|
| 264 | size_t written = write( fd, content, len ); |
---|
| 265 | if( written != (size_t)-1 ) |
---|
| 266 | { |
---|
| 267 | content += written; |
---|
| 268 | len -= written; |
---|
| 269 | } |
---|
| 270 | else |
---|
| 271 | fsd_exc_raise_sys(0); |
---|
| 272 | } |
---|
| 273 | } |
---|
| 274 | EXCEPT_DEFAULT |
---|
| 275 | { fsd_free( name ); } |
---|
| 276 | FINALLY |
---|
| 277 | { |
---|
| 278 | if( fd >= 0 ) |
---|
| 279 | { |
---|
| 280 | if( close( fd ) ) |
---|
| 281 | fsd_exc_raise_sys(0); |
---|
| 282 | } |
---|
| 283 | } |
---|
| 284 | END_TRY |
---|
| 285 | |
---|
| 286 | fsd_log_return(( "=%s", name )); |
---|
| 287 | return name; |
---|
| 288 | } |
---|
| 289 | |
---|
| 290 | |
---|