FEATURE: Added RMCFG[] ASYNCJOBSTART=TRUE for asynchronous job starts in pbs.

Problem Ref:	MAUI + TORQUE JOB START RATE
Organization:	NA
Contact:	NA



git-svn-id: svn://opensvn.adaptivecomputing.com/maui/trunk@115 3f5042e3-fb1d-0410-be18-d6ca2573e517
This commit is contained in:
bchristiansen 2009-09-10 17:16:46 +00:00
parent 10fcf052f8
commit 84de353373
6 changed files with 25 additions and 60 deletions

View File

@ -1,4 +1,5 @@
Maui 3.2.6p21
- Added RMCFG[] ASYNCJOBSTART=TRUE for asynchronous job starts in pbs. (Thanks to Bas van der Vlies and the community)
- Added StartTime and CompletionTime to Gold Charge.
- Fixed backfill issue with SINGLEUSER NODEACCESSPOLICY. (Thanks goes to Roy Dragseth)
- N->{A|C}Res.Swap is overcommitted with N->CRes.Swap instead of N->CRes.Mem. (Thanks goes to Roy Dragseth)

View File

@ -1013,6 +1013,7 @@ enum MLimitAttrType {
enum MRMAttrType {
mrmaNONE = 0,
mrmaAuthType,
mrmaASyncJobStart, /* asynchronous job start */
mrmaConfigFile,
mrmaCSAlgo,
mrmaCSKey,

View File

@ -1136,6 +1136,8 @@ typedef struct {
void *S; /* resource manager specific data */
void *xd;
mbool_t ASyncJobStart; /* asynchronous job start */
} mrm_t;
enum MClassStateEnum {

View File

@ -441,6 +441,7 @@ const char *MFSAttr[] = {
const char *MRMAttr[] = {
NONE,
"AUTHTYPE",
"ASYNCJOBSTART",
"CONFIGFILE",
"CSALGO",
"CSKEY",

View File

@ -1796,41 +1796,6 @@ int MPBSJobStart(
return(FAILURE);
}
if (MPBSJobModify(
J,
R,
ATTR_l,
(char *)(R->Version >= 710 ? "select" : "neednodes"),
HostList,
NULL,
NULL) == FAILURE)
{
DBG(0,fPBS) DPrint("ERROR: cannot set hostlist for job '%s'\n",
J->Name);
if (R->FailIteration != MSched.Iteration)
{
R->FailIteration = MSched.Iteration;
R->FailCount = 0;
}
R->FailCount++;
if (Msg != NULL)
strcpy(Msg,"job cannot be started - cannot set hostlist");
if (SC != NULL)
*SC = mscRemoteFailure;
return(FAILURE);
}
else
{
DBG(7,fPBS) DPrint("INFO: hostlist for job '%s' set to '%s'\n",
J->Name,
HostList);
}
}
else
{
@ -1909,7 +1874,16 @@ int MPBSJobStart(
MJobGetName(J,NULL,R,tmpJobName,sizeof(tmpJobName),mjnRMName);
rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,MasterHost,NULL);
if (R->ASyncJobStart == FALSE )
{
DBG(7,fPBS) DPrint("INFO: use pbs_runjob\n");
rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
}
else
{
DBG(7,fPBS) DPrint("INFO: use pbs_asyrun\n");
rc = pbs_asyrunjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
}
if (rc != 0)
{
@ -1933,28 +1907,6 @@ int MPBSJobStart(
JobStartFailed = TRUE;
}
if (J->NeedNodes != NULL)
{
if (MPBSJobModify(
J,
R,
ATTR_l,
(char *)(R->Version >= 710 ? "select" : "neednodes"),
J->NeedNodes,
NULL,
NULL) == FAILURE)
{
DBG(7,fPBS) DPrint("WARNING: cannot reset hostlist for job '%s')\n",
J->Name);
}
else
{
DBG(7,fPBS) DPrint("INFO: hostlist for job '%s' set to '%s'\n",
J->Name,
J->NeedNodes);
}
}
if (JobStartFailed == TRUE)
{
/* job could not be started */

View File

@ -1771,7 +1771,7 @@ int MRMLoadConfig(
MRMSetDefaults(R);
MOLoadPvtConfig((void **)R,mxoRM,NULL,NULL,NULL);
MOLoadPvtConfig((void **)R,mxoRM,NULL,NULL,NULL);
}
/* load RM specific attributes */
@ -2032,6 +2032,8 @@ int MRMSetDefaults(
R->JobCounter = 0;
R->ASyncJobStart = FALSE;
return(SUCCESS);
}
@ -2242,7 +2244,7 @@ int MRMProcessConfig(
ptr,
(const char **)MRMAttr,
&aindex,
NULL,
NULL,
TRUE,
NULL,
ValLine,
@ -2266,6 +2268,12 @@ int MRMProcessConfig(
break;
case mrmaASyncJobStart:
R->ASyncJobStart = MUBoolFromString(ValLine,FALSE);
break;
case mrmaConfigFile:
MUStrCpy(R->U.LL.ConfigFile,ValLine,sizeof(R->U.LL.ConfigFile));