FEATURE: PATCH:Eygene Ryabinkin - Fitting job into partition that works.

Problem Ref:	[MAUIUSERS] PATCH FOR DEFAULT PARTITION HANDLING
Organization:	Eygene
Contact:	Eygene Ryabinkin



git-svn-id: svn://opensvn.adaptivecomputing.com/maui/trunk@99 3f5042e3-fb1d-0410-be18-d6ca2573e517
This commit is contained in:
bchristiansen 2008-10-03 21:02:55 +00:00
parent 479cb2138a
commit 0d182829d2
6 changed files with 515 additions and 405 deletions

View File

@ -292,6 +292,7 @@ int MJobSetState(mjob_t *,enum MJobStateEnum);
int MJobPreempt(mjob_t *,mjob_t **,enum MPreemptPolicyEnum,char *,int *);
int MJobResume(mjob_t *,char *,int *);
int MJobGetPAL(mjob_t *,int *,int *,mpar_t **);
mpar_t *MJobFindDefPart(mjob_t *, mclass_t *, int *);
int MJobRemove(mjob_t *);
int MJobGetAccount(mjob_t *,mgcred_t **);
int MJobSetCreds(mjob_t *,char *,char *,char *);
@ -387,7 +388,7 @@ int MQueueDiagnose(mjob_t **,int *,int,mpar_t *,char *,int);
int MQueueCheckStatus(void);
int MQueueGetRequeueValue(int *,long,long,double *);
int MQueueSelectAllJobs(mjob_t **,int,mpar_t *,int *,int,int,int,char *);
int MQueueSelectJobs(int *,int *,int,int,int,unsigned long,int,int *,mbool_t);
int MQueueSelectJobs(int *,int *,int,int,int,unsigned long,int,int *,mbool_t,mbool_t);
int MQueueAddAJob(mjob_t *);
int MQueueRemoveAJob(mjob_t *,int);
int MQueueBackFill(int *,int,mpar_t *);

View File

@ -239,52 +239,11 @@ int MJobGetPAL(
if (PAL != NULL)
MUBMCopy(PAL,tmpPAL,MAX_MPAR);
/* determine allowed partition default (precedence: U,G,A,C,S,0) */
/* determine allowed partition default */
if (PDef != NULL)
{
if ((J->Cred.U->F.PDef != NULL) &&
(J->Cred.U->F.PDef != &MPar[0]) &&
MUBMCheck(((mpar_t *)J->Cred.U->F.PDef)->Index,tmpPAL))
{
*PDef = (mpar_t *)J->Cred.U->F.PDef;
}
else if ((J->Cred.G->F.PDef != NULL) &&
(J->Cred.G->F.PDef != &MPar[0]) &&
MUBMCheck(((mpar_t *)J->Cred.G->F.PDef)->Index,tmpPAL))
{
*PDef = (mpar_t *)J->Cred.G->F.PDef;
}
else if ((J->Cred.A != NULL) &&
(J->Cred.A->F.PDef != NULL) &&
(J->Cred.A->F.PDef != &MPar[0]) &&
MUBMCheck(((mpar_t *)J->Cred.A->F.PDef)->Index,tmpPAL))
{
*PDef = (mpar_t *)J->Cred.A->F.PDef;
}
else if ((C != NULL) &&
(C->F.PDef != NULL) &&
(C->F.PDef != &MPar[0]) &&
MUBMCheck(((mpar_t *)C->F.PDef)->Index,tmpPAL))
{
*PDef = (mpar_t *)C->F.PDef;
}
else if ((J->Cred.Q != NULL) &&
(J->Cred.Q->F.PDef != NULL) &&
(J->Cred.Q->F.PDef != &MPar[0]) &&
MUBMCheck(((mpar_t *)J->Cred.Q->F.PDef)->Index,tmpPAL))
{
*PDef = (mpar_t *)J->Cred.Q->F.PDef;
}
else if ((MPar[0].F.PDef != NULL) &&
(MPar[0].F.PDef != &MPar[0]))
{
*PDef = (mpar_t *)MPar[0].F.PDef;
}
else
{
*PDef = &MPar[MDEF_SYSPDEF];
}
*PDef = MJobFindDefPart(J, C, tmpPAL);
/* verify access to default partition */
@ -331,7 +290,70 @@ int MJobGetPAL(
return(SUCCESS);
} /* END MJobGetPAL() */
/*
* Determines default partition for a job (precedence: U,G,A,C,S,0)
* 'PAL' is consulted to determine partition access if it is not NULL.
* 'C' is consulted for the default partition if it is not NULL.
*/
mpar_t *MJobFindDefPart(
mjob_t *J, /* I: job */
mclass_t *C, /* I: job class */
int *PAL) /* I: partition access list */
{
mpar_t *PDef;
if ((J->Cred.U->F.PDef != NULL) &&
(J->Cred.U->F.PDef != &MPar[0]) &&
(PAL == NULL ||
MUBMCheck(((mpar_t *)J->Cred.U->F.PDef)->Index,PAL)))
{
PDef = (mpar_t *)J->Cred.U->F.PDef;
}
else if ((J->Cred.G->F.PDef != NULL) &&
(J->Cred.G->F.PDef != &MPar[0]) &&
(PAL == NULL ||
MUBMCheck(((mpar_t *)J->Cred.G->F.PDef)->Index,PAL)))
{
PDef = (mpar_t *)J->Cred.G->F.PDef;
}
else if ((J->Cred.A != NULL) &&
(J->Cred.A->F.PDef != NULL) &&
(J->Cred.A->F.PDef != &MPar[0]) &&
(PAL == NULL ||
MUBMCheck(((mpar_t *)J->Cred.A->F.PDef)->Index,PAL)))
{
PDef = (mpar_t *)J->Cred.A->F.PDef;
}
else if ((C != NULL) &&
(C->F.PDef != NULL) &&
(C->F.PDef != &MPar[0]) &&
(PAL == NULL ||
MUBMCheck(((mpar_t *)C->F.PDef)->Index,PAL)))
{
PDef = (mpar_t *)C->F.PDef;
}
else if ((J->Cred.Q != NULL) &&
(J->Cred.Q->F.PDef != NULL) &&
(J->Cred.Q->F.PDef != &MPar[0]) &&
(PAL == NULL ||
MUBMCheck(((mpar_t *)J->Cred.Q->F.PDef)->Index,PAL)))
{
PDef = (mpar_t *)J->Cred.Q->F.PDef;
}
else if ((MPar[0].F.PDef != NULL) &&
(MPar[0].F.PDef != &MPar[0]))
{
PDef = (mpar_t *)MPar[0].F.PDef;
}
else
{
PDef = &MPar[MDEF_SYSPDEF];
}
return PDef;
} /* END MJobFindDefPart() */
int MParFind(

View File

@ -39,11 +39,21 @@ extern mres_t *MRes[];
*/
static int MQueueCheckSingleJob(
mjob_t *J,
int *Reason,
mpar_t *P,
mpar_t *GP,
int PLevel,
int MaxNC,
int MaxPC,
unsigned long MaxWCLimit,
int OrigPIndex,
mbool_t UpdateStats);
/* NYI: must handle effqduration */
int MQueueSelectJobs(
int *SrcQ, /* I */
@ -54,7 +64,8 @@ int MQueueSelectJobs(
unsigned long MaxWCLimit, /* I */
int OrigPIndex, /* I */
int *FReason, /* O */
mbool_t UpdateStats) /* I: (boolean) */
mbool_t UpdateStats, /* I: (boolean) */
mbool_t OnlyDefPart) /* I: (boolean) */
{
int index;
@ -63,27 +74,14 @@ int MQueueSelectJobs(
mjob_t *J;
char DValue[MAX_MNAME];
enum MJobDependEnum DType;
mpar_t *P;
mpar_t *GP;
long PS;
int LReason[MAX_MREJREASON];
int PReason;
int *Reason;
int PIndex;
int PReq;
mreq_t *RQ;
double PE;
char tmpLine[MAX_MLINE];
const char *FName = "MQueueSelectJobs";
@ -159,324 +157,28 @@ int MQueueSelectJobs(
continue;
}
RQ = J->Req[0]; /* FIXME */
/* if job removed */
if (J->Name[0] == '\0')
if (OnlyDefPart == TRUE && MJobFindDefPart(J, NULL, NULL) != P)
{
Reason[marCorruption]++;
continue;
}
if (UpdateStats == TRUE)
{
J->BlockReason = 0;
if (J->State == mjsIdle)
MStat.IdleJobs++;
}
PReq = MJobGetProcCount(J);
MJobGetPE(J,P,&PE);
PS = (long)PReq * J->SpecWCLimit[0];
/* check partition */
if (OrigPIndex != -1)
{
if ((P->Index == 0) && !(J->Flags & (1 << mjfSpan)))
{
/* why? what does partition '0' mean in partition mode? */
DBG(3,fSCHED) DPrint("INFO: job %s not considered for spanning\n",
J->Name);
Reason[marPartitionAccess]++;
continue;
}
else if ((P->Index != 0) && (J->Flags & (1 << mjfSpan)))
{
DBG(3,fSCHED) DPrint("INFO: spanning job %s not considered for partition scheduling\n",
J->Name);
Reason[marPartitionAccess]++;
continue;
}
if ((P->Index > 0) && (MUBMCheck(P->Index,J->PAL) == FAILURE))
{
DBG(7,fSCHED) DPrint("INFO: job %s not considered for partition %s (allowed %s)\n",
J->Name,
P->Name,
MUListAttrs(ePartition,J->PAL[0]));
Reason[marPartitionAccess]++;
continue;
}
} /* END if (OrigPIndex != -1) */
/* check job state */
if ((J->State != mjsIdle) && (J->State != mjsSuspended))
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle state '%s')\n",
DBG(7,fSCHED) DPrint("INFO: skipping job[%d] '%s', only default partition check requested (and current partition is %s)\n",
jindex,
J->Name,
MJobState[J->State]);
Reason[marState]++;
if ((MaxNC == MAX_MNODE) &&
(MaxWCLimit == MAX_MTIME) &&
(J->R != NULL))
{
if ((J->State != mjsStarting) && (J->State != mjsRunning))
MResDestroy(&J->R);
}
P->Name);
continue;
}
/* check if job has been previously scheduled or deferred */
if ((J->EState != mjsIdle) && (J->EState != mjsSuspended))
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle expected state: '%s')\n",
J->Name,
MJobState[J->EState]);
Reason[marEState]++;
if ((MaxNC == MAX_MNODE) && (MaxWCLimit == MAX_MTIME) && (J->R != NULL))
{
if ((J->EState != mjsStarting) && (J->EState != mjsRunning))
MResDestroy(&J->R);
}
continue;
}
/* check available procs */
if (PReq > P->CRes.Procs)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds configured procs: %d > %d)\n",
J->Name,
P->Name,
PReq,
P->CRes.Procs);
Reason[marNodeCount]++;
if (P->Index <= 0)
{
if (J->R != NULL)
MResDestroy(&J->R);
if (J->Hold == 0)
{
MJobSetHold(
J,
(1 << mhDefer),
MSched.DeferTime,
mhrNoResources,
"exceeds partition configured procs");
}
}
continue;
}
/* check partition specific limits */
if (MJobCheckLimits(
J,
if (MQueueCheckSingleJob(
J,
Reason,
P,
GP,
PLevel,
P,
(1 << mlSystem),
tmpLine) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (%s)\n",
J->Name,
P->Name,
tmpLine);
Reason[marSystemLimits]++;
if (P->Index <= 0)
{
if (J->R != NULL)
MResDestroy(&J->R);
MJobSetHold(
J,
(1 << mhDefer),
MSched.DeferTime,
mhrSystemLimits,
"exceeds system proc/job limit");
}
MaxNC,
MaxPC,
MaxWCLimit,
OrigPIndex,
UpdateStats) == FAILURE)
continue;
} /* END if (MJobCheckLimits() == FAILURE) */
/* check job size */
if (PReq > MaxPC)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds window size: %d > %d)\n",
J->Name,
P->Name,
PReq,
MaxPC);
Reason[marNodeCount]++;
continue;
}
/* check job duration */
if (J->SpecWCLimit[0] > MaxWCLimit)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds window time: %ld > %ld)\n",
J->Name,
P->Name,
J->SpecWCLimit[0],
MaxWCLimit);
Reason[marTime]++;
continue;
}
/* check partition class support */
if (P->Index > 0)
{
if (MUNumListGetCount(J->StartPriority,RQ->DRes.PSlot,P->CRes.PSlot,0,NULL) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (classes not supported '%s')\n",
J->Name,
P->Name,
MUCAListToString(RQ->DRes.PSlot,P->CRes.PSlot,NULL));
Reason[marClass]++;
if (J->R != NULL)
MResDestroy(&J->R);
continue;
}
} /* END if (PIndex) */
if (MJobCheckDependency(J,&DType,DValue) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected (dependent on job '%s' %s)\n",
J->Name,
DValue,
MJobDependType[DType]);
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
Reason[marDepend]++;
if ((MaxNC == MAX_MNODE) &&
(MaxWCLimit == MAX_MTIME) &&
(J->R != NULL))
{
MResDestroy(&J->R);
}
continue;
} /* END if (MJobCheckDependency(J,&JDepend) == FAILURE) */
/* check partition active job policies */
if (MJobCheckPolicies(
J,
PLevel,
(1 << mlActive),
P, /* NOTE: may set to &MPar[0] */
&PReason,
NULL,
MAX_MTIME) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (policy failure: '%s')\n",
J->Name,
P->Name,
MPolicyRejection[PReason]);
if (PLevel == ptHARD)
{
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
}
Reason[marPolicy]++;
if ((MaxNC == MAX_MNODE) &&
(MaxWCLimit == MAX_MTIME) &&
(J->R != NULL))
{
MResDestroy(&J->R);
}
continue;
}
J->Cred.U->MTime = MSched.Time;
J->Cred.G->MTime = MSched.Time;
if (J->Cred.A != NULL)
J->Cred.A->MTime = MSched.Time;
if (MPar[0].FSC.FSPolicy != fspNONE)
{
int OIndex;
if (MFSCheckCap(NULL,J,P,&OIndex) == FAILURE)
{
DBG(5,fSCHED) DPrint("INFO: job '%s' exceeds %s FS cap\n",
J->Name,
(OIndex > 0) ? MXO[OIndex] : "NONE");
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
Reason[marFairShare]++;
continue;
}
} /* END if (FS[0].FSPolicy != fspNONE) */
/* NOTE: idle queue policies handled in MQueueSelectAllJobs() */
if (MLocalCheckFairnessPolicy(J,MSched.Time,NULL) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (violates local fairness policy)\n",
J->Name,
P->Name);
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
Reason[marPolicy]++;
continue;
}
/* NOTE: effective queue duration not yet properly supported */
@ -522,6 +224,363 @@ int MQueueSelectJobs(
return(SUCCESS);
} /* END MQueueSelectJobs() */
/*
* Helper for MQueueSelectJobs: performs the single job evaluation.
* Returns SUCCESS if job can be queued and FAILURE otherwise.
*/
static int MQueueCheckSingleJob(
mjob_t *J,
int *Reason,
mpar_t *P,
mpar_t *GP,
int PLevel,
int MaxNC,
int MaxPC,
unsigned long MaxWCLimit,
int OrigPIndex,
mbool_t UpdateStats)
{
char DValue[MAX_MNAME];
enum MJobDependEnum DType;
long PS;
int PReason;
int PReq;
mreq_t *RQ;
double PE;
char tmpLine[MAX_MLINE];
const char *FName = "MQueueCheckSingleJob";
RQ = J->Req[0]; /* FIXME */
/* if job removed */
if (J->Name[0] == '\0')
{
Reason[marCorruption]++;
return(FAILURE);
}
if (UpdateStats == TRUE)
{
J->BlockReason = 0;
if (J->State == mjsIdle)
MStat.IdleJobs++;
}
PReq = MJobGetProcCount(J);
/* XXX: PE is unused? */
MJobGetPE(J,P,&PE);
PS = (long)PReq * J->SpecWCLimit[0];
/* check partition */
if (OrigPIndex != -1)
{
if ((P->Index == 0) && !(J->Flags & (1 << mjfSpan)))
{
/* why? what does partition '0' mean in partition mode? */
DBG(3,fSCHED) DPrint("INFO: job %s not considered for spanning\n",
J->Name);
Reason[marPartitionAccess]++;
return(FAILURE);
}
else if ((P->Index != 0) && (J->Flags & (1 << mjfSpan)))
{
DBG(3,fSCHED) DPrint("INFO: spanning job %s not considered for partition scheduling\n",
J->Name);
Reason[marPartitionAccess]++;
return(FAILURE);
}
if ((P->Index > 0) && (MUBMCheck(P->Index,J->PAL) == FAILURE))
{
DBG(7,fSCHED) DPrint("INFO: job %s not considered for partition %s (allowed %s)\n",
J->Name,
P->Name,
MUListAttrs(ePartition,J->PAL[0]));
Reason[marPartitionAccess]++;
return(FAILURE);
}
} /* END if (OrigPIndex != -1) */
/* check job state */
if ((J->State != mjsIdle) && (J->State != mjsSuspended))
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle state '%s')\n",
J->Name,
MJobState[J->State]);
Reason[marState]++;
if ((MaxNC == MAX_MNODE) &&
(MaxWCLimit == MAX_MTIME) &&
(J->R != NULL))
{
if ((J->State != mjsStarting) && (J->State != mjsRunning))
MResDestroy(&J->R);
}
return(FAILURE);
}
/* check if job has been previously scheduled or deferred */
if ((J->EState != mjsIdle) && (J->EState != mjsSuspended))
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle expected state: '%s')\n",
J->Name,
MJobState[J->EState]);
Reason[marEState]++;
if ((MaxNC == MAX_MNODE) && (MaxWCLimit == MAX_MTIME) && (J->R != NULL))
{
if ((J->EState != mjsStarting) && (J->EState != mjsRunning))
MResDestroy(&J->R);
}
return(FAILURE);
}
/* check available procs */
if (PReq > P->CRes.Procs)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds configured procs: %d > %d)\n",
J->Name,
P->Name,
PReq,
P->CRes.Procs);
Reason[marNodeCount]++;
if (P->Index <= 0)
{
if (J->R != NULL)
MResDestroy(&J->R);
if (J->Hold == 0)
{
MJobSetHold(
J,
(1 << mhDefer),
MSched.DeferTime,
mhrNoResources,
"exceeds partition configured procs");
}
}
return(FAILURE);
}
/* check partition specific limits */
if (MJobCheckLimits(
J,
PLevel,
P,
(1 << mlSystem),
tmpLine) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (%s)\n",
J->Name,
P->Name,
tmpLine);
Reason[marSystemLimits]++;
if (P->Index <= 0)
{
if (J->R != NULL)
MResDestroy(&J->R);
MJobSetHold(
J,
(1 << mhDefer),
MSched.DeferTime,
mhrSystemLimits,
"exceeds system proc/job limit");
}
return(FAILURE);
} /* END if (MJobCheckLimits() == FAILURE) */
/* check job size */
if (PReq > MaxPC)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds window size: %d > %d)\n",
J->Name,
P->Name,
PReq,
MaxPC);
Reason[marNodeCount]++;
return(FAILURE);
}
/* check job duration */
if (J->SpecWCLimit[0] > MaxWCLimit)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds window time: %ld > %ld)\n",
J->Name,
P->Name,
J->SpecWCLimit[0],
MaxWCLimit);
Reason[marTime]++;
return(FAILURE);
}
/* check partition class support */
if (P->Index > 0)
{
if (MUNumListGetCount(J->StartPriority,RQ->DRes.PSlot,P->CRes.PSlot,0,NULL) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (classes not supported '%s')\n",
J->Name,
P->Name,
MUCAListToString(RQ->DRes.PSlot,P->CRes.PSlot,NULL));
Reason[marClass]++;
if (J->R != NULL)
MResDestroy(&J->R);
return(FAILURE);
}
} /* END if (PIndex) */
if (MJobCheckDependency(J,&DType,DValue) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected (dependent on job '%s' %s)\n",
J->Name,
DValue,
MJobDependType[DType]);
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
Reason[marDepend]++;
if ((MaxNC == MAX_MNODE) &&
(MaxWCLimit == MAX_MTIME) &&
(J->R != NULL))
{
MResDestroy(&J->R);
}
return(FAILURE);
} /* END if (MJobCheckDependency(J,&JDepend) == FAILURE) */
/* check partition active job policies */
if (MJobCheckPolicies(
J,
PLevel,
(1 << mlActive),
P, /* NOTE: may set to &MPar[0] */
&PReason,
NULL,
MAX_MTIME) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (policy failure: '%s')\n",
J->Name,
P->Name,
MPolicyRejection[PReason]);
if (PLevel == ptHARD)
{
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
}
Reason[marPolicy]++;
if ((MaxNC == MAX_MNODE) &&
(MaxWCLimit == MAX_MTIME) &&
(J->R != NULL))
{
MResDestroy(&J->R);
}
return(FAILURE);
}
J->Cred.U->MTime = MSched.Time;
J->Cred.G->MTime = MSched.Time;
if (J->Cred.A != NULL)
J->Cred.A->MTime = MSched.Time;
if (MPar[0].FSC.FSPolicy != fspNONE)
{
int OIndex;
if (MFSCheckCap(NULL,J,P,&OIndex) == FAILURE)
{
DBG(5,fSCHED) DPrint("INFO: job '%s' exceeds %s FS cap\n",
J->Name,
(OIndex > 0) ? MXO[OIndex] : "NONE");
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
Reason[marFairShare]++;
return(FAILURE);
}
} /* END if (FS[0].FSPolicy != fspNONE) */
/* NOTE: idle queue policies handled in MQueueSelectAllJobs() */
if (MLocalCheckFairnessPolicy(J,MSched.Time,NULL) == FAILURE)
{
DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (violates local fairness policy)\n",
J->Name,
P->Name);
if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
Reason[marPolicy]++;
return(FAILURE);
}
return(SUCCESS);
} /* END MQueueCheckSingleJob() */

View File

@ -338,6 +338,7 @@ int MQueueBackFill(
AdjBFTime,
P->Index,
NULL,
FALSE,
FALSE) == FAILURE)
{
DBG(5,fSCHED) DPrint("INFO: no jobs meet BF window criteria in partition %s\n",
@ -1408,6 +1409,7 @@ int MQueueCheckStatus()
MAX_MTIME,
-1,
ReasonList,
FALSE,
FALSE) == FAILURE)
{
strcpy(DeferMessage,"SCHED_INFO: job cannot run. Reason: cannot select job\n");

View File

@ -6737,6 +6737,52 @@ int MJobDistributeTasks(
} /* END MJobDistributeTasks() */
/* Helper routine for MSchedProcessJobs() */
static void m_schedule_on_partitions(
int OnlyDefPart, /* I */
int DoBackfill, /* I */
int *CurrentQ) /* I */
{
int PIndex;
int tmpQ[MAX_MJOB];
for (PIndex = 0;PIndex < MAX_MPAR;PIndex++)
{
if (((PIndex == 0) && (MPar[2].ConfigNodes == 0)) ||
(MPar[PIndex].ConfigNodes == 0))
{
continue;
}
MOQueueInitialize(tmpQ);
if (MQueueSelectJobs(
CurrentQ,
tmpQ,
ptSOFT,
MAX_MNODE,
MAX_MTASK,
MAX_MTIME,
PIndex,
NULL,
TRUE,
OnlyDefPart) == SUCCESS)
{
MQueueScheduleIJobs(tmpQ,&MPar[PIndex]);
if (DoBackfill == TRUE && MPar[PIndex].BFPolicy != ptOFF)
{
/* backfill jobs using 'soft' policy constraints */
MQueueBackFill(tmpQ,ptSOFT,&MPar[PIndex]);
}
}
MOQueueDestroy(tmpQ,FALSE);
} /* END for (PIndex) */ \
} /* END m_schedule_on_partitions() */
int MSchedProcessJobs(
@ -6841,6 +6887,7 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
FALSE,
FALSE) == SUCCESS)
{
memcpy(MFQ,tmpQ,sizeof(MFQ));
@ -6863,45 +6910,20 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
TRUE);
TRUE,
FALSE);
/* schedule priority jobs */
if (CurrentQ[0] != -1)
{
for (PIndex = 0;PIndex < MAX_MPAR;PIndex++)
{
if (((PIndex == 0) && (MPar[2].ConfigNodes == 0)) ||
(MPar[PIndex].ConfigNodes == 0))
{
continue;
}
/* schedule jobs on their default partitions; skip backfilling */
MOQueueInitialize(tmpQ);
m_schedule_on_partitions(TRUE, FALSE, CurrentQ);
if (MQueueSelectJobs(
CurrentQ,
tmpQ,
ptSOFT,
MAX_MNODE,
MAX_MTASK,
MAX_MTIME,
PIndex,
NULL,
TRUE) == SUCCESS)
{
MQueueScheduleIJobs(tmpQ,&MPar[PIndex]);
/* schedule jobs on all partitions; do backfilling */
if (MPar[PIndex].BFPolicy != ptOFF)
{
/* backfill jobs using 'soft' policy constraints */
MQueueBackFill(tmpQ,ptSOFT,&MPar[PIndex]);
}
}
MOQueueDestroy(tmpQ,FALSE);
} /* END for (PIndex) */
m_schedule_on_partitions(FALSE, TRUE, CurrentQ);
} /* END if (GlobalSQ[0] != -1) */
MOQueueDestroy(CurrentQ,TRUE);
@ -6915,7 +6937,8 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
TRUE);
TRUE,
FALSE);
if (CurrentQ[0] != -1)
{
@ -6947,7 +6970,8 @@ int MSchedProcessJobs(
MAX_MTIME,
PIndex,
NULL,
TRUE) == SUCCESS)
TRUE,
FALSE) == SUCCESS)
{
MQueueBackFill(tmpQ,ptHARD,&MPar[PIndex]);
}
@ -6989,7 +7013,8 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
TRUE);
TRUE,
FALSE);
/* must sort/order MUIQ */

View File

@ -1775,6 +1775,7 @@ int UIJobShow(
MAX_MTIME,
P->Index,
Reason,
FALSE,
FALSE) == FAILURE) || (DstQ[0] == -1))
{
for (index = 0;index < MAX_MREJREASON;index++)