maui/src/moab/MSys.c
dev e3a3e5501e improve silver compatibility
add garrick node MAM interface


git-svn-id: svn://opensvn.adaptivecomputing.com/maui/trunk@6 3f5042e3-fb1d-0410-be18-d6ca2573e517
2005-07-12 23:22:01 +00:00

2709 lines
50 KiB
C

/* HEADER */
#include "moab.h"
#include "moab-proto.h"
extern mlog_t mlog;
/* NOTE: must sync with declaration */
mjob_t *MJob[MAX_MJOB];
mnode_t *MNode[MAX_MNODE];
mgcred_t *MUser[MAX_MUSER + MAX_MHBUF];
mgcred_t MGroup[MAX_MGROUP + MAX_MHBUF];
mgcred_t MAcct[MAX_MACCT + MAX_MHBUF];
mres_t *MRes[MAX_MRES];
srsv_t SRes[MAX_MSRES];
srsv_t OSRes[MAX_MSRES];
mjobl_t MJobName[MAX_MJOB + MAX_MHBUF];
mam_t MAM[MAX_MAM];
mrange_t MRange[MAX_MRANGE];
mrclass_t MRClass[MAX_MRCLASS]; /* available resource classes */
int MAQ[MAX_MJOB]; /* terminated by '-1' value */
mre_t MRE[MAX_MRES << 2];
mclass_t MClass[MAX_MCLASS];
msched_t MSched;
mckpt_t MCP;
mpar_t MPar[MAX_MPAR];
mrm_t MRM[MAX_MRM];
mqos_t MQOS[MAX_MQOS];
mframe_t MFrame[MAX_MFRAME];
mattrlist_t MAList; /* dynamic scheduling attributes */
mstat_t MStat;
mclient_t MClient[MAX_MCLIENT];
mjob_t *MJobTraceBuffer;
mrmfunc_t MRMFunc[MAX_MRMTYPE];
msim_t MSim;
msys_t MSys; /* cluster layout */
mx_t X;
int MFQ[MAX_MJOB]; /* terminated by '-1' value */
int MUIQ[MAX_MJOB];
char *MParam[MAX_MCFG];
extern mjob_t *MJobTraceBuffer;
extern const mcfg_t MCfg[];
extern const char *MAMOType[];
extern const char *MJobFlags[];
extern const char *MSysNodeAttr[];
extern const char *MNodeState[];
extern const char *MRMType[];
extern const char *MS3CName[];
extern const char *MSchedAttr[];
extern const char *MCredCfgParm[];
extern const char *MSchedMode[];
mrclass_t MRClass[MAX_MRCLASS]; /* resource classes */
/* local prototpyes */
int __MSysTestRLMerge();
/* END local prototypes */
#include "__MGridStub.c"
int MSysInitialize(mbool_t DoInit)
{
int index;
int srindex;
msched_t *S;
time_t tmpTime;
const char *FName = "MSysInitialize";
DBG(5,fALL) DPrint("%s()\n",
FName);
/* initialize all data structures */
S = &MSched;
memset(S,0,sizeof(MSched));
time(&tmpTime);
S->Time = (long)tmpTime;
S->X = (void *)&X;
MOSSyslogInit(S);
MUBuildPList((mcfg_t *)MCfg,MParam);
S->T[mxoAcct] = &MAcct[0];
S->S[mxoAcct] = sizeof(mgcred_t);
S->M[mxoAcct] = MAX_MACCT + MAX_MHBUF;
S->E[mxoAcct] = &MAcct[MAX_MACCT + MAX_MHBUF - 1];
S->T[mxoAM] = &MAM[0];
S->S[mxoAM] = sizeof(mam_t);
S->M[mxoAM] = MAX_MAM;
S->E[mxoAM] = &MAM[MAX_MAM - 1];
S->T[mxoClass] = &MClass[0];
S->S[mxoClass] = sizeof(mclass_t);
S->M[mxoClass] = MAX_MCLASS;
S->E[mxoClass] = &MClass[MAX_MCLASS - 1];
S->T[mxoCP] = &MCP;
S->S[mxoCP] = sizeof(mckpt_t);
S->M[mxoCP] = 1;
S->E[mxoCP] = &MCP;
S->T[mxoGroup] = &MGroup[0];
S->S[mxoGroup] = sizeof(mgcred_t);
S->M[mxoGroup] = MAX_MGROUP + MAX_MHBUF;
S->E[mxoGroup] = &MGroup[MAX_MGROUP + MAX_MHBUF - 1];
S->T[mxoJob] = &MJob[0];
S->S[mxoJob] = sizeof(mjob_t);
S->M[mxoJob] = MAX_MJOB;
S->E[mxoJob] = &MJob[MAX_MJOB - 1];
S->T[mxoNode] = &MNode[0];
S->S[mxoNode] = sizeof(mnode_t);
S->M[mxoNode] = MAX_MNODE;
S->E[mxoNode] = &MNode[MAX_MNODE - 1];
S->T[mxoPar] = &MPar[0];
S->S[mxoPar] = sizeof(mpar_t);
S->M[mxoPar] = MAX_MPAR;
S->E[mxoPar] = &MPar[MAX_MPAR - 1];
S->T[mxoQOS] = &MQOS[0];
S->S[mxoQOS] = sizeof(mqos_t);
S->M[mxoQOS] = MAX_MQOS;
S->E[mxoQOS] = &MQOS[MAX_MQOS - 1];
S->T[mxoRsv] = &MRes[0];
S->S[mxoRsv] = sizeof(mres_t);
S->M[mxoRsv] = MAX_MRES;
S->E[mxoRsv] = &MRes[MAX_MRES - 1];
S->T[mxoRM] = &MRM[0];
S->S[mxoRM] = sizeof(mrm_t);
S->M[mxoRM] = MAX_MRM;
S->E[mxoRM] = &MRM[MAX_MRM - 1];
S->T[mxoSched] = &MSched;
S->S[mxoSched] = sizeof(msched_t);
S->M[mxoSched] = 1;
S->E[mxoSched] = &MSched;
S->T[mxoSRes] = &SRes[0];
S->S[mxoSRes] = sizeof(srsv_t);
S->M[mxoSRes] = MAX_MSRES;
S->E[mxoSRes] = &SRes[MAX_MSRES - 1];
S->T[mxoUser] = &MUser[0];
S->S[mxoUser] = sizeof(mgcred_t *);
S->M[mxoUser] = MAX_MUSER + MAX_MHBUF;
S->E[mxoUser] = &MUser[MAX_MUSER + MAX_MHBUF - 1];
memset(MJob,0,sizeof(MJob));
memset(MNode,0,sizeof(MNode));
memset(MJobName,0,sizeof(MJobName));
memset(MRes,0,sizeof(MRes));
memset(SRes,0,sizeof(SRes));
memset(MRE,0,sizeof(MRE));
memset(MAList,0,sizeof(MAList));
memset(MPar,0,sizeof(MPar));
memset(MRClass,0,sizeof(MRClass));
memset(&MCP,0,sizeof(MCP));
strcpy(MCP.SVersionList,MCKPT_SVERSIONLIST);
strcpy(MCP.WVersion,MCKPT_VERSION);
memset(MRM,0,sizeof(MRM));
MRMLoadModules();
memset(MAQ,0,sizeof(MAQ));
MAQ[0] = -1;
memset(MRange,0,sizeof(MRange));
MFUCacheInitialize(&S->Time);
MQueueInitialize(&MJob[0],DEFAULT);
MSchedSetDefaults(S);
MSimSetDefaults();
MStatSetDefaults();
MCredSetDefaults();
/* initialize standing reservations */
for (srindex = 0;srindex < MAX_MSRES;srindex++)
{
OSRes[srindex].TaskCount = 0;
OSRes[srindex].StartTime = 0;
OSRes[srindex].EndTime = 0;
OSRes[srindex].A = NULL;
} /* END for (srindex) */
MNodeInitialize(S->GN,MDEF_GNNAME);
/* load state attribute values */
strcpy(MAList[eNodeState][0],NONE);
for (index = 1;MNodeState[index] != NULL;index++)
{
MUMAGetIndex(eNodeState,(char *)MNodeState[index],mAdd);
}
strcpy(MAList[eJobState][0],NONE);
for (index = 1;MJobState[index] != NULL;index++)
{
MUMAGetIndex(eJobState,(char *)MJobState[index],mAdd);
}
strcpy(MAList[eSysAttr][0],NONE);
for (index = 1;MSysNodeAttr[index] != NULL;index++)
{
MUMAGetBM(eSysAttr,(char *)MSysNodeAttr[index],mAdd);
} /* END for (index) */
strcpy(MAList[eJFeature][0],NONE);
for (index = 1;MJobFlags[index] != NULL;index++)
{
MUMAGetBM(eJFeature,(char *)MJobFlags[index],mAdd);
}
strcpy(MAList[eFeature][0],NONE);
strcpy(MAList[eNetwork][0],NONE);
strcpy(MAList[eOpsys][0],NONE);
strcpy(MAList[eArch][0],NONE);
strcpy(MAList[eSysAttr][0],NONE);
MParAdd(GLOBAL_MPARNAME,NULL);
MParAdd(DEFAULT_MPARNAME,NULL);
MClassAdd(ALL,NULL);
/* initialize frame array */
for (index = 0;index < MAX_MFRAME;index++)
{
MFrame[index].Index = index;
} /* END for (index) */
MSUIPCInitialize();
MLocalInitialize();
return(SUCCESS);
} /* END MSysInitialize() */
int MSysLoadConfig(
char *Directory, /* I */
char *ConfigFile, /* I */
int Mode) /* I */
{
char FileName[MAX_MLINE + 1];
int count;
int SC;
mnode_t *N;
mnode_t *GN;
const char *FName = "MSysLoadConfig";
DBG(3,fCONFIG) DPrint("%s(%s,%s,%d)\n",
FName,
Directory,
ConfigFile,
Mode);
if ((MSched.ConfigBuffer == NULL) || (Mode & (1 << mcmForce)))
{
if (!strstr(ConfigFile,Directory))
{
if (Directory[strlen(Directory) - 1] == '/')
{
sprintf(FileName,"%s%s",
Directory,
ConfigFile);
}
else
{
sprintf(FileName,"%s/%s",
Directory,
ConfigFile);
}
}
else
{
MUStrCpy(FileName,ConfigFile,sizeof(FileName));
}
if ((MSched.ConfigBuffer = MFULoad(FileName,1,macmWrite,&count,&SC)) == NULL)
{
DBG(2,fCONFIG) DPrint("WARNING: cannot load configuration file '%s' (using internal defaults)\n",
FileName);
return(FAILURE);
}
MCfgAdjustBuffer(&MSched.ConfigBuffer,TRUE);
} /* END if ((MSched.ConfigBuffer == NULL) || ...) */
strcpy(FileName + strlen(FileName) - strlen(".cfg"),"-private.cfg");
/* load general client config */
MOLoadPvtConfig(NULL,-1,NULL,NULL,FileName);
/* load peer service config */
MOLoadPvtConfig(NULL,mxoNONE,"EM",&MSched.EM,NULL);
MSched.EM.Type = mpstEM;
MSched.EM.S = (msocket_t *)calloc(1,sizeof(msocket_t));
MOLoadPvtConfig(NULL,mxoNONE,"DS",&MSched.DS,NULL);
MSched.DS.Type = mpstSD;
MSched.DS.S = (msocket_t *)calloc(1,sizeof(msocket_t));
GN = NULL;
if (MNodeCreate(&GN) == SUCCESS)
{
strcpy(GN->Name,MDEF_GNNAME);
}
N = &MSched.DefaultN;
/* allocate sub-structures/initialize default node */
if (MNodeCreate(&N) == SUCCESS)
{
strcpy(MSched.DefaultN.Name,"DEFAULT");
}
MSchedLoadConfig(NULL);
MCfgProcessBuffer(MSched.ConfigBuffer);
MCredLoadConfig(mxoSys,NULL,NULL,NULL);
MCredLoadConfig(mxoQOS,"DEFAULT",NULL,NULL);
MCredLoadConfig(mxoQOS,NULL,NULL,NULL);
MCredLoadConfig(mxoUser,"DEFAULT",NULL,NULL);
MCredLoadConfig(mxoUser,NULL,NULL,NULL);
MCredLoadConfig(mxoGroup,"DEFAULT",NULL,NULL);
MCredLoadConfig(mxoAcct,"DEFAULT",NULL,NULL);
MCredLoadConfig(mxoClass,"DEFAULT",NULL,NULL);
MCredLoadConfig(mxoGroup,NULL,NULL,NULL);
MCredLoadConfig(mxoAcct,NULL,NULL,NULL);
MCredLoadConfig(mxoClass,NULL,NULL,NULL);
MSRLoadConfig(NULL);
MRMLoadConfig(NULL);
MAMLoadConfig(NULL,NULL);
/* NOTE: two copies of global node */
MNodeLoadConfig(GN,NULL);
if (MUMemCCmp(
(char *)&GN->CRes,
'\0',
sizeof(GN->CRes)) == FAILURE)
{
/* global resources are configured */
MNodeAdd(GN->Name,&N);
MNodeSetState(N,mnsIdle,0);
memcpy(&N->CRes,&GN->CRes,sizeof(N->CRes));
MSched.GN = N;
} /* END if (MUMemCCmp() == FAILURE) */
MNodeDestroy(&GN);
/* NOTE: must free core node structure */
MUFree((char **)&GN);
MNodeLoadConfig(&MSched.DefaultN,NULL);
sprintf(MSched.KeyFile,"%s/%s",
MSched.HomeDir,
MSCHED_KEYFILE);
MSched.UID = MOSGetEUID();
MUCheckAuthFile(&MSched,MSched.DefaultCSKey,NULL,TRUE);
return(SUCCESS);
} /* END MSysLoadConfig() */
int MSysMemCheck()
{
const char *FName = "MSysMemCheck";
DBG(3,fCORE) DPrint("%s()\n",
FName);
DBG(2,fCORE) DPrint("MNode[%d] %6.2f\n",
MAX_MNODE,
(double)(sizeof(MNode)) / 1048576);
DBG(2,fCORE) DPrint("MJob[%d] %6.2f\n",
MAX_MJOB,
(double)(sizeof(MJob)) / 1048576);
DBG(2,fCORE) DPrint("MJobTraceBuffer[%d] %6.2f\n",
MAX_MJOB_TRACE,
(double)(sizeof(MJobTraceBuffer)) / 1048576);
DBG(2,fCORE) DPrint("MUser[%d] %6.2f\n",
MAX_MUSER + MAX_MHBUF,
(double)(sizeof(MUser)) / 1048576);
DBG(2,fCORE) DPrint("MGroup[%d] %6.2f\n",
MAX_MGROUP + MAX_MHBUF,
(double)(sizeof(MGroup)) / 1048576);
DBG(2,fCORE) DPrint("MAcct[%d] %6.2f\n",
MAX_MACCT + MAX_MHBUF,
(double)(sizeof(MAcct)) / 1048576);
DBG(2,fCORE) DPrint("MRes[%d] %6.2f\n",
MAX_MRES,
(double)(sizeof(MRes)) / 1048576);
DBG(2,fCORE) DPrint("SRes[%4d] %6.2f\n",
MAX_MSRES,
(double)(sizeof(SRes)) / 1048576);
return(SUCCESS);
} /* END MSysMemCheck() */
int __MSysTestXML(
char *XMLString) /* I */
{
char *tail;
mxml_t *E = NULL;
fprintf(stdout,"XMLString: '%s'\n\n",
XMLString);
if (MXMLFromString(&E,XMLString,&tail,NULL) == FAILURE)
{
exit(1);
}
if (tail != NULL)
{
fprintf(stdout,"tail: '%s'\n\n",
XMLString);
}
exit(0);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestXML() */
int __MSysTestRLMerge()
{
mrange_t R1[] = {
{956601443,956612113,22,22},
{956612113,956631180,30,30},
{956631180,956635459,33,33},
{956712600,956766449,17,17},
{956766449,956770944,119,119},
{956770944,956810762,131,131},
{956810762,956981059,133,133},
{956981059,957198515,183,183},
{957198515,2139740800,187,187},
{0,0,0,0}
};
mrange_t R2[] = {
{956635459,956635459,1,1},
{956981059,2139740800,1,1},
{0,0,0,0}
};
/*
mrange_t R1[] = {
{888709099,888736699,4,4},
{888736699,888741199,7,7},
{888741199,888763698,19,19},
{888763709,888763709,19,19},
{888763709,888763711,171,171},
{888763711,2139999999,190,190},
{0,0,0,0}
};
mrange_t R2[] = {
{888736699,888763698,1,1},
{888763709,888763709,1,1},
{888763711,2139999999,1,1},
{0,0,0,0}
};
*/
MRLMerge(R1,R2,1,NULL);
exit(0);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestRLMerge() */
int __MSysTestJobSelectFRL()
{
mjob_t tmpJ;
int RCount;
mrange_t R1[] = {
{ 1022172521, 1022173705, 476, 119 },
{ 1022173705, 1022176944, 496, 124 },
{ 1022176944, 1022179074, 500, 125 },
{ 1022179074, 1022182393, 516, 129 },
{ 1022182393, 1022185130, 520, 130 },
{ 1022185130, 1022188330, 536, 134 },
{ 1022188330, 1022189431, 600, 150 },
{ 1022189431, 1022208469, 664, 166 },
{ 1022208469, 1022211377, 680, 170 },
{ 1022211377, 1022215080, 728, 182 },
{ 1022215080, 1022250175, 744, 186 },
{ 1022250175, 1022252002, 748, 187 },
{ 1022252002, 1022252096, 752, 188 },
{ 1022252096, 1022253053, 756, 189 },
{ 1022253053, 1022255920, 804, 201 },
{ 1022255920, 2139992800, 808, 202 },
{ 0, 0, 0, 0 } };
memset(&tmpJ,0,sizeof(tmpJ));
strcpy(tmpJ.Name,"test");
tmpJ.Request.TC = 512;
tmpJ.Request.NC = 128;
MJobSelectFRL(&tmpJ,R1,1,&RCount);
exit(0);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestJobSelectFRL() */
int __MSysTestNPrioF()
{
mnode_t tmpN;
memset(&tmpN,0,sizeof(tmpN));
MNodeProcessPrioF(&tmpN,"6*LOAD + -.01 * CMEM - JOBCOUNT");
exit(0);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestNPrioF() */
int __MSysTestRLAND()
{
mrange_t R1[] = {
{ 1016704575, 2139856000, 4, 1 },
/*
{1,3,1,1},
{5,6,1,1},
{6,7,2,2},
{11,13,2,2},
{13,14,1,1},
{15,16,1,1},
{17,18,1,1},
*/
{0,0,0,0}
};
mrange_t R2[] = {
{1016704575, 2139856000, 4, 1 },
/*
{0,2,1,1},
{4,6,1,1},
{8,9,1,1},
{10,11,1,1},
{13,14,1,1},
{14,15,1,1},
{15,16,1,1},
{16,17,1,1},
*/
{0,0,0,0}
};
mrange_t C[MAX_MRANGE];
/*
mrange_t R1[] = {
{888709099,888736699,4,4},
{888736699,888741199,7,7},
{888741199,888763698,19,19},
{888763709,888763709,19,19},
{888763709,888763711,171,171},
{888763711,2139999999,190,190},
{0,0,0,0}
};
mrange_t R2[] = {
{888736699,888763698,1,1},
{888763709,888763709,1,1},
{888763711,2139999999,1,1},
{0,0,0,0}
};
*/
MRLAND(C,R1,R2);
exit(0);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestRLAND() */
int __MSysTestJobGetSNRange()
{
mjob_t tmpJ;
mnode_t tmpN;
mre_t RE[MAX_MRES_DEPTH << 1];
mres_t *R[MAX_MRES_DEPTH];
short RC[MAX_MRES_DEPTH];
mreq_t tmpRQ;
mrange_t GRange[MAX_MRANGE];
mrange_t ARange[MAX_MRANGE];
char Affinity;
char NAvailPolicy[MAX_MRESOURCETYPE];
memset(&tmpJ,0,sizeof(tmpJ));
memset(&tmpN,0,sizeof(tmpN));
memset(&tmpRQ,0,sizeof(tmpRQ));
memset(NAvailPolicy,0,sizeof(NAvailPolicy));
/* configure general */
MSched.Time = 1025;
MSched.ResDepth = 8;
mlog.logfp = stderr;
mlog.Threshold = 8;
/* configure reservation */
#ifdef __MNOT1
MResInitialize(&MRes[0],"META");
MRes[0]->Type = 2;
MRes[0]->StartTime = 10;
MRes[0]->EndTime = 3600;
MRes[0]->DRes.Procs = 1;
MRes[0]->Flags = (1 << mrfByName);
/* configure node */
tmpN.CRes.Procs = 2;
tmpN.ARes.Procs = 2;
/* link node */
tmpN.R = R;
tmpN.RE = RE;
tmpN.RC = RC;
tmpN.R[0] = MRes[0];
tmpN.RC[0] = 1;
tmpN.RE[0].Type = mreStart;
tmpN.RE[0].DRes.Procs = 1;
tmpN.RE[1].Type = mreEnd;
tmpN.RE[1].DRes.Procs = 1;
tmpN.RE[2].Type = mreNONE;
tmpN.RE[0].Time = tmpN.R[0]->StartTime;
tmpN.RE[1].Time = tmpN.R[0]->EndTime;
tmpN.RE[0].Index = 0;
tmpN.RE[1].Index = 0;
/* configure job */
tmpJ.Req[0] = &tmpRQ;
tmpJ.WCLimit = 1800;
tmpJ.SpecWCLimit[0] = 1800;
tmpRQ.DRes.Procs = 1;
/* configure range requirements */
GRange[0].TaskCount = 1;
GRange[0].StartTime = 0;
GRange[0].EndTime = MAX_MTIME;
GRange[1].EndTime = 0;
#else /* __MNOT1 */
MResInitialize(&MRes[0],"1041");
MRes[0]->Type = mrtJob;
MRes[0]->StartTime = 1025;
MRes[0]->EndTime = 2825;
MRes[0]->DRes.Procs = 1;
MRes[0]->Flags = 0;
MResInitialize(&MRes[1],"1040");
MRes[1]->Type = mrtJob;
MRes[1]->StartTime = 1000;
MRes[1]->EndTime = 2800;
MRes[1]->DRes.Procs = 1;
MRes[1]->Flags = 0;
/* configure node */
strcpy(tmpN.Name,"rocky4");
tmpN.CRes.Procs = 2;
tmpN.ARes.Procs = 2;
tmpN.NAvailPolicy = NAvailPolicy;
tmpN.NAvailPolicy[mrProc] = mrapDedicated;
/* link node */
tmpN.R = R;
tmpN.RE = RE;
tmpN.RC = RC;
tmpN.R[0] = MRes[0];
tmpN.RC[0] = 1;
tmpN.R[1] = MRes[1];
tmpN.RC[1] = 1;
tmpN.RE[0].Type = mreStart;
tmpN.RE[0].DRes.Procs = 1;
tmpN.RE[0].Time = tmpN.R[1]->StartTime;
tmpN.RE[0].Index = 1;
tmpN.RE[1].Type = mreStart;
tmpN.RE[1].DRes.Procs = 1;
tmpN.RE[1].Time = tmpN.R[0]->StartTime;
tmpN.RE[1].Index = 0;
tmpN.RE[2].Type = mreEnd;
tmpN.RE[2].DRes.Procs = 1;
tmpN.RE[2].Time = tmpN.R[1]->EndTime;
tmpN.RE[2].Index = 1;
tmpN.RE[3].Type = mreEnd;
tmpN.RE[3].DRes.Procs = 1;
tmpN.RE[3].Time = tmpN.R[0]->EndTime;
tmpN.RE[3].Index = 0;
tmpN.RE[4].Type = mreNONE;
MNode[0] = &tmpN;
/* configure job */
strcpy(tmpJ.Name,"1042");
tmpJ.Req[0] = &tmpRQ;
tmpJ.WCLimit = 1800;
tmpJ.SpecWCLimit[0] = 1800;
tmpRQ.DRes.Procs = 1;
/* configure range requirements */
GRange[0].TaskCount = 1;
GRange[0].NodeCount = 0;
GRange[0].StartTime = 2800;
GRange[0].EndTime = MAX_MTIME;
GRange[1].EndTime = 0;
#endif /* __MNOT1 */
MJobGetSNRange(
&tmpJ,
&tmpRQ,
&tmpN,
GRange,
MAX_MRANGE,
&Affinity,
NULL,
ARange,
NULL,
NULL);
exit(0);
/*NOTREACHED*/
return(SUCCESS);
} /* __MSysTestJobGetSNRange() */
int __MSysTestFeatureSub()
{
int NFMap[4] = { 58720258, 0, 0, 0 };
int RFMap[4] = {301989888, 0, 0, 0};
if (MAttrSubset(NFMap,RFMap,sizeof(NFMap),0) != SUCCESS)
{
/* test succeeded */
exit(0);
}
exit(1);
/*NOTREACHED*/
return(SUCCESS);
} /* __MSysTestFeatureSub() */
int __MSysTestResParse(
char *RString) /* I */
{
mjob_t tmpJ;
mreq_t tmpRQ;
int RMType = mrmtLSF;
memset(&tmpJ,0,sizeof(tmpJ));
memset(&tmpRQ,0,sizeof(tmpRQ));
if (MReqRResFromString(&tmpJ,&tmpRQ,RString,RMType,FALSE) == SUCCESS)
{
/* test succeeded */
exit(0);
}
exit(1);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestResParse() */
int __MSysTestSubmit()
{
char *SubmitString = "#PBS -l nodes=2,walltime=100\n/bin/sleep 60;hostname\n";
char JobName[MAX_MNAME];
int SC;
char Output[MAX_MLINE];
mjob_t tmpJ;
mjob_t *J = NULL;
/* NOTE: launch environment passed via job (user, group, iwd, env) */
int rc;
memset(&tmpJ,0,sizeof(tmpJ));
J = &tmpJ;
MUserAdd("heather",&tmpJ.Cred.U);
MGroupAdd("heather",&tmpJ.Cred.G);
MUStrDup(&J->E.IWD,"/tmp");
if (X.XRMJobSubmit != (int (*)())0)
{
rc = (*X.XRMJobSubmit)(
X.xd,
SubmitString,
&MRM[0],
&J,
JobName,
Output,
&SC);
if (rc == SUCCESS)
{
fprintf(stdout,"NOTE: job '%s' launched\n",
JobName);
exit(0);
}
else
{
fprintf(stdout,"NOTE: submission failed '%s'\n",
Output);
exit(1);
}
}
fprintf(stdout,"NOTE: submission disabled\n");
exit(1);
/*NOTREACHED*/
return(SUCCESS);
} /* END __MSysTestSubmit() */
int MSysDoTest()
{
char *tptr;
char *aptr;
char *ptr;
int aindex;
const char *TName[] = {
NONE,
"SCHED",
"XML",
"RANGEAND",
"RANGECOLLAPSE",
"RANGEMERGE",
"GETSNRANGE",
"FEATURESUB",
"NODEPRIO",
"RESPARSE",
"SUBMIT",
"WIKINODE",
"WIKIJOB",
"RMX",
"JOBNAME",
"JOBDIST",
NULL };
enum {
mirtNONE = 0,
mirtSched,
mirtXML,
mirtRLAND,
mirtJobSelectFRL,
mirtRLMerge,
mirtJobGetSNRange,
mirtFeatureSubset,
mirtNodePrio,
mirtResParse,
mirtSubmit,
mirtWikiNode,
mirtWikiJob,
mirtRMExtension,
mirtJobName,
mirtJobDist };
if ((tptr = getenv(MSCHED_ENVTESTVAR)) == NULL)
{
return(SUCCESS);
}
aindex = MUGetIndex(tptr,TName,TRUE,0);
aptr = NULL;
if ((ptr = strchr(tptr,':')) != NULL)
{
aptr = ptr + 1;
}
switch(aindex)
{
case mirtSched:
MSchedTest();
break;
case mirtXML:
__MSysTestXML(aptr);
break;
case mirtRLAND:
__MSysTestRLAND();
break;
case mirtJobSelectFRL:
__MSysTestJobSelectFRL();
break;
case mirtRLMerge:
__MSysTestRLMerge();
break;
case mirtJobGetSNRange:
__MSysTestJobGetSNRange();
break;
case mirtFeatureSubset:
__MSysTestFeatureSub();
break;
case mirtNodePrio:
__MSysTestNPrioF();
break;
case mirtResParse:
__MSysTestResParse(aptr);
break;
case mirtSubmit:
__MSysTestSubmit();
break;
case mirtWikiNode:
MWikiTestNode(aptr);
break;
case mirtWikiJob:
MWikiTestJob(aptr);
break;
case mirtRMExtension:
MJobTestRMExtension(aptr);
break;
case mirtJobName:
MJobTestName(aptr);
break;
case mirtJobDist:
MJobTestDist();
break;
default:
/* cannot determine test */
fprintf(stderr,"ERROR: invalid test specified (%s)\n",
tptr);
exit(1);
/*NOTREACHED*/
break;
} /* END switch(aindex) */
return(SUCCESS);
} /* END MSysDoTest() */
int MSysRegExtEvent(
char *Message, /* I */
int AType, /* I: action type */
long EFlags, /* I: event flags */
int Prio) /* I: event priority */
{
time_t T;
char tmpLine[MAX_MLINE];
const char *XHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\">";
const char *ReqO = "event-manager-requests";
char CName[MAX_MNAME];
char TString[MAX_MNAME];
char DString[MAX_MNAME];
const char *FName = "MSysRegExtEvent";
DBG(2,fCORE) DPrint("%s(%s,%d,%ld,%d)\n",
FName,
(Message != NULL) ? Message : "NULL",
AType,
EFlags,
Prio);
time(&T);
strncpy(TString,ctime(&T),24);
TString[24] = '\0';
strcpy(CName,MSCHED_SNAME);
strcpy(DString,NONE);
sprintf(tmpLine,"%s<%s><event component=\"%s\" time=\"%s msg=\"%s\" data=\"%s\"/></%s>",
XHeader,
ReqO,
CName,
TString,
Message,
DString,
ReqO);
/* send message */
/* NYI */
#ifdef __MSSSLIB
/* NYI */
#endif /* __MSSSLIB */
return(SUCCESS);
} /* END MSysRegExtEvent() */
int MSysRegEvent(
char *Message, /* I */
int AType, /* I: action type */
long EFlags, /* I: event flags */
int Prio) /* I: event priority */
{
char *ASList[32];
const char *FName = "MSysRegEvent";
DBG(2,fCORE) DPrint("%s(%s,%d,%ld,%d)\n",
FName,
(Message != NULL) ? Message : "NULL",
AType,
EFlags,
Prio);
if (EFlags & (1 << mefExternal))
{
MSysRegExtEvent(Message,AType,EFlags,Prio);
}
if (Prio > 0)
{
ASList[0] = NULL;
ASList[1] = Message;
ASList[2] = NULL;
MSysLaunchAction(
ASList,
(AType != 0) ? AType : mactAdminEvent);
}
return(SUCCESS);
} /* END MSysRegEvent() */
int MSysLaunchAction(
char **ASList,
int AType)
{
static char Exec[MAX_MLINE];
static char Line[MAX_MBUFFER];
int pid;
int rc;
const char *FName = "MSysLaunchAction";
DBG(2,fCORE) DPrint("%s(ASList,%d)\n",
FName,
AType);
if (MSched.Action[AType][0] == '\0')
{
DBG(5,fCORE) DPrint("INFO: scheduler action %d disabled\n",
AType);
return(SUCCESS);
}
if (MSched.Action[AType][0] == '/')
{
strcpy(Exec,MSched.Action[AType]);
}
else if (MSched.ToolsDir[strlen(MSched.ToolsDir) - 1] == '/')
{
sprintf(Exec,"%s%s",
MSched.ToolsDir,
MSched.Action[AType]);
}
else
{
sprintf(Exec,"%s/%s",
MSched.ToolsDir,
MSched.Action[AType]);
}
if (ASList[1] == NULL)
{
sprintf(Line,"\"%s %s\"",
MLogGetTime(),
"NODATA");
DBG(7,fCORE) DPrint("INFO: launching '%s' (AString: '%s')\n",
Exec,
Line);
}
else
{
DBG(7,fCORE) DPrint("INFO: launching '%s' (AString: '%s', ...)\n",
Exec,
ASList[1]);
}
ASList[0] = Exec;
/* fork process */
if ((pid = fork()) == -1)
{
DBG(0,fCORE) DPrint("ALERT: cannot fork for action '%s', errno: %d (%s)",
MSched.Action[AType],
errno,
strerror(errno));
return(FAILURE);
}
if (pid == 0)
{
/* if child */
if (ASList[1] == NULL)
{
rc = execl(Exec,Exec,Line,NULL);
}
else
{
rc = execv(Exec,ASList);
}
if (rc == -1)
{
/* child has failed */
DBG(1,fCORE) DPrint("ALERT: cannot exec action '%s', rc: %d, errno: %d (%s)\n",
Exec,
rc,
errno,
strerror(errno));
}
exit(0);
} /* END if (pid == 0) */
DBG(2,fCORE) DPrint("INFO: action '%s' launched with message '%s'\n",
MSched.Action[AType],
ASList[1]);
return(SUCCESS);
} /* END MSysLaunchAction() */
int MSysDSQuery(
char *SName, /* I: service name */
char *CName, /* I: cluster name */
char *HostName, /* O: service hostname */
int *Port, /* O: service port */
char *WProtocol, /* O: service wire protocol */
char *SProtocol) /* O: service socket protocol */
{
char *RspPtr = NULL;
char CmdString[MAX_MLINE];
char tmpLine[MAX_MLINE];
mxml_t *E = NULL;
mxml_t *RE = NULL;
mxml_t *LE = NULL;
int ReqID;
const char *FName = "MSysDSQuery";
DBG(4,fCORE) DPrint("%s(%s,%s,HostName,Port,WProtocol,SProtocol)\n",
FName,
(SName != NULL) ? SName : "NULL",
(CName != NULL) ? CName : "NULL");
if (SName == NULL)
{
return(FAILURE);
}
/* create request string */
MXMLCreateE(&E,"DirectoryRequests");
MXMLCreateE(&RE,"DirectoryRequest");
MXMLSetAttr(RE,"service",(void *)SName,mdfString);
if (CName != NULL)
MXMLSetAttr(RE,"cluster",(void *)CName,mdfString);
ReqID = 1;
MXMLSetAttr(RE,"reqid",(void *)&ReqID,mdfInt);
MXMLAddE(E,RE);
MXMLToString(E,CmdString,sizeof(CmdString),NULL,TRUE);
MXMLDestroyE(&E);
if (MS3DoCommand(&MSched.DS,CmdString,&RspPtr,NULL,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot query service '%s'\n",
SName);
return(FAILURE);
}
if ((MXMLFromString(&E,RspPtr,NULL,NULL) == FAILURE) ||
(MXMLGetChild(E,"DirectoryResponse",NULL,&RE) == FAILURE) ||
(MXMLGetChild(RE,"Location",NULL,&LE) == FAILURE))
{
DBG(2,fCORE) DPrint("ALERT: cannot process DS response '%s'\n",
RspPtr);
return(FAILURE);
}
if ((MXMLGetAttr(RE,"outcome",NULL,tmpLine,0) == FAILURE) ||
strcmp(tmpLine,"success"))
{
DBG(2,fCORE) DPrint("ALERT: DS query failed '%s'\n",
RspPtr);
return(FAILURE);
}
/* query succeeded, extract data */
if (HostName != NULL)
{
if (MXMLGetAttr(LE,"host",NULL,tmpLine,0) == SUCCESS)
{
MUStrCpy(HostName,tmpLine,MAX_MNAME);
}
else
{
HostName[0] = '\0';
}
}
if (Port != NULL)
{
if (MXMLGetAttr(LE,"port",NULL,tmpLine,0) == SUCCESS)
{
*Port = (int)strtol(tmpLine,NULL,0);
}
else
{
*Port = -1;
}
}
if (SProtocol != NULL)
{
if (MXMLGetAttr(LE,"protocol",NULL,tmpLine,0) == SUCCESS)
{
MUStrCpy(SProtocol,tmpLine,MAX_MNAME);
}
else
{
SProtocol[0] = '\0';
}
}
/* NOTE: wire protocol not supported */
MXMLDestroyE(&E);
DBG(2,fCORE) DPrint("INFO: information for service '%s' successfully queried\n",
SName);
return(SUCCESS);
} /* END MSysDSQuery() */
int MSysSynchronize()
{
return(SUCCESS);
if (MSched.Sync.UpdateTime <= 0)
{
char tmpBuf[MAX_MLINE];
char tmpLine[MAX_MLINE];
char tmpHost[MAX_MLINE];
char *RspPtr = NULL;
mpsi_t tmpP;
memset(&tmpP,0,sizeof(tmpP));
sprintf(tmpLine,"%s/%s",
DEFAULT_MHSYNCLOCATION,
MRMType[MRM[0].Type]);
tmpP.Type = mpstWWW;
tmpP.Data = (void *)tmpLine;
tmpBuf[0] = '\0';
strcpy(tmpHost,DEFAULT_MHSERVER);
tmpP.HostName = tmpHost;
tmpP.Port = DEFAULT_MHPORT;
tmpP.Timeout = 3;
if (MS3DoCommand(&tmpP,tmpBuf,&RspPtr,NULL,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot sync\n");
return(FAILURE);
}
/* process updates */
DBG(4,fCORE) DPrint("INFO: received update '%s'\n",
RspPtr);
/* NYI */
MUFree(&RspPtr);
MSched.Sync.UpdateTime = MSched.Time;
} /* END if (MSched.Sync.UpdateTime <= 0) */
return(SUCCESS);
} /* END MSysSynchronize() */
int MSysEMSubmit(
mpsi_t *EM, /* I */
char *SName, /* I: service name */
char *EName, /* I: event name */
char *Message) /* I: event message */
{
char tmpLine[MAX_MLINE];
char *RspPtr = NULL;
char CmdString[MAX_MLINE];
mxml_t *E = NULL;
mxml_t *RE = NULL;
const char *FName = "MSysEMSubmit";
DBG(4,fCORE) DPrint("%s(EM,%s,%s,%s)\n",
FName,
(SName != NULL) ? SName : "NULL",
(EName != NULL) ? EName : "NULL",
(Message != NULL) ? Message : "NULL");
if ((EM == NULL) || (SName == NULL) || (EName == NULL))
{
return(FAILURE);
}
if ((EM->HostName == NULL) || (EM->HostName[0] == '\0'))
{
/* EM disabled */
DBG(6,fCORE) DPrint("INFO: EM disabled\n");
return(SUCCESS);
}
MXMLCreateE(&E,"event-manager-requests");
MXMLCreateE(&RE,"event");
MXMLSetAttr(RE,"component",(void *)SName,mdfString);
MXMLSetAttr(RE,"msg",(void *)EName,mdfString);
strcpy(tmpLine,ctime((time_t *)&MSched.Time));
MXMLSetAttr(RE,"time",(void *)tmpLine,mdfString);
if (Message != NULL)
MXMLSetAttr(RE,"data",(void *)Message,mdfString);
MXMLAddE(E,RE);
MXMLToString(E,CmdString,sizeof(CmdString),NULL,TRUE);
MXMLDestroyE(&E);
if (MS3DoCommand(EM,CmdString,&RspPtr,NULL,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot submit event '%s'\n",
EName);
return(FAILURE);
}
if (MXMLFromString(&E,RspPtr,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot process EM response '%s'\n",
RspPtr);
return(FAILURE);
}
if (MXMLGetChild(E,"event-ok",NULL,&RE) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: EM submission failed '%s'\n",
RspPtr);
return(FAILURE);
}
MXMLDestroyE(&E);
DBG(4,fCORE) DPrint("INFO: event '%s' successfully submitted\n",
EName);
return(SUCCESS);
} /* END MSysEMSubmit() */
int MSysEMRegister(
mpsi_t *EM, /* I */
char *SName, /* I: service name */
char *EName, /* I: event name */
char *EData, /* I: specific event data required (optional) */
char *Dst) /* I: event destination */
{
char *RspPtr = NULL;
char CmdString[MAX_MLINE];
char tmpLine[MAX_MLINE];
mxml_t *E = NULL;
mxml_t *RE = NULL;
const char *FName = "MSysEMRegister";
DBG(4,fCORE) DPrint("%s(EM,%s,%s,%s,%s)\n",
FName,
(SName != NULL) ? SName : "NULL",
(EName != NULL) ? EName : "NULL",
(EData != NULL) ? EData : "NULL",
(Dst != NULL) ? Dst : "NULL");
if ((EM == NULL) || (SName == NULL) || (EName == NULL))
{
return(FAILURE);
}
if ((EM->HostName == NULL) || (EM->HostName[0] == '\0'))
{
/* EM disabled */
DBG(6,fCORE) DPrint("INFO: EM disabled\n");
return(SUCCESS);
}
MXMLCreateE(&E,"event-manager-requests");
MXMLCreateE(&RE,"notification");
MXMLSetAttr(RE,"component",(void *)SName,mdfString);
MXMLSetAttr(RE,"msg",(void *)EName,mdfString);
if (EData != NULL)
MXMLSetAttr(RE,"data",(void *)EData,mdfString);
else
MXMLSetAttr(RE,"data",(void *)"*",mdfString);
if (Dst != NULL)
MXMLSetAttr(RE,"respond_to",(void *)Dst,mdfString);
MXMLAddE(E,RE);
MXMLToString(E,CmdString,sizeof(CmdString),NULL,TRUE);
MXMLDestroyE(&E);
if (MS3DoCommand(EM,CmdString,&RspPtr,NULL,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot register for event '%s'\n",
EName);
return(FAILURE);
}
if (MXMLFromString(&E,RspPtr,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot process EM response '%s'\n",
RspPtr);
return(FAILURE);
}
if ((MXMLGetAttr(E,"outcome",NULL,tmpLine,0) == FAILURE) ||
strcmp(tmpLine,"success"))
{
DBG(2,fCORE) DPrint("ALERT: EM registration failed '%s'\n",
RspPtr);
return(FAILURE);
}
MXMLDestroyE(&E);
DBG(4,fCORE) DPrint("INFO: event '%s' subscription successfully registered\n",
EName);
return(SUCCESS);
} /* END MSysEMRegister() */
int MSysDSUnregister(
char *SName, /* I: service name */
char *CName, /* I: cluster name */
char *HostName, /* I */
int Port, /* I */
char *WProtocol, /* I */
char *SProtocol) /* I */
{
char *RspPtr = NULL;
char CmdString[MAX_MLINE];
char tmpLine[MAX_MLINE];
mxml_t *E = NULL;
mxml_t *RE = NULL;
int ReqID;
const char *FName = "MSysDSUnregister";
DBG(4,fCORE) DPrint("%s(%s,%s,%s,Port,WProtocol,SProtocol)\n",
FName,
(SName != NULL) ? SName : "NULL",
(CName != NULL) ? CName : "NULL",
(HostName != NULL) ? HostName : "NULL");
if (SName == NULL)
{
return(FAILURE);
}
if ((MSched.DS.HostName == NULL) || (MSched.DS.HostName[0] == '\0'))
{
/* DS disabled */
DBG(6,fCORE) DPrint("INFO: DS disabled\n");
return(SUCCESS);
}
/* create request string */
MXMLCreateE(&E,"DirectoryRequests");
MXMLCreateE(&RE,"DirectoryRemove");
MXMLSetAttr(RE,"service",(void *)SName,mdfString);
if (CName != NULL)
MXMLSetAttr(RE,"cluster",(void *)CName,mdfString);
if (SProtocol)
MXMLSetAttr(RE,"protocol",(void *)SProtocol,mdfString);
ReqID = 1;
MXMLSetAttr(RE,"reqid",(void *)&ReqID,mdfInt);
MXMLAddE(E,RE);
MXMLToString(E,CmdString,sizeof(CmdString),NULL,TRUE);
MXMLDestroyE(&E);
if (MS3DoCommand(&MSched.DS,CmdString,&RspPtr,NULL,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot un-register service '%s'\n",
CName);
return(FAILURE);
}
if (MXMLFromString(&E,RspPtr,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot process DS response '%s'\n",
RspPtr);
return(FAILURE);
}
if (MXMLGetChild(E,"DirectoryStatus",NULL,&RE) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot process DS response '%s'\n",
RspPtr);
MXMLDestroyE(&E);
return(FAILURE);
}
if ((MXMLGetAttr(RE,"outcome",NULL,tmpLine,0) == FAILURE) ||
strcmp(tmpLine,"success"))
{
DBG(2,fCORE) DPrint("ALERT: un-register request failed '%s'\n",
RspPtr);
MXMLDestroyE(&E);
return(FAILURE);
}
MXMLDestroyE(&E);
DBG(2,fCORE) DPrint("INFO: service '%s' successfully registered\n",
SName);
return(SUCCESS);
} /* END MSysDSUnregister() */
int MSysDSRegister(
char *SName, /* I: service name */
char *CName, /* I: cluster name */
char *HostName, /* I */
int Port, /* I */
char *WProtocol, /* I */
char *SProtocol) /* I */
{
char *RspPtr = NULL;
char CmdString[MAX_MLINE];
char tmpLine[MAX_MLINE];
mxml_t *E = NULL;
mxml_t *RE = NULL;
int ReqID;
const char *FName = "MSysDSRegister";
DBG(4,fCORE) DPrint("%s(%s,%s,%s,Port,WProtocol,SProtocol)\n",
FName,
(SName != NULL) ? SName : "NULL",
(CName != NULL) ? CName : "NULL",
(HostName != NULL) ? HostName : "NULL");
if (SName == NULL)
{
return(FAILURE);
}
if ((MSched.DS.HostName == NULL) || (MSched.DS.HostName[0] == '\0'))
{
/* DS disabled */
DBG(6,fCORE) DPrint("INFO: DS disabled\n");
return(SUCCESS);
}
/* create request string */
MXMLCreateE(&E,"DirectoryRequests");
MXMLCreateE(&RE,"DirectoryRegister");
MXMLSetAttr(RE,"service",(void *)SName,mdfString);
if (CName != NULL)
MXMLSetAttr(RE,"cluster",(void *)CName,mdfString);
if (HostName != NULL)
MXMLSetAttr(RE,"host",(void *)HostName,mdfString);
if (Port != -1)
MXMLSetAttr(RE,"port",(void *)&Port,mdfInt);
if (SProtocol)
MXMLSetAttr(RE,"protocol",(void *)SProtocol,mdfString);
ReqID = 1;
MXMLSetAttr(RE,"reqid",(void *)&ReqID,mdfInt);
MXMLAddE(E,RE);
MXMLToString(E,CmdString,sizeof(CmdString),NULL,TRUE);
MXMLDestroyE(&E);
if (MS3DoCommand(&MSched.DS,CmdString,&RspPtr,NULL,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot register service '%s'\n",
CName);
return(FAILURE);
}
if (MXMLFromString(&E,RspPtr,NULL,NULL) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot process DS response '%s'\n",
RspPtr);
return(FAILURE);
}
if (MXMLGetChild(E,"DirectoryStatus",NULL,&RE) == FAILURE)
{
DBG(2,fCORE) DPrint("ALERT: cannot process DS response '%s'\n",
RspPtr);
MXMLDestroyE(&E);
return(FAILURE);
}
if ((MXMLGetAttr(RE,"outcome",NULL,tmpLine,0) == FAILURE) ||
strcmp(tmpLine,"success"))
{
DBG(2,fCORE) DPrint("ALERT: registation request failed '%s'\n",
RspPtr);
MXMLDestroyE(&E);
return(FAILURE);
}
MXMLDestroyE(&E);
DBG(2,fCORE) DPrint("INFO: service '%s' successfully registered\n",
SName);
return(SUCCESS);
} /* END MSysDSUnregister() */
int MSysUpdateTime(
msched_t *S) /* I */
{
const char *FName = "MSysUpdateTime";
DBG(3,fALL) DPrint("%s()\n",
FName);
/* update time, day, iteration, interval, and runtime */
/* update time */
if ((MSched.Mode != msmSim) || (MSched.Iteration != 0))
{
MUGetTime(&MSched.Time,mtmRefresh,S);
}
/* update day */
{
time_t tmpT;
char tmpDay[MAX_MNAME];
tmpT = (time_t)MSched.Time;
MUStrCpy(tmpDay,ctime(&tmpT),4);
/* FORMAT: DDD ... */
if (strcmp(MSched.Day,tmpDay) != 0)
{
/* starting new day */
DBG(2,fALL) DPrint("INFO: starting new day: %s",
MULToDString(&MSched.Time));
strcpy(MSched.Day,tmpDay);
} /* END if (strcmp(MSched.Day,tmpDay) != 0) */
} /* END BLOCK */
/* get exact time (update SchedTime, Interval) */
{
struct timeval tvp;
struct timezone tzp;
long interval;
gettimeofday(&tvp,&tzp);
/* determine time interval in 1/100's of a second */
if ((MSched.Mode == msmSim) && (MSched.TimePolicy != mtpReal))
{
interval = MSched.RMPollInterval * 100;
}
else
{
interval = (tvp.tv_sec - MSched.SchedTime.tv_sec) * 100 +
(tvp.tv_usec - MSched.SchedTime.tv_usec) / 10000;
}
if (interval < 0)
{
DBG(1,fSCHED) fprintf(stderr,"ALERT: negative interval detected (%ld)\n",
interval);
MSched.Interval = 0;
}
else if (MSched.SchedTime.tv_sec == 0)
{
/* first pass, time not yet initialized */
MSched.Interval = 0;
}
else
{
MSched.Interval = interval;
MStat.SchedRunTime += MSched.Interval;
}
memcpy(&MSched.SchedTime,&tvp,sizeof(struct timeval));
} /* END BLOCK */
return(SUCCESS);
} /* END MSysUpdateTime() */
int MSysCheck()
{
int jindex;
int nindex;
mjob_t *J;
mnode_t *N;
mrm_t *RM;
char Message[MAX_MLINE];
const char *FName = "MSysCheck";
DBG(4,fCORE) DPrint("%s()\n",
FName);
MLimitEnforceAll(&MPar[0]);
/* located jobs which have violated wallclock limits */
for (jindex = 0;MAQ[jindex] != -1;jindex++)
{
J = MJob[MAQ[jindex]];
RM = (J->RM != NULL) ? J->RM : &MRM[0];
/* locate jobs which have allocated 'down' nodes */
if ((MSched.Time - J->StartTime) > MPar[0].MaxJobStartTime)
{
if (RM->Type == mrmtPBS)
{
if ((J->State == mjsIdle) &&
((J->EState == mjsStarting) || (J->EState == mjsRunning)))
{
DBG(2,fCORE) DPrint("ALERT: PBS job '%s' in state '%s' was started %s ago. assuming prolog hang and cancelling job\n",
J->Name,
MJobState[J->State],
MULToTString(MSched.Time - J->StartTime));
MRMJobCancel(J,"MAUI_INFO: job cannot start\n",NULL);
}
}
for (nindex = 0;nindex < MAX_MNODE_PER_JOB;nindex++)
{
if (J->NodeList[nindex].N == NULL)
break;
N = J->NodeList[nindex].N;
if (((N->State == mnsIdle) ||
(N->State == mnsDown)) &&
((MSched.Time - N->StateMTime) > 300))
{
DBG(1,fCORE) DPrint("ALERT: job '%s' has been in state '%s' for %ld seconds. node '%s' is in state '%s' (job '%s' will be cancelled)\n",
J->Name,
MJobState[J->State],
MSched.Time - J->StartTime,
N->Name,
MNodeState[N->State],
J->Name);
sprintf(Message,"JOBCORRUPTION: job '%s' (user %s) has been in state '%s' for %ld seconds. node '%s' is in state '%s' (job '%s' will be cancelled)\n",
J->Name,
J->Cred.U->Name,
MJobState[J->State],
MSched.Time - J->StartTime,
N->Name,
MNodeState[N->State],
J->Name);
MSysRegEvent(Message,0,0,1);
if (N->State == mnsDown)
{
MRMJobCancel(J,"MAUI_ERROR: job has 'DOWN' node allocated\n",NULL);
break;
}
} /* END if N->State */
} /* END for nindex */
} /* END if MSched.Time */
} /* END for (jindex) */
/* clear all defunct child processes */
MUClearChild(NULL);
return(SUCCESS);
} /* END MSysCheck() */
int MSysDestroyObjects()
{
int jindex;
int nindex;
mnode_t *N;
const char *FName = "MSysDestroyObjects";
DBG(1,fSTRUCT) DPrint("%s()\n",
FName);
for (jindex = 1;jindex < MAX_MJOB;jindex++)
{
if ((MJob[jindex] == NULL) || (MJob[jindex] == (mjob_t *)1))
continue;
MJobRemove(MJob[jindex]);
} /* END for (jindex) */
for (nindex = 0;nindex < MAX_MNODE;nindex++)
{
N = MNode[nindex];
if (N == NULL)
break;
MNodeRemove(N);
} /* END for (nindex) */
return(SUCCESS);
} /* END MSysDestroyObjects() */
void MSysShutdown(
int Signo) /* I */
{
const char *FName = "MSysShutdown";
DBG(2,fALL) DPrint("%s(%d)\n",
FName,
Signo);
DBG(0,fALL) DPrint("INFO: received signal %d. shutting down server\n",
Signo);
if (MSysDSUnregister(
(char *)MS3CName[mpstSC],
MRM[0].Name,
MSched.ServerHost,
MSched.ServerPort,
NULL,
NULL) == FAILURE)
{
DBG(1,fRM) DPrint("ALERT: cannot unregister with directory service\n");
}
MAMShutdown(&MAM[0]);
MFSShutdown(&MPar[0].FSC);
if ((MSched.Mode != msmSim) ||
(getenv(MSCHED_ENVCKTESTVAR) != NULL))
{
MCPCreate(MCP.CPFileName);
}
MSUDisconnect(&MSched.ServerS);
MStatShutdown();
MQOSFreeTable();
MJobFreeTable();
MNodeFreeTable();
MUserFreeTable();
MResFreeTable();
MLogShutdown();
exit(0);
} /* END MSysShutdown() */
int MSchedLoadConfig(
char *Buf) /* I (optional) */
{
char IndexName[MAX_MNAME];
char Value[MAX_MLINE];
char *ptr;
char *head;
/* FORMAT: <KEY>=<VAL>[<WS><KEY>=<VAL>]... */
/* <VAL> -> <ATTR>=<VAL>[:<ATTR>=<VAL>]... */
/* load all/specified AM config info */
head = (Buf != NULL) ? Buf : MSched.ConfigBuffer;
if (head == NULL)
{
return(FAILURE);
}
/* load all sched config info */
ptr = head;
IndexName[0] = '\0';
while (MCfgGetSVal(
head,
&ptr,
MCredCfgParm[mxoSched],
IndexName,
NULL,
Value,
sizeof(Value),
0,
NULL) != FAILURE)
{
if (IndexName[0] != '\0')
{
/* set scheduler name */
MSchedSetAttr(&MSched,msaName,(void *)IndexName,mdfString,mSet);
}
/* load sys specific attributes */
MSchedProcessConfig(&MSched,Value);
IndexName[0] = '\0';
} /* END while (MCfgGetSVal() != FAILURE) */
return(SUCCESS);
} /* END MSchedLoadConfig() */
int MSchedProcessConfig(
msched_t *S, /* I (modified) */
char *Value) /* I */
{
int aindex;
char *ptr;
char *TokPtr;
char ValLine[MAX_MLINE];
char *ValList[2];
if ((S == NULL) ||
(Value == NULL) ||
(Value[0] == '\0'))
{
return(FAILURE);
}
/* process value line */
ptr = MUStrTok(Value," \t\n",&TokPtr);
while(ptr != NULL)
{
/* parse name-value pairs */
/* FOAMAT: <VALUE>[,<VALUE>] */
if (MUGetPair(
ptr,
(const char **)MSchedAttr,
&aindex,
NULL,
TRUE,
NULL,
ValLine,
MAX_MNAME) == FAILURE)
{
/* cannot parse value pair */
ptr = MUStrTok(NULL," \t\n",&TokPtr);
continue;
}
ValList[0] = ValLine;
ValList[1] = NULL;
switch(aindex)
{
case msaFBServer:
MUURLParse(ValLine,NULL,S->FBServerHost,NULL,0,&S->FBServerPort,TRUE);
break;
case msaServer:
MUURLParse(ValLine,NULL,S->ServerHost,NULL,0,&S->ServerPort,TRUE);
break;
case msaMode:
S->Mode = MUGetIndex(ValLine,MSchedMode,FALSE,S->Mode);
S->SpecMode = S->Mode;
break;
default:
DBG(4,fAM) DPrint("WARNING: sys attribute '%s' not handled\n",
MSchedAttr[aindex]);
break;
} /* END switch(aindex) */
ptr = MUStrTok(NULL," \t\n",&TokPtr);
} /* END while (ptr != NULL) */
return(SUCCESS);
} /* END MSchedProcessConfig() */
int MSysToPrimary()
{
const char *FName = "MSysToPrimary";
DBG(2,fALL) DPrint("%s()\n",
FName);
/* load checkpointed state */
/* update service directory */
/* change mode */
MSched.Mode = MSched.SpecMode;
/* enable user interface */
/* NYI */
return(SUCCESS);
} /* END MSysToPrimary() */
int MSysToSecondary()
{
const char *FName = "MSysToSecondary";
DBG(2,fALL) DPrint("%s()\n",
FName);
/* disable user interface */
if (MSched.ServerS.sd > 0)
{
MSUDisconnect(&MSched.ServerS);
}
/* change mode */
MSched.Mode = msmTest;
return(SUCCESS);
} /* END MSysToSecondary() */
int MSysCheckPrimary()
{
static int FBFailureCount = 0;
char tmpBuf[MAX_MBUFFER];
static char *CmdString = "0 [NONE] 0";
int rc;
const char *FName = "MSysCheckPrimary";
DBG(2,fALL) DPrint("%s()\n",
FName);
/* contact primary */
rc = MCDoCommand(
MSched.FBServerHost,
MSched.FBServerPort,
svcShowQ,
CmdString,
tmpBuf);
DBG(2,fALL) DPrint("INFO: connection to FBServer %s:%d %s\n",
MSched.FBServerHost,
MSched.FBServerPort,
(rc == SUCCESS) ? "succeeded" : "failed");
if ((MSched.Mode == msmTest) && (MSched.FBActive == TRUE))
{
if (rc == FAILURE)
{
FBFailureCount++;
if (FBFailureCount >= MSched.FBFailureCount)
{
/* if multiple failed connection attempts, become primary */
MSysToPrimary();
}
}
else
{
FBFailureCount = 0;
}
sleep(MSched.FBPollInterval);
}
else
{
if (rc == FAILURE)
{
/* if single successful connection, become secondary */
MSysToSecondary();
}
}
return(SUCCESS);
} /* END MSysCheckPrimary() */
int MSysDiagnose(
char *SBuf, /* O */
int SBufSize, /* I */
long Flags) /* I */
{
if (SBuf == NULL)
{
return(FAILURE);
}
SBuf[0] = '\0';
sprintf(SBuf,"%sInitialized: S:%s/I:%s CCount: %d FCount: %d QCount: %d JCount: %d RCount: %d\n",
SBuf,
(MSched.G.SIsInitialized == TRUE) ? "TRUE" : "FALSE",
(MSched.G.IIsInitialized == TRUE) ? "TRUE" : "FALSE",
MSched.G.CCount,
MSched.G.FailureCount,
MSched.G.QCount,
MSched.G.JCount,
MSched.G.RCount);
if (MSim.StopIteration == MSched.Iteration)
{
sprintf(SBuf,"%s\nNOTE: scheduler is currently stopped\n",
SBuf);
}
if (MSched.G.Messages != NULL)
{
sprintf(SBuf,"%s\nMessages:\n %s\n",
SBuf,
MSched.G.Messages);
}
return(SUCCESS);
} /* END MSysDiagnose() */
int MSysStartServer(
int IsFBServer) /* I (boolean) */
{
const char *FName = "MSysStartServer";
DBG(3,fALL) DPrint("%s()\n",
FName);
DBG(0,fALL) DPrint("starting %s version %s (PID: %d) on %s",
MSCHED_VERSION,
MSCHED_NAME,
MSched.PID,
MULToDString(&MSched.Time));
MSysMemCheck();
MStatInitialize(&MStat.P);
if (IsFBServer == TRUE)
{
MSysToSecondary();
MSysCheckPrimary();
}
/* set up user interface socket */
MSUInitialize(
&MSched.ServerS,
NULL,
MSched.ServerPort,
MSched.ClientTimeout,
(1 << TCP));
if (MSUListen(&MSched.ServerS) == FAILURE)
{
DBG(0,fALL) DPrint("ERROR: cannot open user interface socket on port %d\n",
MSched.ServerPort);
fprintf(stderr,"ERROR: cannot open user interface socket on port %d\n",
MSched.ServerPort);
exit(1);
}
/* enable extension interface */
MSUInitialize(
&MSched.ServerSH,
NULL,
MSched.ServerPort + 1,
MSched.ClientTimeout,
(1 << TCP));
if (MSUListen(&MSched.ServerSH) == FAILURE)
{
DBG(7,fALL) DPrint("ERROR: cannot open extension interface socket on port %d\n",
MSched.ServerPort);
}
MFSInitialize(&MPar[0].FSC);
if (MAM[0].Type != mamtNONE)
MAMActivate(&MAM[0]);
MCPLoad(MCP.CPFileName,mckptResOnly);
if (MSched.Mode == msmSim)
{
/* initialize simulation */
MSimInitialize();
} /* END if (MSched.Mode == msmSim) */
else
{
/* initialize resource manager */
MRMInitialize();
/* initialize allocation manager */
MAMInitialize(NULL);
} /* END else (MSched.Mode == msmSim) */
return(SUCCESS);
} /* END MSysStartServer() */
/* END MSys.c */