#!/usr/bin/env perl # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html package xCAT_plugin::xen; my $libvirtsupport; $libvirtsupport = eval { require Sys::Virt; }; BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; } use lib "$::XCATROOT/lib/perl"; use xCAT::GlobalDef; use xCAT_monitoring::monitorctrl; #use Net::SNMP qw(:snmp INTEGER); use xCAT::Table; use XML::Simple qw(XMLout); use Thread qw(yield); use IO::Socket; use IO::Select; use SNMP; use strict; #use warnings; my %vm_comm_pids; my $vmhash; my $hmhash; use XML::Simple; if ($^O =~ /^linux/i) { $XML::Simple::PREFERRED_PARSER='XML::Parser'; } use Data::Dumper; use POSIX "WNOHANG"; use Storable qw(freeze thaw); use IO::Select; use IO::Handle; use Time::HiRes qw(gettimeofday sleep); use Net::Telnet; use xCAT::DBobjUtils; use Getopt::Long; my %runningstates; my $vmmaxp; my $mactab; my $nrtab; my $machash; my $status_noop="XXXno-opXXX"; sub handled_commands { unless ($libvirtsupport) { return {}; } return { rpower => 'nodehm:power,mgt', rmigrate => 'nodehm:mgt', getxencons => 'nodehm:mgt', #rvitals => 'nodehm:mgt', #rinv => 'nodehm:mgt', rbeacon => 'nodehm:mgt', #rspreset => 'nodehm:mgt', #rspconfig => 'nodehm:mgt', #rbootseq => 'nodehm:mgt', #reventlog => 'nodehm:mgt', }; } my $virsh; my $vmhash; my $hypconn; my $hyp; my $doreq; my %hyphash; my $node; my $hmtab; my $vmtab; sub waitforack { my $sock = shift; my $select = new IO::Select; $select->add($sock); my $str; if ($select->can_read(10)) { # Continue after 10 seconds, even if not acked... if ($str = <$sock>) { } else { $select->remove($sock); #Block until parent acks data } } } sub build_oshash { my %rethash; $rethash{type}->{content}='hvm'; $rethash{loader}->{content}='/usr/lib/xen/boot/hvmloader'; $rethash{boot}->[0]->{dev}='network'; $rethash{boot}->[1]->{dev}='hd'; return \%rethash; } sub build_diskstruct { my @returns=(); my $diskhash; my $currdev; my @suffixes=('a'..'z'); my $suffidx=0; if (defined $vmhash->{$node}->[0]->{storage}) { my $disklocs=$vmhash->{$node}->[0]->{storage}; my @locations=split /\|/,$disklocs; foreach (@locations) { my $disk = $_; my $disktype = substr($disk, 0, 3); $currdev='hd'.$suffixes[$suffidx++]; if ($disktype eq "phy") { my $diskdev = substr($disk, 4); $diskhash->{type}='block'; $diskhash->{source}->{dev}=$diskdev; } else { $diskhash->{type}='file'; $diskhash->{source}->{file}=$_; } $diskhash->{target}->{dev}=$currdev; push @returns,$diskhash; } } return \@returns; } sub getNodeUUID { my $node = shift; return xCAT::Utils::genUUID(); } sub build_nicstruct { my $rethash; my $node = shift; my @macs=(); if ($machash->{$node}->[0]->{mac}) { my $macdata=$machash->{$node}->[0]->{mac}; foreach my $macaddr (split /\|/,$macdata) { $macaddr =~ s/\!.*//; push @macs,$macaddr; } } unless (scalar(@macs)) { my $allbutmult = 65279; # & mask for bitwise clearing of the multicast bit of mac my $localad=512; # | to set the bit for locally admnistered mac address my $leading=int(rand(65535)); $leading=$leading|512; $leading=$leading&65279; my $n=inet_aton($node); my $tail; if ($n) { $tail=unpack("N",$n); } unless ($tail) { $tail=int(rand(4294967295)); } my $macstr = sprintf("%04x%08x",$leading,$tail); $macstr =~ s/(..)(..)(..)(..)(..)(..)/$1:$2:$3:$4:$5:$6/; $mactab->setNodeAttribs($node,{mac=>$macstr}); $nrtab->setNodeAttribs($node,{netboot=>'pxe'}); $doreq->({command=>['makedhcp'],node=>[$node]}); push @macs,$macstr; } my @rethashes; foreach (@macs) { my $rethash; $rethash->{type}='bridge'; $rethash->{mac}->{address}=$_; push @rethashes,$rethash; } return \@rethashes; } sub getUnits { my $amount = shift; my $defunit = shift; my $divisor=shift; unless ($divisor) { $divisor = 1; } if ($amount =~ /(\D)$/) { #If unitless, add unit $defunit=$1; chop $amount; } if ($defunit =~ /k/i) { return $amount*1024/$divisor; } elsif ($defunit =~ /m/i) { return $amount*1048576/$divisor; } elsif ($defunit =~ /g/i) { return $amount*1073741824/$divisor; } } sub build_xmldesc { my $node = shift; my %xtree=(); $xtree{type}='xen'; $xtree{name}->{content}=$node; $xtree{uuid}->{content}=getNodeUUID($node); $xtree{os} = build_oshash(); if (defined $vmhash->{$node}->[0]->{memory}) { $xtree{memory}->{content}=getUnits($vmhash->{$node}->[0]->{memory},"M",1024); } else { $xtree{memory}->{content}=524288; } $xtree{vcpu}->{content}=1; $xtree{features}->{pae}={}; $xtree{features}->{acpi}={}; $xtree{features}->{apic}={}; $xtree{features}->{content}="\n"; $xtree{devices}->{emulator}->{content}='/usr/lib64/xen/bin/qemu-dm'; $xtree{devices}->{disk}=build_diskstruct(); $xtree{devices}->{interface}=build_nicstruct($node); $xtree{devices}->{graphics}->{type}='vnc'; $xtree{devices}->{console}->{type}='pty'; $xtree{devices}->{console}->{target}->{port}='1'; return XMLout(\%xtree,RootName=>"domain"); } sub refresh_vm { my $dom = shift; my $newxml=XMLin($dom->get_xml_description()); my $vncport=$newxml->{devices}->{graphics}->{port}; my $stty=$newxml->{devices}->{console}->{tty}; $vmtab->setNodeAttribs($node,{vncport=>$vncport,textconsole=>$stty}); return {vncport=>$vncport,textconsole=>$stty}; } sub getvmcons { my $node = shift(); my $type = shift(); my $dom; eval { $dom = $hypconn->get_domain_by_name($node); }; unless ($dom) { return 1,"Unable to query running VM"; } my $consdata=refresh_vm($dom); my $hyper=$vmhash->{$node}->[0]->{host}; if ($type eq "text") { my $serialspeed; if ($hmhash) { $serialspeed=$hmhash->{$node}->[0]->{serialspeed}; } my $sconsparms = {node=>[{name=>[$node]}]}; $sconsparms->{node}->[0]->{sshhost}=[$hyper]; $sconsparms->{node}->[0]->{psuedotty}=[$consdata->{textconsole}]; $sconsparms->{node}->[0]->{baudrate}=[$serialspeed]; return (0,$sconsparms); } elsif ($type eq "vnc") { return (0,'ssh+vnc@'.$hyper.": ".$consdata->{vncport}); } } sub migrate { my $node = shift(); my $targ = shift(); my $prevhyp; my $target = "xen+ssh://".$targ; my $currhyp="xen+ssh://"; if ($vmhash->{$node}->[0]->{host}) { $prevhyp=$vmhash->{$node}->[0]->{host}; $currhyp.=$prevhyp; } else { return (1,"Unable to find current location of $node"); } my $sock = IO::Socket::INET->new(Proto=>'udp'); my $ipa=inet_aton($node); my $pa=sockaddr_in(7,$ipa); #UDP echo service, not needed to be actually #serviced, we just want to trigger an arp query my $rc=system("virsh -c $currhyp migrate --live $node $target"); system("arp -d $node"); #Make ethernet fabric take note of change send($sock,"dummy",0,$pa); my $newhypconn= Sys::Virt->new(uri=>"xen+ssh://".$targ); my $dom; eval { $dom = $newhypconn->get_domain_by_name($node); }; if ($dom) { refresh_vm($dom); } if ($rc) { return (1,"Failed migration from $prevhyp to $targ"); } else { $vmtab->setNodeAttribs($node,{host=>$targ}); return (0,"migrated to $targ"); } } sub getpowstate { my $dom = shift; my $vmstat; if ($dom) { $vmstat = $dom->get_info; } if ($vmstat and $runningstates{$vmstat->{state}}) { return "on"; } else { return "off"; } } sub power { my $subcommand = shift; my $retstring; my $dom; eval { $dom = $hypconn->get_domain_by_name($node); }; if ($subcommand eq "boot") { my $currstate=getpowstate($dom); $retstring=$currstate." "; if ($currstate eq "off") { $subcommand="on"; } elsif ($currstate eq "on") { $subcommand="reset"; } } if ($subcommand eq 'on') { unless ($dom) { my $xml=build_xmldesc($node); my $errstr; eval { $dom=$hypconn->create_domain($xml); }; if ($@) { $errstr = $@; } if (ref $errstr) { $errstr = ":".$errstr->{message}; } if ($errstr) { return (1,$errstr); } if ($dom) { refresh_vm($dom); } } else { $retstring .= " $status_noop"; } } elsif ($subcommand eq 'off') { if ($dom) { $dom->destroy(); } else { $retstring .= " $status_noop"; } } elsif ($subcommand eq 'softoff') { if ($dom) { $dom->shutdown(); } else { $retstring .= " $status_noop"; } } elsif ($subcommand eq 'reset') { if ($dom) { $dom->reboot(1); #TODO: Sys::Virt *nor* libvirt have meaningful flags, #but require it $retstring.="reset"; } else { $retstring .= " $status_noop"; } } else { unless ($subcommand =~ /^stat/) { return (1,"Unsupported power directive '$subcommand'"); } } unless ($retstring =~ /reset/) { $retstring=getpowstate($dom).$retstring; } return (0,$retstring); } sub guestcmd { $hyp = shift; $node = shift; my $command = shift; my @args = @_; my $error; if ($command eq "rpower") { return power(@args); } elsif ($command eq "rmigrate") { return migrate($node,@args); } elsif ($command eq "getxencons") { return getvmcons($node,@args); } =cut } elsif ($command eq "rvitals") { return vitals(@args); } elsif ($command =~ /r[ms]preset/) { return resetmp(@args); } elsif ($command eq "rspconfig") { return mpaconfig($mpa,$user,$pass,$node,$slot,@args); } elsif ($command eq "rbootseq") { return bootseq(@args); } elsif ($command eq "switchblade") { return switchblade(@args); } elsif ($command eq "getmacs") { return getmacs(@args); } elsif ($command eq "rinv") { return inv(@args); } elsif ($command eq "reventlog") { return eventlog(@args); } elsif ($command eq "rscan") { return rscan(\@args); } =cut return (1,"$command not a supported command by xen method"); } sub preprocess_request { my $request = shift; if ($request->{_xcatdest}) { return [$request]; } #exit if preprocessed my $callback=shift; unless ($libvirtsupport) { #Try to see if conditions changed since last check (no xCATd restart for it to take effect) $libvirtsupport = eval { require Sys::Virt; }; } unless ($libvirtsupport) { #Still no Sys::Virt module $callback->({error=>"Sys::Virt perl module missing, unable to fulfill Xen plugin requirements",errorcode=>[42]}); return []; } require Sys::Virt::Domain; %runningstates = (&Sys::Virt::Domain::STATE_NOSTATE=>1,&Sys::Virt::Domain::STATE_RUNNING=>1,&Sys::Virt::Domain::STATE_BLOCKED=>1); my @requests; my $noderange = $request->{node}; #Should be arrayref my $command = $request->{command}->[0]; my $extrargs = $request->{arg}; my @exargs=($request->{arg}); if (ref($extrargs)) { @exargs=@$extrargs; } my $usage_string=xCAT::Usage->parseCommand($command, @exargs); if ($usage_string) { $callback->({data=>$usage_string}); $request = {}; return; } if (!$noderange) { $usage_string=xCAT::Usage->getUsage($command); $callback->({data=>$usage_string}); $request = {}; return; } #print "noderange=@$noderange\n"; # find service nodes for requested nodes # build an individual request for each service node my $service = "xcat"; my $sn = xCAT::Utils->get_ServiceNode($noderange, $service, "MN"); # build each request for each service node foreach my $snkey (keys %$sn) { #print "snkey=$snkey\n"; my $reqcopy = {%$request}; $reqcopy->{node} = $sn->{$snkey}; $reqcopy->{'_xcatdest'} = $snkey; push @requests, $reqcopy; } return \@requests; } sub adopt { #TODO: adopt orphans into suitable homes if possible return 0; } sub grab_table_data{ my $noderange=shift; my $callback=shift; $vmtab = xCAT::Table->new("vm"); $hmtab = xCAT::Table->new("nodehm"); if ($hmtab) { $hmhash = $hmtab->getNodesAttribs($noderange,['serialspeed']); } unless ($vmtab) { $callback->({data=>["Cannot open vm table"]}); return; } $vmhash = $vmtab->getNodesAttribs($noderange,['node','host','migrationdest','storage','memory','cpu','nics','bootorder','virtflags']); $mactab = xCAT::Table->new("mac",-create=>1); $nrtab= xCAT::Table->new("noderes",-create=>1); $machash = $mactab->getNodesAttribs($noderange,['mac']); } sub process_request { $SIG{INT} = $SIG{TERM} = sub { foreach (keys %vm_comm_pids) { kill 2, $_; } exit 0; }; my $request = shift; my $callback = shift; $doreq = shift; my $level = shift; my $noderange = $request->{node}; my $command = $request->{command}->[0]; my @exargs; unless ($command) { return; #Empty request } if (ref($request->{arg})) { @exargs = @{$request->{arg}}; } else { @exargs = ($request->{arg}); } grab_table_data($noderange,$callback); my $sitetab = xCAT::Table->new('site'); my $tmp; if ($sitetab) { ($tmp)=$sitetab->getAttribs({'key'=>'vmmaxp'},'value'); if (defined($tmp)) { $vmmaxp=$tmp->{value}; } } my $children = 0; $SIG{CHLD} = sub { my $cpid; while ($cpid = waitpid(-1, WNOHANG) > 0) { delete $vm_comm_pids{$cpid}; $children--; } }; my $inputs = new IO::Select;; my $sub_fds = new IO::Select; %hyphash=(); my %orphans=(); foreach (keys %{$vmhash}) { if ($vmhash->{$_}->[0]->{host}) { $hyphash{$vmhash->{$_}->[0]->{host}}->{nodes}->{$_}=1; } else { $orphans{$_}=1; } } if (keys %orphans) { if ($command eq "rpower" and (grep /^on$/,@exargs or grep /^boot$/,@exargs)) { unless (adopt(\%orphans,\%hyphash)) { $callback->({error=>"Can't find ".join(",",keys %orphans),errorcode=>[1]}); return 1; } } elsif ($command eq "rmigrate") { $callback->({error=>"Can't find ".join(",",keys %orphans),errorcode=>[1]}); return; } else { $callback->({error=>"Can't find ".join(",",keys %orphans),errorcode=>[1]}); return; } } if ($command eq "rbeacon") { my %req=(); $req{command}=['rbeacon']; $req{arg}=\@exargs; $req{node}=[keys %hyphash]; $doreq->(\%req,$callback); return; } #get new node status my %nodestat=(); my $check=0; my $newstat; if ($command eq 'rpower') { my $subcommand=$exargs[0]; if (($subcommand ne 'stat') && ($subcommand ne 'status')) { $check=1; my @allnodes=@$noderange; if ($subcommand eq 'off') { $newstat=$::STATUS_POWERING_OFF; } else { $newstat=$::STATUS_BOOTING;} foreach (@allnodes) { $nodestat{$_}=$newstat; } if ($subcommand ne 'off') { #get the current nodeset stat if (@allnodes>0) { my $nsh={}; my ($ret, $msg)=xCAT::Utils->getNodesetStates(\@allnodes, $nsh); if (!$ret) { foreach (keys %$nsh) { my $currstate=$nsh->{$_}; $nodestat{$_}=xCAT_monitoring::monitorctrl->getNodeStatusFromNodesetState($currstate, "rpower"); } } } } } } #foreach (keys %nodestat) { print "node=$_,status=" . $nodestat{$_} ."\n"; } #Ling:remove foreach $hyp (sort (keys %hyphash)) { while ($children > $vmmaxp) { my $errornodes={}; forward_data($callback,$sub_fds,$errornodes); #update the node status to the nodelist.status table if ($check) { updateNodeStatus(\%nodestat, $errornodes); } } $children++; my $cfd; my $pfd; socketpair($pfd, $cfd,AF_UNIX,SOCK_STREAM,PF_UNSPEC) or die "socketpair: $!"; $cfd->autoflush(1); $pfd->autoflush(1); my $cpid = xCAT::Utils->xfork; unless (defined($cpid)) { die "Fork error"; } unless ($cpid) { close($cfd); dohyp($pfd,$hyp,$command,-args=>\@exargs); exit(0); } $vm_comm_pids{$cpid} = 1; close ($pfd); $sub_fds->add($cfd); } while ($sub_fds->count > 0 or $children > 0) { my $errornodes={}; forward_data($callback,$sub_fds,$errornodes); #update the node status to the nodelist.status table if ($check) { updateNodeStatus(\%nodestat, $errornodes); } } #Make sure they get drained, this probably is overkill but shouldn't hurt my $rc=1; while ( $rc>0 ) { my $errornodes={}; $rc=forward_data($callback,$sub_fds,$errornodes); #update the node status to the nodelist.status table if ($check) { updateNodeStatus(\%nodestat, $errornodes); } } } sub updateNodeStatus { my $nodestat=shift; my $errornodes=shift; my %node_status=(); foreach my $node (keys(%$errornodes)) { if ($errornodes->{$node} == -1) { next;} #has error, not updating status my $stat=$nodestat->{$node}; if (exists($node_status{$stat})) { my $pa=$node_status{$stat}; push(@$pa, $node); } else { $node_status{$stat}=[$node]; } } xCAT_monitoring::monitorctrl::setNodeStatusAttributes(\%node_status, 1); } sub forward_data { my $callback = shift; my $fds = shift; my $errornodes=shift; my @ready_fds = $fds->can_read(1); my $rfh; my $rc = @ready_fds; foreach $rfh (@ready_fds) { my $data; if ($data = <$rfh>) { while ($data !~ /ENDOFFREEZE6sK4ci/) { $data .= <$rfh>; } print $rfh "ACK\n"; my $responses=thaw($data); foreach (@$responses) { #save the nodes that has errors and the ones that has no-op for use by the node status monitoring my $no_op=0; if ($_->{node}->[0]->{errorcode}) { $no_op=1; } else { my $text=$_->{node}->[0]->{data}->[0]->{contents}->[0]; #print "data:$text\n"; if (($text) && ($text =~ /$status_noop/)) { $no_op=1; #remove the symbols that meant for use by node status $_->{node}->[0]->{data}->[0]->{contents}->[0] =~ s/ $status_noop//; } } #print "data:". $_->{node}->[0]->{data}->[0]->{contents}->[0] . "\n"; if ($no_op) { if ($errornodes) { $errornodes->{$_->{node}->[0]->{name}->[0]}=-1; } } else { if ($errornodes) { $errornodes->{$_->{node}->[0]->{name}->[0]}=1; } } $callback->($_); } } else { $fds->remove($rfh); close($rfh); } } yield(); #Try to avoid useless iterations as much as possible return $rc; } sub dohyp { my $out = shift; $hyp = shift; my $command=shift; my %namedargs=@_; my @exargs=@{$namedargs{-args}}; my $node; my $args = \@exargs; $hypconn= Sys::Virt->new(uri=>"xen+ssh://".$hyp); unless ($hypconn) { my %err=(node=>[]); foreach (keys %{$hyphash{$hyp}->{nodes}}) { push (@{$err{node}},{name=>[$_],error=>["Cannot communicate via libvirt to $hyp"],errorcode=>[1]}); } print $out freeze([\%err]); print $out "\nENDOFFREEZE6sK4ci\n"; yield(); waitforack($out); return 1,"General error establishing libvirt communication"; } foreach $node (sort (keys %{$hyphash{$hyp}->{nodes}})) { my ($rc,@output) = guestcmd($hyp,$node,$command,@$args); foreach(@output) { my %output; if (ref($_)) { print $out freeze([$_]); print $out "\nENDOFFREEZE6sK4ci\n"; yield(); waitforack($out); next; } (my $desc,my $text) = split (/:/,$_,2); unless ($text) { $text=$desc; } else { $desc =~ s/^\s+//; $desc =~ s/\s+$//; if ($desc) { $output{node}->[0]->{data}->[0]->{desc}->[0]=$desc; } } $text =~ s/^\s+//; $text =~ s/\s+$//; $output{node}->[0]->{errorcode} = $rc; $output{node}->[0]->{name}->[0]=$node; $output{node}->[0]->{data}->[0]->{contents}->[0]=$text; print $out freeze([\%output]); print $out "\nENDOFFREEZE6sK4ci\n"; yield(); waitforack($out); } yield(); } #my $msgtoparent=freeze(\@outhashes); # = XMLout(\%output,RootName => 'xcatresponse'); #print $out $msgtoparent; #$node.": $_\n"; } 1;