xcat-core/xCAT-client/bin/getxcatdocs
2012-04-14 17:34:04 +00:00

172 lines
6.0 KiB
Perl
Executable File

#!/usr/bin/perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
# Downloads/converts the xCAT docs on the sourceforge wiki to local HTML and PDF.
# This script is not dependent on other xCAT code, so you can copy it to a machine
# that has internet access to run it. Before running this command, you must have
# wget, python, and pisa installed. See: http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs .
# Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ .
# You also need to set $UPLOADUSER to your sourceforge user.
#BEGIN
#{
# $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr';
#}
use strict;
#use lib "$::XCATROOT/lib/perl";
#use xCAT::Utils;
use Getopt::Long;
use File::Path;
use Cwd;
#use Data::Dumper;
# Update this list if you group any xcat docs on a separate page such that they are no longer linked from the
# main doc page.
my @indexdocs = ('XCAT_Documentation', 'Power_775_Cluster_Documentation', 'Highly_Available_Management_Node', 'Mixed_Cluster_Support');
#my $VERSION;
my $HELP;
my $UPLOAD;
my $UPLOADONLY;
my $VERBOSE;
my $usage = sub {
my $exitcode = shift @_;
print "Usage: getxcatdocs [-?|-h|--help] [-v|--verbose] [-u|--upload] [--uploadonly] [<destination-dir>]\n";
exit $exitcode;
};
# Process the cmd line args
Getopt::Long::Configure("bundling");
#Getopt::Long::Configure("pass_through");
Getopt::Long::Configure("no_pass_through");
if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'u|upload' => \$UPLOAD, 'uploadonly' => \$UPLOADONLY )) { $usage->(1); }
if ($HELP) { $usage->(0); }
#if ($VERSION) {
#print xCAT::Utils->Version(), "\n";
# exit;
#}
if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; }
my $destdir = scalar(@ARGV) ? $ARGV[0] : '.';
chdir($destdir) or die "Can not cd to $destdir: $!\n";
#my $docdir = $ENV{'PWD'};
# Download the HTML docs and convert them all to pdfs
my @dir;
if (!$UPLOADONLY) {
@dir = gethtmldocs('html');
convert2pdf('pdf', \@dir);
}
# tar/compress
my $date=`date +%Y%m%d%H%M`;
chop $date;
my $docname="xcat-docs-snap$date.tar.gz";
#system('pwd');
my $cmd = "tar -zcf $docname html pdf 2>&1";
verbose($cmd);
system($cmd);
# Optionally upload the tarball to sourceforge
if ($UPLOAD || $UPLOADONLY) {
my $UPLOADUSER = 'bp-sawyers';
my $count = 1;
#my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/';
my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/';
print "$cmd\n";
while ($count<=5 && system("$cmd 2>&1")) { $count++; }
}
exit 0;
sub verbose { if ($VERBOSE) { print shift, "\n"; } }
# Download all of the html docs from several "index" docs
sub gethtmldocs {
my $dir = shift;
my $savedir = getcwd();
File::Path::make_path($dir);
chdir($dir);
#system('pwd');
unlink <*>; # delete all the files in the dir, in case they previously ran this
#system('ls');
my $indexes = '';
foreach my $index (@indexdocs) {
$indexes .= qq('http://sourceforge.net/apps/mediawiki/xcat/index.php?title=$index&printable=yes' );
}
print "Downloading the xCAT wiki documentation to $dir, from: $indexes ...\n";
runwget($indexes);
# remove the dir portion of links to other docs
#my $sedcmd = q(sed -i 's/<a href="\/apps\/mediawiki\/xcat\/index.php?title/<a href="index.php%3Ftitle/' *);
my $sedcmd = q(sed -i 's/<a href="index.php?title/<a href="index.php%3Ftitle/g' *);
print "$sedcmd\n";
system($sedcmd);
# get the list of docs
opendir(DIR, '.') or die "Error: could not read the just created html directory.\n";
my @docs = grep /^index.php\?title=/, readdir(DIR); # /
close(DIR);
chdir($savedir);
return @docs;
}
# Convert to pdf
sub convert2pdf {
my ($dir, $files) = @_;
my $savedir = getcwd();
File::Path::make_path($dir);
chdir($dir);
if (system('which xhtml2pdf >/dev/null 2>&1')) { die "xhtml2pdf is not installed. See http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs .\n"; }
unlink <*>; # delete all the files in the dir, in case they previously ran this
foreach my $file (@$files) {
if ($file =~ /^index.php\?title=MediaWiki:/ || $file eq 'index.php?title=XCAT_Documentation') { next; }
my ($docname) = $file =~ /^index.php\?title=(.+)$/;
print "Converting $docname to PDF format...\n";
my $url = "http://sourceforge.net/apps/mediawiki/xcat/$file&printable=yes";
my $destfile = "$docname.pdf";
my $cmd = "xhtml2pdf '$url' '$destfile' ";
runh2p($cmd);
}
chdir($savedir);
}
# Run the wget cmd and filter out some of the silly output
sub runwget {
my $index = shift;
# options we might consider: --html-extension --restrict-file-names=windows --cut-dirs=3
# options that do not work: --relative
my $rejectlist = q('*title=Special:*,*title=Talk:*,*title=-&*,*title=HowTos,*title=Main_Page,*title=MediaWiki:*,*title=Release_Notes,*title=Wish_List_for_xCAT_2,*&action=edit*,*&action=history*,*&printable=yes*,*&oldid=*,index.html,opensearch_desc.php,xcat,login.php,support');
my $cmd = qq(wget --recursive --convert-links --no-verbose --progress=bar --level=1 --page-requisites --no-parent --no-host-directories --no-directories --no-clobber --execute robots=off --post-data='printable=yes' --reject $rejectlist $index);
verbose($cmd);
open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n";
while (<OUT>) {
if (/URL:https*:\/\/sourceforge\.net.+\s+->\s+\"(\S+)\"\s+\[/) { print "Downloaded $1.\n"; }
else { print; }
}
close OUT || die "Error running $cmd: $! $?";
}
# Run the xhtml2pdf cmd and filter out some of the silly output
sub runh2p {
my $cmd = shift;
verbose($cmd);
open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n";
while (<OUT>) {
next if /DeprecationWarning:\sthe sets module is deprecated/;
next if /from sets import ImmutableSet/;
next if /^Converting\ssourceforge.net/;
print;
}
close OUT || die "Error running $cmd: $! $?";
}