#!/usr/bin/perl # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html # Downloads/converts the xCAT docs on the sourceforge wiki to local HTML and PDF. # This script is not dependent on other xCAT code, so you can copy it to a machine # that has internet access to run it. Before running this command, you must have # wget, python, and pisa installed. See: http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs . # Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ . # You also need to set $UPLOADUSER to your sourceforge user. #BEGIN #{ # $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; #} use strict; #use lib "$::XCATROOT/lib/perl"; #use xCAT::Utils; use Getopt::Long; use File::Path; use Cwd; #use Data::Dumper; # Update this list if you group any xcat docs on a separate page such that they are no longer linked from the # main doc page. my @indexdocs = ('XCAT_Documentation', 'Power_775_Cluster_Documentation', 'Highly_Available_Management_Node', 'Mixed_Cluster_Support'); #my $VERSION; my $HELP; my $UPLOAD; my $UPLOADONLY; my $VERBOSE; my $usage = sub { my $exitcode = shift @_; print "Usage: getxcatdocs [-?|-h|--help] [-v|--verbose] [-u|--upload] [--uploadonly] []\n"; exit $exitcode; }; # Process the cmd line args Getopt::Long::Configure("bundling"); #Getopt::Long::Configure("pass_through"); Getopt::Long::Configure("no_pass_through"); if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'u|upload' => \$UPLOAD, 'uploadonly' => \$UPLOADONLY )) { $usage->(1); } if ($HELP) { $usage->(0); } #if ($VERSION) { #print xCAT::Utils->Version(), "\n"; # exit; #} if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; } my $destdir = scalar(@ARGV) ? $ARGV[0] : '.'; chdir($destdir) or die "Can not cd to $destdir: $!\n"; #my $docdir = $ENV{'PWD'}; # Download the HTML docs and convert them all to pdfs my @dir; if (!$UPLOADONLY) { @dir = gethtmldocs('html'); convert2pdf('pdf', \@dir); } # tar/compress my $date=`date +%Y%m%d%H%M`; chop $date; my $docname="xcat-docs-snap$date.tar.gz"; #system('pwd'); my $cmd = "tar -zcf $docname html pdf 2>&1"; verbose($cmd); system($cmd); # Optionally upload the tarball to sourceforge if ($UPLOAD || $UPLOADONLY) { my $UPLOADUSER = 'bp-sawyers'; my $count = 1; #my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/'; my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/'; print "$cmd\n"; while ($count<=5 && system("$cmd 2>&1")) { $count++; } } exit 0; sub verbose { if ($VERBOSE) { print shift, "\n"; } } # Download all of the html docs from several "index" docs sub gethtmldocs { my $dir = shift; my $savedir = getcwd(); File::Path::make_path($dir); chdir($dir); #system('pwd'); unlink <*>; # delete all the files in the dir, in case they previously ran this #system('ls'); my $indexes = ''; foreach my $index (@indexdocs) { $indexes .= qq('http://sourceforge.net/apps/mediawiki/xcat/index.php?title=$index&printable=yes' ); } print "Downloading the xCAT wiki documentation to $dir, from: $indexes ...\n"; runwget($indexes); # remove the dir portion of links to other docs #my $sedcmd = q(sed -i 's//dev/null 2>&1')) { die "xhtml2pdf is not installed. See http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs .\n"; } unlink <*>; # delete all the files in the dir, in case they previously ran this foreach my $file (@$files) { if ($file =~ /^index.php\?title=MediaWiki:/ || $file eq 'index.php?title=XCAT_Documentation') { next; } my ($docname) = $file =~ /^index.php\?title=(.+)$/; print "Converting $docname to PDF format...\n"; my $url = "http://sourceforge.net/apps/mediawiki/xcat/$file&printable=yes"; my $destfile = "$docname.pdf"; my $cmd = "xhtml2pdf '$url' '$destfile' "; runh2p($cmd); } chdir($savedir); } # Run the wget cmd and filter out some of the silly output sub runwget { my $index = shift; # options we might consider: --html-extension --restrict-file-names=windows --cut-dirs=3 # options that do not work: --relative my $rejectlist = q('*title=Special:*,*title=Talk:*,*title=-&*,*title=HowTos,*title=Main_Page,*title=MediaWiki:*,*title=Release_Notes,*title=Wish_List_for_xCAT_2,*&action=edit*,*&action=history*,*&printable=yes*,*&oldid=*,index.html,opensearch_desc.php,xcat,login.php,support'); my $cmd = qq(wget --recursive --convert-links --no-verbose --progress=bar --level=1 --page-requisites --no-parent --no-host-directories --no-directories --no-clobber --execute robots=off --post-data='printable=yes' --reject $rejectlist $index); verbose($cmd); open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n"; while () { if (/URL:https*:\/\/sourceforge\.net.+\s+->\s+\"(\S+)\"\s+\[/) { print "Downloaded $1.\n"; } else { print; } } close OUT || die "Error running $cmd: $! $?"; } # Run the xhtml2pdf cmd and filter out some of the silly output sub runh2p { my $cmd = shift; verbose($cmd); open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n"; while () { next if /DeprecationWarning:\sthe sets module is deprecated/; next if /from sets import ImmutableSet/; next if /^Converting\ssourceforge.net/; print; } close OUT || die "Error running $cmd: $! $?"; }