#!/usr/bin/perl # Downloads/converts the xCAT docs on the sourceforge Allura wiki to local HTML and PDF. # This script is not dependent on other xCAT code, so you can copy it to a machine # that has internet access to run it. Before running this command, you must have # curl, pandoc, and latex installed. See: http://sourceforge.net/p/xcat/wiki/Editing_and_Downloading_xCAT_Documentation/#converting-wiki-pages-to-html-and-pdfs # Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ . # You also need to set $UPLOADUSER to your sourceforge user: my $UPLOADUSER = 'mellor'; use strict; use Getopt::Long; use Cwd; use JSON; # URL for the xCAT Allura wiki API markdown on SourceForge my $sf_url='http://sourceforge.net/rest'; my $wiki_url=$sf_url.'/p/xcat/wiki/'; # Update this list if you group any xcat docs on a separate page such that they # are no longer linked from the main doc page: my @indexdocs = ('XCAT_Documentation', 'Power_775_Cluster_Documentation', 'Highly_Available_Management_Node', 'Mixed_Cluster_Support', 'IBM_HPC_Stack_in_an_xCAT_Cluster'); my $HELP; my $UPLOAD; my $UPLOADONLY; my $IGNOREERRORS; my $SINGLE_DOC; my $VERBOSE; sub verbose { if ($VERBOSE) { print shift, "\n"; } } my $usage = sub { my $exitcode = shift @_; print "Usage: getxcatdocs [-?|-h|--help] \n"; print "Usage: getxcatdocs [-v|--verbose] [-u|--upload] [--uploadonly] [-i|--ignoreerrors] []\n"; print "Usage: getxcatdocs [-v|--verbose] [-d|--doc single_doc] [-i|--ignoreerrors] []\n"; exit $exitcode; }; # Main processing # Process the cmd line args Getopt::Long::Configure("bundling"); #Getopt::Long::Configure("pass_through"); Getopt::Long::Configure("no_pass_through"); if (!GetOptions( 'h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'u|upload' => \$UPLOAD, 'uploadonly' => \$UPLOADONLY, 'i|ignoreerrors' => \$IGNOREERRORS, 'd|doc=s' => \$SINGLE_DOC )) { $usage->(1); } if ($HELP) { $usage->(0); } if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; } my $destdir = scalar(@ARGV) ? $ARGV[0] : '.'; chdir($destdir) or die "Can not cd to $destdir: $!\n"; my $json = JSON->new(); if ($SINGLE_DOC) { my $scurlcmd = "curl -X GET $wiki_url$SINGLE_DOC"; verbose($scurlcmd); my $pagecontent = `$scurlcmd`; if ($? && !$IGNOREERRORS) { die "error encountered in $scurlcmd \n";} my $pageout = $json->decode($pagecontent); foreach my $pageatt (@{$pageout->{attachments}}) { my $swgetcmd = "wget $pageatt->{url}"; verbose($swgetcmd); system($swgetcmd); if ($? && !$IGNOREERRORS) { die "error encountered in $swgetcmd \n";} } convert_doc($SINGLE_DOC,$pageout->{text},'.','.','.','.'); exit; } # Download the HTML docs and convert them all to pdfs if (!$UPLOADONLY) { gethtmldocs('md','html','pdf','images'); } # tar/compress my $date=`date +%Y%m%d%H%M`; chop $date; my $docname="xcat-docs-snap$date.tar.gz"; chdir($destdir) or die "Can not cd to $destdir: $!\n"; my $cmd = "tar -zcf $docname html pdf images 2>&1"; verbose($cmd); system($cmd) == 0 or die "Error running $cmd: $!, rc=$?"; # Optionally upload the tarball to sourceforge if ($UPLOAD || $UPLOADONLY) { my $count = 1; #my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/'; my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/'; print "$cmd\n"; while ($count<=5 && system("$cmd 2>&1")) { $count++; } } exit 0; sub gethtmldocs { my $mddir = shift; my $htmldir = shift; my $pdfdir = shift; my $imagedir = shift; my $savedir = getcwd(); mkdir($mddir); mkdir($htmldir); mkdir($pdfdir); mkdir($imagedir); #delete all the files in the dirs in case they previously ran this unlink <$mddir/*>; unlink <$htmldir/*>; unlink <$pdfdir/*>; unlink <$imagedir/*>; print "\nDownloading and converting the xCAT wiki document list from $wiki_url ...\n"; my @doclist; my %donelist; foreach my $index (@indexdocs) { if ( $donelist{$index} ) { next; } my $indexcmd = "curl -X GET $wiki_url/$index"; verbose($indexcmd); my $indexmd = `$indexcmd`; if ($? && !$IGNOREERRORS) { die "error encountered in $indexcmd \n";} my $jsout = $json->decode($indexmd); push @doclist,@{$jsout->{related_artifacts}}; foreach my $att (@{$jsout->{attachments}}) { my $iwgetcmd = "wget -P $imagedir/ $att->{url}"; verbose($iwgetcmd); system($iwgetcmd); if ($? && !$IGNOREERRORS) { die "error encountered in $iwgetcmd \n";} } convert_doc($index,$jsout->{text},$mddir,$htmldir,$pdfdir,$imagedir); $donelist{$index}=1; } print "\nDownloading and converting the xCAT wiki documentation to $savedir ...\n"; foreach my $doc (@doclist) { my $doc_name = $doc; $doc_name =~ s/\/.*\/(.+)\/$/$1/; if ( $donelist{$doc_name} ) { next; } verbose("processing $doc"); my $doc_url=$sf_url.$doc; my $curlcmd = "curl -X GET $doc_url"; verbose($curlcmd); my $pagecontent = `$curlcmd`; my $pageout = $json->decode($pagecontent); foreach my $pageatt (@{$pageout->{attachments}}) { my $wgetcmd = "wget -P $imagedir/ $pageatt->{url}"; system($wgetcmd); if ($? && !$IGNOREERRORS) { die "error encountered in $wgetcmd \n";} } convert_doc($doc_name,$pageout->{text},$mddir,$htmldir,$pdfdir,$imagedir); $donelist{$doc_name}=1; } chdir($savedir); } sub convert_doc { my $doc_name = shift; my $doc_text = shift; my $mddir = shift; my $htmldir = shift; my $pdfdir = shift; my $imagedir = shift; ## Make image refs local $doc_text =~ s/\!\[\]\(.+\/(.+)\.png\)/\!\[\]\(\.\.\/$imagedir\/$1\.png\)/g; $doc_text =~ s/\!\[\]\(.+\/(.+)\.PNG\)/\!\[\]\(\.\.\/$imagedir\/$1\.PNG\)/g; $doc_text =~ s/\!\[\]\(.+\/(.+)\.jpg\)/\!\[\]\(\.\.\/$imagedir\/$1\.jpg\)/g; open(MDFILE, ">$mddir/${doc_name}.md") or die; print MDFILE $doc_text; close MDFILE; my $pandoccmd = "pandoc -s --toc $mddir/${doc_name}.md -o $htmldir/${doc_name}.html"; verbose($pandoccmd); system($pandoccmd); if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd \n";} # This rename is probably a hack, but I didn't want to take the time to # figure out what was going on: # pandoc does different processing if target filetype is html # but all internal refs only work in browser when there is no html filetype rename "$htmldir/${doc_name}.html","$htmldir/${doc_name}"; $doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.png\)/\!\[\]\(\.\/$imagedir\/$1\.png\)/g; $doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.PNG\)/\!\[\]\(\.\/$imagedir\/$1\.PNG\)/g; $doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.jpg\)/\!\[\]\(\.\/$imagedir\/$1\.jpg\)/g; open(MDFILE, ">$mddir/${doc_name}.md") or die; print MDFILE $doc_text; close MDFILE; my $pandoccmd2 = "pandoc --toc $mddir/${doc_name}.md -o $pdfdir/${doc_name}.pdf"; verbose($pandoccmd2); system($pandoccmd2); if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd2 \n";} }