#!/usr/bin/perl # Downloads/converts the xCAT docs on the sourceforge Allura wiki to local HTML and PDF. # This script is not dependent on other xCAT code, so you can copy it to a machine # that has internet access to run it. Before running this command, you must have # curl, pandoc, and latex installed. See: http://sourceforge.net/p/xcat/wiki/Editing_and_Downloading_xCAT_Documentation/#converting-wiki-pages-to-html-and-pdfs # Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ . # You also need to set $UPLOADUSER to your sourceforge user: my $UPLOADUSER = 'mellor'; use strict; use Getopt::Long; use Cwd; use JSON; use List::Util qw[max]; # URL for the xCAT Allura wiki API markdown on SourceForge my $SF_URL='http://sourceforge.net/rest'; my $WIKI_URL=$SF_URL.'/p/xcat/wiki/'; # Update this list if you group any xcat docs on a separate page such that they # are no longer linked from the main doc page: my @INDEXDOCS = ('XCAT_Documentation', 'Power_775_Cluster_Documentation', 'Highly_Available_Management_Node', 'Mixed_Cluster_Support', 'IBM_HPC_Stack_in_an_xCAT_Cluster'); my $HELP; my $UPLOAD; my $UPLOADONLY; my $IGNOREERRORS; my $CONTINUE; my $SINGLE_DOC; my $VERBOSE; my $MDDIR; my $HTMLDIR; my $PDFDIR; my $IMAGEDIR; my %LOADEDDOCS; sub verbose { if ($VERBOSE) { print shift, "\n"; } } my $usage = sub { my $exitcode = shift @_; print "Usage: getxcatdocs [-?|-h|--help] \n"; print "Usage: getxcatdocs [-v|--verbose] [-u|--upload] [--uploadonly] [-i|--ignoreerrors] []\n"; print "Usage: getxcatdocs [-v|--verbose] [-c|--continue] [-d|--doc single_doc] [-i|--ignoreerrors] []\n"; exit $exitcode; }; # Main processing # Process the cmd line args Getopt::Long::Configure("bundling"); #Getopt::Long::Configure("pass_through"); Getopt::Long::Configure("no_pass_through"); if (!GetOptions( 'h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'u|upload' => \$UPLOAD, 'uploadonly' => \$UPLOADONLY, 'c|continue' => \$CONTINUE, 'i|ignoreerrors' => \$IGNOREERRORS, 'd|doc=s' => \$SINGLE_DOC )) { $usage->(1); } if ($HELP) { $usage->(0); } if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; } my $DESTDIR = scalar(@ARGV) ? $ARGV[0] : '.'; chdir($DESTDIR) or die "Can not cd to $DESTDIR: $!\n"; my $json = JSON->new(); if ($SINGLE_DOC) { $MDDIR = '.'; $HTMLDIR = '.'; $PDFDIR = '.'; $IMAGEDIR = '.'; download_doc($SINGLE_DOC); convert_doc($SINGLE_DOC); exit; } # Download the HTML docs and convert them all to pdfs if (!$UPLOADONLY) { gethtmldocs(); } # tar/compress my $date=`date +%Y%m%d%H%M`; chop $date; my $docname="xcat-docs-snap$date.tar.gz"; my $cmd = "tar -zcf $docname html pdf images 2>&1"; verbose($cmd); system($cmd) == 0 or die "Error running $cmd: $!, rc=$?"; # Optionally upload the tarball to sourceforge if ($UPLOAD || $UPLOADONLY) { my $count = 1; #my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/'; my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/'; print "$cmd\n"; while ($count<=5 && system("$cmd 2>&1")) { $count++; } } exit 0; sub gethtmldocs { $MDDIR = 'md'; $HTMLDIR = 'html'; $PDFDIR = 'pdf'; $IMAGEDIR = 'images'; mkdir($MDDIR); mkdir($HTMLDIR); mkdir($PDFDIR); mkdir($IMAGEDIR); #delete all the files in the dirs in case they previously ran this if ($CONTINUE) { print "CONTINUING with files already in $MDDIR"; my @mdfiles = glob "$MDDIR/*.md"; foreach my $mdf (@mdfiles) { $mdf =~ s/^$MDDIR\///; $mdf =~ s/\.md//; $LOADEDDOCS{$mdf}=1; } } else { unlink <$MDDIR/*>; unlink <$HTMLDIR/*>; unlink <$PDFDIR/*>; unlink <$IMAGEDIR/*>; } print "\nDownloading and converting the xCAT wiki document list from $WIKI_URL ...\n"; foreach my $index (@INDEXDOCS) { my @related_docs = download_doc($index); foreach my $docref (@related_docs) { my $docref_name = $docref; $docref_name =~ s/\/.*\/(.+)\/$/$1/; download_doc($docref_name); } } foreach my $doc (keys %LOADEDDOCS) { convert_doc($doc); } return; } sub download_doc { my $doc_name = shift; if ( $LOADEDDOCS{$doc_name} ) { return; } verbose("processing $doc_name"); $LOADEDDOCS{$doc_name}=1; my $curlcmd = "curl --retry 5 -X GET $WIKI_URL/$doc_name"; verbose($curlcmd); my $docjson = `$curlcmd`; if ($? && !$IGNOREERRORS) { die "error encountered in $curlcmd \n";} my $jsout = $json->decode($docjson); foreach my $att (@{$jsout->{attachments}}) { my $wgetcmd = "wget -P $IMAGEDIR/ $att->{url}"; verbose($wgetcmd); system($wgetcmd); if ($? && !$IGNOREERRORS) { die "error encountered in $wgetcmd \n";} } open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md"; print MDFILE $jsout->{text}; close MDFILE; return @{$jsout->{related_artifacts}}; } sub convert_doc { my $doc_name = shift; open(MDFILE, "<$MDDIR/${doc_name}.md") or die "Could not open <$MDDIR/${doc_name}.md"; my @doc_lines = ; close MDFILE; my $doc_text = join('',@doc_lines); $doc_text = process_includes($doc_text,0); if ($doc_text =~ /begin_xcat_table/) { open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md"; print MDFILE $doc_text; close MDFILE; convert_tables($doc_name); open(MDFILE, "<$MDDIR/${doc_name}.md") or die "Could not open <$MDDIR/${doc_name}.md"; @doc_lines = ; close MDFILE; $doc_text = join('',@doc_lines); } ## Make image refs local $doc_text =~ s/\!\[\]\(.+\/(.+)\.png\)/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.png\)/g; $doc_text =~ s/\!\[\]\(.+\/(.+)\.PNG\)/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.PNG\)/g; $doc_text =~ s/\!\[\]\(.+\/(.+)\.jpg\)/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.jpg\)/g; $doc_text =~ s/\[img src=(.+)\.png\]/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.png\)/g; $doc_text =~ s/\[img src=(.+)\.PNG\]/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.PNG\)/g; $doc_text =~ s/\[img src=(.+)\.jpg\]/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.jpg\)/g; ## Remove [TOC] entries $doc_text =~ s/\[TOC\]//g; open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md"; print MDFILE $doc_text; close MDFILE; my $pandoccmd = "pandoc -s --toc $MDDIR/${doc_name}.md -o $HTMLDIR/${doc_name}.html"; verbose($pandoccmd); system($pandoccmd); if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd \n";} # This rename is probably a hack, but I didn't want to take the time to # figure out what was going on: # pandoc does different processing if target filetype is html # but all internal refs only work in browser when there is no html filetype rename "$HTMLDIR/${doc_name}.html","$HTMLDIR/${doc_name}"; $doc_text =~ s/\!\[\]\(\.\.\/$IMAGEDIR\/(.+)\.png\)/\!\[\]\(\.\/$IMAGEDIR\/$1\.png\)/g; $doc_text =~ s/\!\[\]\(\.\.\/$IMAGEDIR\/(.+)\.PNG\)/\!\[\]\(\.\/$IMAGEDIR\/$1\.PNG\)/g; $doc_text =~ s/\!\[\]\(\.\.\/$IMAGEDIR\/(.+)\.jpg\)/\!\[\]\(\.\/$IMAGEDIR\/$1\.jpg\)/g; open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md"; print MDFILE $doc_text; close MDFILE; my $pandoccmd2 = "pandoc --toc $MDDIR/${doc_name}.md -o $PDFDIR/${doc_name}.pdf"; verbose($pandoccmd2); system($pandoccmd2); if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd2 \n";} } sub process_includes { my $doc_text = shift; my $include_nest = shift; if ($include_nest++ > 10) { die "nested include processing greater than 10. Infinite recursion???"; } while (1) { if ($doc_text =~ /\[\[(\s*)include (\s*)ref=(\s*)(.+)(\s*)\]\]/) { my $next_include = $4; download_doc($next_include); open(INCLDFILE, "<$MDDIR/${next_include}.md") or die "Could not open <$MDDIR/${next_include}.md"; my @include_lines = ; close INCLDFILE; # my $include_text = join('\n', @include_lines); my $include_text = join('', @include_lines); $include_text = process_includes($include_text,$include_nest); $doc_text =~ s/\[\[(\s*)include (\s*)ref=(\s*)$next_include(\s*)\]\]/$include_text/g; } else { last; } } return $doc_text; } sub convert_tables { my $doc_name=shift; my $infile="$MDDIR/${doc_name}.md"; my $outfile=$infile; open(MDFILE, "<$infile") or die "Could not open <$infile"; my @inlines=; close MDFILE; my @outlines; my @tablines; my $in_comment=0; my $xcat_table=0; my $numcols=1; my @colwidths=(0); my $tabcount=0; verbose("converting tables in $doc_name"); foreach my $line (@inlines) { if ($line =~ /\<\!---/) { $in_comment=1; next; } if ($in_comment) { if ($line =~ /begin_xcat_table/) {$xcat_table=1; next;} if ($xcat_table) { if ($line =~ /numcols=(\d+)/) { $numcols=$1; next;} if ($line =~ /colwidths=([\d,]+)/) { @colwidths=split(',',$1); next;} } if ($line =~ /end_xcat_table/) { my $separator = '+'; foreach my $c (@colwidths) { if ($c > 0) { $separator .= '-' x $c; } $separator .= '+'; } $separator .= "\n"; my $headsep = $separator; $headsep =~ s/-/=/g; my $rowline = $separator; $rowline =~ s/-/ /g; my $nosep=0; foreach my $tabline(@tablines) { if ($tabline =~ /^\s*$/) { next;} if ($tabline =~ /^\-\-/) { push (@outlines,$headsep); $nosep = 1; next; } if ($nosep) { $nosep=0;} else {push (@outlines,$separator);} $tabline =~ s/^\s*\|//; my @vals = split (/\|/,$tabline); my $last_cell_line=0; my $colnum=0; my @tabrow; foreach my $c (@colwidths) { if ($c > 0) { my $colval=$vals[$colnum]; $colval =~ s/(\s*)$//; my $vallen = length($colval); my $cell_line=0; while ($vallen > $c) { $tabrow[$cell_line++][$colnum] = substr($colval,0,$c); $vallen -= $c; $colval = substr($colval,$c,$vallen); } $tabrow[$cell_line][$colnum] = substr($colval,0,$vallen); if ($vallen < $c) { $tabrow[$cell_line][$colnum] .= " " x ($c-$vallen); } $last_cell_line = max($cell_line,$last_cell_line); } $colnum++; } my @rowlines; for (my $i=0;$i<=$last_cell_line;$i++) { for (my $j=0;$j<=$numcols-1;$j++) { $rowlines[$i] .= "|"; if ($tabrow[$i][$j]) { $rowlines[$i] .= $tabrow[$i][$j]; } else { $rowlines[$i] .= " " x $colwidths[$j]; } } $rowlines[$i] .= "|\n"; } push (@outlines,@rowlines); } push (@outlines,$separator); # reset to process next table @tablines = (); $xcat_table=0; $numcols=1;@colwidths=(0);next; } if ($line =~ /--\>/) {$in_comment=0;next;} next; } if ($xcat_table) { push (@tablines,$line); next; } push (@outlines,$line); next; } open(MD2FILE, ">$outfile") or die "Could not open >$outfile"; print MD2FILE @outlines; close MD2FILE; return; }