xcat-core/xCAT-client/bin/getxcatdocs

#!/usr/bin/perl


# Downloads/converts the xCAT docs on the sourceforge Allura wiki to local HTML and PDF.
# This script is not dependent on other xCAT code, so you can copy it to a machine
# that has internet access to run it.  Before running this command, you must have
# curl, pandoc, and latex installed.  See: http://sourceforge.net/p/xcat/wiki/Editing_and_Downloading_xCAT_Documentation/#converting-wiki-pages-to-html-and-pdfs


# Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ .
#       You also need to set $UPLOADUSER to your sourceforge user:
my $UPLOADUSER = 'bp-sawyers';

use strict;
use Getopt::Long;
use Cwd;
use JSON;
use List::Util qw[max];


# URL for the xCAT Allura wiki API markdown on SourceForge
my $SF_URL='http://sourceforge.net/rest';
my $WIKI_URL=$SF_URL.'/p/xcat/wiki/';

# Update this list if you group any xcat docs on a separate page such that they
# are no longer linked from the main doc page:
my @INDEXDOCS = ('XCAT_Documentation',
                 'Power_775_Cluster_Documentation',
                 'Highly_Available_Management_Node',
                 'Mixed_Cluster_Support',
                 'IBM_HPC_Stack_in_an_xCAT_Cluster');


my $HELP;
my $UPLOAD;
my $UPLOADONLY;
my $IGNOREERRORS;
my $CONTINUE;
my $SINGLE_DOC;
my $VERBOSE;

my $MDDIR;
my $HTMLDIR;
my $PDFDIR;
my $IMAGEDIR;
my %LOADEDDOCS;

sub verbose { if ($VERBOSE) { print shift, "\n"; } }

my $usage = sub {
    my $exitcode = shift @_;
    print "Usage: getxcatdocs [-?|-h|--help] \n";
    print "Usage: getxcatdocs [-v|--verbose] [-u|--upload] [--uploadonly] [-U|--uploaduser sourceforge_id] [-i|--ignoreerrors] [<destination-dir>]\n";
    print "Usage: getxcatdocs [-v|--verbose] [-c|--continue] [-d|--doc single_doc] [-i|--ignoreerrors] [<destination-dir>]\n";
    exit $exitcode;
};


# Main processing

    # Process the cmd line args
    Getopt::Long::Configure("bundling");
    #Getopt::Long::Configure("pass_through");
    Getopt::Long::Configure("no_pass_through");
    if (!GetOptions(
         'h|?|help'    => \$HELP,
         'v|verbose'   => \$VERBOSE,
         'u|upload'    => \$UPLOAD,
         'uploadonly'  => \$UPLOADONLY,
         'uploaduser'  => \$UPLOADUSER,
         'c|continue'    => \$CONTINUE,
         'i|ignoreerrors'    => \$IGNOREERRORS,
         'd|doc=s'     => \$SINGLE_DOC ))
          { $usage->(1); }

    if ($HELP) { $usage->(0); }

    if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; }

    my $DESTDIR = scalar(@ARGV) ? $ARGV[0] : '.';
    chdir($DESTDIR) or die "Can not cd to $DESTDIR: $!\n";

    my $json = JSON->new();

    if ($SINGLE_DOC) {
      $MDDIR = '.';
      $HTMLDIR = '.';
      $PDFDIR = '.';
      $IMAGEDIR = '.';
      download_doc($SINGLE_DOC);
      convert_doc($SINGLE_DOC);
      exit;
    }

    # Download the HTML docs and convert them all to pdfs
    if (!$UPLOADONLY) { gethtmldocs(); }

    # tar/compress
    my $date=`date +%Y%m%d%H%M`;
    chop $date;
    my $docname="xcat-docs-snap$date.tar.gz";

    my $cmd = "tar -zcf $docname html pdf images 2>&1";
    verbose($cmd);
    system($cmd) == 0 or die "Error running $cmd: $!, rc=$?";

    # Optionally upload the tarball to sourceforge
    if ($UPLOAD || $UPLOADONLY) {
        my $count = 1;
        #my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/';
        my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/';
        print "$cmd\n";
        while ($count<=5 && system("$cmd 2>&1")) { $count++; }
    }
    exit 0;


sub gethtmldocs {

    $MDDIR = 'md';
    $HTMLDIR = 'html';
    $PDFDIR = 'pdf';
    $IMAGEDIR = 'images';

    mkdir($MDDIR);
    mkdir($HTMLDIR);
    mkdir($PDFDIR);
    mkdir($IMAGEDIR);
    #delete all the files in the dirs in case they previously ran this
     if ($CONTINUE) {
        print "CONTINUING with files already in $MDDIR";
        my @mdfiles = glob "$MDDIR/*.md";
        foreach my $mdf (@mdfiles) {
            $mdf =~ s/^$MDDIR\///;
            $mdf =~ s/\.md//;
            $LOADEDDOCS{$mdf}=1;
         }
     } else {
         unlink <$MDDIR/*>;
         unlink <$HTMLDIR/*>;
         unlink <$PDFDIR/*>;
         unlink <$IMAGEDIR/*>;
    }

   print "\nDownloading and converting the xCAT wiki document list from $WIKI_URL ...\n";
    foreach my $index (@INDEXDOCS) {
      my @related_docs = download_doc($index);
      foreach my $docref (@related_docs) {
        my $docref_name = $docref;
        $docref_name =~ s/\/.*\/(.+)\/$/$1/;
        download_doc($docref_name);
      }
    }

    foreach my $doc (keys %LOADEDDOCS) {
      convert_doc($doc);
    }
    return;
}


sub download_doc {
    my $doc_name = shift;

    if ( $LOADEDDOCS{$doc_name} ) { return; }
    verbose("processing $doc_name");
    $LOADEDDOCS{$doc_name}=1;

    my $curlcmd = "curl --retry 5 -X GET $WIKI_URL/$doc_name";
    verbose($curlcmd);
    my $docjson = `$curlcmd`;
    if ($? && !$IGNOREERRORS) { die "error encountered in $curlcmd \n";}

    my $jsout = $json->decode($docjson);

    foreach my $att (@{$jsout->{attachments}}) {
      my $wgetcmd = "wget -P $IMAGEDIR/ $att->{url}";
      verbose($wgetcmd);
      system($wgetcmd);
      if ($? && !$IGNOREERRORS) { die "error encountered in $wgetcmd \n";}
    }

    open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md";
    print MDFILE $jsout->{text};
    close MDFILE;

    return @{$jsout->{related_artifacts}};
}


sub convert_doc {
   my $doc_name = shift;

   open(MDFILE, "<$MDDIR/${doc_name}.md") or die "Could not open <$MDDIR/${doc_name}.md";
   my @doc_lines = <MDFILE>;
   close MDFILE;
   my $doc_text = join('',@doc_lines);

   $doc_text = process_includes($doc_text,0);

   if ($doc_text =~ /begin_xcat_table/) {
       open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md";
       print MDFILE $doc_text;
       close MDFILE;

       convert_tables($doc_name);

       open(MDFILE, "<$MDDIR/${doc_name}.md") or die "Could not open <$MDDIR/${doc_name}.md";
       @doc_lines = <MDFILE>;
       close MDFILE;
       $doc_text = join('',@doc_lines);
   }

  ## Make image refs local
   $doc_text =~ s/\!\[\]\(.+\/(.+)\.png\)/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.png\)/g;
   $doc_text =~ s/\!\[\]\(.+\/(.+)\.PNG\)/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.PNG\)/g;
   $doc_text =~ s/\!\[\]\(.+\/(.+)\.jpg\)/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.jpg\)/g;
   $doc_text =~ s/\[img src=(.+)\.png\]/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.png\)/g;
   $doc_text =~ s/\[img src=(.+)\.PNG\]/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.PNG\)/g;
   $doc_text =~ s/\[img src=(.+)\.jpg\]/\!\[\]\(\.\.\/$IMAGEDIR\/$1\.jpg\)/g;

   ## Remove [TOC] entries
   $doc_text =~ s/\[TOC\]//g;


   open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md";
   print MDFILE $doc_text;
   close MDFILE;

   my $pandoccmd = "pandoc -s --toc $MDDIR/${doc_name}.md -o $HTMLDIR/${doc_name}.html";
   verbose($pandoccmd);
   system($pandoccmd);
   if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd \n";}
   # This rename is probably a hack, but I didn't want to take the time to
   # figure out what was going on:
   #   pandoc does different processing if target filetype is html
   #   but all internal refs only work in browser when there is no html filetype
   rename "$HTMLDIR/${doc_name}.html","$HTMLDIR/${doc_name}";

   $doc_text =~ s/\!\[\]\(\.\.\/$IMAGEDIR\/(.+)\.png\)/\!\[\]\(\.\/$IMAGEDIR\/$1\.png\)/g;
   $doc_text =~ s/\!\[\]\(\.\.\/$IMAGEDIR\/(.+)\.PNG\)/\!\[\]\(\.\/$IMAGEDIR\/$1\.PNG\)/g;
   $doc_text =~ s/\!\[\]\(\.\.\/$IMAGEDIR\/(.+)\.jpg\)/\!\[\]\(\.\/$IMAGEDIR\/$1\.jpg\)/g;
   open(MDFILE, ">$MDDIR/${doc_name}.md") or die "Could not open >$MDDIR/${doc_name}.md";
   print MDFILE $doc_text;
   close MDFILE;
   my $pandoccmd2 = "pandoc --toc $MDDIR/${doc_name}.md -o $PDFDIR/${doc_name}.pdf";
   verbose($pandoccmd2);
   system($pandoccmd2);
   if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd2 \n";}

}


sub process_includes {
    my $doc_text = shift;
    my $include_nest = shift;

    if ($include_nest++ > 10) { die "nested include processing greater than 10.  Infinite recursion???"; }

    while (1) {
        if ($doc_text =~ /\[\[(\s*)include (\s*)ref=(\s*)(.+)(\s*)\]\]/) {
            my $next_include = $4;
            download_doc($next_include);

            open(INCLDFILE, "<$MDDIR/${next_include}.md") or die "Could not open <$MDDIR/${next_include}.md";
            my @include_lines = <INCLDFILE>;
            close INCLDFILE;

#            my $include_text = join('\n', @include_lines);
            my $include_text = join('', @include_lines);
            $include_text = process_includes($include_text,$include_nest);

            $doc_text =~ s/\[\[(\s*)include (\s*)ref=(\s*)$next_include(\s*)\]\]/$include_text/g;

        } else {
            last;
        }
    }

    return $doc_text;
}


sub convert_tables {
  my $doc_name=shift;
  my $infile="$MDDIR/${doc_name}.md";
  my $outfile=$infile;

  open(MDFILE, "<$infile") or die "Could not open <$infile";
  my @inlines=<MDFILE>;
  close MDFILE;
  my @outlines;
  my @tablines;

  my $in_comment=0;
  my $xcat_table=0;
  my $numcols=1;
  my @colwidths=(0);
  my $tabcount=0;

  verbose("converting tables in $doc_name");
  foreach my $line (@inlines) {
      if ($line =~ /\<\!---/) { $in_comment=1; next; }
      if ($in_comment) {
         if ($line =~ /begin_xcat_table/) {$xcat_table=1; next;}
         if ($xcat_table) {
            if ($line =~ /numcols=(\d+)/) { $numcols=$1; next;}
            if ($line =~ /colwidths=([\d,]+)/) { @colwidths=split(',',$1); next;}
         }
         if ($line =~ /end_xcat_table/) {
             my $separator = '+';
             foreach my $c (@colwidths) {
                if ($c > 0) { $separator .= '-' x $c; }
                $separator .= '+';
             }
             $separator .= "\n";
             my $headsep = $separator;
             $headsep =~ s/-/=/g;
             my $rowline = $separator;
             $rowline =~ s/-/ /g;

             my $nosep=0;
             foreach my $tabline(@tablines) {
               if ($tabline =~ /^\s*$/) { next;}
               if ($tabline =~ /^\-\-/) {
                 push (@outlines,$headsep);
                 $nosep = 1;
                 next;
               }
               if ($nosep) { $nosep=0;} else {push (@outlines,$separator);}
               $tabline =~ s/^\s*\|//;
               my @vals = split (/\|/,$tabline);
               my $last_cell_line=0;
               my $colnum=0;
               my @tabrow;
               foreach my $c (@colwidths) {
                 if ($c > 0) {
                     my $colval=$vals[$colnum];
                     $colval =~ s/(\s*)$//;
                     my $vallen = length($colval);
                     my $cell_line=0;
                     while ($vallen >  $c) {
                       $tabrow[$cell_line++][$colnum] = substr($colval,0,$c);
                       $vallen -= $c;
                       $colval = substr($colval,$c,$vallen);
                     }
                     $tabrow[$cell_line][$colnum] = substr($colval,0,$vallen);
                     if ($vallen < $c) {
                        $tabrow[$cell_line][$colnum] .= " " x ($c-$vallen);
                     }
                     $last_cell_line = max($cell_line,$last_cell_line);
                 }
                 $colnum++;
               }

               my @rowlines;
               for (my $i=0;$i<=$last_cell_line;$i++) {
                 for (my $j=0;$j<=$numcols-1;$j++) {
                   $rowlines[$i] .= "|";
           if ($tabrow[$i][$j]) { $rowlines[$i] .= $tabrow[$i][$j]; }
                   else { $rowlines[$i] .= " " x $colwidths[$j]; }
                 }
                 $rowlines[$i] .= "|\n";
               }
               push (@outlines,@rowlines);
            }
            push (@outlines,$separator);

            # reset to process next table
            @tablines = ();
            $xcat_table=0; $numcols=1;@colwidths=(0);next;
         }
         if ($line =~ /--\>/) {$in_comment=0;next;}
         next;
      }
      if ($xcat_table) { push (@tablines,$line); next; }

      push (@outlines,$line);
      next;
  }

  open(MD2FILE, ">$outfile") or die "Could not open >$outfile";
  print MD2FILE @outlines;
  close MD2FILE;

  return;


}