rewrote getxcatdocs to work with Allura wiki on sourceforge
This commit is contained in:
parent
6bfb39fb7d
commit
77625c7fe9
@ -1,191 +1,214 @@
|
||||
#!/usr/bin/perl
|
||||
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
|
||||
|
||||
# Downloads/converts the xCAT docs on the sourceforge wiki to local HTML and PDF.
|
||||
|
||||
# Downloads/converts the xCAT docs on the sourceforge Allura wiki to local HTML and PDF.
|
||||
# This script is not dependent on other xCAT code, so you can copy it to a machine
|
||||
# that has internet access to run it. Before running this command, you must have
|
||||
# wget, python, and pisa installed. See: http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs .
|
||||
# curl, pandoc, and latex installed. See: http://sourceforge.net/p/xcat/wiki/Editing_and_Downloading_xCAT_Documentation/#converting-wiki-pages-to-html-and-pdfs
|
||||
|
||||
|
||||
# Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ .
|
||||
# You also need to set $UPLOADUSER to your sourceforge user.
|
||||
# You also need to set $UPLOADUSER to your sourceforge user:
|
||||
my $UPLOADUSER = 'mellor';
|
||||
|
||||
#BEGIN
|
||||
#{
|
||||
# $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr';
|
||||
#}
|
||||
use strict;
|
||||
#use lib "$::XCATROOT/lib/perl";
|
||||
#use xCAT::Utils;
|
||||
use Getopt::Long;
|
||||
#use File::Path;
|
||||
use Cwd;
|
||||
#use Data::Dumper;
|
||||
use JSON;
|
||||
|
||||
# URL for the xCAT Allura wiki API markdown on SourceForge
|
||||
my $sf_url='http://sourceforge.net/rest';
|
||||
my $wiki_url=$sf_url.'/p/xcat/wiki/';
|
||||
|
||||
# Update this list if you group any xcat docs on a separate page such that they
|
||||
# are no longer linked from the main doc page:
|
||||
my @indexdocs = ('XCAT_Documentation',
|
||||
'Power_775_Cluster_Documentation',
|
||||
'Highly_Available_Management_Node',
|
||||
'Mixed_Cluster_Support',
|
||||
'IBM_HPC_Stack_in_an_xCAT_Cluster');
|
||||
|
||||
# Update this list if you group any xcat docs on a separate page such that they are no longer linked from the
|
||||
# main doc page.
|
||||
my @indexdocs = ('XCAT_Documentation', 'Power_775_Cluster_Documentation', 'Highly_Available_Management_Node', 'Mixed_Cluster_Support', 'IBM_HPC_Stack_in_an_xCAT_Cluster');
|
||||
|
||||
#my $VERSION;
|
||||
my $HELP;
|
||||
my $UPLOAD;
|
||||
my $UPLOADONLY;
|
||||
my $IGNOREERRORS;
|
||||
my $SINGLE_DOC;
|
||||
my $VERBOSE;
|
||||
|
||||
my $usage = sub {
|
||||
my $exitcode = shift @_;
|
||||
print "Usage: getxcatdocs [-?|-h|--help] [-v|--verbose] [-u|--upload] [--uploadonly] [<destination-dir>]\n";
|
||||
exit $exitcode;
|
||||
};
|
||||
|
||||
# Process the cmd line args
|
||||
Getopt::Long::Configure("bundling");
|
||||
#Getopt::Long::Configure("pass_through");
|
||||
Getopt::Long::Configure("no_pass_through");
|
||||
if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'u|upload' => \$UPLOAD, 'uploadonly' => \$UPLOADONLY )) { $usage->(1); }
|
||||
|
||||
if ($HELP) { $usage->(0); }
|
||||
|
||||
#if ($VERSION) {
|
||||
#print xCAT::Utils->Version(), "\n";
|
||||
# exit;
|
||||
#}
|
||||
|
||||
if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; }
|
||||
|
||||
my $destdir = scalar(@ARGV) ? $ARGV[0] : '.';
|
||||
chdir($destdir) or die "Can not cd to $destdir: $!\n";
|
||||
#my $docdir = $ENV{'PWD'};
|
||||
|
||||
# Download the HTML docs and convert them all to pdfs
|
||||
my @dir;
|
||||
if (!$UPLOADONLY) {
|
||||
@dir = gethtmldocs('html');
|
||||
convert2pdf('pdf', \@dir);
|
||||
}
|
||||
|
||||
|
||||
# tar/compress
|
||||
my $date=`date +%Y%m%d%H%M`;
|
||||
chop $date;
|
||||
my $docname="xcat-docs-snap$date.tar.gz";
|
||||
#system('pwd');
|
||||
my $cmd = "tar -zcf $docname html pdf 2>&1";
|
||||
verbose($cmd);
|
||||
system($cmd) == 0 or die "Error running $cmd: $!, rc=$?";
|
||||
|
||||
# Optionally upload the tarball to sourceforge
|
||||
if ($UPLOAD || $UPLOADONLY) {
|
||||
my $UPLOADUSER = 'bp-sawyers';
|
||||
my $count = 1;
|
||||
#my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/';
|
||||
my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/';
|
||||
print "$cmd\n";
|
||||
while ($count<=5 && system("$cmd 2>&1")) { $count++; }
|
||||
}
|
||||
exit 0;
|
||||
|
||||
|
||||
sub verbose { if ($VERBOSE) { print shift, "\n"; } }
|
||||
|
||||
my $usage = sub {
|
||||
my $exitcode = shift @_;
|
||||
print "Usage: getxcatdocs [-?|-h|--help] \n";
|
||||
print "Usage: getxcatdocs [-v|--verbose] [-u|--upload] [--uploadonly] [-i|--ignoreerrors] [<destination-dir>]\n";
|
||||
print "Usage: getxcatdocs [-v|--verbose] [-d|--doc single_doc] [-i|--ignoreerrors] [<destination-dir>]\n";
|
||||
exit $exitcode;
|
||||
};
|
||||
|
||||
|
||||
# Main processing
|
||||
|
||||
# Process the cmd line args
|
||||
Getopt::Long::Configure("bundling");
|
||||
#Getopt::Long::Configure("pass_through");
|
||||
Getopt::Long::Configure("no_pass_through");
|
||||
if (!GetOptions(
|
||||
'h|?|help' => \$HELP,
|
||||
'v|verbose' => \$VERBOSE,
|
||||
'u|upload' => \$UPLOAD,
|
||||
'uploadonly' => \$UPLOADONLY,
|
||||
'i|ignoreerrors' => \$IGNOREERRORS,
|
||||
'd|doc=s' => \$SINGLE_DOC ))
|
||||
{ $usage->(1); }
|
||||
|
||||
if ($HELP) { $usage->(0); }
|
||||
|
||||
if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; }
|
||||
|
||||
my $destdir = scalar(@ARGV) ? $ARGV[0] : '.';
|
||||
chdir($destdir) or die "Can not cd to $destdir: $!\n";
|
||||
|
||||
my $json = JSON->new();
|
||||
|
||||
if ($SINGLE_DOC) {
|
||||
my $scurlcmd = "curl -X GET $wiki_url$SINGLE_DOC";
|
||||
verbose($scurlcmd);
|
||||
my $pagecontent = `$scurlcmd`;
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $scurlcmd \n";}
|
||||
my $pageout = $json->decode($pagecontent);
|
||||
foreach my $pageatt (@{$pageout->{attachments}}) {
|
||||
my $swgetcmd = "wget $pageatt->{url}";
|
||||
verbose($swgetcmd);
|
||||
system($swgetcmd);
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $swgetcmd \n";}
|
||||
}
|
||||
convert_doc($SINGLE_DOC,$pageout->{text},'.','.','.','.');
|
||||
exit;
|
||||
}
|
||||
|
||||
# Download the HTML docs and convert them all to pdfs
|
||||
if (!$UPLOADONLY) { gethtmldocs('md','html','pdf','images'); }
|
||||
|
||||
# tar/compress
|
||||
my $date=`date +%Y%m%d%H%M`;
|
||||
chop $date;
|
||||
my $docname="xcat-docs-snap$date.tar.gz";
|
||||
chdir($destdir) or die "Can not cd to $destdir: $!\n";
|
||||
|
||||
my $cmd = "tar -zcf $docname html pdf images 2>&1";
|
||||
verbose($cmd);
|
||||
system($cmd) == 0 or die "Error running $cmd: $!, rc=$?";
|
||||
|
||||
# Optionally upload the tarball to sourceforge
|
||||
if ($UPLOAD || $UPLOADONLY) {
|
||||
my $count = 1;
|
||||
#my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/';
|
||||
my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/';
|
||||
print "$cmd\n";
|
||||
while ($count<=5 && system("$cmd 2>&1")) { $count++; }
|
||||
}
|
||||
exit 0;
|
||||
|
||||
|
||||
|
||||
# Download all of the html docs from several "index" docs
|
||||
sub gethtmldocs {
|
||||
my $dir = shift;
|
||||
my $savedir = getcwd();
|
||||
#File::Path::make_path($dir);
|
||||
mkdir($dir);
|
||||
chdir($dir);
|
||||
#system('pwd');
|
||||
unlink <*>; # delete all the files in the dir, in case they previously ran this
|
||||
#system('ls');
|
||||
|
||||
my $indexes = '';
|
||||
foreach my $index (@indexdocs) {
|
||||
$indexes .= qq('http://sourceforge.net/apps/mediawiki/xcat/index.php?title=$index&printable=yes' );
|
||||
}
|
||||
print "Downloading the xCAT wiki documentation to $dir, from: $indexes ...\n";
|
||||
runwget($indexes);
|
||||
|
||||
# Remove the funny chars from the links to other docs and rename the docs
|
||||
#my $sedcmd = q(sed -i 's/<a href="\/apps\/mediawiki\/xcat\/index.php?title/<a href="index.php%3Ftitle/' *);
|
||||
# sed -i 's/href="index.php%3Ftitle=/href="/g' index.php\?title\=
|
||||
# sed -i 's/<a href="\([^"]*\)"/<a href="\1.html"/'
|
||||
# This searches for '<a href="index.php?title=' and then all text before a '"' or '#', and then removes the front part and add .html on the end
|
||||
# Note: this does not convert the 'MediaWiki:*' files because they are used in <link> tags, but converting them does not seem to do any good anyway.
|
||||
my $cmd = q(sed -i 's/<a href="index.php?title=\\([^"#]*\\)\\("\|#\\)/<a href="\1.html\2/g' *);
|
||||
verbose($cmd);
|
||||
system($cmd) == 0 or die "Error running $cmd: $!, rc=$?";
|
||||
# get the list of docs
|
||||
opendir(DIR, '.') or die "Error: could not read the just created html directory.\n";
|
||||
#my @docs = grep /^index.php\?title=/, readdir(DIR); # /
|
||||
my @docs;
|
||||
foreach my $f (readdir(DIR)) {
|
||||
if ($f !~ /^index.php\?title=/ || $f =~ /^index.php\?title=MediaWiki:/) { next; }
|
||||
my $newf = $f;
|
||||
$newf =~ s/^index.php\?title=//;
|
||||
if ($newf !~ /\./) { $newf .= '.html'; }
|
||||
verbose("Renaming $f to $newf");
|
||||
rename($f, $newf);
|
||||
push @docs, $newf;
|
||||
}
|
||||
close(DIR);
|
||||
chdir($savedir);
|
||||
return @docs;
|
||||
|
||||
my $mddir = shift;
|
||||
my $htmldir = shift;
|
||||
my $pdfdir = shift;
|
||||
my $imagedir = shift;
|
||||
my $savedir = getcwd();
|
||||
mkdir($mddir);
|
||||
mkdir($htmldir);
|
||||
mkdir($pdfdir);
|
||||
mkdir($imagedir);
|
||||
#delete all the files in the dirs in case they previously ran this
|
||||
unlink <$mddir/*>;
|
||||
unlink <$htmldir/*>;
|
||||
unlink <$pdfdir/*>;
|
||||
unlink <$imagedir/*>;
|
||||
|
||||
print "\nDownloading and converting the xCAT wiki document list from $wiki_url ...\n";
|
||||
my @doclist;
|
||||
my %donelist;
|
||||
foreach my $index (@indexdocs) {
|
||||
if ( $donelist{$index} ) { next; }
|
||||
my $indexcmd = "curl -X GET $wiki_url/$index";
|
||||
verbose($indexcmd);
|
||||
my $indexmd = `$indexcmd`;
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $indexcmd \n";}
|
||||
my $jsout = $json->decode($indexmd);
|
||||
push @doclist,@{$jsout->{related_artifacts}};
|
||||
foreach my $att (@{$jsout->{attachments}}) {
|
||||
my $iwgetcmd = "wget -P $imagedir/ $att->{url}";
|
||||
verbose($iwgetcmd);
|
||||
system($iwgetcmd);
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $iwgetcmd \n";}
|
||||
}
|
||||
convert_doc($index,$jsout->{text},$mddir,$htmldir,$pdfdir,$imagedir);
|
||||
$donelist{$index}=1;
|
||||
}
|
||||
print "\nDownloading and converting the xCAT wiki documentation to $savedir ...\n";
|
||||
|
||||
foreach my $doc (@doclist) {
|
||||
my $doc_name = $doc;
|
||||
$doc_name =~ s/\/.*\/(.+)\/$/$1/;
|
||||
if ( $donelist{$doc_name} ) { next; }
|
||||
verbose("processing $doc");
|
||||
my $doc_url=$sf_url.$doc;
|
||||
my $curlcmd = "curl -X GET $doc_url";
|
||||
verbose($curlcmd);
|
||||
my $pagecontent = `$curlcmd`;
|
||||
my $pageout = $json->decode($pagecontent);
|
||||
foreach my $pageatt (@{$pageout->{attachments}}) {
|
||||
my $wgetcmd = "wget -P $imagedir/ $pageatt->{url}";
|
||||
system($wgetcmd);
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $wgetcmd \n";}
|
||||
}
|
||||
convert_doc($doc_name,$pageout->{text},$mddir,$htmldir,$pdfdir,$imagedir);
|
||||
$donelist{$doc_name}=1;
|
||||
}
|
||||
|
||||
chdir($savedir);
|
||||
}
|
||||
|
||||
sub convert_doc {
|
||||
my $doc_name = shift;
|
||||
my $doc_text = shift;
|
||||
my $mddir = shift;
|
||||
my $htmldir = shift;
|
||||
my $pdfdir = shift;
|
||||
my $imagedir = shift;
|
||||
|
||||
## Make image refs local
|
||||
$doc_text =~ s/\!\[\]\(.+\/(.+)\.png\)/\!\[\]\(\.\.\/$imagedir\/$1\.png\)/g;
|
||||
$doc_text =~ s/\!\[\]\(.+\/(.+)\.PNG\)/\!\[\]\(\.\.\/$imagedir\/$1\.PNG\)/g;
|
||||
$doc_text =~ s/\!\[\]\(.+\/(.+)\.jpg\)/\!\[\]\(\.\.\/$imagedir\/$1\.jpg\)/g;
|
||||
open(MDFILE, ">$mddir/${doc_name}.md") or die;
|
||||
print MDFILE $doc_text;
|
||||
close MDFILE;
|
||||
|
||||
my $pandoccmd = "pandoc -s --toc $mddir/${doc_name}.md -o $htmldir/${doc_name}.html";
|
||||
verbose($pandoccmd);
|
||||
system($pandoccmd);
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd \n";}
|
||||
# This rename is probably a hack, but I didn't want to take the time to
|
||||
# figure out what was going on:
|
||||
# pandoc does different processing if target filetype is html
|
||||
# but all internal refs only work in browser when there is no html filetype
|
||||
rename "$htmldir/${doc_name}.html","$htmldir/${doc_name}";
|
||||
|
||||
$doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.png\)/\!\[\]\(\.\/$imagedir\/$1\.png\)/g;
|
||||
$doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.PNG\)/\!\[\]\(\.\/$imagedir\/$1\.PNG\)/g;
|
||||
$doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.jpg\)/\!\[\]\(\.\/$imagedir\/$1\.jpg\)/g;
|
||||
open(MDFILE, ">$mddir/${doc_name}.md") or die;
|
||||
print MDFILE $doc_text;
|
||||
close MDFILE;
|
||||
my $pandoccmd2 = "pandoc --toc $mddir/${doc_name}.md -o $pdfdir/${doc_name}.pdf";
|
||||
verbose($pandoccmd2);
|
||||
system($pandoccmd2);
|
||||
if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd2 \n";}
|
||||
|
||||
# Convert to pdf
|
||||
sub convert2pdf {
|
||||
my ($dir, $files) = @_;
|
||||
my $savedir = getcwd();
|
||||
#File::Path::make_path($dir);
|
||||
mkdir($dir);
|
||||
chdir($dir);
|
||||
if (system('which xhtml2pdf >/dev/null 2>&1')) { die "xhtml2pdf is not installed. See http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs .\n"; }
|
||||
unlink <*>; # delete all the files in the dir, in case they previously ran this
|
||||
foreach my $file (@$files) {
|
||||
#if ($file =~ /^index.php\?title=MediaWiki:/ || $file eq 'index.php?title=XCAT_Documentation') { next; }
|
||||
if ($file eq 'XCAT_Documentation') { next; }
|
||||
#my ($docname) = $file =~ /^index.php\?title=(.+)$/;
|
||||
$file =~ s/\.html$//;
|
||||
print "Converting $file to PDF format...\n";
|
||||
my $url = 'http://sourceforge.net/apps/mediawiki/xcat/index.php?title=' . $file . '&printable=yes';
|
||||
my $destfile = "$file.pdf";
|
||||
my $cmd = "xhtml2pdf '$url' '$destfile' ";
|
||||
runh2p($cmd);
|
||||
}
|
||||
chdir($savedir);
|
||||
}
|
||||
|
||||
|
||||
# Run the wget cmd and filter out some of the silly output
|
||||
sub runwget {
|
||||
my $index = shift;
|
||||
# options we might consider: --html-extension --restrict-file-names=windows --cut-dirs=3
|
||||
# options that do not work: --relative
|
||||
#my $rejectlist = q('*title=Special:*,*title=Talk:*,*title=-&*,*title=HowTos,*title=Main_Page,*title=MediaWiki:*,*title=Release_Notes,*title=Wish_List_for_xCAT_2,*&action=edit*,*&action=history*,*&printable=yes*,*&oldid=*,index.html,opensearch_desc.php,xcat,login.php,support');
|
||||
my $rejectlist = q('*title=Special:*,*title=Talk:*,*title=-&*,*title=HowTos,*title=Main_Page,*title=Release_Notes,*title=Wish_List_for_xCAT_2,*&action=edit*,*&action=history*,*&printable=yes*,*&oldid=*,index.html,opensearch_desc.php,xcat,login.php,support');
|
||||
my $cmd = qq(wget --recursive --convert-links --no-verbose --progress=bar --level=1 --page-requisites --no-parent --no-host-directories --no-directories --no-clobber --execute robots=off --post-data='printable=yes' --reject $rejectlist $index);
|
||||
verbose($cmd);
|
||||
open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n";
|
||||
while (<OUT>) {
|
||||
if (/URL:https*:\/\/sourceforge\.net.+\s+->\s+\"(\S+)\"\s+\[/) { print "Downloaded $1.\n"; }
|
||||
else { print; }
|
||||
}
|
||||
close OUT || print "Error running $cmd: $! $?\n";
|
||||
}
|
||||
|
||||
# Run the xhtml2pdf cmd and filter out some of the silly output
|
||||
sub runh2p {
|
||||
my $cmd = shift;
|
||||
verbose($cmd);
|
||||
open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n";
|
||||
while (<OUT>) {
|
||||
next if /DeprecationWarning:\sthe sets module is deprecated/;
|
||||
next if /from sets import ImmutableSet/;
|
||||
next if /^\s*import sets\s*$/;
|
||||
next if /^Converting\ssourceforge.net/;
|
||||
print;
|
||||
}
|
||||
close OUT || print "Error running $cmd: $! $?\n";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user