mirror of
				https://github.com/xcat2/xcat-core.git
				synced 2025-10-25 00:15:43 +00:00 
			
		
		
		
	rewrote getxcatdocs to work with Allura wiki on sourceforge
This commit is contained in:
		| @@ -1,191 +1,214 @@ | ||||
| #!/usr/bin/perl | ||||
| # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html | ||||
|  | ||||
| # Downloads/converts the xCAT docs on the sourceforge wiki to local HTML and PDF. | ||||
|  | ||||
| # Downloads/converts the xCAT docs on the sourceforge Allura wiki to local HTML and PDF. | ||||
| # This script is not dependent on other xCAT code, so you can copy it to a machine | ||||
| # that has internet access to run it.  Before running this command, you must have | ||||
| # wget, python, and pisa installed.  See: http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs . | ||||
| # curl, pandoc, and latex installed.  See: http://sourceforge.net/p/xcat/wiki/Editing_and_Downloading_xCAT_Documentation/#converting-wiki-pages-to-html-and-pdfs | ||||
|  | ||||
|  | ||||
| # Note: do not use the --upload option, unless your machine has authority to write to http://xcat.sourceforge.net/doc/ . | ||||
| #       You also need to set $UPLOADUSER to your sourceforge user. | ||||
| #       You also need to set $UPLOADUSER to your sourceforge user: | ||||
| my $UPLOADUSER = 'mellor'; | ||||
|  | ||||
| #BEGIN | ||||
| #{ | ||||
| #    $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; | ||||
| #} | ||||
| use strict; | ||||
| #use lib "$::XCATROOT/lib/perl"; | ||||
| #use xCAT::Utils; | ||||
| use Getopt::Long; | ||||
| #use File::Path; | ||||
| use Cwd; | ||||
| #use Data::Dumper; | ||||
| use JSON; | ||||
|  | ||||
| # URL for the xCAT Allura wiki API markdown on SourceForge | ||||
| my $sf_url='http://sourceforge.net/rest'; | ||||
| my $wiki_url=$sf_url.'/p/xcat/wiki/'; | ||||
|  | ||||
| # Update this list if you group any xcat docs on a separate page such that they | ||||
| # are no longer linked from the main doc page: | ||||
| my @indexdocs = ('XCAT_Documentation',  | ||||
|                  'Power_775_Cluster_Documentation',  | ||||
|                  'Highly_Available_Management_Node',  | ||||
|                  'Mixed_Cluster_Support',  | ||||
|                  'IBM_HPC_Stack_in_an_xCAT_Cluster'); | ||||
|  | ||||
| # Update this list if you group any xcat docs on a separate page such that they are no longer linked from the | ||||
| # main doc page. | ||||
| my @indexdocs = ('XCAT_Documentation', 'Power_775_Cluster_Documentation', 'Highly_Available_Management_Node', 'Mixed_Cluster_Support', 'IBM_HPC_Stack_in_an_xCAT_Cluster'); | ||||
|  | ||||
| #my $VERSION; | ||||
| my $HELP; | ||||
| my $UPLOAD; | ||||
| my $UPLOADONLY; | ||||
| my $IGNOREERRORS; | ||||
| my $SINGLE_DOC; | ||||
| my $VERBOSE; | ||||
|   | ||||
| my $usage = sub { | ||||
|    	my $exitcode = shift @_; | ||||
|    	print "Usage: getxcatdocs [-?|-h|--help] [-v|--verbose] [-u|--upload] [--uploadonly] [<destination-dir>]\n"; | ||||
| 	exit $exitcode; | ||||
| }; | ||||
|  | ||||
| # Process the cmd line args | ||||
| Getopt::Long::Configure("bundling"); | ||||
| #Getopt::Long::Configure("pass_through"); | ||||
| Getopt::Long::Configure("no_pass_through"); | ||||
| if (!GetOptions('h|?|help'  => \$HELP, 'v|verbose' => \$VERBOSE, 'u|upload' => \$UPLOAD, 'uploadonly' => \$UPLOADONLY )) { $usage->(1); } | ||||
|  | ||||
| if ($HELP) { $usage->(0); } | ||||
|  | ||||
| #if ($VERSION) { | ||||
|     #print xCAT::Utils->Version(), "\n"; | ||||
| #    exit; | ||||
| #} | ||||
|  | ||||
| if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; } | ||||
|  | ||||
| my $destdir = scalar(@ARGV) ? $ARGV[0] : '.'; | ||||
| chdir($destdir) or die "Can not cd to $destdir: $!\n"; | ||||
| #my $docdir = $ENV{'PWD'}; | ||||
|  | ||||
| # Download the HTML docs and convert them all to pdfs | ||||
| my @dir; | ||||
| if (!$UPLOADONLY) { | ||||
| 	@dir = gethtmldocs('html'); | ||||
| 	convert2pdf('pdf', \@dir); | ||||
| } | ||||
|  | ||||
|  | ||||
| # tar/compress | ||||
| my $date=`date +%Y%m%d%H%M`; | ||||
| chop $date; | ||||
| my $docname="xcat-docs-snap$date.tar.gz"; | ||||
| #system('pwd'); | ||||
| my $cmd = "tar -zcf $docname html pdf 2>&1"; | ||||
| verbose($cmd); | ||||
| system($cmd) == 0 or die "Error running $cmd: $!, rc=$?"; | ||||
|  | ||||
| # Optionally upload the tarball to sourceforge | ||||
| if ($UPLOAD || $UPLOADONLY) { | ||||
| 	my $UPLOADUSER = 'bp-sawyers'; | ||||
| 	my $count = 1; | ||||
| 	#my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/'; | ||||
| 	my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/'; | ||||
| 	print "$cmd\n"; | ||||
| 	while ($count<=5 && system("$cmd 2>&1")) { $count++; } | ||||
| } | ||||
| exit 0; | ||||
|  | ||||
|  | ||||
| sub verbose { if ($VERBOSE) { print shift, "\n"; } } | ||||
|  | ||||
| my $usage = sub { | ||||
|     my $exitcode = shift @_; | ||||
|     print "Usage: getxcatdocs [-?|-h|--help] \n"; | ||||
|     print "Usage: getxcatdocs [-v|--verbose] [-u|--upload] [--uploadonly] [-i|--ignoreerrors] [<destination-dir>]\n"; | ||||
|     print "Usage: getxcatdocs [-v|--verbose] [-d|--doc single_doc] [-i|--ignoreerrors] [<destination-dir>]\n"; | ||||
|     exit $exitcode; | ||||
| }; | ||||
|  | ||||
|  | ||||
| # Main processing | ||||
|  | ||||
|     # Process the cmd line args | ||||
|     Getopt::Long::Configure("bundling"); | ||||
|     #Getopt::Long::Configure("pass_through"); | ||||
|     Getopt::Long::Configure("no_pass_through"); | ||||
|     if (!GetOptions( | ||||
|          'h|?|help'    => \$HELP,  | ||||
|          'v|verbose'   => \$VERBOSE,  | ||||
|          'u|upload'    => \$UPLOAD,  | ||||
|          'uploadonly'  => \$UPLOADONLY,  | ||||
|          'i|ignoreerrors'    => \$IGNOREERRORS,  | ||||
|          'd|doc=s'     => \$SINGLE_DOC )) | ||||
|           { $usage->(1); } | ||||
|  | ||||
|     if ($HELP) { $usage->(0); } | ||||
|  | ||||
|     if ($^O =~ /^aix/i) { die "Error: this command is not yet supported on AIX.\n"; } | ||||
|  | ||||
|     my $destdir = scalar(@ARGV) ? $ARGV[0] : '.'; | ||||
|     chdir($destdir) or die "Can not cd to $destdir: $!\n"; | ||||
|  | ||||
|     my $json = JSON->new();  | ||||
|  | ||||
|     if ($SINGLE_DOC) { | ||||
|       my $scurlcmd = "curl -X GET $wiki_url$SINGLE_DOC"; | ||||
|       verbose($scurlcmd); | ||||
|       my $pagecontent = `$scurlcmd`; | ||||
|       if ($? && !$IGNOREERRORS) { die "error encountered in $scurlcmd \n";} | ||||
|       my $pageout = $json->decode($pagecontent); | ||||
|       foreach my $pageatt (@{$pageout->{attachments}}) { | ||||
|          my $swgetcmd = "wget $pageatt->{url}"; | ||||
|          verbose($swgetcmd); | ||||
|          system($swgetcmd); | ||||
|          if ($? && !$IGNOREERRORS) { die "error encountered in $swgetcmd \n";} | ||||
|       } | ||||
|       convert_doc($SINGLE_DOC,$pageout->{text},'.','.','.','.'); | ||||
|       exit; | ||||
|     }       | ||||
|     | ||||
|     # Download the HTML docs and convert them all to pdfs | ||||
|     if (!$UPLOADONLY) { gethtmldocs('md','html','pdf','images'); } | ||||
|  | ||||
|     # tar/compress | ||||
|     my $date=`date +%Y%m%d%H%M`; | ||||
|     chop $date; | ||||
|     my $docname="xcat-docs-snap$date.tar.gz"; | ||||
|     chdir($destdir) or die "Can not cd to $destdir: $!\n"; | ||||
|  | ||||
|     my $cmd = "tar -zcf $docname html pdf images 2>&1"; | ||||
|     verbose($cmd); | ||||
|     system($cmd) == 0 or die "Error running $cmd: $!, rc=$?"; | ||||
|  | ||||
|     # Optionally upload the tarball to sourceforge | ||||
|     if ($UPLOAD || $UPLOADONLY) { | ||||
|         my $count = 1; | ||||
|         #my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:htdocs/doc/'; | ||||
|         my $cmd = "rsync -v $docname $UPLOADUSER," . 'xcat@web.sourceforge.net:/home/frs/project/x/xc/xcat/doc/'; | ||||
|         print "$cmd\n"; | ||||
|         while ($count<=5 && system("$cmd 2>&1")) { $count++; } | ||||
|     } | ||||
|     exit 0; | ||||
|  | ||||
|  | ||||
|  | ||||
| # Download all of the html docs from several "index" docs | ||||
| sub gethtmldocs { | ||||
| 	my $dir = shift; | ||||
| 	my $savedir = getcwd(); | ||||
| 	#File::Path::make_path($dir); | ||||
| 	mkdir($dir); | ||||
| 	chdir($dir); | ||||
| 	#system('pwd'); | ||||
| 	unlink <*>;		# delete all the files in the dir, in case they previously ran this | ||||
| 	#system('ls'); | ||||
| 	 | ||||
| 	my $indexes = ''; | ||||
| 	foreach my $index (@indexdocs) { | ||||
| 		$indexes .= qq('http://sourceforge.net/apps/mediawiki/xcat/index.php?title=$index&printable=yes' ); | ||||
| 	} | ||||
| 	print "Downloading the xCAT wiki documentation to $dir, from: $indexes ...\n"; | ||||
| 	runwget($indexes); | ||||
| 	 | ||||
| 	# Remove the funny chars from the links to other docs and rename the docs | ||||
| 	#my $sedcmd = q(sed -i 's/<a href="\/apps\/mediawiki\/xcat\/index.php?title/<a href="index.php%3Ftitle/' *); | ||||
| 	# sed -i 's/href="index.php%3Ftitle=/href="/g' index.php\?title\= | ||||
| 	# sed -i 's/<a href="\([^"]*\)"/<a href="\1.html"/' | ||||
| 	# This searches for '<a href="index.php?title=' and then all text before a '"' or '#', and then removes the front part and add .html on the end | ||||
| 	# Note: this does not convert the 'MediaWiki:*' files because they are used in <link> tags, but converting them does not seem to do any good anyway. | ||||
| 	my $cmd = q(sed -i 's/<a href="index.php?title=\\([^"#]*\\)\\("\|#\\)/<a href="\1.html\2/g' *); | ||||
| 	verbose($cmd); | ||||
| 	system($cmd) == 0 or die "Error running $cmd: $!, rc=$?"; | ||||
| 	# get the list of docs | ||||
| 	opendir(DIR, '.') or die "Error: could not read the just created html directory.\n"; | ||||
| 	#my @docs = grep /^index.php\?title=/, readdir(DIR);		# / | ||||
| 	my @docs; | ||||
| 	foreach my $f (readdir(DIR)) { | ||||
| 		if ($f !~ /^index.php\?title=/ || $f =~ /^index.php\?title=MediaWiki:/) { next; } | ||||
| 		my $newf = $f; | ||||
| 		$newf =~ s/^index.php\?title=//; | ||||
| 		if ($newf !~ /\./) { $newf .= '.html'; } | ||||
| 		verbose("Renaming $f to $newf"); | ||||
| 		rename($f, $newf); | ||||
| 		push @docs, $newf; | ||||
| 	} | ||||
| 	close(DIR); | ||||
| 	chdir($savedir); | ||||
| 	return @docs; | ||||
|  | ||||
|     my $mddir = shift; | ||||
|     my $htmldir = shift; | ||||
|     my $pdfdir = shift; | ||||
|     my $imagedir = shift; | ||||
|     my $savedir = getcwd(); | ||||
|     mkdir($mddir); | ||||
|     mkdir($htmldir); | ||||
|     mkdir($pdfdir); | ||||
|     mkdir($imagedir); | ||||
|     #delete all the files in the dirs in case they previously ran this | ||||
|     unlink <$mddir/*>;    | ||||
|     unlink <$htmldir/*>;    | ||||
|     unlink <$pdfdir/*>;    | ||||
|     unlink <$imagedir/*>;    | ||||
|                                      | ||||
|    print "\nDownloading and converting the xCAT wiki document list from $wiki_url ...\n"; | ||||
|     my @doclist; | ||||
|     my %donelist; | ||||
|     foreach my $index (@indexdocs) { | ||||
|       if ( $donelist{$index} ) { next; } | ||||
|       my $indexcmd = "curl -X GET $wiki_url/$index"; | ||||
|       verbose($indexcmd); | ||||
|       my $indexmd = `$indexcmd`; | ||||
|       if ($? && !$IGNOREERRORS) { die "error encountered in $indexcmd \n";} | ||||
|       my $jsout = $json->decode($indexmd);  | ||||
|       push @doclist,@{$jsout->{related_artifacts}}; | ||||
|       foreach my $att (@{$jsout->{attachments}}) { | ||||
|         my $iwgetcmd = "wget -P $imagedir/ $att->{url}"; | ||||
|         verbose($iwgetcmd); | ||||
|         system($iwgetcmd); | ||||
|         if ($? && !$IGNOREERRORS) { die "error encountered in $iwgetcmd \n";} | ||||
|       } | ||||
|       convert_doc($index,$jsout->{text},$mddir,$htmldir,$pdfdir,$imagedir); | ||||
|       $donelist{$index}=1; | ||||
|     } | ||||
|     print "\nDownloading and converting the xCAT wiki documentation to $savedir ...\n"; | ||||
|  | ||||
|     foreach my $doc (@doclist) { | ||||
|       my $doc_name = $doc; | ||||
|       $doc_name =~ s/\/.*\/(.+)\/$/$1/; | ||||
|       if ( $donelist{$doc_name} ) { next; } | ||||
|       verbose("processing $doc"); | ||||
|       my $doc_url=$sf_url.$doc; | ||||
|       my $curlcmd = "curl -X GET $doc_url"; | ||||
|       verbose($curlcmd); | ||||
|       my $pagecontent = `$curlcmd`; | ||||
|       my $pageout = $json->decode($pagecontent); | ||||
|       foreach my $pageatt (@{$pageout->{attachments}}) { | ||||
|          my $wgetcmd = "wget -P $imagedir/ $pageatt->{url}"; | ||||
|          system($wgetcmd); | ||||
|          if ($? && !$IGNOREERRORS) { die "error encountered in $wgetcmd \n";} | ||||
|       } | ||||
|       convert_doc($doc_name,$pageout->{text},$mddir,$htmldir,$pdfdir,$imagedir); | ||||
|       $donelist{$doc_name}=1; | ||||
|     } | ||||
|    | ||||
|     chdir($savedir); | ||||
| } | ||||
|  | ||||
| sub convert_doc { | ||||
|    my $doc_name = shift; | ||||
|    my $doc_text = shift; | ||||
|    my $mddir = shift; | ||||
|    my $htmldir = shift; | ||||
|    my $pdfdir = shift; | ||||
|    my $imagedir = shift; | ||||
|  | ||||
|   ## Make image refs local | ||||
|    $doc_text =~ s/\!\[\]\(.+\/(.+)\.png\)/\!\[\]\(\.\.\/$imagedir\/$1\.png\)/g; | ||||
|    $doc_text =~ s/\!\[\]\(.+\/(.+)\.PNG\)/\!\[\]\(\.\.\/$imagedir\/$1\.PNG\)/g; | ||||
|    $doc_text =~ s/\!\[\]\(.+\/(.+)\.jpg\)/\!\[\]\(\.\.\/$imagedir\/$1\.jpg\)/g; | ||||
|    open(MDFILE, ">$mddir/${doc_name}.md") or die; | ||||
|    print MDFILE $doc_text; | ||||
|    close MDFILE; | ||||
|  | ||||
|    my $pandoccmd = "pandoc -s --toc $mddir/${doc_name}.md -o $htmldir/${doc_name}.html"; | ||||
|    verbose($pandoccmd); | ||||
|    system($pandoccmd); | ||||
|    if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd \n";} | ||||
|    # This rename is probably a hack, but I didn't want to take the time to | ||||
|    # figure out what was going on: | ||||
|    #   pandoc does different processing if target filetype is html | ||||
|    #   but all internal refs only work in browser when there is no html filetype | ||||
|    rename "$htmldir/${doc_name}.html","$htmldir/${doc_name}"; | ||||
|  | ||||
|    $doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.png\)/\!\[\]\(\.\/$imagedir\/$1\.png\)/g; | ||||
|    $doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.PNG\)/\!\[\]\(\.\/$imagedir\/$1\.PNG\)/g; | ||||
|    $doc_text =~ s/\!\[\]\(\.\.\/$imagedir\/(.+)\.jpg\)/\!\[\]\(\.\/$imagedir\/$1\.jpg\)/g; | ||||
|    open(MDFILE, ">$mddir/${doc_name}.md") or die; | ||||
|    print MDFILE $doc_text; | ||||
|    close MDFILE; | ||||
|    my $pandoccmd2 = "pandoc --toc $mddir/${doc_name}.md -o $pdfdir/${doc_name}.pdf"; | ||||
|    verbose($pandoccmd2); | ||||
|    system($pandoccmd2); | ||||
|    if ($? && !$IGNOREERRORS) { die "error encountered in $pandoccmd2 \n";} | ||||
|  | ||||
| # Convert to pdf | ||||
| sub convert2pdf { | ||||
| 	my ($dir, $files) = @_; | ||||
| 	my $savedir = getcwd(); | ||||
| 	#File::Path::make_path($dir); | ||||
| 	mkdir($dir); | ||||
| 	chdir($dir); | ||||
| 	if (system('which xhtml2pdf >/dev/null 2>&1')) { die "xhtml2pdf is not installed.  See http://sourceforge.net/apps/mediawiki/xcat/index.php?title=Editing_xCAT_Documentation_Pages#Converting_Wiki_Pages_to_HTML_and_PDFs .\n"; } | ||||
| 	unlink <*>;		# delete all the files in the dir, in case they previously ran this | ||||
| 	foreach my $file (@$files) { | ||||
| 		#if ($file =~ /^index.php\?title=MediaWiki:/ || $file eq 'index.php?title=XCAT_Documentation') { next; } | ||||
| 		if ($file eq 'XCAT_Documentation') { next; } | ||||
| 		#my ($docname) = $file =~ /^index.php\?title=(.+)$/; | ||||
| 		$file =~ s/\.html$//; | ||||
| 		print "Converting $file to PDF format...\n"; | ||||
| 		my $url = 'http://sourceforge.net/apps/mediawiki/xcat/index.php?title=' . $file . '&printable=yes'; | ||||
| 		my $destfile = "$file.pdf"; | ||||
| 		my $cmd = "xhtml2pdf '$url' '$destfile' "; | ||||
| 		runh2p($cmd); | ||||
| 	} | ||||
| 	chdir($savedir); | ||||
| } | ||||
|  | ||||
|  | ||||
| # Run the wget cmd and filter out some of the silly output | ||||
| sub runwget { | ||||
| 	my $index = shift; | ||||
| 	# options we might consider: --html-extension --restrict-file-names=windows  --cut-dirs=3 | ||||
| 	# options that do not work:  --relative | ||||
| 	#my $rejectlist = q('*title=Special:*,*title=Talk:*,*title=-&*,*title=HowTos,*title=Main_Page,*title=MediaWiki:*,*title=Release_Notes,*title=Wish_List_for_xCAT_2,*&action=edit*,*&action=history*,*&printable=yes*,*&oldid=*,index.html,opensearch_desc.php,xcat,login.php,support'); | ||||
| 	my $rejectlist = q('*title=Special:*,*title=Talk:*,*title=-&*,*title=HowTos,*title=Main_Page,*title=Release_Notes,*title=Wish_List_for_xCAT_2,*&action=edit*,*&action=history*,*&printable=yes*,*&oldid=*,index.html,opensearch_desc.php,xcat,login.php,support'); | ||||
| 	my $cmd = qq(wget --recursive --convert-links --no-verbose --progress=bar --level=1 --page-requisites --no-parent --no-host-directories --no-directories --no-clobber --execute robots=off --post-data='printable=yes' --reject $rejectlist $index); | ||||
| 	verbose($cmd); | ||||
| 	open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n"; | ||||
| 	while (<OUT>) { | ||||
| 		if (/URL:https*:\/\/sourceforge\.net.+\s+->\s+\"(\S+)\"\s+\[/) { print "Downloaded $1.\n"; } | ||||
| 		else { print; } | ||||
| 	} | ||||
| 	close OUT || print "Error running $cmd: $! $?\n"; | ||||
| } | ||||
|  | ||||
| # Run the xhtml2pdf cmd and filter out some of the silly output | ||||
| sub runh2p { | ||||
| 	my $cmd = shift; | ||||
| 	verbose($cmd); | ||||
| 	open(OUT, "$cmd 2>&1 |") || die "can't fork $cmd: $!\n"; | ||||
| 	while (<OUT>) { | ||||
| 		next if /DeprecationWarning:\sthe sets module is deprecated/; | ||||
| 		next if /from sets import ImmutableSet/; | ||||
| 		next if /^\s*import sets\s*$/; | ||||
| 		next if /^Converting\ssourceforge.net/; | ||||
| 		print; | ||||
| 	} | ||||
| 	close OUT || print "Error running $cmd: $! $?\n"; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user