From d4483bb59f43b851f9315d698fd021a3a84555cb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 21 Feb 2020 14:18:15 -0500 Subject: [PATCH] Polish up the osimport concept more --- librarian/{imageimporter.py => osimport} | 98 +++++++++++++++++++++--- 1 file changed, 86 insertions(+), 12 deletions(-) rename librarian/{imageimporter.py => osimport} (50%) diff --git a/librarian/imageimporter.py b/librarian/osimport similarity index 50% rename from librarian/imageimporter.py rename to librarian/osimport index 07578b62..682adc95 100644 --- a/librarian/imageimporter.py +++ b/librarian/osimport @@ -1,9 +1,14 @@ +#!/usr/bin/python import logging logging.getLogger('libarchive').addHandler(logging.NullHandler()) import libarchive import hashlib +import os +import shutil import sys +COPY = 0 +EXTRACT = 1 READFILES = set([ 'media.1/products', 'media.2/products', @@ -12,9 +17,46 @@ READFILES = set([ HEADERSUMS = set([b'\x85\xeddW\x86\xc5\xbdhx\xbe\x81\x18X\x1e\xb4O\x14\x9d\x11\xb7C8\x9b\x97R\x0c-\xb8Ht\xcb\xb3']) HASHPRINTS = { - '69d5f1c5e4474d70b0fb5374bfcb29bf57ba828ff00a55237cd757e61ed71048': ('cumulus-broadcom-amd64-4.0.0', None), + '69d5f1c5e4474d70b0fb5374bfcb29bf57ba828ff00a55237cd757e61ed71048': {'name': 'cumulus-broadcom-amd64-4.0.0', 'method': COPY}, } +from ctypes import byref, c_longlong, c_size_t, c_void_p + +from libarchive.ffi import ( + write_disk_new, write_disk_set_options, write_free, write_header, + read_data_block, write_data_block, write_finish_entry, ARCHIVE_EOF +) + +def extract_entries(entries, flags=0, callback=None, totalsize=None): + """Extracts the given archive entries into the current directory. + """ + buff, size, offset = c_void_p(), c_size_t(), c_longlong() + buff_p, size_p, offset_p = byref(buff), byref(size), byref(offset) + sizedone = 0 + with libarchive.extract.new_archive_write_disk(flags) as write_p: + for entry in entries: + write_header(write_p, entry._entry_p) + read_p = entry._archive_p + while 1: + r = read_data_block(read_p, buff_p, size_p, offset_p) + sizedone += size.value + if callback: + callback({'progress': float(sizedone) / float(totalsize)}) + if r == ARCHIVE_EOF: + break + write_data_block(write_p, buff, size, offset) + write_finish_entry(write_p) + +def extract_file(filepath, flags=0, callback=lambda x: None, imginfo=()): + """Extracts an archive from a file into the current directory.""" + totalsize = 0 + for img in imginfo: + if not imginfo[img]: + continue + totalsize += imginfo[img] + with libarchive.file_reader(filepath) as archive: + extract_entries(archive, flags, callback, totalsize) + def check_centos(isoinfo): ver = None arch = None @@ -30,7 +72,7 @@ def check_centos(isoinfo): break else: return None - return ('centos-{0}-{1}'.format(ver, arch), None) + return {'name': 'centos-{0}-{1}'.format(ver, arch), 'method': EXTRACT} def check_sles(isoinfo): @@ -64,7 +106,8 @@ def check_sles(isoinfo): elif '.2' in medianame: disk = '2' if disk and distro: - return ('{0}-{1}-{2}'.format(distro, ver, arch), disk) + return {'name': '{0}-{1}-{2}'.format(distro, ver, arch), + 'method': EXTRACT, 'subname': disk} return None @@ -83,19 +126,19 @@ def check_rhel(isoinfo): break else: return None - return ('rhel-{0}-{1}'.format(ver, arch), None) + return {'name': 'rhel-{0}-{1}'.format(ver, arch), 'method': EXTRACT} def scan_iso(filename): - filelist = [] + filesizes = {} filecontents = {} with libarchive.file_reader(filename) as reader: for ent in reader: - filelist.append(str(ent)) + filesizes[str(ent)] = ent.size if str(ent) in READFILES: filecontents[str(ent)] = b'' for block in ent.get_blocks(): filecontents[str(ent)] += bytes(block) - return filelist, filecontents + return filesizes, filecontents def fingerprint(filename): with open(sys.argv[1], 'rb') as archive: @@ -109,9 +152,7 @@ def fingerprint(filename): if fun.startswith('check_'): name = globals()[fun](isoinfo) if name: - return name - for file in isoinfo[0]: - print(file) + return name, isoinfo[0] return None else: sum = hashlib.sha256(header) @@ -121,8 +162,41 @@ def fingerprint(filename): while chunk: sum.update(chunk) chunk = archive.read(32768) - return HASHPRINTS.get(sum.hexdigest(), None) + imginfo = HASHPRINTS.get(sum.hexdigest(), None) + if imginfo: + return imginfo, isoinfo[0] + + +def printit(info): + sys.stdout.write(' \r{:.2f}%'.format(100 * info['progress'])) + sys.stdout.flush() + +def import_image(filename): + identity = fingerprint(filename) + if not identity: + return -1 + identity, imginfo = identity + targpath = identity['name'] + if identity.get('subname', None): + targpath += '/' + identity['subname'] + targpath = '/var/lib/confluent/distributions/' + targpath + try: + os.makedirs(targpath) + except OSError as e: + if e.errno != 17: + raise + filename = os.path.abspath(filename) + os.chdir(targpath) + print('Importing OS to ' + targpath + ':') + printit({'progress': 0.0}) + if identity['method'] == EXTRACT: + extract_file(filename, callback=printit, imginfo=imginfo) + elif identity['method'] == COPY: + targpath = os.path.join(targpath, os.path.basename(filename)) + shutil.copyfile(filename, targpath) + printit({'progress': 1.0}) + sys.stdout.write('\n') if __name__ == '__main__': - print(repr(fingerprint(sys.argv[1]))) \ No newline at end of file + sys.exit(import_image(sys.argv[1])) \ No newline at end of file