Update decompression support on Windows (#25185)

Most package installations include compressed source files. This
adds support for common archive types on Windows:

* Add support for using system 7zip functionality to decompress .Z
  files when available (and on Windows, use 7zip for .xz archives)
* Default to using built-in Python support for tar/bz2 decompression
  (note that Python tar documentation mentions preservation of file
  permissions)
* Add tests for decompression support
* Extract logic for handling exploding archives (i.e. compressed
  archives that expand to more than one base file) into an
  exploding_archive_catch context manager in the filesystem module
This commit is contained in:
John W. Parent 2022-06-06 21:14:43 -04:00 committed by GitHub
parent 9d7cc43673
commit 5b45df5269
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 507 additions and 61 deletions

View file

@ -308,6 +308,68 @@ def change_sed_delimiter(old_delim, new_delim, *filenames):
filter_file(double_quoted, '"%s"' % repl, f)
@contextmanager
def exploding_archive_catch(stage):
# Check for an exploding tarball, i.e. one that doesn't expand to
# a single directory. If the tarball *didn't* explode, move its
# contents to the staging source directory & remove the container
# directory. If the tarball did explode, just rename the tarball
# directory to the staging source directory.
#
# NOTE: The tar program on Mac OS X will encode HFS metadata in
# hidden files, which can end up *alongside* a single top-level
# directory. We initially ignore presence of hidden files to
# accomodate these "semi-exploding" tarballs but ensure the files
# are copied to the source directory.
# Expand all tarballs in their own directory to contain
# exploding tarballs.
tarball_container = os.path.join(stage.path,
"spack-expanded-archive")
mkdirp(tarball_container)
orig_dir = os.getcwd()
os.chdir(tarball_container)
try:
yield
# catch an exploding archive on sucessful extraction
os.chdir(orig_dir)
exploding_archive_handler(tarball_container, stage)
except Exception as e:
# return current directory context to previous on failure
os.chdir(orig_dir)
raise e
@system_path_filter
def exploding_archive_handler(tarball_container, stage):
"""
Args:
tarball_container: where the archive was expanded to
stage: Stage object referencing filesystem location
where archive is being expanded
"""
files = os.listdir(tarball_container)
non_hidden = [f for f in files if not f.startswith('.')]
if len(non_hidden) == 1:
src = os.path.join(tarball_container, non_hidden[0])
if os.path.isdir(src):
stage.srcdir = non_hidden[0]
shutil.move(src, stage.source_path)
if len(files) > 1:
files.remove(non_hidden[0])
for f in files:
src = os.path.join(tarball_container, f)
dest = os.path.join(stage.path, f)
shutil.move(src, dest)
os.rmdir(tarball_container)
else:
# This is a non-directory entry (e.g., a patch file) so simply
# rename the tarball container to be the source path.
shutil.move(tarball_container, stage.source_path)
else:
shutil.move(tarball_container, stage.source_path)
@system_path_filter(arg_slice=slice(1))
def get_owner_uid(path, err_msg=None):
if not os.path.exists(path):

View file

@ -35,6 +35,7 @@
import six.moves.urllib.parse as urllib_parse
import llnl.util
import llnl.util.filesystem as fs
import llnl.util.tty as tty
from llnl.util.filesystem import (
get_single_file,
@ -528,50 +529,11 @@ def expand(self):
decompress = decompressor_for(self.archive_file, self.extension)
# Expand all tarballs in their own directory to contain
# exploding tarballs.
tarball_container = os.path.join(self.stage.path,
"spack-expanded-archive")
# Below we assume that the command to decompress expand the
# archive in the current working directory
mkdirp(tarball_container)
with working_dir(tarball_container):
with fs.exploding_archive_catch(self.stage):
decompress(self.archive_file)
# Check for an exploding tarball, i.e. one that doesn't expand to
# a single directory. If the tarball *didn't* explode, move its
# contents to the staging source directory & remove the container
# directory. If the tarball did explode, just rename the tarball
# directory to the staging source directory.
#
# NOTE: The tar program on Mac OS X will encode HFS metadata in
# hidden files, which can end up *alongside* a single top-level
# directory. We initially ignore presence of hidden files to
# accomodate these "semi-exploding" tarballs but ensure the files
# are copied to the source directory.
files = os.listdir(tarball_container)
non_hidden = [f for f in files if not f.startswith('.')]
if len(non_hidden) == 1:
src = os.path.join(tarball_container, non_hidden[0])
if os.path.isdir(src):
self.stage.srcdir = non_hidden[0]
shutil.move(src, self.stage.source_path)
if len(files) > 1:
files.remove(non_hidden[0])
for f in files:
src = os.path.join(tarball_container, f)
dest = os.path.join(self.stage.path, f)
shutil.move(src, dest)
os.rmdir(tarball_container)
else:
# This is a non-directory entry (e.g., a patch file) so simply
# rename the tarball container to be the source path.
shutil.move(tarball_container, self.stage.source_path)
else:
shutil.move(tarball_container, self.stage.source_path)
def archive(self, destination):
"""Just moves this archive to the destination."""
if not self.archive_file:

View file

@ -0,0 +1 @@
TEST

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,98 @@
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
import shutil
import pytest
from llnl.util.filesystem import working_dir
from spack.paths import spack_root
from spack.util import compression as scomp
from spack.util.executable import CommandNotFoundError
datadir = os.path.join(spack_root, 'lib', 'spack',
'spack', 'test', 'data', 'compression')
ext_archive = {}
[ext_archive.update({ext: '.'.join(['Foo', ext])}) for
ext in scomp.ALLOWED_ARCHIVE_TYPES if 'TAR' not in ext]
def support_stub():
return False
@pytest.fixture
def compr_support_check(monkeypatch):
monkeypatch.setattr(scomp, 'lzma_support', support_stub)
monkeypatch.setattr(scomp, 'tar_support', support_stub)
monkeypatch.setattr(scomp, 'gzip_support', support_stub)
monkeypatch.setattr(scomp, 'bz2_support', support_stub)
@pytest.fixture
def archive_file(tmpdir_factory, request):
"""Copy example archive to temp directory for test"""
archive_file_stub = os.path.join(datadir, 'Foo')
extension = request.param
tmpdir = tmpdir_factory.mktemp('compression')
shutil.copy(archive_file_stub + '.' + extension, str(tmpdir))
return os.path.join(str(tmpdir), 'Foo.%s' % extension)
@pytest.mark.parametrize('archive_file', ext_archive.keys(), indirect=True)
def test_native_unpacking(tmpdir_factory, archive_file):
extension = scomp.extension(archive_file)
util = scomp.decompressor_for(archive_file, extension)
tmpdir = tmpdir_factory.mktemp("comp_test")
with working_dir(str(tmpdir)):
assert not os.listdir(os.getcwd())
util(archive_file)
files = os.listdir(os.getcwd())
assert len(files) == 1
with open(files[0], 'r') as f:
contents = f.read()
assert 'TEST' in contents
@pytest.mark.parametrize('archive_file', ext_archive.keys(), indirect=True)
def test_system_unpacking(tmpdir_factory, archive_file, compr_support_check):
extension = scomp.extension(archive_file)
# actually run test
util = scomp.decompressor_for(archive_file, extension)
tmpdir = tmpdir_factory.mktemp("system_comp_test")
with working_dir(str(tmpdir)):
assert not os.listdir(os.getcwd())
util(archive_file)
files = os.listdir(os.getcwd())
assert len(files) == 1
with open(files[0], 'r') as f:
contents = f.read()
assert 'TEST' in contents
def test_unallowed_extension():
bad_ext_archive = 'Foo.py'
with pytest.raises(CommandNotFoundError):
scomp.decompressor_for(bad_ext_archive, 'py')
@pytest.mark.parametrize('archive', ext_archive.values())
def test_get_extension(archive):
ext = scomp.extension(archive)
assert ext_archive[ext] == archive
def test_get_bad_extension():
archive = 'Foo.py'
ext = scomp.extension(archive)
assert ext is None
@pytest.mark.parametrize('path', ext_archive.values())
def test_allowed_archvie(path):
assert scomp.allowed_archive(path)

View file

@ -5,10 +5,11 @@
import os
import re
import shutil
import sys
from itertools import product
from spack.util.executable import which
from spack.util.executable import CommandNotFoundError, which
# Supported archive extensions.
PRE_EXTS = ["tar", "TAR"]
@ -22,35 +23,146 @@
is_windows = sys.platform == 'win32'
def bz2_support():
try:
import bz2 # noqa
return True
except ImportError:
return False
def gzip_support():
try:
import gzip # noqa
return True
except ImportError:
return False
def lzma_support():
try:
import lzma # noqa # novermin
return True
except ImportError:
return False
def tar_support():
try:
import tarfile # noqa
return True
except ImportError:
return False
def allowed_archive(path):
return any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
return False if not path else \
any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
def _untar(archive_file):
""" Untar archive. Prefer native Python `tarfile`
but fall back to system utility if there is a failure
to find the native Python module (tar on Unix).
Filters archives through native support gzip and xz
compression formats.
Args:
archive_file (str): absolute path to the archive to be extracted.
Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
"""
_, ext = os.path.splitext(archive_file)
outfile = os.path.basename(archive_file.strip(ext))
uncompress_required = 'Z' in ext
lzma_required = 'xz' in ext
lzma_needed_and_not_available = not lzma_support() and lzma_required
if tar_support() and not uncompress_required and\
not lzma_needed_and_not_available:
import tarfile
tar = tarfile.open(archive_file)
tar.extractall()
tar.close()
else:
tar = which('tar', required=True)
tar.add_default_arg('-oxf')
tar(archive_file)
return outfile
def _bunzip2(archive_file):
""" Use Python's bz2 module to decompress bz2 compressed archives
Fall back to system utility failing to find Python module `bz2`
Args:
archive_file (str): absolute path to the bz2 archive to be decompressed
"""
_, ext = os.path.splitext(archive_file)
compressed_file_name = os.path.basename(archive_file)
decompressed_file = os.path.basename(archive_file.strip(ext))
working_dir = os.getcwd()
archive_out = os.path.join(working_dir, decompressed_file)
copy_path = os.path.join(working_dir, compressed_file_name)
if bz2_support():
import bz2
f_bz = bz2.BZ2File(archive_file, mode='rb')
with open(archive_out, 'wb') as ar:
ar.write(f_bz.read())
f_bz.close()
else:
shutil.copy(archive_file, copy_path)
bunzip2 = which('bunzip2', required=True)
bunzip2.add_default_arg('-q')
return bunzip2(copy_path)
return archive_out
def _gunzip(archive_file):
"""Like gunzip, but extracts in the current working directory
""" Decompress `.gz` extensions. Prefer native Python `gzip` module.
Failing back to system utility gunzip.
Like gunzip, but extracts in the current working directory
instead of in-place.
Args:
archive_file (str): absolute path of the file to be decompressed
"""
import gzip
decompressed_file = os.path.basename(archive_file.strip('.gz'))
_, ext = os.path.splitext(archive_file)
decompressed_file = os.path.basename(archive_file.strip(ext))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
with gzip.open(archive_file, "rb") as f_in:
if gzip_support():
import gzip
f_in = gzip.open(archive_file, "rb")
with open(destination_abspath, "wb") as f_out:
f_out.write(f_in.read())
else:
_system_gunzip(archive_file)
return destination_abspath
def _system_gunzip(archive_file):
_, ext = os.path.splitext(archive_file)
decompressed_file = os.path.basename(archive_file.strip(ext))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
compressed_file = os.path.basename(archive_file)
copy_path = os.path.join(working_dir, compressed_file)
shutil.copy(archive_file, copy_path)
gzip = which("gzip")
gzip.add_default_arg("-d")
gzip(copy_path)
return destination_abspath
def _unzip(archive_file):
"""Try to use Python's zipfile, but extract in the current working
directory instead of in-place.
If unavailable, search for 'unzip' executable on system and use instead
"""
Extract Zipfile, searching for unzip system executable
If unavailable, search for 'tar' executable on system and use instead
Args:
archive_file (str): absolute path of the file to be decompressed
"""
destination_abspath = os.getcwd()
exe = 'unzip'
arg = '-q'
if is_windows:
@ -59,21 +171,122 @@ def _unzip(archive_file):
unzip = which(exe, required=True)
unzip.add_default_arg(arg)
unzip(archive_file)
return destination_abspath
def decompressor_for(path, extension=None):
"""Get the appropriate decompressor for a path."""
if ((extension and re.match(r'\.?zip$', extension)) or
path.endswith('.zip')):
def _unZ(archive_file):
if is_windows:
result = _7zip(archive_file)
else:
result = _system_gunzip(archive_file)
return result
def _lzma_decomp(archive_file):
"""Decompress lzma compressed files. Prefer Python native
lzma module, but fall back on command line xz tooling
to find available Python support. This is the xz command
on Unix and 7z on Windows"""
if lzma_support():
import lzma # novermin
_, ext = os.path.splitext(archive_file)
decompressed_file = os.path.basename(archive_file.strip(ext))
archive_out = os.path.join(os.getcwd(), decompressed_file)
with open(archive_out, 'wb') as ar:
with lzma.open(archive_file) as lar:
ar.write(lar.read())
else:
if is_windows:
return _7zip(archive_file)
else:
return _xz(archive_file)
def _xz(archive_file):
"""Decompress lzma compressed .xz files via xz command line
tool. Available only on Unix
"""
if is_windows:
raise RuntimeError('XZ tool unavailable on Windows')
_, ext = os.path.splitext(archive_file)
decompressed_file = os.path.basename(archive_file.strip(ext))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
compressed_file = os.path.basename(archive_file)
copy_path = os.path.join(working_dir, compressed_file)
shutil.copy(archive_file, copy_path)
xz = which('xz', required=True)
xz.add_default_arg('-d')
xz(copy_path)
return destination_abspath
def _7zip(archive_file):
"""Unpack/decompress with 7z executable
7z is able to handle a number file extensions however
it may not be available on system.
Without 7z, Windows users with certain versions of Python may
be unable to extract .xz files, and all Windows users will be unable
to extract .Z files. If we cannot find 7z either externally or a
Spack installed copy, we fail, but inform the user that 7z can
be installed via `spack install 7zip`
Args:
archive_file (str): absolute path of file to be unarchived
"""
_, ext = os.path.splitext(archive_file)
outfile = os.path.basename(archive_file.strip(ext))
_7z = which('7z')
if not _7z:
raise CommandNotFoundError("7z unavailable,\
unable to extract %s files. 7z can be installed via Spack" % ext)
_7z.add_default_arg('e')
_7z(archive_file)
return outfile
def decompressor_for(path, ext=None):
"""Returns a function pointer to appropriate decompression
algorithm based on extension type.
Args:
path (str): path of the archive file requiring decompression
ext (str): Extension of archive file
"""
if not ext:
ext = extension(path)
if not allowed_archive(ext):
raise CommandNotFoundError("Cannot extract archive, \
unrecognized file extension: '%s'" % ext)
if re.match(r'\.?zip$', ext) or path.endswith('.zip'):
return _unzip
if extension and re.match(r'gz', extension):
if re.match(r'gz', ext):
return _gunzip
if extension and re.match(r'bz2', extension):
bunzip2 = which('bunzip2', required=True)
return bunzip2
tar = which('tar', required=True)
tar.add_default_arg('-oxf')
return tar
if re.match(r'bz2', ext):
return _bunzip2
# Python does not have native support
# of any kind for .Z files. In these cases,
# we rely on external tools such as tar,
# 7z, or uncompressZ
if re.match(r'Z$', ext):
return _unZ
# Python and platform may not have support for lzma
# compression. If no lzma support, use tools available on systems
# 7zip on Windows and the xz tool on Unix systems.
if re.match(r'xz', ext):
return _lzma_decomp
if ('xz' in ext or 'Z' in ext) and is_windows:
return _7zip
return _untar
def strip_extension(path):

View file

@ -0,0 +1,16 @@
diff --git a/CPP/7zip/UI/Common/Update.cpp b/CPP/7zip/UI/Common/Update.prev.cpp
index 451b12c..3be3781 100644
--- a/CPP/7zip/UI/Common/Update.cpp
+++ b/CPP/7zip/UI/Common/Update.prev.cpp
@@ -1075,11 +1075,7 @@ static HRESULT EnumerateInArchiveItems(
#if defined(_WIN32) && !defined(UNDER_CE)
-#pragma push_macro("WIN_NOEXCEPT")
-#undef WIN_NOEXCEPT
-#define WIN_NOEXCEPT
#include <MAPI.h>
-#pragma pop_macro("WIN_NOEXCEPT")
#endif

View file

@ -0,0 +1,94 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import glob
import os
import platform
import re
import shutil
from spack.package import *
class _7zip(SourceforgePackage, Package):
"""7-Zip is a file archiver for Windows"""
homepage = "https://sourceforge.net/projects/sevenzip"
sourceforge_mirror_path = 'sevenzip/files/7z2107-src.tar.xz'
executables = ['7z']
version('21.07', sha256='213d594407cb8efcba36610b152ca4921eda14163310b43903d13e68313e1e39')
variant('link_type', default='shared',
description='build shared and/or static libraries',
values=('static', 'shared'), multi=True)
phases = ['build', 'install']
conflicts('platform=linux')
conflicts('platform=darwin')
conflicts('platform=cray')
# TODO: Patch on WinSDK version 10.0.20348.0 when SDK is introduced to Spack
# This patch solves a known bug in that SDK version on the 7zip side
# right now patch for all versions to prevent build errors
patch('noexcept_typedef.patch', when='platform=windows')
@classmethod
def determine_version(cls, exe):
output = Executable(exe)('--help', output=str, error=str)
match = re.search(r'7-Zip ([0-9][0-9]*.[0-9][0-9])', output)
return match.group(1) if match else None
def url_version(self, version):
ver_str = str(version).replace('.', '')
return '7z' + ver_str
@property
def _7z_src_dir(self):
return os.path.join(self.stage.source_path, 'CPP', '7zip')
@property
def plat_arch(self):
"""
String referencing platform architecture
filtered through 7zip's Windows build file
"""
arch = platform.machine()
if arch.lower() == 'amd64':
arch = 'x64'
elif arch.lower() == 'i386':
arch = 'x86'
return arch
def is_64bit(self):
return platform.machine().endswith('64')
def build(self, spec, prefix):
link_type = '1' if 'static' in spec.variants['link_type'].value else '0'
nmake_args = ['PLATFORM=%s' % self.plat_arch,
'MY_STATIC_LINK=%s' % link_type,
'NEW_COMPILER=1']
# 7zips makefile is configured in such as way that if this value is set
# compiler paths with spaces are incorrectly parsed. Compiler will be infered
# from VCVARs on Windows
os.environ.pop('CC', None)
with working_dir(self._7z_src_dir):
nmake(*nmake_args)
def install(self, spec, prefix):
"""7Zip exports no install target so we must hand install"""
arch_prefix = 'x64' if self.is_64bit() else 'x86'
path_roots = ['Bundles', 'UI']
exts = ['*.exe', '*.dll']
with working_dir(self._7z_src_dir):
for root in path_roots:
pth = os.path.join(root, '*', arch_prefix)
for ext in exts:
glob_str = os.path.join(pth, ext)
files = glob.glob(glob_str)
[shutil.copy(os.path.join(self._7z_src_dir, x),
os.path.join(prefix, os.path.basename(x))) for x in files]