From 8fd94e31142e6eb49cf846e3dcedda17eba87cf1 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Thu, 28 Oct 2021 10:39:16 -0400 Subject: [PATCH] YamlFilesystemView: improve file removal performance via batching (#24355) * Drastically improve YamlFilesystemView file removal via batching The `remove_file` routine has to check if the file is owned by multiple packages, so it doesn't remove necessary files. This is done by the `get_all_specs` routine, which walks the entire package tree. With large numbers of packages on shared file systems, this can take seconds per file tree traversal, which adds up extremely quickly. For example, a single deactivate of a largish python package in our software stack on GPFS took approximately 40 minutes. This patch simply replaces `remove_file` with a batch `remove_files` routine. This routine removes a list of files rather than a single file, requiring only one traversal per batch. In practice this means a package can be removed in seconds time, rather than potentially hours, essentially a ~100x speedup (ignoring initial deactivation logic, which takes about 3 minutes in our test setup). --- lib/spack/spack/build_systems/python.py | 6 ++++- lib/spack/spack/filesystem_view.py | 31 ++++++++++++++----------- lib/spack/spack/package.py | 3 +-- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/lib/spack/spack/build_systems/python.py b/lib/spack/spack/build_systems/python.py index fd772acfbb..a308e77cb9 100644 --- a/lib/spack/spack/build_systems/python.py +++ b/lib/spack/spack/build_systems/python.py @@ -393,11 +393,15 @@ def remove_files_from_view(self, view, merge_map): self.spec ) ) + + to_remove = [] for src, dst in merge_map.items(): if ignore_namespace and namespace_init(dst): continue if global_view or not path_contains_subdirectory(src, bin_dir): - view.remove_file(src, dst) + to_remove.append(dst) else: os.remove(dst) + + view.remove_files(to_remove) diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index 27a8708596..659ae550ae 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -442,11 +442,7 @@ def unmerge(self, spec, ignore=None): # now unmerge the directory tree tree.unmerge_directories(view_dst, ignore_file) - def remove_file(self, src, dest): - if not os.path.lexists(dest): - tty.warn("Tried to remove %s which does not exist" % dest) - return - + def remove_files(self, files): def needs_file(spec, file): # convert the file we want to remove to a source in this spec projection = self.get_projection_for_spec(spec) @@ -465,16 +461,23 @@ def needs_file(spec, file): manifest = {} return test_path in manifest - # remove if dest is not owned by any other package in the view - # This will only be false if two packages are merged into a prefix - # and have a conflicting file + specs = self.get_all_specs() - # check all specs for whether they own the file. That include the spec - # we are currently removing, as we remove files before unlinking the - # metadata directory. - if len([s for s in self.get_all_specs() - if needs_file(s, dest)]) <= 1: - os.remove(dest) + for file in files: + if not os.path.lexists(file): + tty.warn("Tried to remove %s which does not exist" % file) + continue + + # remove if file is not owned by any other package in the view + # This will only be false if two packages are merged into a prefix + # and have a conflicting file + + # check all specs for whether they own the file. That include the spec + # we are currently removing, as we remove files before unlinking the + # metadata directory. + if len([s for s in specs if needs_file(s, file)]) <= 1: + tty.debug("Removing file " + file) + os.remove(file) def check_added(self, spec): assert spec.concrete diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 00ba8a38d5..c22898c5a3 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -469,8 +469,7 @@ def remove_files_from_view(self, view, merge_map): example if two packages include the same file, it should only be removed when both packages are removed. """ - for src, dst in merge_map.items(): - view.remove_file(src, dst) + view.remove_files(merge_map.values()) def test_log_pathname(test_stage, spec):