YamlFilesystemView: improve file removal performance via batching (#24355)

* Drastically improve YamlFilesystemView file removal via batching

The `remove_file` routine has to check if the file is owned by multiple packages, so it doesn't
remove necessary files. This is done by the `get_all_specs` routine, which walks the entire
package tree. With large numbers of packages on shared file systems, this can take seconds
per file tree traversal, which adds up extremely quickly. For example, a single deactivate
of a largish python package in our software stack on GPFS took approximately 40 minutes.

This patch simply replaces `remove_file` with a batch `remove_files` routine. This routine
removes a list of files rather than a single file, requiring only one traversal per batch. In
practice this means a package can be removed in seconds time, rather than potentially hours,
essentially a ~100x speedup (ignoring initial deactivation logic, which takes about 3 minutes
in our test setup).
This commit is contained in:
Robert Blackwell 2021-10-28 10:39:16 -04:00 committed by GitHub
parent c13f915735
commit 8fd94e3114
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 17 deletions

View file

@ -393,11 +393,15 @@ def remove_files_from_view(self, view, merge_map):
self.spec
)
)
to_remove = []
for src, dst in merge_map.items():
if ignore_namespace and namespace_init(dst):
continue
if global_view or not path_contains_subdirectory(src, bin_dir):
view.remove_file(src, dst)
to_remove.append(dst)
else:
os.remove(dst)
view.remove_files(to_remove)

View file

@ -442,11 +442,7 @@ def unmerge(self, spec, ignore=None):
# now unmerge the directory tree
tree.unmerge_directories(view_dst, ignore_file)
def remove_file(self, src, dest):
if not os.path.lexists(dest):
tty.warn("Tried to remove %s which does not exist" % dest)
return
def remove_files(self, files):
def needs_file(spec, file):
# convert the file we want to remove to a source in this spec
projection = self.get_projection_for_spec(spec)
@ -465,16 +461,23 @@ def needs_file(spec, file):
manifest = {}
return test_path in manifest
# remove if dest is not owned by any other package in the view
# This will only be false if two packages are merged into a prefix
# and have a conflicting file
specs = self.get_all_specs()
# check all specs for whether they own the file. That include the spec
# we are currently removing, as we remove files before unlinking the
# metadata directory.
if len([s for s in self.get_all_specs()
if needs_file(s, dest)]) <= 1:
os.remove(dest)
for file in files:
if not os.path.lexists(file):
tty.warn("Tried to remove %s which does not exist" % file)
continue
# remove if file is not owned by any other package in the view
# This will only be false if two packages are merged into a prefix
# and have a conflicting file
# check all specs for whether they own the file. That include the spec
# we are currently removing, as we remove files before unlinking the
# metadata directory.
if len([s for s in specs if needs_file(s, file)]) <= 1:
tty.debug("Removing file " + file)
os.remove(file)
def check_added(self, spec):
assert spec.concrete

View file

@ -469,8 +469,7 @@ def remove_files_from_view(self, view, merge_map):
example if two packages include the same file, it should only be
removed when both packages are removed.
"""
for src, dst in merge_map.items():
view.remove_file(src, dst)
view.remove_files(merge_map.values())
def test_log_pathname(test_stage, spec):