YamlFilesystemView: improve file removal performance via batching (#24355)
* Drastically improve YamlFilesystemView file removal via batching The `remove_file` routine has to check if the file is owned by multiple packages, so it doesn't remove necessary files. This is done by the `get_all_specs` routine, which walks the entire package tree. With large numbers of packages on shared file systems, this can take seconds per file tree traversal, which adds up extremely quickly. For example, a single deactivate of a largish python package in our software stack on GPFS took approximately 40 minutes. This patch simply replaces `remove_file` with a batch `remove_files` routine. This routine removes a list of files rather than a single file, requiring only one traversal per batch. In practice this means a package can be removed in seconds time, rather than potentially hours, essentially a ~100x speedup (ignoring initial deactivation logic, which takes about 3 minutes in our test setup).
This commit is contained in:
parent
c13f915735
commit
8fd94e3114
3 changed files with 23 additions and 17 deletions
|
@ -393,11 +393,15 @@ def remove_files_from_view(self, view, merge_map):
|
|||
self.spec
|
||||
)
|
||||
)
|
||||
|
||||
to_remove = []
|
||||
for src, dst in merge_map.items():
|
||||
if ignore_namespace and namespace_init(dst):
|
||||
continue
|
||||
|
||||
if global_view or not path_contains_subdirectory(src, bin_dir):
|
||||
view.remove_file(src, dst)
|
||||
to_remove.append(dst)
|
||||
else:
|
||||
os.remove(dst)
|
||||
|
||||
view.remove_files(to_remove)
|
||||
|
|
|
@ -442,11 +442,7 @@ def unmerge(self, spec, ignore=None):
|
|||
# now unmerge the directory tree
|
||||
tree.unmerge_directories(view_dst, ignore_file)
|
||||
|
||||
def remove_file(self, src, dest):
|
||||
if not os.path.lexists(dest):
|
||||
tty.warn("Tried to remove %s which does not exist" % dest)
|
||||
return
|
||||
|
||||
def remove_files(self, files):
|
||||
def needs_file(spec, file):
|
||||
# convert the file we want to remove to a source in this spec
|
||||
projection = self.get_projection_for_spec(spec)
|
||||
|
@ -465,16 +461,23 @@ def needs_file(spec, file):
|
|||
manifest = {}
|
||||
return test_path in manifest
|
||||
|
||||
# remove if dest is not owned by any other package in the view
|
||||
# This will only be false if two packages are merged into a prefix
|
||||
# and have a conflicting file
|
||||
specs = self.get_all_specs()
|
||||
|
||||
# check all specs for whether they own the file. That include the spec
|
||||
# we are currently removing, as we remove files before unlinking the
|
||||
# metadata directory.
|
||||
if len([s for s in self.get_all_specs()
|
||||
if needs_file(s, dest)]) <= 1:
|
||||
os.remove(dest)
|
||||
for file in files:
|
||||
if not os.path.lexists(file):
|
||||
tty.warn("Tried to remove %s which does not exist" % file)
|
||||
continue
|
||||
|
||||
# remove if file is not owned by any other package in the view
|
||||
# This will only be false if two packages are merged into a prefix
|
||||
# and have a conflicting file
|
||||
|
||||
# check all specs for whether they own the file. That include the spec
|
||||
# we are currently removing, as we remove files before unlinking the
|
||||
# metadata directory.
|
||||
if len([s for s in specs if needs_file(s, file)]) <= 1:
|
||||
tty.debug("Removing file " + file)
|
||||
os.remove(file)
|
||||
|
||||
def check_added(self, spec):
|
||||
assert spec.concrete
|
||||
|
|
|
@ -469,8 +469,7 @@ def remove_files_from_view(self, view, merge_map):
|
|||
example if two packages include the same file, it should only be
|
||||
removed when both packages are removed.
|
||||
"""
|
||||
for src, dst in merge_map.items():
|
||||
view.remove_file(src, dst)
|
||||
view.remove_files(merge_map.values())
|
||||
|
||||
|
||||
def test_log_pathname(test_stage, spec):
|
||||
|
|
Loading…
Reference in a new issue