YamlFilesystemView: improve file removal performance via batching (#24355)
* Drastically improve YamlFilesystemView file removal via batching The `remove_file` routine has to check if the file is owned by multiple packages, so it doesn't remove necessary files. This is done by the `get_all_specs` routine, which walks the entire package tree. With large numbers of packages on shared file systems, this can take seconds per file tree traversal, which adds up extremely quickly. For example, a single deactivate of a largish python package in our software stack on GPFS took approximately 40 minutes. This patch simply replaces `remove_file` with a batch `remove_files` routine. This routine removes a list of files rather than a single file, requiring only one traversal per batch. In practice this means a package can be removed in seconds time, rather than potentially hours, essentially a ~100x speedup (ignoring initial deactivation logic, which takes about 3 minutes in our test setup).
This commit is contained in:
parent
c13f915735
commit
8fd94e3114
3 changed files with 23 additions and 17 deletions
|
@ -393,11 +393,15 @@ def remove_files_from_view(self, view, merge_map):
|
||||||
self.spec
|
self.spec
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
to_remove = []
|
||||||
for src, dst in merge_map.items():
|
for src, dst in merge_map.items():
|
||||||
if ignore_namespace and namespace_init(dst):
|
if ignore_namespace and namespace_init(dst):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if global_view or not path_contains_subdirectory(src, bin_dir):
|
if global_view or not path_contains_subdirectory(src, bin_dir):
|
||||||
view.remove_file(src, dst)
|
to_remove.append(dst)
|
||||||
else:
|
else:
|
||||||
os.remove(dst)
|
os.remove(dst)
|
||||||
|
|
||||||
|
view.remove_files(to_remove)
|
||||||
|
|
|
@ -442,11 +442,7 @@ def unmerge(self, spec, ignore=None):
|
||||||
# now unmerge the directory tree
|
# now unmerge the directory tree
|
||||||
tree.unmerge_directories(view_dst, ignore_file)
|
tree.unmerge_directories(view_dst, ignore_file)
|
||||||
|
|
||||||
def remove_file(self, src, dest):
|
def remove_files(self, files):
|
||||||
if not os.path.lexists(dest):
|
|
||||||
tty.warn("Tried to remove %s which does not exist" % dest)
|
|
||||||
return
|
|
||||||
|
|
||||||
def needs_file(spec, file):
|
def needs_file(spec, file):
|
||||||
# convert the file we want to remove to a source in this spec
|
# convert the file we want to remove to a source in this spec
|
||||||
projection = self.get_projection_for_spec(spec)
|
projection = self.get_projection_for_spec(spec)
|
||||||
|
@ -465,16 +461,23 @@ def needs_file(spec, file):
|
||||||
manifest = {}
|
manifest = {}
|
||||||
return test_path in manifest
|
return test_path in manifest
|
||||||
|
|
||||||
# remove if dest is not owned by any other package in the view
|
specs = self.get_all_specs()
|
||||||
# This will only be false if two packages are merged into a prefix
|
|
||||||
# and have a conflicting file
|
|
||||||
|
|
||||||
# check all specs for whether they own the file. That include the spec
|
for file in files:
|
||||||
# we are currently removing, as we remove files before unlinking the
|
if not os.path.lexists(file):
|
||||||
# metadata directory.
|
tty.warn("Tried to remove %s which does not exist" % file)
|
||||||
if len([s for s in self.get_all_specs()
|
continue
|
||||||
if needs_file(s, dest)]) <= 1:
|
|
||||||
os.remove(dest)
|
# remove if file is not owned by any other package in the view
|
||||||
|
# This will only be false if two packages are merged into a prefix
|
||||||
|
# and have a conflicting file
|
||||||
|
|
||||||
|
# check all specs for whether they own the file. That include the spec
|
||||||
|
# we are currently removing, as we remove files before unlinking the
|
||||||
|
# metadata directory.
|
||||||
|
if len([s for s in specs if needs_file(s, file)]) <= 1:
|
||||||
|
tty.debug("Removing file " + file)
|
||||||
|
os.remove(file)
|
||||||
|
|
||||||
def check_added(self, spec):
|
def check_added(self, spec):
|
||||||
assert spec.concrete
|
assert spec.concrete
|
||||||
|
|
|
@ -469,8 +469,7 @@ def remove_files_from_view(self, view, merge_map):
|
||||||
example if two packages include the same file, it should only be
|
example if two packages include the same file, it should only be
|
||||||
removed when both packages are removed.
|
removed when both packages are removed.
|
||||||
"""
|
"""
|
||||||
for src, dst in merge_map.items():
|
view.remove_files(merge_map.values())
|
||||||
view.remove_file(src, dst)
|
|
||||||
|
|
||||||
|
|
||||||
def test_log_pathname(test_stage, spec):
|
def test_log_pathname(test_stage, spec):
|
||||||
|
|
Loading…
Reference in a new issue