commands: add resource stats to spack url stats (#13205)

`spack url stats` now also looks at packages' resources when outputting
statistics.

Example:

```
$ spack url stats
==> URL stats for 3531 packages:
--------------------------------------------------------------
stat                    versions        %   resources        %
--------------------------------------------------------------
url                         8335    89.3%         339    89.0%
    schemes
        https               6489    69.5%          93    24.4%
        ftp                   32     0.3%           8     2.1%
        http                1763    18.9%         237    62.2%
        file                  51     0.5%           1     0.3%
    checksums
        md5                   26     0.3%           0     0.0%
        sha256              8306    89.0%         336    88.2%
        no checksum            3     0.0%           3     0.8%
--------------------------------------------------------------
go                             1     0.0%           0     0.0%
--------------------------------------------------------------
hg                             7     0.1%           0     0.0%
--------------------------------------------------------------
no code                        4     0.0%           0     0.0%
--------------------------------------------------------------
svn                            4     0.0%          16     4.2%
--------------------------------------------------------------
git                          981    10.5%          26     6.8%
    branch                   442     4.7%           4     1.0%
    commit                   362     3.9%          14     3.7%
    no ref                    36     0.4%           2     0.5%
    tag                      141     1.5%           6     1.6%
--------------------------------------------------------------
```
This commit is contained in:
Todd Gamblin 2019-10-14 08:58:01 -07:00 committed by GitHub
parent 8a1e5f68f4
commit 385d2bf3fa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 92 additions and 64 deletions

View file

@ -239,80 +239,104 @@ def url_summary(args):
def url_stats(args): def url_stats(args):
stats = {} # stats about fetchers in packages. class UrlStats(object):
nvers = 0 # total number of versions def __init__(self):
npkgs = 0 # total number of packages self.total = 0
self.schemes = defaultdict(lambda: 0)
self.checksums = defaultdict(lambda: 0)
self.url_type = defaultdict(lambda: 0)
self.git_type = defaultdict(lambda: 0)
def inc(fstype, category, attr=None): def add(self, fetcher):
"""Increment statistics in the stats dict.""" self.total += 1
categories = stats.setdefault(fstype, {})
if attr: url_type = fetcher.url_attr
cat_stats = categories.setdefault(category, {}) self.url_type[url_type or 'no code'] += 1
val = cat_stats.setdefault(attr, 0)
stats[fstype][category][attr] = val + 1 if url_type == 'url':
digest = getattr(fetcher, 'digest', None)
if digest:
algo = crypto.hash_algo_for_digest(digest)
else: else:
val = categories.setdefault(category, 0) algo = 'no checksum'
stats[fstype][category] = val + 1 self.checksums[algo] += 1
# over all packages
for pkg in spack.repo.path.all_packages():
npkgs += 1
if not pkg.has_code:
for _ in pkg.versions:
inc('No code', 'total')
nvers += 1
continue
# look at each version
for v, args in pkg.versions.items():
# figure out what type of fetcher it is
fetcher = fs.for_package_version(pkg, v)
nvers += 1
fstype = fetcher.url_attr
inc(fstype, 'total')
# put some special stats in for particular types of fetchers.
if fstype == 'git':
if 'commit' in args:
inc('git', 'security', 'commit')
else:
inc('git', 'security', 'no commit')
elif fstype == 'url':
for h in crypto.hashes:
if h in args:
inc('url', 'checksums', h)
break
else:
if 'checksum' in args:
h = crypto.hash_algo_for_digest(args['checksum'])
inc('url', 'checksums', h)
else:
inc('url', 'checksums', 'no checksum')
# parse out the URL scheme (https/http/ftp/etc.) # parse out the URL scheme (https/http/ftp/etc.)
urlinfo = urlparse(fetcher.url) urlinfo = urlparse(fetcher.url)
inc('url', 'schemes', urlinfo.scheme) self.schemes[urlinfo.scheme] += 1
elif url_type == 'git':
if getattr(fetcher, 'commit', None):
self.git_type['commit'] += 1
elif getattr(fetcher, 'branch', None):
self.git_type['branch'] += 1
elif getattr(fetcher, 'tag', None):
self.git_type['tag'] += 1
else:
self.git_type['no ref'] += 1
npkgs = 0
version_stats = UrlStats()
resource_stats = UrlStats()
for pkg in spack.repo.path.all_packages():
npkgs += 1
for v, args in pkg.versions.items():
fetcher = fs.for_package_version(pkg, v)
version_stats.add(fetcher)
for _, resources in pkg.resources.items():
for resource in resources:
resource_stats.add(resource.fetcher)
# print a nice summary table # print a nice summary table
tty.msg("%d total versions for %d packages:" % (nvers, npkgs)) tty.msg("URL stats for %d packages:" % npkgs)
line_width = 36
print("-" * line_width)
for fetcher, fetcher_stats in sorted(stats.items(), reverse=True):
fs_total = fetcher_stats['total']
fs_pct = float(fs_total) / nvers * 100
print("%-22s%5d%8.1f%%" % (fetcher, fs_total, fs_pct))
for category, cat_stats in sorted(fetcher_stats.items(), reverse=True): def print_line():
if category == 'total': print("-" * 62)
continue
print(" %s" % category)
for name, number in sorted(cat_stats.items(), reverse=True): def print_stat(indent, name, stat_name=None):
pct = float(number) / fs_total * 100 width = 20 - indent
print(" %-18s%5d%8.1f%%" % (name, number, pct)) fmt = " " * indent
print("-" * line_width) fmt += "%%-%ds" % width
if stat_name is None:
print(fmt % name)
else:
fmt += "%12d%8.1f%%%12d%8.1f%%"
v = getattr(version_stats, stat_name).get(name, 0)
r = getattr(resource_stats, stat_name).get(name, 0)
print(fmt % (name,
v, v / version_stats.total * 100,
r, r / resource_stats.total * 100))
print_line()
print("%-20s%12s%9s%12s%9s" % ("stat", "versions", "%", "resources", "%"))
print_line()
print_stat(0, "url", "url_type")
print_stat(4, "schemes")
schemes = set(version_stats.schemes) | set(resource_stats.schemes)
for scheme in schemes:
print_stat(8, scheme, "schemes")
print_stat(4, "checksums")
checksums = set(version_stats.checksums) | set(resource_stats.checksums)
for checksum in checksums:
print_stat(8, checksum, "checksums")
print_line()
types = set(version_stats.url_type) | set(resource_stats.url_type)
types -= set(["url", "git"])
for url_type in sorted(types):
print_stat(0, url_type, "url_type")
print_line()
print_stat(0, "git", "url_type")
git_types = set(version_stats.git_type) | set(resource_stats.git_type)
for git_type in sorted(git_types):
print_stat(4, git_type, "git_type")
print_line()
def print_name_and_version(url): def print_name_and_version(url):

View file

@ -131,4 +131,8 @@ def test_url_stats(capfd):
output = url('stats') output = url('stats')
npkgs = '%d packages' % len(spack.repo.all_package_names()) npkgs = '%d packages' % len(spack.repo.all_package_names())
assert npkgs in output assert npkgs in output
assert 'total versions' in output assert 'url' in output
assert 'git' in output
assert 'schemes' in output
assert 'versions' in output
assert 'resources' in output