'spack urls' debugging command, more consistent URL extrapolation.

- spack urls inspects all URLs in packages, prints out colors to show how they are parased.
- URL extrapolation test added.
- Extrapolation is more consistent now.
- Extrapolation handles more complex URLs.
- More test cases for extrapolation.
This commit is contained in:
Todd Gamblin 2014-11-08 22:08:15 -08:00
parent 57076f6ca4
commit 1da5d12bdd
4 changed files with 315 additions and 84 deletions

View file

@ -0,0 +1,58 @@
##############################################################################
# Copyright (c) 2013, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory.
#
# This file is part of Spack.
# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
# LLNL-CODE-647188
#
# For details, see https://scalability-llnl.github.io/spack
# Please also see the LICENSE file for our notice and the LGPL.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (as published by
# the Free Software Foundation) version 2.1 dated February 1999.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
# conditions of the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
import sys
import spack
import spack.url
description = "Inspect urls used by packages in spack."
def setup_parser(subparser):
subparser.add_argument(
'-c', '--color', action='store_true',
help="Color the parsed version and name in the urls shown. "
"Version will be cyan, name red.")
subparser.add_argument(
'-e', '--extrapolation', action='store_true',
help="Color the versions used for extrapolation as well."
"Additional versions are green, names magenta.")
def urls(parser, args):
urls = set()
for pkg in spack.db.all_packages():
url = getattr(pkg.__class__, 'url', None)
if url:
urls.add(url)
for params in pkg.versions.values():
url = params.get('url', None)
if url:
urls.add(url)
for url in sorted(urls):
if args.color or args.extrapolation:
print spack.url.color_url(url, subs=args.extrapolation, errors=True)
else:
print url

View file

@ -32,80 +32,69 @@
from spack.test.mock_packages_test import *
class UrlExtrapolateTest(MockPackagesTest):
class UrlExtrapolateTest(unittest.TestCase):
def check_url(self, base, version, new_url):
self.assertEqual(url.substitute_version(base, version), new_url)
def test_libelf_version(self):
base = "http://www.mr511.de/software/libelf-0.8.13.tar.gz"
self.assertEqual(url.substitute_version(base, '0.8.13'), base)
self.assertEqual(url.substitute_version(base, '0.8.12'),
"http://www.mr511.de/software/libelf-0.8.12.tar.gz")
self.assertEqual(url.substitute_version(base, '0.3.1'),
"http://www.mr511.de/software/libelf-0.3.1.tar.gz")
self.assertEqual(url.substitute_version(base, '1.3.1b'),
"http://www.mr511.de/software/libelf-1.3.1b.tar.gz")
self.check_url(base, '0.8.13', base)
self.check_url(base, '0.8.12', "http://www.mr511.de/software/libelf-0.8.12.tar.gz")
self.check_url(base, '0.3.1', "http://www.mr511.de/software/libelf-0.3.1.tar.gz")
self.check_url(base, '1.3.1b', "http://www.mr511.de/software/libelf-1.3.1b.tar.gz")
def test_libdwarf_version(self):
base = "http://www.prevanders.net/libdwarf-20130729.tar.gz"
self.assertEqual(url.substitute_version(base, '20130729'), base)
self.assertEqual(url.substitute_version(base, '8.12'),
"http://www.prevanders.net/libdwarf-8.12.tar.gz")
self.check_url(base, '20130729', base)
self.check_url(base, '8.12', "http://www.prevanders.net/libdwarf-8.12.tar.gz")
def test_dyninst_version(self):
# Dyninst has a version twice in the URL.
base = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
self.assertEqual(url.substitute_version(base, '8.1.2'), base)
self.assertEqual(
url.substitute_version(base, '8.2'),
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.2/DyninstAPI-8.2.tgz")
self.assertEqual(
url.substitute_version(base, '8.3.1'),
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.3.1/DyninstAPI-8.3.1.tgz")
self.check_url(base, '8.1.2', base)
self.check_url(base, '8.2',
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.2/DyninstAPI-8.2.tgz")
self.check_url(base, '8.3.1',
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.3.1/DyninstAPI-8.3.1.tgz")
def test_extrapolate_version(self):
d = spack.db.get('dyninst')
# Nearest URL for 8.1.1.5 is 8.1.1, and the URL there is
# release8.1/DyninstAPI-8.1.1.tgz. Only the last part matches
# the version, so only extrapolate the last part. Obviously
# dyninst has ambiguous URL versions, but we want to make sure
# extrapolation works in a well-defined way.
self.assertEqual(
d.url_for_version('8.1.1.5'), 'http://www.paradyn.org/release8.1/DyninstAPI-8.1.1.5.tgz')
# 8.2 matches both the release8.2 component and the DyninstAPI-8.2 component.
# Extrapolation should replace both with the new version.
# TODO: figure out a consistent policy for this.
self.assertEqual(
d.url_for_version('8.2.3'), 'http://www.paradyn.org/release8.2.3/DyninstAPI-8.2.3.tgz')
def test_partial_version_prefix(self):
# Test now with a partial prefix earlier in the URL -- this is
# hard to figure out so Spack only substitutes the last
# instance of the version.
base = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1/DyninstAPI-8.1.2.tgz"
self.check_url(base, '8.1.2', base)
self.check_url(base, '8.1.4',
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1/DyninstAPI-8.1.4.tgz")
self.check_url(base, '8.2',
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1/DyninstAPI-8.2.tgz")
self.check_url(base, '8.3.1',
"http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1/DyninstAPI-8.3.1.tgz")
def test_with_package(self):
d = spack.db.get('dyninst@8.2')
self.assertEqual(d.fetcher.url, 'http://www.paradyn.org/release8.2/DyninstAPI-8.2.tgz')
d = spack.db.get('dyninst@8.1.2')
self.assertEqual(d.fetcher.url, 'http://www.paradyn.org/release8.1.2/DyninstAPI-8.1.2.tgz')
d = spack.db.get('dyninst@8.1.1')
self.assertEqual(d.fetcher.url, 'http://www.paradyn.org/release8.1/DyninstAPI-8.1.1.tgz')
def test_scalasca_partial_version(self):
# Note that this probably doesn't actually work, but sites are
# inconsistent about their directory structure, so it's not
# clear what is right. This test is for consistency and to
# document behavior. If you figure out a good way to handle
# this case, fix the tests too.
self.check_url('http://apps.fz-juelich.de/scalasca/releases/cube/4.3/dist/cube-4.3-TP1.tar.gz', '8.3.1',
'http://apps.fz-juelich.de/scalasca/releases/cube/4.3/dist/cube-8.3.1.tar.gz')
self.check_url('http://apps.fz-juelich.de/scalasca/releases/cube/4.3/dist/cube-4.3-TP1.tar.gz', '8.3.1',
'http://apps.fz-juelich.de/scalasca/releases/cube/4.3/dist/cube-8.3.1.tar.gz')
def test_concrete_package(self):
s = Spec('dyninst@8.2')
s.concretize()
d = spack.db.get(s)
self.assertEqual(d.fetcher.url, 'http://www.paradyn.org/release8.2/DyninstAPI-8.2.tgz')
def test_mpileaks_version(self):
self.check_url('https://github.com/hpc/mpileaks/releases/download/v1.0/mpileaks-1.0.tar.gz', '2.1.3',
'https://github.com/hpc/mpileaks/releases/download/v2.1.3/mpileaks-2.1.3.tar.gz')
s = Spec('dyninst@8.1.2')
s.concretize()
d = spack.db.get(s)
self.assertEqual(d.fetcher.url, 'http://www.paradyn.org/release8.1.2/DyninstAPI-8.1.2.tgz')
s = Spec('dyninst@8.1.1')
s.concretize()
d = spack.db.get(s)
self.assertEqual(d.fetcher.url, 'http://www.paradyn.org/release8.1/DyninstAPI-8.1.1.tgz')
def test_gcc(self):
self.check_url('http://open-source-box.org/gcc/gcc-4.9.2/gcc-4.9.2.tar.bz2', '4.7',
'http://open-source-box.org/gcc/gcc-4.7/gcc-4.7.tar.bz2')
self.check_url('http://open-source-box.org/gcc/gcc-4.4.7/gcc-4.4.7.tar.bz2', '4.4.7',
'http://open-source-box.org/gcc/gcc-4.4.7/gcc-4.4.7.tar.bz2')

View file

@ -295,3 +295,30 @@ def test_hdf5_version(self):
self.check(
'hdf5', '1.8.13',
'http://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.8.13.tar.bz2')
def test_scalasca_version(self):
self.check(
'cube', '4.2.3',
'http://apps.fz-juelich.de/scalasca/releases/cube/4.2/dist/cube-4.2.3.tar.gz')
self.check(
'cube', '4.3-TP1',
'http://apps.fz-juelich.de/scalasca/releases/cube/4.3/dist/cube-4.3-TP1.tar.gz')
def test_mpileaks_version(self):
self.check(
'mpileaks', '1.0',
'https://github.com/hpc/mpileaks/releases/download/v1.0/mpileaks-1.0.tar.gz')
self.check(
'mpileaks', '1.0',
'https://github.com/hpc/mpileaks/releases/download/1.0/mpileaks-1.0.tar.gz')
def test_gcc_version(self):
self.check(
'gcc', '4.4.7',
'http://open-source-box.org/gcc/gcc-4.4.7/gcc-4.4.7.tar.bz2')
def test_gcc_version_precedence(self):
# prefer the version in the tarball, not in the url prefix.
self.check(
'gcc', '4.4.7',
'http://open-source-box.org/gcc/gcc-4.9.2/gcc-4.4.7.tar.bz2')

View file

@ -46,6 +46,9 @@
"""
import os
import re
from StringIO import StringIO
from llnl.util.tty.color import *
import spack.error
import spack.util.compression as comp
@ -112,6 +115,10 @@ def parse_version_offset(path):
# e.g. https://github.com/erlang/otp/tarball/OTP_R15B01 (erlang style)
(r'[-_](R\d+[AB]\d*(-\d+)?)', path),
# e.g., https://github.com/hpc/libcircle/releases/download/0.2.1-rc.1/libcircle-0.2.1-rc.1.tar.gz
# e.g., https://github.com/hpc/mpileaks/releases/download/v1.0/mpileaks-1.0.tar.gz
(r'github.com/[^/]+/[^/]+/releases/download/v?([^/]+)/.*$', path),
# e.g. boost_1_39_0
(r'((\d+_)+\d+)$', stem),
@ -126,7 +133,7 @@ def parse_version_offset(path):
(r'-((\d+\.)*\d+)$', stem),
# e.g. foobar-4.5.1b
(r'-((\d+\.)*\d+([a-z]|rc|RC)\d*)$', stem),
(r'-((\d+\.)*\d+\-?([a-z]|rc|RC|tp|TP)\d*)$', stem),
# e.g. foobar-4.5.0-beta1, or foobar-4.50-beta
(r'-((\d+\.)*\d+-beta(\d+)?)$', stem),
@ -153,11 +160,16 @@ def parse_version_offset(path):
(r'\.v(\d+[a-z]?)', stem)]
for i, vtype in enumerate(version_types):
regex, match_string = vtype[:2]
regex, match_string = vtype
match = re.search(regex, match_string)
if match and match.group(1) is not None:
version = match.group(1)
start = offset + match.start(1)
start = match.start(1)
# if we matched from the basename, then add offset in.
if match_string is stem:
start += offset
return version, start, len(version)
raise UndetectableVersionError(path)
@ -171,24 +183,46 @@ def parse_version(path):
return Version(ver)
def parse_name_offset(path, ver=None):
if ver is None:
ver = parse_version(path)
def parse_name_offset(path, v=None):
if v is None:
v = parse_version(path)
ntypes = (r'/sourceforge/([^/]+)/',
r'/([^/]+)/(tarball|zipball)/',
r'/([^/]+)[_.-](bin|dist|stable|src|sources)[_.-]%s' % ver,
r'github.com/[^/]+/([^/]+)/archive',
r'/([^/]+)[_.-]v?%s' % ver,
r'/([^/]+)%s' % ver,
r'^([^/]+)[_.-]v?%s' % ver,
r'^([^/]+)%s' % ver)
# Strip archive extension
path = comp.strip_extension(path)
for nt in ntypes:
match = re.search(nt, path)
# Allow matching with either path or stem, as with the version.
stem = os.path.basename(path)
offset = len(path) - len(stem)
name_types = [
(r'/sourceforge/([^/]+)/', path),
(r'github.com/[^/]+/[^/]+/releases/download/%s/(.*)-%s$' % (v, v), path),
(r'/([^/]+)/(tarball|zipball)/', path),
(r'/([^/]+)[_.-](bin|dist|stable|src|sources)[_.-]%s' % v, path),
(r'github.com/[^/]+/([^/]+)/archive', path),
(r'([^/]+)[_.-]v?%s' % v, stem), # prefer the stem
(r'([^/]+)%s' % v, stem),
(r'/([^/]+)[_.-]v?%s' % v, path), # accept the path if name is not in stem.
(r'/([^/]+)%s' % v, path),
(r'^([^/]+)[_.-]v?%s' % v, path),
(r'^([^/]+)%s' % v, path)]
for i, name_type in enumerate(name_types):
regex, match_string = name_type
match = re.search(regex, match_string)
if match:
name = match.group(1)
return name, match.start(1), len(name)
name = match.group(1)
start = match.start(1)
# if we matched from the basename, then add offset in.
if match_string is stem:
start += offset
return name, start, len(name)
raise UndetectableNameError(path)
@ -204,7 +238,7 @@ def parse_name_and_version(path):
def insensitize(string):
"""Chagne upper and lowercase letters to be case insensitive in
"""Change upper and lowercase letters to be case insensitive in
the provided string. e.g., 'a' because '[Aa]', 'B' becomes
'[bB]', etc. Use for building regexes."""
def to_ins(match):
@ -213,12 +247,53 @@ def to_ins(match):
return re.sub(r'([a-zA-Z])', to_ins, string)
def substitute_version(path, new_version):
"""Given a URL or archive name, find the version in the path and substitute
the new version for it.
def cumsum(elts, init=0, fn=lambda x:x):
"""Return cumulative sum of result of fn on each element in elts."""
sums = []
s = init
for i, e in enumerate(elts):
sums.append(s)
s += fn(e)
return sums
def substitution_offsets(path):
"""This returns offsets for substituting versions and names in the provided path.
It is a helper for substitute_version().
"""
ver, start, l = parse_version_offset(path)
return path[:start] + str(new_version) + path[(start+l):]
# Get name and version offsets
try:
ver, vs, vl = parse_version_offset(path)
name, ns, nl = parse_name_offset(path, ver)
except UndetectableNameError, e:
return (None, -1, -1, (), ver, vs, vl, (vs,))
except UndetectableVersionError, e:
return (None, -1, -1, (), None, -1, -1, ())
# protect extensions like bz2 from getting inadvertently
# considered versions.
ext = comp.extension(path)
path = comp.strip_extension(path)
# Construct a case-insensitive regular expression for the package name.
name_re = '(%s)' % insensitize(name)
# Split the string apart by things that match the name so that if the
# name contains numbers or things that look like versions, we don't
# accidentally substitute them with a version.
name_parts = re.split(name_re, path)
offsets = cumsum(name_parts, 0, len)
name_offsets = offsets[1::2]
ver_offsets = []
for i in xrange(0, len(name_parts), 2):
vparts = re.split(ver, name_parts[i])
voffsets = cumsum(vparts, offsets[i], len)
ver_offsets.extend(voffsets[1::2])
return (name, ns, nl, tuple(name_offsets),
ver, vs, vl, tuple(ver_offsets))
def wildcard_version(path):
@ -228,13 +303,13 @@ def wildcard_version(path):
# Get name and version, so we can treat them specially
name, v = parse_name_and_version(path)
# Construct a case-insensitive regular expression for the package name.
name_re = '(%s)' % insensitize(name)
# protect extensions like bz2 from wildcarding.
ext = comp.extension(path)
path = comp.strip_extension(path)
# Construct a case-insensitive regular expression for the package name.
name_re = '(%s)' % insensitize(name)
# Split the string apart by things that match the name so that if the
# name contains numbers or things that look like versions, we don't
# catch them with the version wildcard.
@ -254,6 +329,88 @@ def wildcard_version(path):
return ''.join(name_parts) + '.' + ext
def substitute_version(path, new_version):
"""Given a URL or archive name, find the version in the path and
substitute the new version for it. Replace all occurrences of
the version *if* they don't overlap with the package name.
Simple example::
substitute_version('http://www.mr511.de/software/libelf-0.8.13.tar.gz', '2.9.3')
->'http://www.mr511.de/software/libelf-2.9.3.tar.gz'
Complex examples::
substitute_version('http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.0.tar.gz', 2.1)
-> 'http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.1.tar.gz'
# In this string, the "2" in mvapich2 is NOT replaced.
substitute_version('http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.tar.gz', 2.1)
-> 'http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.1.tar.gz'
"""
(name, ns, nl, noffs,
ver, vs, vl, voffs) = substitution_offsets(path)
new_path = ''
last = 0
for vo in voffs:
new_path += path[last:vo]
new_path += str(new_version)
last = vo + vl
new_path += path[last:]
return new_path
def color_url(path, **kwargs):
"""Color the parts of the url according to Spack's parsing.
Colors are:
Cyan: The version found by parse_version_offset().
Red: The name found by parse_name_offset().
Green: Instances of version string substituted by substitute_version().
Magenta: Instances of the name (protected from substitution).
Optional args:
errors=True Append parse errors at end of string.
subs=True Color substitutions as well as parsed name/version.
"""
errors = kwargs.get('errors', False)
subs = kwargs.get('subs', False)
(name, ns, nl, noffs,
ver, vs, vl, voffs) = substitution_offsets(path)
nends = [no + nl - 1 for no in noffs]
vends = [vo + vl - 1 for vo in voffs]
nerr = verr = 0
out = StringIO()
for i in range(len(path)):
if i == vs: out.write('@c'); verr += 1
elif i == ns: out.write('@r'); nerr += 1
elif subs:
if i in voffs: out.write('@g')
elif i in noffs: out.write('@m')
out.write(path[i])
if i == vs + vl - 1: out.write('@.'); verr += 1
elif i == ns + nl - 1: out.write('@.'); nerr += 1
elif subs:
if i in vends or i in nends:
out.write('@.')
if errors:
if nerr == 0: out.write(" @r{[no name]}")
if verr == 0: out.write(" @r{[no version]}")
if nerr == 1: out.write(" @r{[incomplete name]}")
if verr == 1: out.write(" @r{[incomplete version]}")
return colorize(out.getvalue())
class UrlParseError(spack.error.SpackError):
"""Raised when the URL module can't parse something correctly."""
def __init__(self, msg, path):