Added web spider capability for listing versions.
This commit is contained in:
parent
fe7da0dcff
commit
389fa1792d
19 changed files with 321 additions and 59 deletions
|
@ -30,6 +30,8 @@ parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
|
||||||
help="print additional output during builds")
|
help="print additional output during builds")
|
||||||
parser.add_argument('-d', '--debug', action='store_true', dest='debug',
|
parser.add_argument('-d', '--debug', action='store_true', dest='debug',
|
||||||
help="write out debug logs during compile")
|
help="write out debug logs during compile")
|
||||||
|
parser.add_argument('-m', '--mock', action='store_true', dest='mock',
|
||||||
|
help="Use mock packages instead of real ones.")
|
||||||
|
|
||||||
# each command module implements a parser() function, to which we pass its
|
# each command module implements a parser() function, to which we pass its
|
||||||
# subparser for setup.
|
# subparser for setup.
|
||||||
|
@ -46,6 +48,10 @@ args = parser.parse_args()
|
||||||
# Set up environment based on args.
|
# Set up environment based on args.
|
||||||
spack.verbose = args.verbose
|
spack.verbose = args.verbose
|
||||||
spack.debug = args.debug
|
spack.debug = args.debug
|
||||||
|
if args.mock:
|
||||||
|
from spack.util.filesystem import new_path
|
||||||
|
mock_path = new_path(spack.module_path, 'test', 'mock_packages')
|
||||||
|
spack.packages_path = mock_path
|
||||||
|
|
||||||
# Try to load the particular command asked for and run it
|
# Try to load the particular command asked for and run it
|
||||||
command = spack.cmd.get_command(args.command)
|
command = spack.cmd.get_command(args.command)
|
||||||
|
|
63
lib/spack/spack/cmd/checksum.py
Normal file
63
lib/spack/spack/cmd/checksum.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
from pprint import pprint
|
||||||
|
from subprocess import CalledProcessError
|
||||||
|
|
||||||
|
import spack.tty as tty
|
||||||
|
import spack.packages as packages
|
||||||
|
from spack.stage import Stage
|
||||||
|
from spack.colify import colify
|
||||||
|
from spack.util.crypto import md5
|
||||||
|
from spack.version import *
|
||||||
|
|
||||||
|
group='foo'
|
||||||
|
description ="Checksum available versions of a package, print out checksums for addition to a package file."
|
||||||
|
|
||||||
|
def setup_parser(subparser):
|
||||||
|
subparser.add_argument('package', metavar='PACKAGE', help='Package to list versions for')
|
||||||
|
subparser.add_argument('versions', nargs=argparse.REMAINDER, help='Versions to generate checksums for')
|
||||||
|
subparser.add_argument('-n', '--number', dest='number', type=int,
|
||||||
|
default=10, help='Number of versions to list')
|
||||||
|
|
||||||
|
|
||||||
|
def checksum(parser, args):
|
||||||
|
# get the package we're going to generate checksums for
|
||||||
|
pkg = packages.get(args.package)
|
||||||
|
|
||||||
|
# If the user asked for specific versions, use those.
|
||||||
|
# Otherwise get the latest n, where n is from the -n/--number param
|
||||||
|
versions = [ver(v) for v in args.versions]
|
||||||
|
|
||||||
|
if not all(type(v) == Version for v in versions):
|
||||||
|
tty.die("Cannot generate checksums for version lists or " +
|
||||||
|
"version ranges. Use unambiguous versions.")
|
||||||
|
|
||||||
|
if not versions:
|
||||||
|
versions = pkg.fetch_available_versions()[:args.number]
|
||||||
|
if not versions:
|
||||||
|
tty.die("Could not fetch any available versions for %s."
|
||||||
|
% pkg.name)
|
||||||
|
|
||||||
|
versions.sort()
|
||||||
|
versions.reverse()
|
||||||
|
urls = [pkg.url_for_version(v) for v in versions]
|
||||||
|
|
||||||
|
tty.msg("Found %s versions to checksum." % len(urls))
|
||||||
|
tty.msg("Downloading...")
|
||||||
|
|
||||||
|
hashes = []
|
||||||
|
for url, version in zip(urls, versions):
|
||||||
|
stage = Stage("checksum-%s-%s" % (pkg.name, version), url)
|
||||||
|
try:
|
||||||
|
stage.fetch()
|
||||||
|
hashes.append(md5(stage.archive_file))
|
||||||
|
finally:
|
||||||
|
stage.destroy()
|
||||||
|
|
||||||
|
dict_string = ["{"]
|
||||||
|
for i, (v, h) in enumerate(zip(versions, hashes)):
|
||||||
|
comma = "" if i == len(hashes) - 1 else ","
|
||||||
|
dict_string.append(" '%s' : '%s'%s" % (str(v), str(h), comma))
|
||||||
|
dict_string.append("}")
|
||||||
|
tty.msg("Checksummed new versions of %s:" % pkg.name, *dict_string)
|
|
@ -2,8 +2,10 @@
|
||||||
import spack.cmd
|
import spack.cmd
|
||||||
|
|
||||||
import spack.tty as tty
|
import spack.tty as tty
|
||||||
|
import spack.url as url
|
||||||
import spack
|
import spack
|
||||||
|
|
||||||
|
|
||||||
description = "parse specs and print them out to the command line."
|
description = "parse specs and print them out to the command line."
|
||||||
|
|
||||||
def setup_parser(subparser):
|
def setup_parser(subparser):
|
||||||
|
@ -13,7 +15,11 @@ def spec(parser, args):
|
||||||
specs = spack.cmd.parse_specs(args.specs)
|
specs = spack.cmd.parse_specs(args.specs)
|
||||||
for spec in specs:
|
for spec in specs:
|
||||||
spec.normalize()
|
spec.normalize()
|
||||||
print spec.tree()
|
print spec.tree(color=True)
|
||||||
|
|
||||||
spec.concretize()
|
spec.concretize()
|
||||||
print spec.tree()
|
print spec.tree(color=True)
|
||||||
|
|
||||||
|
pkg = spec.package
|
||||||
|
wc = url.wildcard_version(pkg.url)
|
||||||
|
print wc
|
||||||
|
|
|
@ -2,12 +2,8 @@
|
||||||
import re
|
import re
|
||||||
from subprocess import CalledProcessError
|
from subprocess import CalledProcessError
|
||||||
|
|
||||||
import spack
|
|
||||||
import spack.packages as packages
|
import spack.packages as packages
|
||||||
import spack.url as url
|
|
||||||
import spack.tty as tty
|
|
||||||
from spack.colify import colify
|
from spack.colify import colify
|
||||||
from spack.version import ver
|
|
||||||
|
|
||||||
description ="List available versions of a package"
|
description ="List available versions of a package"
|
||||||
|
|
||||||
|
@ -17,4 +13,4 @@ def setup_parser(subparser):
|
||||||
|
|
||||||
def versions(parser, args):
|
def versions(parser, args):
|
||||||
pkg = packages.get(args.package)
|
pkg = packages.get(args.package)
|
||||||
colify(reversed(pkg.available_versions))
|
colify(reversed(pkg.fetch_available_versions()))
|
||||||
|
|
|
@ -29,6 +29,8 @@
|
||||||
from multi_function import platform
|
from multi_function import platform
|
||||||
from stage import Stage
|
from stage import Stage
|
||||||
from spack.util.lang import memoized, list_modules
|
from spack.util.lang import memoized, list_modules
|
||||||
|
from spack.util.crypto import md5
|
||||||
|
from spack.util.web import get_pages
|
||||||
|
|
||||||
|
|
||||||
class Package(object):
|
class Package(object):
|
||||||
|
@ -251,6 +253,9 @@ class SomePackage(Package):
|
||||||
"""By default a package has no dependencies."""
|
"""By default a package has no dependencies."""
|
||||||
dependencies = {}
|
dependencies = {}
|
||||||
|
|
||||||
|
"""List of specs of virtual packages provided by this package."""
|
||||||
|
provided_virtual_packages = {}
|
||||||
|
|
||||||
#
|
#
|
||||||
# These are default values for instance variables.
|
# These are default values for instance variables.
|
||||||
#
|
#
|
||||||
|
@ -310,6 +315,9 @@ def __init__(self, spec):
|
||||||
if not hasattr(self, 'list_url'):
|
if not hasattr(self, 'list_url'):
|
||||||
self.list_url = os.path.dirname(self.url)
|
self.list_url = os.path.dirname(self.url)
|
||||||
|
|
||||||
|
if not hasattr(self, 'list_depth'):
|
||||||
|
self.list_depth = 1
|
||||||
|
|
||||||
|
|
||||||
def add_commands_to_module(self):
|
def add_commands_to_module(self):
|
||||||
"""Populate the module scope of install() with some useful functions.
|
"""Populate the module scope of install() with some useful functions.
|
||||||
|
@ -464,6 +472,11 @@ def url_version(self, version):
|
||||||
return str(version)
|
return str(version)
|
||||||
|
|
||||||
|
|
||||||
|
def url_for_version(self, version):
|
||||||
|
"""Gives a URL that you can download a new version of this package from."""
|
||||||
|
return url.substitute_version(self.url, self.url_version(version))
|
||||||
|
|
||||||
|
|
||||||
def remove_prefix(self):
|
def remove_prefix(self):
|
||||||
"""Removes the prefix for a package along with any empty parent directories."""
|
"""Removes the prefix for a package along with any empty parent directories."""
|
||||||
if self.dirty:
|
if self.dirty:
|
||||||
|
@ -640,37 +653,42 @@ def do_clean_dist(self):
|
||||||
tty.msg("Successfully cleaned %s" % self.name)
|
tty.msg("Successfully cleaned %s" % self.name)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_available_versions(self):
|
||||||
|
# If not, then try to fetch using list_url
|
||||||
|
if not self._available_versions:
|
||||||
|
self._available_versions = VersionList()
|
||||||
|
url_regex = os.path.basename(url.wildcard_version(self.url))
|
||||||
|
wildcard = self.version.wildcard()
|
||||||
|
|
||||||
|
page_map = get_pages(self.list_url, depth=self.list_depth)
|
||||||
|
for site, page in page_map.iteritems():
|
||||||
|
strings = re.findall(url_regex, page)
|
||||||
|
|
||||||
|
for s in strings:
|
||||||
|
match = re.search(wildcard, s)
|
||||||
|
if match:
|
||||||
|
v = match.group(0)
|
||||||
|
self._available_versions.add(Version(v))
|
||||||
|
|
||||||
|
if not self._available_versions:
|
||||||
|
tty.warn("Found no versions for %s" % self.name,
|
||||||
|
"Check the list_url and list_depth attribute on the "
|
||||||
|
+ self.name + " package.",
|
||||||
|
"Use them to tell Spack where to look for versions.")
|
||||||
|
|
||||||
|
return self._available_versions
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def available_versions(self):
|
def available_versions(self):
|
||||||
# If the package overrode available_versions, then use that.
|
# If the package overrode available_versions, then use that.
|
||||||
if self.versions is not None:
|
if self.versions is not None:
|
||||||
return self.versions
|
return self.versions
|
||||||
|
else:
|
||||||
# If not, then try to fetch using list_url
|
vlist = self.fetch_available_versions()
|
||||||
if not self._available_versions:
|
if not vlist:
|
||||||
self._available_versions = ver([self.version])
|
vlist = ver([self.version])
|
||||||
try:
|
return vlist
|
||||||
# Run curl but grab the mime type from the http headers
|
|
||||||
listing = spack.curl('-s', '-L', self.list_url, return_output=True)
|
|
||||||
url_regex = os.path.basename(url.wildcard_version(self.url))
|
|
||||||
strings = re.findall(url_regex, listing)
|
|
||||||
wildcard = self.version.wildcard()
|
|
||||||
for s in strings:
|
|
||||||
match = re.search(wildcard, s)
|
|
||||||
if match:
|
|
||||||
self._available_versions.add(Version(match.group(0)))
|
|
||||||
|
|
||||||
if not self._available_versions:
|
|
||||||
tty.warn("Found no versions for %s" % self.name,
|
|
||||||
"Packate.available_versions may require adding the list_url attribute",
|
|
||||||
"to the package to tell Spack where to look for versions.")
|
|
||||||
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
tty.warn("Could not connect to %s" % self.list_url,
|
|
||||||
"Package.available_versions requires an internet connection.",
|
|
||||||
"Version list may be incomplete.")
|
|
||||||
|
|
||||||
return self._available_versions
|
|
||||||
|
|
||||||
|
|
||||||
class MakeExecutable(Executable):
|
class MakeExecutable(Executable):
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
invalid_package_re = r'[_-][_-]+'
|
invalid_package_re = r'[_-][_-]+'
|
||||||
|
|
||||||
instances = {}
|
instances = {}
|
||||||
|
providers = {}
|
||||||
|
|
||||||
|
|
||||||
def get(pkg_name):
|
def get(pkg_name):
|
||||||
|
@ -29,6 +30,24 @@ def get(pkg_name):
|
||||||
return instances[pkg_name]
|
return instances[pkg_name]
|
||||||
|
|
||||||
|
|
||||||
|
def get_providers(vpkg_name):
|
||||||
|
if not providers:
|
||||||
|
compute_providers()
|
||||||
|
|
||||||
|
if not vpkg_name in providers:
|
||||||
|
raise UnknownPackageError("No such virtual package: %s" % vpkg_name)
|
||||||
|
|
||||||
|
return providers[vpkg_name]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_providers():
|
||||||
|
for pkg in all_packages():
|
||||||
|
for vpkg in pkg.provided_virtual_packages:
|
||||||
|
if vpkg not in providers:
|
||||||
|
providers[vpkg] = []
|
||||||
|
providers[vpkg].append(pkg)
|
||||||
|
|
||||||
|
|
||||||
def valid_package_name(pkg_name):
|
def valid_package_name(pkg_name):
|
||||||
return (re.match(valid_package_re, pkg_name) and
|
return (re.match(valid_package_re, pkg_name) and
|
||||||
not re.search(invalid_package_re, pkg_name))
|
not re.search(invalid_package_re, pkg_name))
|
||||||
|
@ -75,6 +94,11 @@ def class_name_for_package_name(pkg_name):
|
||||||
return class_name
|
return class_name
|
||||||
|
|
||||||
|
|
||||||
|
def exists(pkg_name):
|
||||||
|
"""Whether a package is concrete."""
|
||||||
|
return os.path.exists(filename_for_package_name(pkg_name))
|
||||||
|
|
||||||
|
|
||||||
def get_class_for_package_name(pkg_name):
|
def get_class_for_package_name(pkg_name):
|
||||||
file_name = filename_for_package_name(pkg_name)
|
file_name = filename_for_package_name(pkg_name)
|
||||||
|
|
||||||
|
@ -149,7 +173,6 @@ def quote(string):
|
||||||
out.write('}\n')
|
out.write('}\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class InvalidPackageNameError(spack.error.SpackError):
|
class InvalidPackageNameError(spack.error.SpackError):
|
||||||
"""Raised when we encounter a bad package name."""
|
"""Raised when we encounter a bad package name."""
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
|
|
|
@ -4,6 +4,7 @@ class Dyninst(Package):
|
||||||
homepage = "https://paradyn.org"
|
homepage = "https://paradyn.org"
|
||||||
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
|
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
|
||||||
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
|
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
|
||||||
|
list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
|
||||||
|
|
||||||
depends_on("libelf")
|
depends_on("libelf")
|
||||||
depends_on("libdwarf")
|
depends_on("libdwarf")
|
||||||
|
|
|
@ -45,16 +45,28 @@ class Mpileaks(Package):
|
||||||
spack install mpileaks ^mpich
|
spack install mpileaks ^mpich
|
||||||
"""
|
"""
|
||||||
import sys
|
import sys
|
||||||
|
import inspect
|
||||||
import spack.spec
|
import spack.spec
|
||||||
|
|
||||||
|
|
||||||
|
def _caller_locals():
|
||||||
|
"""This will return the locals of the *parent* of the caller.
|
||||||
|
This allows a fucntion to insert variables into its caller's
|
||||||
|
scope.
|
||||||
|
"""
|
||||||
|
stack = inspect.stack()
|
||||||
|
try:
|
||||||
|
return stack[2][0].f_locals
|
||||||
|
finally:
|
||||||
|
del stack
|
||||||
|
|
||||||
|
|
||||||
def depends_on(*specs):
|
def depends_on(*specs):
|
||||||
"""Adds a dependencies local variable in the locals of
|
"""Adds a dependencies local variable in the locals of
|
||||||
the calling class, based on args.
|
the calling class, based on args.
|
||||||
"""
|
"""
|
||||||
# Get the enclosing package's scope and add deps to it.
|
# Get the enclosing package's scope and add deps to it.
|
||||||
locals = sys._getframe(1).f_locals
|
dependencies = _caller_locals().setdefault("dependencies", {})
|
||||||
dependencies = locals.setdefault("dependencies", {})
|
|
||||||
for string in specs:
|
for string in specs:
|
||||||
for spec in spack.spec.parse(string):
|
for spec in spack.spec.parse(string):
|
||||||
dependencies[spec.name] = spec
|
dependencies[spec.name] = spec
|
||||||
|
@ -66,7 +78,6 @@ def provides(*args):
|
||||||
can use the providing package to satisfy the dependency.
|
can use the providing package to satisfy the dependency.
|
||||||
"""
|
"""
|
||||||
# Get the enclosing package's scope and add deps to it.
|
# Get the enclosing package's scope and add deps to it.
|
||||||
locals = sys._getframe(1).f_locals
|
provides = _caller_locals().setdefault("provides", [])
|
||||||
provides = locals.setdefault("provides", [])
|
|
||||||
for name in args:
|
for name in args:
|
||||||
provides.append(name)
|
provides.append(name)
|
||||||
|
|
|
@ -321,9 +321,15 @@ def package(self):
|
||||||
return packages.get(self.name)
|
return packages.get(self.name)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def virtual(self):
|
||||||
|
return packages.exists(self.name)
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def concrete(self):
|
def concrete(self):
|
||||||
return bool(self.versions.concrete
|
return bool(not self.virtual
|
||||||
|
and self.versions.concrete
|
||||||
# TODO: support variants
|
# TODO: support variants
|
||||||
and self.architecture
|
and self.architecture
|
||||||
and self.compiler and self.compiler.concrete
|
and self.compiler and self.compiler.concrete
|
||||||
|
|
|
@ -5,7 +5,9 @@ class Callpath(Package):
|
||||||
url = "http://github.com/tgamblin/callpath-0.2.tar.gz"
|
url = "http://github.com/tgamblin/callpath-0.2.tar.gz"
|
||||||
md5 = "foobarbaz"
|
md5 = "foobarbaz"
|
||||||
|
|
||||||
versions = [0.8, 0.9, 1.0]
|
versions = { 0.8 : 'bf03b33375afa66fe0efa46ce3f4b17a',
|
||||||
|
0.9 : 'bf03b33375afa66fe0efa46ce3f4b17a',
|
||||||
|
1.0 : 'bf03b33375afa66fe0efa46ce3f4b17a' }
|
||||||
|
|
||||||
depends_on("dyninst")
|
depends_on("dyninst")
|
||||||
depends_on("mpich")
|
depends_on("mpich")
|
||||||
|
|
|
@ -5,7 +5,11 @@ class Dyninst(Package):
|
||||||
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
|
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
|
||||||
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
|
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
|
||||||
|
|
||||||
versions = '7.0, 7.0.1, 8.0, 8.1.1, 8.1.2'
|
list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
|
||||||
|
|
||||||
|
versions = {
|
||||||
|
'8.1.2' : 'bf03b33375afa66fe0efa46ce3f4b17a',
|
||||||
|
'8.1.1' : '1f8743e3a5662b25ce64a7edf647e77d' }
|
||||||
|
|
||||||
depends_on("libelf")
|
depends_on("libelf")
|
||||||
depends_on("libdwarf")
|
depends_on("libdwarf")
|
||||||
|
|
|
@ -11,6 +11,8 @@ class Libdwarf(Package):
|
||||||
|
|
||||||
md5 = "64b42692e947d5180e162e46c689dfbf"
|
md5 = "64b42692e947d5180e162e46c689dfbf"
|
||||||
|
|
||||||
|
versions = [20070703, 20111030, 20130207]
|
||||||
|
|
||||||
depends_on("libelf")
|
depends_on("libelf")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,10 @@ class Libelf(Package):
|
||||||
url = "http://www.mr511.de/software/libelf-0.8.13.tar.gz"
|
url = "http://www.mr511.de/software/libelf-0.8.13.tar.gz"
|
||||||
md5 = "4136d7b4c04df68b686570afa26988ac"
|
md5 = "4136d7b4c04df68b686570afa26988ac"
|
||||||
|
|
||||||
versions = '0.8.10, 0.8.12, 0.8.13'
|
versions = {
|
||||||
|
'0.8.13' : '4136d7b4c04df68b686570afa26988ac',
|
||||||
|
'0.8.12' : 'e21f8273d9f5f6d43a59878dc274fec7',
|
||||||
|
'0.8.10' : '9db4d36c283d9790d8fa7df1f4d7b4d9' }
|
||||||
|
|
||||||
def install(self, prefix):
|
def install(self, prefix):
|
||||||
configure("--prefix=%s" % prefix,
|
configure("--prefix=%s" % prefix,
|
||||||
|
|
|
@ -3,6 +3,9 @@
|
||||||
class Mpich(Package):
|
class Mpich(Package):
|
||||||
homepage = "http://www.mpich.org"
|
homepage = "http://www.mpich.org"
|
||||||
url = "http://www.mpich.org/static/downloads/3.0.4/mpich-3.0.4.tar.gz"
|
url = "http://www.mpich.org/static/downloads/3.0.4/mpich-3.0.4.tar.gz"
|
||||||
|
|
||||||
|
list_url = "http://www.mpich.org/static/downloads/"
|
||||||
|
list_depth = 2
|
||||||
md5 = "9c5d5d4fe1e17dd12153f40bc5b6dbc0"
|
md5 = "9c5d5d4fe1e17dd12153f40bc5b6dbc0"
|
||||||
|
|
||||||
versions = '1.0.3, 1.3.2p1, 1.4.1p1, 3.0.4, 3.1b1'
|
versions = '1.0.3, 1.3.2p1, 1.4.1p1, 3.0.4, 3.1b1'
|
||||||
|
|
|
@ -5,7 +5,10 @@ class Mpileaks(Package):
|
||||||
url = "http://www.llnl.gov/mpileaks-1.0.tar.gz"
|
url = "http://www.llnl.gov/mpileaks-1.0.tar.gz"
|
||||||
md5 = "foobarbaz"
|
md5 = "foobarbaz"
|
||||||
|
|
||||||
versions = [1.0, 2.1, 2.2, 2.3]
|
versions = { 1.0 : None,
|
||||||
|
2.1 : None,
|
||||||
|
2.2 : None,
|
||||||
|
2.3 : None }
|
||||||
|
|
||||||
depends_on("mpich")
|
depends_on("mpich")
|
||||||
depends_on("callpath")
|
depends_on("callpath")
|
||||||
|
|
|
@ -176,6 +176,8 @@ def wildcard_version(path):
|
||||||
that will match this path with any version in its place.
|
that will match this path with any version in its place.
|
||||||
"""
|
"""
|
||||||
ver, start, end = parse_version_string_with_indices(path)
|
ver, start, end = parse_version_string_with_indices(path)
|
||||||
v = Version(ver)
|
|
||||||
|
|
||||||
return re.escape(path[:start]) + v.wildcard() + re.escape(path[end:])
|
v = Version(ver)
|
||||||
|
parts = list(re.escape(p) for p in path.split(str(v)))
|
||||||
|
|
||||||
|
return v.wildcard().join(parts)
|
||||||
|
|
13
lib/spack/spack/util/crypto.py
Normal file
13
lib/spack/spack/util/crypto.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import hashlib
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
def md5(filename, block_size=2**20):
|
||||||
|
"""Computes the md5 hash of a file."""
|
||||||
|
md5 = hashlib.md5()
|
||||||
|
with closing(open(filename)) as file:
|
||||||
|
while True:
|
||||||
|
data = file.read(block_size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
md5.update(data)
|
||||||
|
return md5.hexdigest()
|
|
@ -30,7 +30,7 @@ def mkdirp(*paths):
|
||||||
|
|
||||||
|
|
||||||
def new_path(prefix, *args):
|
def new_path(prefix, *args):
|
||||||
path=str(prefix)
|
path = str(prefix)
|
||||||
for elt in args:
|
for elt in args:
|
||||||
path = os.path.join(path, str(elt))
|
path = os.path.join(path, str(elt))
|
||||||
|
|
||||||
|
@ -56,16 +56,3 @@ def stem(path):
|
||||||
if re.search(suffix, path):
|
if re.search(suffix, path):
|
||||||
return re.sub(suffix, "", path)
|
return re.sub(suffix, "", path)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
def md5(filename, block_size=2**20):
|
|
||||||
"""Computes the md5 hash of a file."""
|
|
||||||
import hashlib
|
|
||||||
md5 = hashlib.md5()
|
|
||||||
with closing(open(filename)) as file:
|
|
||||||
while True:
|
|
||||||
data = file.read(block_size)
|
|
||||||
if not data:
|
|
||||||
break
|
|
||||||
md5.update(data)
|
|
||||||
return md5.hexdigest()
|
|
||||||
|
|
113
lib/spack/spack/util/web.py
Normal file
113
lib/spack/spack/util/web.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import urllib2
|
||||||
|
import urlparse
|
||||||
|
from multiprocessing import Pool
|
||||||
|
from HTMLParser import HTMLParser
|
||||||
|
|
||||||
|
import spack
|
||||||
|
import spack.tty as tty
|
||||||
|
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
|
||||||
|
|
||||||
|
# Timeout in seconds for web requests
|
||||||
|
TIMEOUT = 10
|
||||||
|
|
||||||
|
|
||||||
|
class LinkParser(HTMLParser):
|
||||||
|
"""This parser just takes an HTML page and strips out the hrefs on the
|
||||||
|
links. Good enough for a really simple spider. """
|
||||||
|
def __init__(self):
|
||||||
|
HTMLParser.__init__(self)
|
||||||
|
self.links = []
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag == 'a':
|
||||||
|
for attr, val in attrs:
|
||||||
|
if attr == 'href':
|
||||||
|
self.links.append(val)
|
||||||
|
|
||||||
|
|
||||||
|
def _spider(args):
|
||||||
|
"""_spider(url, depth, max_depth)
|
||||||
|
|
||||||
|
Fetches URL and any pages it links to up to max_depth. depth should
|
||||||
|
initially be 1, and max_depth includes the root. This function will
|
||||||
|
print out a warning only if the root can't be fetched; it ignores
|
||||||
|
errors with pages that the root links to.
|
||||||
|
|
||||||
|
This will return a list of the pages fetched, in no particular order.
|
||||||
|
|
||||||
|
Takes args as a tuple b/c it's intended to be used by a multiprocessing
|
||||||
|
pool. Firing off all the child links at once makes the fetch MUCH
|
||||||
|
faster for pages with lots of children.
|
||||||
|
"""
|
||||||
|
url, depth, max_depth = args
|
||||||
|
|
||||||
|
pages = {}
|
||||||
|
try:
|
||||||
|
# Make a HEAD request first to check the content type. This lets
|
||||||
|
# us ignore tarballs and gigantic files.
|
||||||
|
# It would be nice to do this with the HTTP Accept header to avoid
|
||||||
|
# one round-trip. However, most servers seem to ignore the header
|
||||||
|
# if you ask for a tarball with Accept: text/html.
|
||||||
|
req = urllib2.Request(url)
|
||||||
|
req.get_method = lambda: "HEAD"
|
||||||
|
resp = urllib2.urlopen(req, timeout=TIMEOUT)
|
||||||
|
|
||||||
|
if not resp.headers["Content-type"].startswith('text/html'):
|
||||||
|
print "ignoring page " + url + " with content type " + resp.headers["Content-type"]
|
||||||
|
return pages
|
||||||
|
|
||||||
|
# Do the real GET request when we know it's just HTML.
|
||||||
|
req.get_method = lambda: "GET"
|
||||||
|
response = urllib2.urlopen(req, timeout=TIMEOUT)
|
||||||
|
response_url = response.geturl()
|
||||||
|
|
||||||
|
# Read the page and and stick it in the map we'll return
|
||||||
|
page = response.read()
|
||||||
|
pages[response_url] = page
|
||||||
|
|
||||||
|
# If we're not at max depth, parse out the links in the page
|
||||||
|
if depth < max_depth:
|
||||||
|
link_parser = LinkParser()
|
||||||
|
|
||||||
|
subcalls = []
|
||||||
|
link_parser.feed(page)
|
||||||
|
while link_parser.links:
|
||||||
|
raw_link = link_parser.links.pop()
|
||||||
|
|
||||||
|
# Skip stuff that looks like an archive
|
||||||
|
if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Evaluate the link relative to the page it came from.
|
||||||
|
abs_link = urlparse.urljoin(response_url, raw_link)
|
||||||
|
subcalls.append((abs_link, depth+1, max_depth))
|
||||||
|
|
||||||
|
if subcalls:
|
||||||
|
pool = Pool(processes=len(subcalls))
|
||||||
|
dicts = pool.map(_spider, subcalls)
|
||||||
|
for d in dicts:
|
||||||
|
pages.update(d)
|
||||||
|
|
||||||
|
except urllib2.HTTPError, e:
|
||||||
|
# Only report it if it's the root page. We ignore errors when spidering.
|
||||||
|
if depth == 1:
|
||||||
|
tty.warn("Could not connect to %s" % url, e.reason,
|
||||||
|
"Package.available_versions requires an internet connection.",
|
||||||
|
"Version list may be incomplete.")
|
||||||
|
|
||||||
|
return pages
|
||||||
|
|
||||||
|
|
||||||
|
def get_pages(root_url, **kwargs):
|
||||||
|
"""Gets web pages from a root URL.
|
||||||
|
If depth is specified (e.g., depth=2), then this will also fetches pages
|
||||||
|
linked from the root and its children up to depth.
|
||||||
|
|
||||||
|
This will spawn processes to fetch the children, for much improved
|
||||||
|
performance over a sequential fetch.
|
||||||
|
"""
|
||||||
|
max_depth = kwargs.setdefault('depth', 1)
|
||||||
|
pages = _spider((root_url, 1, max_depth))
|
||||||
|
return pages
|
Loading…
Reference in a new issue