Added web spider capability for listing versions.
This commit is contained in:
parent
fe7da0dcff
commit
389fa1792d
19 changed files with 321 additions and 59 deletions
|
@ -30,6 +30,8 @@ parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
|
|||
help="print additional output during builds")
|
||||
parser.add_argument('-d', '--debug', action='store_true', dest='debug',
|
||||
help="write out debug logs during compile")
|
||||
parser.add_argument('-m', '--mock', action='store_true', dest='mock',
|
||||
help="Use mock packages instead of real ones.")
|
||||
|
||||
# each command module implements a parser() function, to which we pass its
|
||||
# subparser for setup.
|
||||
|
@ -46,6 +48,10 @@ args = parser.parse_args()
|
|||
# Set up environment based on args.
|
||||
spack.verbose = args.verbose
|
||||
spack.debug = args.debug
|
||||
if args.mock:
|
||||
from spack.util.filesystem import new_path
|
||||
mock_path = new_path(spack.module_path, 'test', 'mock_packages')
|
||||
spack.packages_path = mock_path
|
||||
|
||||
# Try to load the particular command asked for and run it
|
||||
command = spack.cmd.get_command(args.command)
|
||||
|
|
63
lib/spack/spack/cmd/checksum.py
Normal file
63
lib/spack/spack/cmd/checksum.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
import os
|
||||
import re
|
||||
import argparse
|
||||
from pprint import pprint
|
||||
from subprocess import CalledProcessError
|
||||
|
||||
import spack.tty as tty
|
||||
import spack.packages as packages
|
||||
from spack.stage import Stage
|
||||
from spack.colify import colify
|
||||
from spack.util.crypto import md5
|
||||
from spack.version import *
|
||||
|
||||
group='foo'
|
||||
description ="Checksum available versions of a package, print out checksums for addition to a package file."
|
||||
|
||||
def setup_parser(subparser):
|
||||
subparser.add_argument('package', metavar='PACKAGE', help='Package to list versions for')
|
||||
subparser.add_argument('versions', nargs=argparse.REMAINDER, help='Versions to generate checksums for')
|
||||
subparser.add_argument('-n', '--number', dest='number', type=int,
|
||||
default=10, help='Number of versions to list')
|
||||
|
||||
|
||||
def checksum(parser, args):
|
||||
# get the package we're going to generate checksums for
|
||||
pkg = packages.get(args.package)
|
||||
|
||||
# If the user asked for specific versions, use those.
|
||||
# Otherwise get the latest n, where n is from the -n/--number param
|
||||
versions = [ver(v) for v in args.versions]
|
||||
|
||||
if not all(type(v) == Version for v in versions):
|
||||
tty.die("Cannot generate checksums for version lists or " +
|
||||
"version ranges. Use unambiguous versions.")
|
||||
|
||||
if not versions:
|
||||
versions = pkg.fetch_available_versions()[:args.number]
|
||||
if not versions:
|
||||
tty.die("Could not fetch any available versions for %s."
|
||||
% pkg.name)
|
||||
|
||||
versions.sort()
|
||||
versions.reverse()
|
||||
urls = [pkg.url_for_version(v) for v in versions]
|
||||
|
||||
tty.msg("Found %s versions to checksum." % len(urls))
|
||||
tty.msg("Downloading...")
|
||||
|
||||
hashes = []
|
||||
for url, version in zip(urls, versions):
|
||||
stage = Stage("checksum-%s-%s" % (pkg.name, version), url)
|
||||
try:
|
||||
stage.fetch()
|
||||
hashes.append(md5(stage.archive_file))
|
||||
finally:
|
||||
stage.destroy()
|
||||
|
||||
dict_string = ["{"]
|
||||
for i, (v, h) in enumerate(zip(versions, hashes)):
|
||||
comma = "" if i == len(hashes) - 1 else ","
|
||||
dict_string.append(" '%s' : '%s'%s" % (str(v), str(h), comma))
|
||||
dict_string.append("}")
|
||||
tty.msg("Checksummed new versions of %s:" % pkg.name, *dict_string)
|
|
@ -2,8 +2,10 @@
|
|||
import spack.cmd
|
||||
|
||||
import spack.tty as tty
|
||||
import spack.url as url
|
||||
import spack
|
||||
|
||||
|
||||
description = "parse specs and print them out to the command line."
|
||||
|
||||
def setup_parser(subparser):
|
||||
|
@ -13,7 +15,11 @@ def spec(parser, args):
|
|||
specs = spack.cmd.parse_specs(args.specs)
|
||||
for spec in specs:
|
||||
spec.normalize()
|
||||
print spec.tree()
|
||||
print spec.tree(color=True)
|
||||
|
||||
spec.concretize()
|
||||
print spec.tree()
|
||||
print spec.tree(color=True)
|
||||
|
||||
pkg = spec.package
|
||||
wc = url.wildcard_version(pkg.url)
|
||||
print wc
|
||||
|
|
|
@ -2,12 +2,8 @@
|
|||
import re
|
||||
from subprocess import CalledProcessError
|
||||
|
||||
import spack
|
||||
import spack.packages as packages
|
||||
import spack.url as url
|
||||
import spack.tty as tty
|
||||
from spack.colify import colify
|
||||
from spack.version import ver
|
||||
|
||||
description ="List available versions of a package"
|
||||
|
||||
|
@ -17,4 +13,4 @@ def setup_parser(subparser):
|
|||
|
||||
def versions(parser, args):
|
||||
pkg = packages.get(args.package)
|
||||
colify(reversed(pkg.available_versions))
|
||||
colify(reversed(pkg.fetch_available_versions()))
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
from multi_function import platform
|
||||
from stage import Stage
|
||||
from spack.util.lang import memoized, list_modules
|
||||
from spack.util.crypto import md5
|
||||
from spack.util.web import get_pages
|
||||
|
||||
|
||||
class Package(object):
|
||||
|
@ -251,6 +253,9 @@ class SomePackage(Package):
|
|||
"""By default a package has no dependencies."""
|
||||
dependencies = {}
|
||||
|
||||
"""List of specs of virtual packages provided by this package."""
|
||||
provided_virtual_packages = {}
|
||||
|
||||
#
|
||||
# These are default values for instance variables.
|
||||
#
|
||||
|
@ -310,6 +315,9 @@ def __init__(self, spec):
|
|||
if not hasattr(self, 'list_url'):
|
||||
self.list_url = os.path.dirname(self.url)
|
||||
|
||||
if not hasattr(self, 'list_depth'):
|
||||
self.list_depth = 1
|
||||
|
||||
|
||||
def add_commands_to_module(self):
|
||||
"""Populate the module scope of install() with some useful functions.
|
||||
|
@ -464,6 +472,11 @@ def url_version(self, version):
|
|||
return str(version)
|
||||
|
||||
|
||||
def url_for_version(self, version):
|
||||
"""Gives a URL that you can download a new version of this package from."""
|
||||
return url.substitute_version(self.url, self.url_version(version))
|
||||
|
||||
|
||||
def remove_prefix(self):
|
||||
"""Removes the prefix for a package along with any empty parent directories."""
|
||||
if self.dirty:
|
||||
|
@ -640,37 +653,42 @@ def do_clean_dist(self):
|
|||
tty.msg("Successfully cleaned %s" % self.name)
|
||||
|
||||
|
||||
def fetch_available_versions(self):
|
||||
# If not, then try to fetch using list_url
|
||||
if not self._available_versions:
|
||||
self._available_versions = VersionList()
|
||||
url_regex = os.path.basename(url.wildcard_version(self.url))
|
||||
wildcard = self.version.wildcard()
|
||||
|
||||
page_map = get_pages(self.list_url, depth=self.list_depth)
|
||||
for site, page in page_map.iteritems():
|
||||
strings = re.findall(url_regex, page)
|
||||
|
||||
for s in strings:
|
||||
match = re.search(wildcard, s)
|
||||
if match:
|
||||
v = match.group(0)
|
||||
self._available_versions.add(Version(v))
|
||||
|
||||
if not self._available_versions:
|
||||
tty.warn("Found no versions for %s" % self.name,
|
||||
"Check the list_url and list_depth attribute on the "
|
||||
+ self.name + " package.",
|
||||
"Use them to tell Spack where to look for versions.")
|
||||
|
||||
return self._available_versions
|
||||
|
||||
|
||||
@property
|
||||
def available_versions(self):
|
||||
# If the package overrode available_versions, then use that.
|
||||
if self.versions is not None:
|
||||
return self.versions
|
||||
|
||||
# If not, then try to fetch using list_url
|
||||
if not self._available_versions:
|
||||
self._available_versions = ver([self.version])
|
||||
try:
|
||||
# Run curl but grab the mime type from the http headers
|
||||
listing = spack.curl('-s', '-L', self.list_url, return_output=True)
|
||||
url_regex = os.path.basename(url.wildcard_version(self.url))
|
||||
strings = re.findall(url_regex, listing)
|
||||
wildcard = self.version.wildcard()
|
||||
for s in strings:
|
||||
match = re.search(wildcard, s)
|
||||
if match:
|
||||
self._available_versions.add(Version(match.group(0)))
|
||||
|
||||
if not self._available_versions:
|
||||
tty.warn("Found no versions for %s" % self.name,
|
||||
"Packate.available_versions may require adding the list_url attribute",
|
||||
"to the package to tell Spack where to look for versions.")
|
||||
|
||||
except subprocess.CalledProcessError:
|
||||
tty.warn("Could not connect to %s" % self.list_url,
|
||||
"Package.available_versions requires an internet connection.",
|
||||
"Version list may be incomplete.")
|
||||
|
||||
return self._available_versions
|
||||
else:
|
||||
vlist = self.fetch_available_versions()
|
||||
if not vlist:
|
||||
vlist = ver([self.version])
|
||||
return vlist
|
||||
|
||||
|
||||
class MakeExecutable(Executable):
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
invalid_package_re = r'[_-][_-]+'
|
||||
|
||||
instances = {}
|
||||
providers = {}
|
||||
|
||||
|
||||
def get(pkg_name):
|
||||
|
@ -29,6 +30,24 @@ def get(pkg_name):
|
|||
return instances[pkg_name]
|
||||
|
||||
|
||||
def get_providers(vpkg_name):
|
||||
if not providers:
|
||||
compute_providers()
|
||||
|
||||
if not vpkg_name in providers:
|
||||
raise UnknownPackageError("No such virtual package: %s" % vpkg_name)
|
||||
|
||||
return providers[vpkg_name]
|
||||
|
||||
|
||||
def compute_providers():
|
||||
for pkg in all_packages():
|
||||
for vpkg in pkg.provided_virtual_packages:
|
||||
if vpkg not in providers:
|
||||
providers[vpkg] = []
|
||||
providers[vpkg].append(pkg)
|
||||
|
||||
|
||||
def valid_package_name(pkg_name):
|
||||
return (re.match(valid_package_re, pkg_name) and
|
||||
not re.search(invalid_package_re, pkg_name))
|
||||
|
@ -75,6 +94,11 @@ def class_name_for_package_name(pkg_name):
|
|||
return class_name
|
||||
|
||||
|
||||
def exists(pkg_name):
|
||||
"""Whether a package is concrete."""
|
||||
return os.path.exists(filename_for_package_name(pkg_name))
|
||||
|
||||
|
||||
def get_class_for_package_name(pkg_name):
|
||||
file_name = filename_for_package_name(pkg_name)
|
||||
|
||||
|
@ -149,7 +173,6 @@ def quote(string):
|
|||
out.write('}\n')
|
||||
|
||||
|
||||
|
||||
class InvalidPackageNameError(spack.error.SpackError):
|
||||
"""Raised when we encounter a bad package name."""
|
||||
def __init__(self, name):
|
||||
|
|
|
@ -4,6 +4,7 @@ class Dyninst(Package):
|
|||
homepage = "https://paradyn.org"
|
||||
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
|
||||
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
|
||||
list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
|
||||
|
||||
depends_on("libelf")
|
||||
depends_on("libdwarf")
|
||||
|
|
|
@ -45,16 +45,28 @@ class Mpileaks(Package):
|
|||
spack install mpileaks ^mpich
|
||||
"""
|
||||
import sys
|
||||
import inspect
|
||||
import spack.spec
|
||||
|
||||
|
||||
def _caller_locals():
|
||||
"""This will return the locals of the *parent* of the caller.
|
||||
This allows a fucntion to insert variables into its caller's
|
||||
scope.
|
||||
"""
|
||||
stack = inspect.stack()
|
||||
try:
|
||||
return stack[2][0].f_locals
|
||||
finally:
|
||||
del stack
|
||||
|
||||
|
||||
def depends_on(*specs):
|
||||
"""Adds a dependencies local variable in the locals of
|
||||
the calling class, based on args.
|
||||
"""
|
||||
# Get the enclosing package's scope and add deps to it.
|
||||
locals = sys._getframe(1).f_locals
|
||||
dependencies = locals.setdefault("dependencies", {})
|
||||
dependencies = _caller_locals().setdefault("dependencies", {})
|
||||
for string in specs:
|
||||
for spec in spack.spec.parse(string):
|
||||
dependencies[spec.name] = spec
|
||||
|
@ -66,7 +78,6 @@ def provides(*args):
|
|||
can use the providing package to satisfy the dependency.
|
||||
"""
|
||||
# Get the enclosing package's scope and add deps to it.
|
||||
locals = sys._getframe(1).f_locals
|
||||
provides = locals.setdefault("provides", [])
|
||||
provides = _caller_locals().setdefault("provides", [])
|
||||
for name in args:
|
||||
provides.append(name)
|
||||
|
|
|
@ -321,9 +321,15 @@ def package(self):
|
|||
return packages.get(self.name)
|
||||
|
||||
|
||||
@property
|
||||
def virtual(self):
|
||||
return packages.exists(self.name)
|
||||
|
||||
|
||||
@property
|
||||
def concrete(self):
|
||||
return bool(self.versions.concrete
|
||||
return bool(not self.virtual
|
||||
and self.versions.concrete
|
||||
# TODO: support variants
|
||||
and self.architecture
|
||||
and self.compiler and self.compiler.concrete
|
||||
|
|
|
@ -5,7 +5,9 @@ class Callpath(Package):
|
|||
url = "http://github.com/tgamblin/callpath-0.2.tar.gz"
|
||||
md5 = "foobarbaz"
|
||||
|
||||
versions = [0.8, 0.9, 1.0]
|
||||
versions = { 0.8 : 'bf03b33375afa66fe0efa46ce3f4b17a',
|
||||
0.9 : 'bf03b33375afa66fe0efa46ce3f4b17a',
|
||||
1.0 : 'bf03b33375afa66fe0efa46ce3f4b17a' }
|
||||
|
||||
depends_on("dyninst")
|
||||
depends_on("mpich")
|
||||
|
|
|
@ -5,7 +5,11 @@ class Dyninst(Package):
|
|||
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
|
||||
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
|
||||
|
||||
versions = '7.0, 7.0.1, 8.0, 8.1.1, 8.1.2'
|
||||
list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
|
||||
|
||||
versions = {
|
||||
'8.1.2' : 'bf03b33375afa66fe0efa46ce3f4b17a',
|
||||
'8.1.1' : '1f8743e3a5662b25ce64a7edf647e77d' }
|
||||
|
||||
depends_on("libelf")
|
||||
depends_on("libdwarf")
|
||||
|
|
|
@ -11,6 +11,8 @@ class Libdwarf(Package):
|
|||
|
||||
md5 = "64b42692e947d5180e162e46c689dfbf"
|
||||
|
||||
versions = [20070703, 20111030, 20130207]
|
||||
|
||||
depends_on("libelf")
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,10 @@ class Libelf(Package):
|
|||
url = "http://www.mr511.de/software/libelf-0.8.13.tar.gz"
|
||||
md5 = "4136d7b4c04df68b686570afa26988ac"
|
||||
|
||||
versions = '0.8.10, 0.8.12, 0.8.13'
|
||||
versions = {
|
||||
'0.8.13' : '4136d7b4c04df68b686570afa26988ac',
|
||||
'0.8.12' : 'e21f8273d9f5f6d43a59878dc274fec7',
|
||||
'0.8.10' : '9db4d36c283d9790d8fa7df1f4d7b4d9' }
|
||||
|
||||
def install(self, prefix):
|
||||
configure("--prefix=%s" % prefix,
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
class Mpich(Package):
|
||||
homepage = "http://www.mpich.org"
|
||||
url = "http://www.mpich.org/static/downloads/3.0.4/mpich-3.0.4.tar.gz"
|
||||
|
||||
list_url = "http://www.mpich.org/static/downloads/"
|
||||
list_depth = 2
|
||||
md5 = "9c5d5d4fe1e17dd12153f40bc5b6dbc0"
|
||||
|
||||
versions = '1.0.3, 1.3.2p1, 1.4.1p1, 3.0.4, 3.1b1'
|
||||
|
|
|
@ -5,7 +5,10 @@ class Mpileaks(Package):
|
|||
url = "http://www.llnl.gov/mpileaks-1.0.tar.gz"
|
||||
md5 = "foobarbaz"
|
||||
|
||||
versions = [1.0, 2.1, 2.2, 2.3]
|
||||
versions = { 1.0 : None,
|
||||
2.1 : None,
|
||||
2.2 : None,
|
||||
2.3 : None }
|
||||
|
||||
depends_on("mpich")
|
||||
depends_on("callpath")
|
||||
|
|
|
@ -176,6 +176,8 @@ def wildcard_version(path):
|
|||
that will match this path with any version in its place.
|
||||
"""
|
||||
ver, start, end = parse_version_string_with_indices(path)
|
||||
v = Version(ver)
|
||||
|
||||
return re.escape(path[:start]) + v.wildcard() + re.escape(path[end:])
|
||||
v = Version(ver)
|
||||
parts = list(re.escape(p) for p in path.split(str(v)))
|
||||
|
||||
return v.wildcard().join(parts)
|
||||
|
|
13
lib/spack/spack/util/crypto.py
Normal file
13
lib/spack/spack/util/crypto.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
import hashlib
|
||||
from contextlib import closing
|
||||
|
||||
def md5(filename, block_size=2**20):
|
||||
"""Computes the md5 hash of a file."""
|
||||
md5 = hashlib.md5()
|
||||
with closing(open(filename)) as file:
|
||||
while True:
|
||||
data = file.read(block_size)
|
||||
if not data:
|
||||
break
|
||||
md5.update(data)
|
||||
return md5.hexdigest()
|
|
@ -56,16 +56,3 @@ def stem(path):
|
|||
if re.search(suffix, path):
|
||||
return re.sub(suffix, "", path)
|
||||
return path
|
||||
|
||||
|
||||
def md5(filename, block_size=2**20):
|
||||
"""Computes the md5 hash of a file."""
|
||||
import hashlib
|
||||
md5 = hashlib.md5()
|
||||
with closing(open(filename)) as file:
|
||||
while True:
|
||||
data = file.read(block_size)
|
||||
if not data:
|
||||
break
|
||||
md5.update(data)
|
||||
return md5.hexdigest()
|
||||
|
|
113
lib/spack/spack/util/web.py
Normal file
113
lib/spack/spack/util/web.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
import re
|
||||
import subprocess
|
||||
import urllib2
|
||||
import urlparse
|
||||
from multiprocessing import Pool
|
||||
from HTMLParser import HTMLParser
|
||||
|
||||
import spack
|
||||
import spack.tty as tty
|
||||
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
|
||||
|
||||
# Timeout in seconds for web requests
|
||||
TIMEOUT = 10
|
||||
|
||||
|
||||
class LinkParser(HTMLParser):
|
||||
"""This parser just takes an HTML page and strips out the hrefs on the
|
||||
links. Good enough for a really simple spider. """
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
self.links = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'a':
|
||||
for attr, val in attrs:
|
||||
if attr == 'href':
|
||||
self.links.append(val)
|
||||
|
||||
|
||||
def _spider(args):
|
||||
"""_spider(url, depth, max_depth)
|
||||
|
||||
Fetches URL and any pages it links to up to max_depth. depth should
|
||||
initially be 1, and max_depth includes the root. This function will
|
||||
print out a warning only if the root can't be fetched; it ignores
|
||||
errors with pages that the root links to.
|
||||
|
||||
This will return a list of the pages fetched, in no particular order.
|
||||
|
||||
Takes args as a tuple b/c it's intended to be used by a multiprocessing
|
||||
pool. Firing off all the child links at once makes the fetch MUCH
|
||||
faster for pages with lots of children.
|
||||
"""
|
||||
url, depth, max_depth = args
|
||||
|
||||
pages = {}
|
||||
try:
|
||||
# Make a HEAD request first to check the content type. This lets
|
||||
# us ignore tarballs and gigantic files.
|
||||
# It would be nice to do this with the HTTP Accept header to avoid
|
||||
# one round-trip. However, most servers seem to ignore the header
|
||||
# if you ask for a tarball with Accept: text/html.
|
||||
req = urllib2.Request(url)
|
||||
req.get_method = lambda: "HEAD"
|
||||
resp = urllib2.urlopen(req, timeout=TIMEOUT)
|
||||
|
||||
if not resp.headers["Content-type"].startswith('text/html'):
|
||||
print "ignoring page " + url + " with content type " + resp.headers["Content-type"]
|
||||
return pages
|
||||
|
||||
# Do the real GET request when we know it's just HTML.
|
||||
req.get_method = lambda: "GET"
|
||||
response = urllib2.urlopen(req, timeout=TIMEOUT)
|
||||
response_url = response.geturl()
|
||||
|
||||
# Read the page and and stick it in the map we'll return
|
||||
page = response.read()
|
||||
pages[response_url] = page
|
||||
|
||||
# If we're not at max depth, parse out the links in the page
|
||||
if depth < max_depth:
|
||||
link_parser = LinkParser()
|
||||
|
||||
subcalls = []
|
||||
link_parser.feed(page)
|
||||
while link_parser.links:
|
||||
raw_link = link_parser.links.pop()
|
||||
|
||||
# Skip stuff that looks like an archive
|
||||
if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
|
||||
continue
|
||||
|
||||
# Evaluate the link relative to the page it came from.
|
||||
abs_link = urlparse.urljoin(response_url, raw_link)
|
||||
subcalls.append((abs_link, depth+1, max_depth))
|
||||
|
||||
if subcalls:
|
||||
pool = Pool(processes=len(subcalls))
|
||||
dicts = pool.map(_spider, subcalls)
|
||||
for d in dicts:
|
||||
pages.update(d)
|
||||
|
||||
except urllib2.HTTPError, e:
|
||||
# Only report it if it's the root page. We ignore errors when spidering.
|
||||
if depth == 1:
|
||||
tty.warn("Could not connect to %s" % url, e.reason,
|
||||
"Package.available_versions requires an internet connection.",
|
||||
"Version list may be incomplete.")
|
||||
|
||||
return pages
|
||||
|
||||
|
||||
def get_pages(root_url, **kwargs):
|
||||
"""Gets web pages from a root URL.
|
||||
If depth is specified (e.g., depth=2), then this will also fetches pages
|
||||
linked from the root and its children up to depth.
|
||||
|
||||
This will spawn processes to fetch the children, for much improved
|
||||
performance over a sequential fetch.
|
||||
"""
|
||||
max_depth = kwargs.setdefault('depth', 1)
|
||||
pages = _spider((root_url, 1, max_depth))
|
||||
return pages
|
Loading…
Reference in a new issue