Cleanup find_versions_of_archive.

- less kludgy matching -- we nw extract the version straight from a
  capture group in the original match, rather than by trying to match
  again.

- Avoids problems like finding "2" as the version in mvapich2-1.9, when
  "2" here is part of the package name.
This commit is contained in:
Todd Gamblin 2014-02-09 09:11:12 -08:00
parent be7e0a9604
commit 93e80852f5
4 changed files with 26 additions and 19 deletions

View file

@ -155,7 +155,7 @@ def create(parser, args):
else:
urls = [spack.url.substitute_version(url, v) for v in versions]
if len(urls) > 1:
tty.msg("Found %s versions of %s." % (len(urls), name),
tty.msg("Found %s versions of %s:" % (len(urls), name),
*spack.cmd.elide_list(
["%-10s%s" % (v,u) for v, u in zip(versions, urls)]))
print

View file

@ -394,8 +394,6 @@ def version(self):
@property
def stage(self):
if not self.spec.concrete:
print self.spec
print self.spec.concrete
raise ValueError("Can only get a stage for a concrete package.")
if self._stage is None:
@ -803,8 +801,7 @@ def fetch_available_versions(self):
self._available_versions = find_versions_of_archive(
self.url,
list_url=self.list_url,
list_depth=self.list_depth,
wildcard=self.default_version.wildcard())
list_depth=self.list_depth)
if not self._available_versions:
tty.warn("Found no versions for %s" % self.name,
@ -834,25 +831,30 @@ def available_versions(self):
def find_versions_of_archive(archive_url, **kwargs):
list_url = kwargs.get('list_url', None)
list_depth = kwargs.get('list_depth', 1)
wildcard = kwargs.get('wildcard', None)
if not list_url:
list_url = os.path.dirname(archive_url)
if not wildcard:
wildcard = url.wildcard_version(archive_url)
versions = VersionList()
url_regex = os.path.basename(wildcard)
# This creates a regex from the URL with a capture group for the
# version part of the URL. The capture group is converted to a
# generic wildcard, so we can use this to extract things on a page
# that look like archive URLs.
url_regex = url.wildcard_version(archive_url)
# We'll be a bit more liberal and just look for the archive part,
# not the full path.
archive_regex = os.path.basename(url_regex)
# Grab some web pages to scrape.
page_map = get_pages(list_url, depth=list_depth)
# Build a version list from all the matches we find
versions = VersionList()
for site, page in page_map.iteritems():
strings = set(re.findall(url_regex, page))
for s in strings:
match = re.search(wildcard, s)
if match:
v = match.group(0)
# extract versions from matches.
matches = re.finditer(archive_regex, page)
version_strings = set(m.group(1) for m in matches)
for v in version_strings:
versions.add(Version(v))
return versions

View file

@ -204,4 +204,6 @@ def wildcard_version(path):
v = Version(ver)
parts = list(re.escape(p) for p in path.split(str(v)))
return v.wildcard().join(parts)
# Make a group for the wildcard, so it will be captured by the regex.
version_group = '(%s)' % v.wildcard()
return version_group.join(parts)

View file

@ -163,7 +163,10 @@ def a_or_n(seg):
wc = seg_res[0]
for i in xrange(1, len(sep_res)):
wc += '(?:' + sep_res[i] + seg_res[i]
wc += ')?' * (len(seg_res) - 1)
# Add possible alpha or beta indicator at the end of each segemnt
# We treat these specially b/c they're so common.
wc += '[ab]?)?' * (len(seg_res) - 1)
return wc