Cleanup find_versions_of_archive.

- less kludgy matching -- we nw extract the version straight from a capture group in the original match, rather than by trying to match again. - Avoids problems like finding "2" as the version in mvapich2-1.9, when "2" here is part of the package name.
2014-02-09 09:11:12 -08:00 · 2014-02-09 09:11:12 -08:00 · 93e80852f5
commit 93e80852f5
parent be7e0a9604
4 changed files with 26 additions and 19 deletions
--- a/lib/spack/spack/cmd/create.py
+++ b/lib/spack/spack/cmd/create.py
@ -155,7 +155,7 @@ def create(parser, args):
    else:
        urls = [spack.url.substitute_version(url, v) for v in versions]
        if len(urls) > 1:
-            tty.msg("Found %s versions of %s." % (len(urls), name),
+            tty.msg("Found %s versions of %s:" % (len(urls), name),
                    *spack.cmd.elide_list(
                    ["%-10s%s" % (v,u) for v, u in zip(versions, urls)]))
            print
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@ -394,8 +394,6 @@ def version(self):
    @property
    def stage(self):
        if not self.spec.concrete:
-            print self.spec
-            print self.spec.concrete
            raise ValueError("Can only get a stage for a concrete package.")

        if self._stage is None:
@ -803,8 +801,7 @@ def fetch_available_versions(self):
                self._available_versions = find_versions_of_archive(
                    self.url,
                    list_url=self.list_url,
-                    list_depth=self.list_depth,
-                    wildcard=self.default_version.wildcard())
+                    list_depth=self.list_depth)

                if not self._available_versions:
                    tty.warn("Found no versions for %s" % self.name,
@ -834,25 +831,30 @@ def available_versions(self):
 def find_versions_of_archive(archive_url, **kwargs):
    list_url   = kwargs.get('list_url', None)
    list_depth = kwargs.get('list_depth', 1)
-    wildcard   = kwargs.get('wildcard', None)

    if not list_url:
        list_url = os.path.dirname(archive_url)
-    if not wildcard:
-        wildcard = url.wildcard_version(archive_url)

-    versions = VersionList()
-    url_regex = os.path.basename(wildcard)
+    # This creates a regex from the URL with a capture group for the
+    # version part of the URL.  The capture group is converted to a
+    # generic wildcard, so we can use this to extract things on a page
+    # that look like archive URLs.
+    url_regex = url.wildcard_version(archive_url)

+    # We'll be a bit more liberal and just look for the archive part,
+    # not the full path.
+    archive_regex = os.path.basename(url_regex)
+
+    # Grab some web pages to scrape.
    page_map = get_pages(list_url, depth=list_depth)

+    # Build a version list from all the matches we find
+    versions = VersionList()
    for site, page in page_map.iteritems():
-        strings = set(re.findall(url_regex, page))
-
-        for s in strings:
-            match = re.search(wildcard, s)
-            if match:
-                v = match.group(0)
+        # extract versions from matches.
+        matches = re.finditer(archive_regex, page)
+        version_strings = set(m.group(1) for m in matches)
+        for v in version_strings:
            versions.add(Version(v))

    return versions
--- a/lib/spack/spack/url.py
+++ b/lib/spack/spack/url.py
@ -204,4 +204,6 @@ def wildcard_version(path):
    v = Version(ver)
    parts = list(re.escape(p) for p in path.split(str(v)))

-    return  v.wildcard().join(parts)
+    # Make a group for the wildcard, so it will be captured by the regex.
+    version_group = '(%s)' % v.wildcard()
+    return version_group.join(parts)
--- a/lib/spack/spack/version.py
+++ b/lib/spack/spack/version.py
@ -163,7 +163,10 @@ def a_or_n(seg):
        wc = seg_res[0]
        for i in xrange(1, len(sep_res)):
            wc += '(?:' + sep_res[i] + seg_res[i]
-        wc += ')?' * (len(seg_res) - 1)
+
+        # Add possible alpha or beta indicator at the end of each segemnt
+        # We treat these specially b/c they're so common.
+        wc += '[ab]?)?' * (len(seg_res) - 1)
        return wc