view @ 112:8ea43e4f0eba

Ignore whitelist when --strict is specified
author mr
date Wed, 13 Sep 2017 13:17:02 -0700
parents b71592d49d67
line wrap: on
line source
# Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit if you need additional information or have any
# questions.

# JDK changeset checker

# Quick configuration: Add the following to your ~/.hgrc:
#   [extensions]
#   jcheck = /path/to/
#   # Omit these lines if you use Mercurial Queues
#   [hooks]
#   pretxnchangegroup.jcheck = python:jcheck.hook
#   pretxncommit.jcheck = python:jcheck.hook
#   # Include this if you use the (deprecated) Mercurial "fetch" extension
#   [defaults]
#   fetch = -m Merge
# For more information:

_version = "@VERSION@"
_date = "@DATE@"

import sys, os, re, urllib, urllib2
from mercurial.node import *
from mercurial import cmdutil, patch, util, context, templater

Pass = False
Fail = True

def datestr(ctx):
    # Mercurial 0.9.5 and earlier append a time zone; strip it.
    return util.datestr(, format="%Y-%m-%d %H:%M")[:16]

def oneline(ctx):
    return ("%5d:%s  %-12s  %s  %s\n"
            % (ctx.rev(), short(ctx.node()), ctx.user(), datestr(ctx),

def is_merge(repo, rev):
    return not (-1 in repo.changelog.parentrevs(rev))

_matchall = getattr(cmdutil, 'matchall', None)
if not _matchall:
        from mercurial import scmutil
        _matchall = scmutil.matchall
    except ImportError:

def repocompat(repo):
    # Modern mercurial versions use len(repo) and repo[cset_id]; enable those
    # operations with older versions.
    t = type(repo)
    if not getattr(t, '__len__', None):
        def repolen(self):
            return self.changelog.count()
        setattr(t, '__len__', repolen)
    if not getattr(t, '__getitem__', None):
        def repoitem(self, arg):
            return context.changectx(self, arg)
        setattr(t, '__getitem__', repoitem)
    # Similarly, use branchmap instead of branchtags; enable it if needed.
    if not getattr(t, 'branchmap', None):
        setattr(t, 'branchmap', t.branchtags)

# Configuration-file parsing

def load_conf(root):
    cf = { }
    fn = os.path.join(root, ".jcheck/conf")
    f = open(fn)
        prop_re = re.compile("\s*(\S+)\s*=\s*(\S+)\s*$")
        i = 0
        for ln in f.readlines():
            i = i + 1
            ln = ln.strip()
            if (ln.startswith("#")):
            m = prop_re.match(ln)
            if not m:
                raise util.Abort("%s:%d: Invalid configuration syntax: %s"
                                 % (fn, i, ln))
            cf[] =
    for pn in ["project"]:
        if not cf.has_key(pn):
            raise util.Abort("%s: Missing property: %s" % (fn, pn))
    return cf

# Author validation

author_cache = { }                      ## Should really cache more permanently

def validate_author(an, pn):
  if author_cache.has_key(an):
    return True
  u = (""
       % (urllib.quote(an), pn))
  f = None
          f = urllib2.urlopen(u)
      except urllib2.HTTPError, e:
          if e.code == 404:
              return False
          raise e
      if f:
  author_cache[an] = True
  return True

# Whitespace and comment validation

badwhite_re = re.compile("(\t)|([ \t]$)|\r", re.MULTILINE)
normext_re = re.compile(".*\.(java|c|h|cpp|hpp)$")

tag_desc_re = re.compile("Added tag [^ ]+ for changeset [0-9a-f]{12}")
tag_re = re.compile("tip$|jdk-([1-9]([0-9]*)(\.[0-9]){0,3})\+([0-9]+)$|jdk[4-9](u\d{1,3})?-b\d{2,3}$|hs\d\d(\.\d{1,2})?-b\d\d$")

def badwhite_what(m):
        return "Tab character"
        return "Trailing whitespace"
    return "Carriage return (^M)"

base_addr_pat = "[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,4}"
addr_pat = ("(" + base_addr_pat + ")"
            + "|(([-_a-zA-Z0-9][-_ a-zA-Z0-9]+) +<" + base_addr_pat + ">)")

bug_ident = re.compile("(([A-Z][A-Z0-9]+-)?[0-9]+):")
bug_check = re.compile("([0-9]{7}): \S.*$")
sum_ident = re.compile("Summary:")
sum_check = re.compile("Summary: \S.*")
rev_ident = re.compile("Reviewed-by:")
rev_check = re.compile("Reviewed-by: (([a-z0-9]+)(, [a-z0-9]+)*$)")
con_ident = re.compile("Contributed-by:")
con_check = re.compile("Contributed-by: ((" + addr_pat + ")(, (" + addr_pat + "))*)$")

def bug_validate(ch, ctx, m, pn):
    bs =
    if not (bs[0] in ['1','2','4','5','6','7','8']):
        ch.error(ctx, "Invalid bugid: %s" % bs)
    b = int(bs)
    if b in ch.cs_bugids:
        ch.error(ctx, "Bugid %d used more than once in this changeset" % b)
    if not ch.bugids_allow_dups and b in ch.repo_bugids:
        r = ch.repo_bugids[b]
        if r < ctx.rev():
            ch.error(ctx, ("Bugid %d already used in this repository, in revision %d "
                           % (b, r)))

def rev_validate(ch, ctx, m, pn):
    ans = re.split(", *",
    for an in ans:
        if not validate_author(an, pn):
            ch.error(ctx, "Invalid reviewer name: %s" % an)

def con_validate(ch, ctx, m, pn):
    ch.cs_contributor =

class State:
    def __init__(self, name, ident_pattern, check_pattern,
                 validator=None, min=0, max=1): = name
        self.ident_pattern = ident_pattern
        self.check_pattern = check_pattern
        self.validator = validator
        self.min = min
        self.max = max

comment_grammar = [
    State("bugid line",
          bug_ident, bug_check, validator=bug_validate, min=1, max=1000),
    State("change summary",
          sum_ident, sum_check, min=0, max=1),
    State("reviewer attribution",
          rev_ident, rev_check, validator=rev_validate, min=1, max=1),
    State("contributor attribution",
          con_ident, con_check, validator=con_validate, min=0, max=1)

def checked_comment_line(ln):
    for st in comment_grammar:
        if st.ident_pattern.match(ln):
            return True
    return False

def repo_bugids(ui, repo):
    def addbugids(bugids, ctx):
        lns = ctx.description().splitlines()
        for ln in lns:
            m = bug_check.match(ln)
            if m:
                b = int(
                if not b in bugids:
                    bugids[b] = ctx.rev()

    # Should cache this, eventually
    bugids = { }                        # bugid -> rev
    opts = { 'rev' : ['0:tip'] }
        nop = lambda c, fns: None
        iter = cmdutil.walkchangerevs(repo, _matchall(repo), opts, nop)
        for ctx in iter:
            addbugids(bugids, ctx)
    except (AttributeError, TypeError):
        # AttributeError:  matchall does not exist in hg < 1.1
        # TypeError:  walkchangerevs args differ in hg <= 1.3.1
        get = util.cachefunc(lambda r: repo.changectx(r).changeset())
        changeiter, matchfn = cmdutil.walkchangerevs(ui, repo, [], get, opts)
        for st, rev, fns in changeiter:
            if st == 'add':
                node = repo.changelog.node(rev)
                addbugids(bugids, context.changectx(repo, node))
    return bugids


# Black/white lists
## The black/white lists should really be in the database

# Bogus yet historically-accepted changesets,
# so that jcheck may evolve
changeset_whitelist = [

    '73a4d5be86497baf74c1fc194c9a0dd4e86d3a31', # jdk6/jdk6/jaxp bad comment
    'a25f15bfd04b46a302b6ca1a298c176344f432dd', # jdk6/jdk6/jdk  bad comment
    'bf87d5af43614d609a5251c43eea44c028500d02', # jdk6/jdk6/jdk  bad comment
    'd77434402021cebc4c25b452db18bbfd2d7ccda1', # jdk6/jdk6/jdk  bad comment
    '931e5f39e365a0d550d79148ff87a7f9e864d2e1', # hotspot dup bugid 7147064
    'd8abc90163a4b58db407a60cba331ab21c9977e7', # hotspot dup bugid 7147064
    '45849c62c298aa8426c9e67599e4e35793d8db13', # pubs executable files
    '38050e6655d8acc220800a28128cef328906e825', # pubs invalid bugid line
    # hotspot/test/closed no Reviewed-by line
    'c667bae72ea8530ef1e055dc25951b991dfd5888', # hotspot dup bugid 8169597 (hs)
    '5a574ef5a4eec3ec3be9352aae3b383202c9a3a6', # hotspot dup bugid 8169597 (dev)
    '38a240fd58a287acb1963920b92ed4d9c2fd39e3', # hotspot dup bugid 8179954 (jdk10)
    'fc8c54b03f821dbc7385ab6b08cb91cc7a3bf3cb', # hotspot dup bugid 8179954 (hs)
    # For duplicate bugids, add the hashes of all related changesets!

    # consolidated/open

    # consolidated/closed


# Bad changesets that should never be allowed in
changeset_blacklist = [
    # hsdev/hotspot/{hotspot,master} dup bugid 7019157
    # hsx/hotspot-comp/jdk dup bugid 7052202 + follow-on cset
    # hsx/hotspot-rt/hotspot wrong bugid 7059288 + associated merge
    # jdk8/awt/jdk dup bugid 7100054
    # jdk7u/jdk7u-dev/jaxp mistaken push
    # jdk8/build/pubs executable file
    # jdk8/2d/jdk/src/closed security fix in wrong forest
    # jdk7u/jdk7u5/jdk bad fix for 6648202
    # hsx/hsx24/hotspot/src/closed bad merge
    # hsx/hsx24/hotspot/test/closed bad merge
    # jdk8/awt/jdk INTJDK-7600365
    # jdk8/tl/jdk/test/closed INTJDK-7600460
    # jdk7u11-dev/jaxp bad fix for 7192390
    # jdk7u11-dev/jdk bad fix for 7192390
    # jdk7u11-dev/jdk/test/closed bad fix for 7192390
    # jdk7u11-dev/jdk redone rmi fix
    # jdk7u11-dev/jdk/test/closed redone rmi fix
    # jdk7u13-dev/jdk bad fix for 8006611
    # jdk8/nashorn unwanted tag jdk8-b78
    # hsx/hotspot-emb/hotspot wrong bugid 8009004
    # jdk7u40/jdk/src/closed mistaken push 8016315
    # jdk7u40/jdk/test/closed mistaken push 8016622
    # hsx/hotspot-gc/hotspot wrong bugid 8024547
    # jdk8/ds/install dup bugid 8024771
    # jdk5u/jdk5.0u55/j2se bad fix 8025034
    # jdk6u/jdk6u65/j2se bad fix 8025034
    # jdk7u/jdk7u45/j2se bad fix 8025034
    # jdk8/build/pubs executable files
    # hsx/jdk7u/hotspot wrong bugid
    # jdk8/tl/jdk erroneous push 7152892
    # jdk8/tl/jdk/test/closed erroneous push 7152892
    # jdk9/jdk9/closed bad tag
    # jdk9/hs-rt/jdk/src/closed dup bugid 8034951
    # jdk9/hs-rt/jdk/test/closed dup bugid 8034951
    # jdk9/client/deploy erroneous push 8041798

# Path to file containing additional blacklisted changesets
blacklist_file = '/oj/db/hg/blacklist'

# Checker class

class checker(object):

    def __init__(self, ui, repo, strict, lax):
        self.ui = ui
        self.repo = repo
        self.rv = Pass
        self.checks = [c for c in checker.__dict__ if c.startswith("c_")]
        self.summarized = False
        self.repo_bugids = [ ]
        self.cs_bugids = [ ]            # Bugids in current changeset
        self.cs_author = None           # Author of current changeset
        self.cs_reviewers = [ ]         # Reviewers of current changeset
        self.cs_contributor = None      # Contributor of current changeset
        self.strict = strict
        self.conf = load_conf(repo.root)
        self.whitespace_lax = lax and not strict
        if self.conf.get("whitespace") == "lax":
            self.whitespace_lax = True
        self.comments_lax = lax and not strict
        if self.conf.get("comments") == "lax":
            self.comments_lax = True
        self.tags_lax = lax and not strict
        if self.conf.get("tags") == "lax":
            self.tags_lax = True
        self.bugids_allow_dups = self.conf.get("bugids") == "dup"
        self.bugids_lax = lax and not strict
        if self.conf.get("bugids") == "lax":
            self.bugids_lax = True
        self.bugids_ignore = False
        if self.conf.get("bugids") == "ignore":
            self.bugids_ignore = True
        if not self.bugids_ignore:
            # only identify bug ids if we are going to use them
            self.repo_bugids = repo_bugids(ui, repo)
        self.blacklist = dict.fromkeys(changeset_blacklist)
        # hg < 1.0 does not have localrepo.tagtype()
        self.tagtype = getattr(self.repo, 'tagtype', lambda k: 'global')

    def read_blacklist(self, fname):
        if not os.path.exists(fname):
        self.ui.debug('Reading blacklist file %s\n' % fname)
        f = open(fname)
        for line in f:
            # Any comment after the changeset hash becomes the dictionary value.
            l = [s.strip() for s in line.split('#', 1)]
            if l and l[0]:
                self.blacklist[l[0]] = len(l) == 2 and l[1] or None

    def summarize(self, ctx):
        self.ui.status("> Changeset: %d:%s\n" % (ctx.rev(), short(ctx.node())))
        self.ui.status("> Author:    %s\n" % ctx.user())
        self.ui.status("> Date:      %s\n" % datestr(ctx))
        self.ui.status(">\n> ")
        self.ui.status("\n> ".join(ctx.description().splitlines()))

    def error(self, ctx, msg):
        if self.rv != Fail:
            self.ui.status("[jcheck %s %s]\n" % (_version, _date))
        if not self.summarized:
            if ctx:
            self.summarized = True
        self.ui.status(msg + "\n")
        self.rv = Fail

    def c_00_author(self, ctx):
        self.ui.debug("author: %s\n" % ctx.user())
        if not validate_author(ctx.user(), self.conf["project"]):
            self.error(ctx, "Invalid changeset author: %s" % ctx.user())
        self.cs_author = ctx.user()

    def c_01_comment(self, ctx):
        m =
        if m:
            ln = ctx.description().count("\n", 0, m.start()) + 1
            self.error(ctx, "%s in comment (line %d)" % (badwhite_what(m), ln))

        if is_merge(self.repo, ctx.rev()):
            if ctx.description() != "Merge":
                self.error(ctx, ("Invalid comment for merge changeset"
                                 + " (must be \"Merge\")"))

        if tag_desc_re.match(ctx.description()):
            ## Should check tag itself

        if ((ctx.rev() == 0 or (ctx.rev() == 1 and self.comments_lax))
            and ctx.user() == "duke"
            and ctx.description().startswith("Initial load")):

        lns = ctx.description().splitlines()

        # If lax, filter out non-matching lines
        if self.comments_lax:
            lns = filter(checked_comment_line, lns)

        i = 0                           # Input index
        gi = -1                         # Grammar index
        n = 0                           # Occurrence count
        while i < len(lns):
            gi = gi + 1
            if gi >= len(comment_grammar):
            ln = lns[i]
            st = comment_grammar[gi]
            n = 0
            while (st.ident_pattern.match(ln)):
                m = st.check_pattern.match(ln)
                if not m:
                    if not ( == "bugid line" and (self.bugids_lax or self.bugids_ignore)):
                        self.error(ctx, "Invalid %s" %
                elif st.validator:
                    if not ( == "bugid line" and self.bugids_ignore):
                        st.validator(self, ctx, m, self.conf["project"])
                n = n + 1
                i = i + 1
                if i >= len(lns):
                ln = lns[i]
            if n < st.min and not self.comments_lax:
                self.error(ctx, "Incomplete comment: Missing %s" %
            if n > st.max:
                self.error(ctx, "Too many %ss" %

        if not self.cs_contributor and [self.cs_author] == self.cs_reviewers:
            self.error(ctx, "Self-reviews not permitted")
        if not self.comments_lax:
            if (gi == 0 and n > 0):
                self.error(ctx, "Incomplete comment: Missing bugid line")
            elif gi == 1 or (gi == 2 and n == 0):
                self.error(ctx, "Incomplete comment: Missing reviewer attribution")
            if (i < len(lns)):
                self.error(ctx, "Extraneous text in comment")

    def c_02_files(self, ctx):
        changes = self.repo.status(ctx.parents()[0].node(),
                                   ctx.node(), None)[:5]
        modified, added = changes[:2]
        ## Skip files that were renamed but not modified
        files = modified + added
        if self.ui.debugflag:
            self.ui.debug("Checking files: %s\n" % ", ".join(files))
        for f in files:
            if ctx.rev() == 0:
                ## This is loathsome
                if f.startswith("test/java/rmi"): continue
                if f.startswith("test/com/sun/javadoc/test"): continue
                if f.startswith("docs/technotes/guides"): continue
            fx = ctx.filectx(f)
            if normext_re.match(f) and not self.whitespace_lax:
                data =
                if "\t" in data or "\r" in data or " \n" in data:
                    m =
                    if m:
                        ln = data.count("\n", 0, m.start()) + 1
                        self.error(ctx, "%s:%d: %s" % (f, ln, badwhite_what(m)))
            ## check_file_header(self, fx, data)
            flags = fx.manifest().flags(f)
            if 'x' in flags:
                self.error(ctx, "%s: Executable files not permitted" % f)
            if 'l' in flags:
                self.error(ctx, "%s: Symbolic links not permitted" % f)

    def c_03_hash(self, ctx):
        hash = hex(ctx.node())
        if hash in self.blacklist:
            self.error(ctx, "Blacklisted changeset: " + hash)

    def check(self, node):
        self.summarized = False
        self.cs_bugids = [ ]
        self.cs_author = None
        self.cs_reviewers = [ ]
        self.cs_contributor = None
        ctx = context.changectx(self.repo, node)
        if not self.strict and hex(node) in changeset_whitelist:
            self.ui.note("%s in whitelist; skipping\n" % hex(node))
            return Pass
        for c in self.checks:
            cf = checker.__dict__[c]
            cf(self, ctx)
        return self.rv

    def check_repo(self):

        if not self.tags_lax:
            ts = self.repo.tags().keys()
            ignoredtypes = ['local']
            for t in ts:
                if not tag_re.match(t) and not self.tagtype(t) in ignoredtypes:
                               "Illegal tag name: %s" % t)

        bs = self.repo.branchmap()
        if len(bs) > 1:
            bs = bs.copy()
            del bs["default"]
                       "Named branches not permitted; this repository has: %s"
                       % ", ".join(bs.keys()))

        if self.strict:
            nh = len(self.repo.heads())
            if nh > 1:
                           "Multiple heads not permitted; this repository has %d"
                           % nh)

        return self.rv

def hook(ui, repo, hooktype, node=None, source=None, **opts):
    ui.debug("jcheck: node %s, source %s, args %s\n" % (node, source, opts))
    if not repo.local():
        raise util.Abort("repository '%s' is not local" % repo.path)
    if not os.path.exists(os.path.join(repo.root, ".jcheck")):
        ui.note("jcheck not enabled (no .jcheck in repository root); skipping\n")
        return Pass
    strict = opts.has_key("strict") and opts["strict"]
    lax = opts.has_key("lax") and opts["lax"]
    if strict:
        lax = False
    ch = checker(ui, repo, strict, lax)
    firstnode = bin(node)
    start = repo.changelog.rev(firstnode)
    end = (hasattr(repo.changelog, 'count') and repo.changelog.count() or
    for rev in xrange(start, end):
    if ch.rv == Fail:
    return ch.rv

# Run this hook in repository gates

def strict_hook(ui, repo, hooktype, node=None, source=None, **opts):
    opts["strict"] = True
    return hook(ui, repo, hooktype, node, source, **opts)

# From Mercurial 1.9, the preferred way to define commands is using the @command
# decorator. If this isn't available, fallback on a simple local implementation
# that just adds the data to the cmdtable.
cmdtable = {}
if hasattr(cmdutil, 'command'):
    command = cmdutil.command(cmdtable)
    def command(name, options, synopsis):
        def decorator(func):
            cmdtable[name] = func, list(options), synopsis
            return func
        return decorator

opts = [("", "lax", False, "Check comments, tags and whitespace laxly"),
        ("r", "rev", [], "check the specified revision or range (default: tip)"),
        ("s", "strict", False, "check everything")]

help = "[-r rev] [-s]"

@command("jcheck", opts, "hg jcheck " + help)
def jcheck(ui, repo, **opts):
    """check changesets against JDK standards"""
    ui.debug("jcheck repo=%s opts=%s\n" % (repo.path, opts))
    if not repo.local():
        raise util.Abort("repository '%s' is not local" % repo.path)
    if not os.path.exists(os.path.join(repo.root, ".jcheck")):
        ui.status("jcheck not enabled (no .jcheck in repository root)\n")
        return Pass
    if len(opts["rev"]) == 0:
        opts["rev"] = ["tip"]

    strict = opts.has_key("strict") and opts["strict"]
    lax = opts.has_key("lax") and opts["lax"]
    if strict:
        lax = False
    ch = checker(ui, repo, strict, lax)

        nop = lambda c, fns: None
        iter = cmdutil.walkchangerevs(repo, _matchall(repo), opts, nop)
        for ctx in iter:
    except (AttributeError, TypeError):
        # AttributeError:  matchall does not exist in hg < 1.1
        # TypeError:  walkchangerevs args differ in hg <= 1.3.1
        get = util.cachefunc(lambda r: repo.changectx(r).changeset())
        changeiter, matchfn = cmdutil.walkchangerevs(ui, repo, [], get, opts)
        if ui.debugflag:
            displayer = cmdutil.show_changeset(ui, repo, opts, True, matchfn)
        for st, rev, fns in changeiter:
            if st == 'add':
                node = repo.changelog.node(rev)
                if ui.debugflag:
          , node, copies=False)
            elif st == 'iter':
                if ui.debugflag:

    if ch.rv == Fail:
    return ch.rv

# This is invoked on servers to check pushkeys; it's not needed on clients.
def prepushkey(ui, repo, hooktype, namespace, key, old=None, new=None, **opts):
    if namespace == 'phases':
        return Pass
    ui.write_err('ERROR:  pushing keys (%s) is disabled\n' % namespace)
    return Fail