src/share/classes/sun/dyn/util/BytecodeName.java
author jrose
Tue May 05 22:40:09 2009 -0700 (6 months ago)
changeset 1193 d201987cb76c
permissions -rw-r--r--
6829144: JSR 292 JVM features need a provisional Java API
Summary: JDK API and runtime (partial) for anonk, meth, indy
Reviewed-by: mr
        1 /*
        2  * Copyright 2007-2009 Sun Microsystems, Inc.  All Rights Reserved.
        3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
        4  *
        5  * This code is free software; you can redistribute it and/or modify it
        6  * under the terms of the GNU General Public License version 2 only, as
        7  * published by the Free Software Foundation.  Sun designates this
        8  * particular file as subject to the "Classpath" exception as provided
        9  * by Sun in the LICENSE file that accompanied this code.
       10  *
       11  * This code is distributed in the hope that it will be useful, but WITHOUT
       12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       14  * version 2 for more details (a copy is included in the LICENSE file that
       15  * accompanied this code).
       16  *
       17  * You should have received a copy of the GNU General Public License version
       18  * 2 along with this work; if not, write to the Free Software Foundation,
       19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       20  *
       21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       22  * CA 95054 USA or visit www.sun.com if you need additional information or
       23  * have any questions.
       24  */
       25 
       26 package sun.dyn.util;
       27 
       28 /**
       29  * Utility routines for dealing with bytecode-level names.
       30  * Includes universal mangling rules for the JVM.
       31  *
       32  * <h3>Avoiding Dangerous Characters </h3>
       33  *
       34  * <p>
       35  * The JVM defines a very small set of characters which are illegal
       36  * in name spellings.  We will slightly extend and regularize this set
       37  * into a group of <cite>dangerous characters</cite>.
       38  * These characters will then be replaced, in mangled names, by escape sequences.
       39  * In addition, accidental escape sequences must be further escaped.
       40  * Finally, a special prefix will be applied if and only if
       41  * the mangling would otherwise fail to begin with the escape character.
       42  * This happens to cover the corner case of the null string,
       43  * and also clearly marks symbols which need demangling.
       44  * </p>
       45  * <p>
       46  * Dangerous characters are the union of all characters forbidden
       47  * or otherwise restricted by the JVM specification,
       48  * plus their mates, if they are brackets
       49  * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
       50  * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
       51  * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
       52  * There is no distinction between type, method, and field names.
       53  * This makes it easier to convert between mangled names of different
       54  * types, since they do not need to be decoded (demangled).
       55  * </p>
       56  * <p>
       57  * The escape character is backslash <code><big><b>\</b></big></code>
       58  * (also known as reverse solidus).
       59  * This character is, until now, unheard of in bytecode names,
       60  * but traditional in the proposed role.
       61  *
       62  * </p>
       63  * <h3> Replacement Characters </h3>
       64  *
       65  *
       66  * <p>
       67  * Every escape sequence is two characters
       68  * (in fact, two UTF8 bytes) beginning with
       69  * the escape character and followed by a
       70  * <cite>replacement character</cite>.
       71  * (Since the replacement character is never a backslash,
       72  * iterated manglings do not double in size.)
       73  * </p>
       74  * <p>
       75  * Each dangerous character has some rough visual similarity
       76  * to its corresponding replacement character.
       77  * This makes mangled symbols easier to recognize by sight.
       78  * </p>
       79  * <p>
       80  * The dangerous characters are
       81  * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
       82  * <code><big><b>.</b></big></code> (dot, also a package delimiter),
       83  * <code><big><b>;</b></big></code> (semicolon, used in signatures),
       84  * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
       85  * <code><big><b>&lt;</b></big></code> (left angle),
       86  * <code><big><b>&gt;</b></big></code> (right angle),
       87  * <code><big><b>[</b></big></code> (left square bracket, used in array types),
       88  * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
       89  * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
       90  * Their replacements are, respectively,
       91  * <code><big><b>|</b></big></code> (vertical bar),
       92  * <code><big><b>,</b></big></code> (comma),
       93  * <code><big><b>?</b></big></code> (question mark),
       94  * <code><big><b>%</b></big></code> (percent),
       95  * <code><big><b>^</b></big></code> (caret),
       96  * <code><big><b>_</b></big></code> (underscore), and
       97  * <code><big><b>{</b></big></code> (left curly bracket),
       98  * <code><big><b>}</b></big></code> (right curly bracket),
       99  * <code><big><b>!</b></big></code> (exclamation mark).
      100  * In addition, the replacement character for the escape character itself is
      101  * <code><big><b>-</b></big></code> (hyphen),
      102  * and the replacement character for the null prefix is
      103  * <code><big><b>=</b></big></code> (equal sign).
      104  * </p>
      105  * <p>
      106  * An escape character <code><big><b>\</b></big></code>
      107  * followed by any of these replacement characters
      108  * is an escape sequence, and there are no other escape sequences.
      109  * An equal sign is only part of an escape sequence
      110  * if it is the second character in the whole string, following a backslash.
      111  * Two consecutive backslashes do <em>not</em> form an escape sequence.
      112  * </p>
      113  * <p>
      114  * Each escape sequence replaces a so-called <cite>original character</cite>
      115  * which is either one of the dangerous characters or the escape character.
      116  * A null prefix replaces an initial null string, not a character.
      117  * </p>
      118  * <p>
      119  * All this implies that escape sequences cannot overlap and may be
      120  * determined all at once for a whole string.  Note that a spelling
      121  * string can contain <cite>accidental escapes</cite>, apparent escape
      122  * sequences which must not be interpreted as manglings.
      123  * These are disabled by replacing their leading backslash with an
      124  * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
      125  * are required, though they may be carried out in one pass:
      126  * </p>
      127  * <ol>
      128  *   <li>In each accidental escape, replace the backslash with an escape sequence
      129  * (<code><big><b>\-</b></big></code>).</li>
      130  *   <li>Replace each dangerous character with an escape sequence
      131  * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
      132  *   <li>If the first two steps introduced any change, <em>and</em>
      133  * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
      134  * </ol>
      135  *
      136  * To demangle a mangled string that begins with an escape,
      137  * remove any null prefix, and then replace (in parallel)
      138  * each escape sequence by its original character.
      139  * <p>Spelling strings which contain accidental
      140  * escapes <em>must</em> have them replaced, even if those
      141  * strings do not contain dangerous characters.
      142  * This restriction means that mangling a string always
      143  * requires a scan of the string for escapes.
      144  * But then, a scan would be required anyway,
      145  * to check for dangerous characters.
      146  *
      147  * </p>
      148  * <h3> Nice Properties </h3>
      149  *
      150  * <p>
      151  * If a bytecode name does not contain any escape sequence,
      152  * demangling is a no-op:  The string demangles to itself.
      153  * Such a string is called <cite>self-mangling</cite>.
      154  * Almost all strings are self-mangling.
      155  * In practice, to demangle almost any name &ldquo;found in nature&rdquo;,
      156  * simply verify that it does not begin with a backslash.
      157  * </p>
      158  * <p>
      159  * Mangling is a one-to-one function, while demangling
      160  * is a many-to-one function.
      161  * A mangled string is defined as <cite>validly mangled</cite> if
      162  * it is in fact the unique mangling of its spelling string.
      163  * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
      164  * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
      165  * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
      166  * If a language back-end or runtime is using mangled names,
      167  * it should never present an invalidly mangled bytecode
      168  * name to the JVM.  If the runtime encounters one,
      169  * it should also report an error, since such an occurrence
      170  * probably indicates a bug in name encoding which
      171  * will lead to errors in linkage.
      172  * However, this note does not propose that the JVM verifier
      173  * detect invalidly mangled names.
      174  * </p>
      175  * <p>
      176  * As a result of these rules, it is a simple matter to
      177  * compute validly mangled substrings and concatenations
      178  * of validly mangled strings, and (with a little care)
      179  * these correspond to corresponding operations on their
      180  * spelling strings.
      181  * </p>
      182  * <ul>
      183  *   <li>Any prefix of a validly mangled string is also validly mangled,
      184  * although a null prefix may need to be removed.</li>
      185  *   <li>Any suffix of a validly mangled string is also validly mangled,
      186  * although a null prefix may need to be added.</li>
      187  *   <li>Two validly mangled strings, when concatenated,
      188  * are also validly mangled, although any null prefix
      189  * must be removed from the second string,
      190  * and a trailing backslash on the first string may need escaping,
      191  * if it would participate in an accidental escape when followed
      192  * by the first character of the second string.</li>
      193  * </ul>
      194  * <p>If languages that include non-Java symbol spellings use this
      195  * mangling convention, they will enjoy the following advantages:
      196  * </p>
      197  * <ul>
      198  *   <li>They can interoperate via symbols they share in common.</li>
      199  *   <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
      200  *   <li>Future JVM and language extensions can safely use the dangerous characters
      201  * for structuring symbols, but will never interfere with valid spellings.</li>
      202  *   <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
      203  *   <li>Occasional transliterations and name composition will be simple and regular,
      204  * for classes, methods, and fields.</li>
      205  *   <li>Bytecode names will continue to be compact.
      206  * When mangled, spellings will at most double in length, either in
      207  * UTF8 or UTF16 format, and most will not change at all.</li>
      208  * </ul>
      209  *
      210  *
      211  * <h3> Suggestions for Human Readable Presentations </h3>
      212  *
      213  *
      214  * <p>
      215  * For human readable displays of symbols,
      216  * it will be better to present a string-like quoted
      217  * representation of the spelling, because JVM users
      218  * are generally familiar with such tokens.
      219  * We suggest using single or double quotes before and after
      220  * mangled symbols which are not valid Java identifiers,
      221  * with quotes, backslashes, and non-printing characters
      222  * escaped as if for literals in the Java language.
      223  * </p>
      224  * <p>
      225  * For example, an HTML-like spelling
      226  * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
      227  * <code><big><b>\^pre\_</b></big></code> and could
      228  * display more cleanly as
      229  * <code><big><b>'&lt;pre&gt;'</b></big></code>,
      230  * with the quotes included.
      231  * Such string-like conventions are <em>not</em> suitable
      232  * for mangled bytecode names, in part because
      233  * dangerous characters must be eliminated, rather
      234  * than just quoted.  Otherwise internally structured
      235  * strings like package prefixes and method signatures
      236  * could not be reliably parsed.
      237  * </p>
      238  * <p>
      239  * In such human-readable displays, invalidly mangled
      240  * names should <em>not</em> be demangled and quoted,
      241  * for this would be misleading.  Likewise, JVM symbols
      242  * which contain dangerous characters (like dots in field
      243  * names or brackets in method names) should not be
      244  * simply quoted.  The bytecode names
      245  * <code><big><b>\=phase\,1</b></big></code> and
      246  * <code><big><b>phase.1</b></big></code> are distinct,
      247  * and in demangled displays they should be presented as
      248  * <code><big><b>'phase.1'</b></big></code> and something like
      249  * <code><big><b>'phase'.1</b></big></code>, respectively.
      250  * </p>
      251  *
      252  * @author John Rose
      253  * @version 1.2, 02/06/2008
      254  * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
      255  */
      256 public class BytecodeName {
      257     private BytecodeName() { }  // static only class
      258 
      259     /** Given a source name, produce the corresponding bytecode name.
      260      * The source name should not be qualified, because any syntactic
      261      * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
      262      * @param s the source name
      263      * @return a valid bytecode name which represents the source name
      264      */
      265     public static String toBytecodeName(String s) {
      266         String bn = mangle(s);
      267         assert((Object)bn == s || looksMangled(bn)) : bn;
      268         assert(s.equals(toSourceName(bn))) : s;
      269         return bn;
      270     }
      271 
      272     /** Given an unqualified bytecode name, produce the corresponding source name.
      273      * The bytecode name must not contain dangerous characters.
      274      * In particular, it must not be qualified or segmented by colon {@code ':'}.
      275      * @param s the bytecode name
      276      * @return the source name, which may possibly have unsafe characters
      277      * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
      278      * @see #isSafeBytecodeName(java.lang.String)
      279      */
      280     public static String toSourceName(String s) {
      281         checkSafeBytecodeName(s);
      282         String sn = s;
      283         if (looksMangled(s)) {
      284             sn = demangle(s);
      285             assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
      286         }
      287         return sn;
      288     }
      289 
      290     /**
      291      * Given a bytecode name from a classfile, separate it into
      292      * components delimited by dangerous characters.
      293      * Each resulting array element will be either a dangerous character,
      294      * or else a safe bytecode name.
      295      * (The safe name might possibly be mangled to hide further dangerous characters.)
      296      * For example, the qualified class name {@code java/lang/String}
      297      * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
      298      * The name {@code &lt;init&gt;} will be parsed into { '&lt;', "init", '&gt;'}}
      299      * The name {@code foo/bar$:baz} will be parsed into
      300      * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
      301      */
      302     public static Object[] parseBytecodeName(String s) {
      303         int slen = s.length();
      304         Object[] res = null;
      305         for (int pass = 0; pass <= 1; pass++) {
      306             int fillp = 0;
      307             int lasti = 0;
      308             for (int i = 0; i <= slen; i++) {
      309                 int whichDC = -1;
      310                 if (i < slen) {
      311                     whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
      312                     if (whichDC < DANGEROUS_CHAR_FIRST_INDEX)  continue;
      313                 }
      314                 // got to end of string or next dangerous char
      315                 if (lasti < i) {
      316                     // normal component
      317                     if (pass != 0)
      318                         res[fillp] = s.substring(lasti, i);
      319                     fillp++;
      320                     lasti = i+1;
      321                 }
      322                 if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
      323                     if (pass != 0)
      324                         res[fillp] = DANGEROUS_CHARS_CA[whichDC];
      325                     fillp++;
      326                 }
      327             }
      328             if (pass != 0)  break;
      329             // between passes, build the result array
      330             res = new String[fillp];
      331             if (fillp <= 1) {
      332                 if (fillp != 0)  res[0] = s;
      333                 break;
      334             }
      335         }
      336         return res;
      337     }
      338 
      339     /**
      340      * Given a series of components, create a bytecode name for a classfile.
      341      * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
      342      * Each component must either be an interned one-character string of
      343      * a dangerous character, or else a safe bytecode name.
      344      * @param components a series of name components
      345      * @return the concatenation of all components
      346      * @throws IllegalArgumentException if any component contains an unsafe
      347      *          character, and is not an interned one-character string
      348      * @throws NullPointerException if any component is null
      349      */
      350     public static String unparseBytecodeName(Object[] components) {
      351         for (Object c : components) {
      352             if (c instanceof String)
      353                 checkSafeBytecodeName((String) c);  // may fail
      354         }
      355         return appendAll(components);
      356     }
      357     private static String appendAll(Object[] components) {
      358         if (components.length <= 1) {
      359             if (components.length == 1) {
      360                 return String.valueOf(components[0]);
      361             }
      362             return "";
      363         }
      364         int slen = 0;
      365         for (Object c : components) {
      366             if (c instanceof String)
      367                 slen += String.valueOf(c).length();
      368             else
      369                 slen += 1;
      370         }
      371         StringBuilder sb = new StringBuilder(slen);
      372         for (Object c : components) {
      373             sb.append(c);
      374         }
      375         return sb.toString();
      376     }
      377 
      378     /**
      379      * Given a bytecode name, produce the corresponding display name.
      380      * This is the source name, plus quotes if needed.
      381      * If the bytecode name contains dangerous characters,
      382      * assume that they are being used as punctuation,
      383      * and pass them through unchanged.
      384      * @param s the original bytecode name (which may be qualified)
      385      * @return a human-readable presentation
      386      */
      387     public static String toDisplayName(String s) {
      388         Object[] components = parseBytecodeName(s);
      389         for (int i = 0; i < components.length; i++) {
      390             if (!(components[i] instanceof String))
      391                 continue;
      392             String c = (String) components[i];
      393             // pretty up the name by demangling it
      394             String sn = toSourceName(c);
      395             if ((Object)sn != c || !isJavaIdent(sn)) {
      396                 components[i] = quoteDisplay(sn);
      397             }
      398         }
      399         return appendAll(components);
      400     }
      401     private static boolean isJavaIdent(String s) {
      402         int slen = s.length();
      403         if (slen == 0)  return false;
      404         if (!Character.isUnicodeIdentifierStart(s.charAt(0)))
      405             return false;
      406         for (int i = 1; i < slen; i++) {
      407             if (!Character.isUnicodeIdentifierPart(s.charAt(0)))
      408                 return false;
      409         }
      410         return true;
      411     }
      412     private static String quoteDisplay(String s) {
      413         // TO DO:  Replace wierd characters in s by C-style escapes.
      414         return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
      415     }
      416 
      417     private static void checkSafeBytecodeName(String s)
      418             throws IllegalArgumentException {
      419         if (!isSafeBytecodeName(s)) {
      420             throw new IllegalArgumentException(s);
      421         }
      422     }
      423 
      424     /**
      425      * Report whether a simple name is safe as a bytecode name.
      426      * Such names are acceptable in class files as class, method, and field names.
      427      * Additionally, they are free of "dangerous" characters, even if those
      428      * characters are legal in some (or all) names in class files.
      429      * @param s the proposed bytecode name
      430      * @return true if the name is non-empty and all of its characters are safe
      431      */
      432     public static boolean isSafeBytecodeName(String s) {
      433         if (s.length() == 0)  return false;
      434         // check occurrences of each DANGEROUS char
      435         for (char xc : DANGEROUS_CHARS_A) {
      436             if (xc == ESCAPE_C)  continue;  // not really that dangerous
      437             if (s.indexOf(xc) >= 0)  return false;
      438         }
      439         return true;
      440     }
      441 
      442     /**
      443      * Report whether a character is safe in a bytecode name.
      444      * This is true of any unicode character except the following
      445      * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
      446      * @param s the proposed character
      447      * @return true if the character is safe to use in classfiles
      448      */
      449     public static boolean isSafeBytecodeChar(char c) {
      450         return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
      451     }
      452 
      453     private static boolean looksMangled(String s) {
      454         return s.charAt(0) == ESCAPE_C;
      455     }
      456 
      457     private static String mangle(String s) {
      458         if (s.length() == 0)
      459             return NULL_ESCAPE;
      460 
      461         // build this lazily, when we first need an escape:
      462         StringBuilder sb = null;
      463 
      464         for (int i = 0, slen = s.length(); i < slen; i++) {
      465             char c = s.charAt(i);
      466 
      467             boolean needEscape = false;
      468             if (c == ESCAPE_C) {
      469                 if (i+1 < slen) {
      470                     char c1 = s.charAt(i+1);
      471                     if ((i == 0 && c1 == NULL_ESCAPE_C)
      472                         || c1 != originalOfReplacement(c1)) {
      473                         // an accidental escape
      474                         needEscape = true;
      475                     }
      476                 }
      477             } else {
      478                 needEscape = isDangerous(c);
      479             }
      480 
      481             if (!needEscape) {
      482                 if (sb != null)  sb.append(c);
      483                 continue;
      484             }
      485 
      486             // build sb if this is the first escape
      487             if (sb == null) {
      488                 sb = new StringBuilder(s.length()+10);
      489                 // mangled names must begin with a backslash:
      490                 if (s.charAt(0) != ESCAPE_C && i > 0)
      491                     sb.append(NULL_ESCAPE);
      492                 // append the string so far, which is unremarkable:
      493                 sb.append(s.substring(0, i));
      494             }
      495 
      496             // rewrite \ to \-, / to \|, etc.
      497             sb.append(ESCAPE_C);
      498             sb.append(replacementOf(c));
      499         }
      500 
      501         if (sb != null)   return sb.toString();
      502 
      503         return s;
      504     }
      505 
      506     private static String demangle(String s) {
      507         // build this lazily, when we first meet an escape:
      508         StringBuilder sb = null;
      509 
      510         int stringStart = 0;
      511         if (s.startsWith(NULL_ESCAPE))
      512             stringStart = 2;
      513 
      514         for (int i = stringStart, slen = s.length(); i < slen; i++) {
      515             char c = s.charAt(i);
      516 
      517             if (c == ESCAPE_C && i+1 < slen) {
      518                 // might be an escape sequence
      519                 char rc = s.charAt(i+1);
      520                 char oc = originalOfReplacement(rc);
      521                 if (oc != rc) {
      522                     // build sb if this is the first escape
      523                     if (sb == null) {
      524                         sb = new StringBuilder(s.length());
      525                         // append the string so far, which is unremarkable:
      526                         sb.append(s.substring(stringStart, i));
      527                     }
      528                     ++i;  // skip both characters
      529                     c = oc;
      530                 }
      531             }
      532 
      533             if (sb != null)
      534                 sb.append(c);
      535         }
      536 
      537         if (sb != null)   return sb.toString();
      538 
      539         return s.substring(stringStart);
      540     }
      541 
      542     static char ESCAPE_C = '\\';
      543     // empty escape sequence to avoid a null name or illegal prefix
      544     static char NULL_ESCAPE_C = '=';
      545     static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
      546 
      547     static final String DANGEROUS_CHARS   = "\\/.;:$[]<>"; // \\ must be first
      548     static final String REPLACEMENT_CHARS =  "-|,?!%{}^_";
      549     static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
      550     static char[] DANGEROUS_CHARS_A   = DANGEROUS_CHARS.toCharArray();
      551     static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
      552     static final Character[] DANGEROUS_CHARS_CA;
      553     static {
      554         Character[] dcca = new Character[DANGEROUS_CHARS.length()];
      555         for (int i = 0; i < dcca.length; i++)
      556             dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
      557         DANGEROUS_CHARS_CA = dcca;
      558     }
      559 
      560     static final long[] SPECIAL_BITMAP = new long[2];  // 128 bits
      561     static {
      562         String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
      563         //System.out.println("SPECIAL = "+SPECIAL);
      564         for (char c : SPECIAL.toCharArray()) {
      565             SPECIAL_BITMAP[c >>> 6] |= 1L << c;
      566         }
      567     }
      568     static boolean isSpecial(char c) {
      569         if ((c >>> 6) < SPECIAL_BITMAP.length)
      570             return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
      571         else
      572             return false;
      573     }
      574     static char replacementOf(char c) {
      575         if (!isSpecial(c))  return c;
      576         int i = DANGEROUS_CHARS.indexOf(c);
      577         if (i < 0)  return c;
      578         return REPLACEMENT_CHARS.charAt(i);
      579     }
      580     static char originalOfReplacement(char c) {
      581         if (!isSpecial(c))  return c;
      582         int i = REPLACEMENT_CHARS.indexOf(c);
      583         if (i < 0)  return c;
      584         return DANGEROUS_CHARS.charAt(i);
      585     }
      586     static boolean isDangerous(char c) {
      587         if (!isSpecial(c))  return false;
      588         return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
      589     }
      590     static int indexOfDangerousChar(String s, int from) {
      591         for (int i = from, slen = s.length(); i < slen; i++) {
      592             if (isDangerous(s.charAt(i)))
      593                 return i;
      594         }
      595         return -1;
      596     }
      597     static int lastIndexOfDangerousChar(String s, int from) {
      598         for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
      599             if (isDangerous(s.charAt(i)))
      600                 return i;
      601         }
      602         return -1;
      603     }
      604 
      605     // test driver
      606     static void main(String[] av) {
      607         // If verbose is enabled, quietly check everything.
      608         // Otherwise, print the output for the user to check.
      609         boolean verbose = false;
      610 
      611         int maxlen = 0;
      612 
      613         while (av.length > 0 && av[0].startsWith("-")) {
      614             String flag = av[0].intern();
      615             av = java.util.Arrays.copyOfRange(av, 1, av.length); // Java 1.6 or later
      616             if (flag == "-" || flag == "--")  break;
      617             else if (flag == "-q")
      618                 verbose = false;
      619             else if (flag == "-v")
      620                 verbose = true;
      621             else if (flag.startsWith("-l"))
      622                 maxlen = Integer.valueOf(flag.substring(2));
      623             else
      624                 throw new Error("Illegal flag argument: "+flag);
      625         }
      626 
      627         if (maxlen == 0)
      628             maxlen = (verbose ? 2 : 4);
      629         if (verbose)  System.out.println("Note: maxlen = "+maxlen);
      630 
      631         switch (av.length) {
      632         case 0: av = new String[] {
      633                     DANGEROUS_CHARS.substring(0) +
      634                     REPLACEMENT_CHARS.substring(0, 1) +
      635                     NULL_ESCAPE + "x"
      636                 }; // and fall through:
      637         case 1:
      638             char[] cv = av[0].toCharArray();
      639             av = new String[cv.length];
      640             int avp = 0;
      641             for (char c : cv) {
      642                 String s = String.valueOf(c);
      643                 if (c == 'x')  s = "foo";  // tradition...
      644                 av[avp++] = s;
      645             }
      646         }
      647         if (verbose)
      648             System.out.println("Note: Verbose output mode enabled.  Use '-q' to suppress.");
      649         Tester t = new Tester();
      650         t.maxlen = maxlen;
      651         t.verbose = verbose;
      652         t.tokens = av;
      653         t.test("", 0);
      654     }
      655 
      656     static class Tester {
      657         boolean verbose;
      658         int maxlen;
      659         java.util.Map<String,String> map = new java.util.HashMap<String,String>();
      660         String[] tokens;
      661 
      662         void test(String stringSoFar, int tokensSoFar) {
      663             test(stringSoFar);
      664             if (tokensSoFar <= maxlen) {
      665                 for (String token : tokens) {
      666                     if (token.length() == 0)  continue;  // skip empty tokens
      667                     if (stringSoFar.indexOf(token) != stringSoFar.lastIndexOf(token))
      668                         continue;   // there are already two occs. of this token
      669                     if (token.charAt(0) == ESCAPE_C && token.length() == 1 && maxlen < 4)
      670                         test(stringSoFar+token, tokensSoFar);  // want lots of \'s
      671                     else if (tokensSoFar < maxlen)
      672                         test(stringSoFar+token, tokensSoFar+1);
      673                 }
      674             }
      675         }
      676 
      677         void test(String s) {
      678             // for small batches, do not test the null string
      679             if (s.length() == 0 && maxlen >=1 && maxlen <= 2)  return;
      680             String bn = testSourceName(s);
      681             if (bn == null)  return;
      682             if (bn == s) {
      683                 //if (verbose)  System.out.println(s+" == id");
      684             } else {
      685                 if (verbose)  System.out.println(s+" => "+bn+" "+toDisplayName(bn));
      686                 String bnbn = testSourceName(bn);
      687                 if (bnbn == null)  return;
      688                 if (verbose)  System.out.println(bn+" => "+bnbn+" "+toDisplayName(bnbn));
      689                 /*
      690                 String bn3 = testSourceName(bnbn);
      691                 if (bn3 == null)  return;
      692                 if (verbose)  System.out.println(bnbn+" => "+bn3);
      693                 */
      694             }
      695         }
      696 
      697         String testSourceName(String s) {
      698             if (map.containsKey(s))  return null;
      699             String bn = toBytecodeName(s);
      700             map.put(s, bn);
      701             String sn = toSourceName(bn);
      702             if (!sn.equals(s)) {
      703                 String bad = (s+" => "+bn+" != "+sn);
      704                 if (!verbose)  throw new Error("Bad mangling: "+bad);
      705                 System.out.println("*** "+bad);
      706                 return null;
      707             }
      708             return bn;
      709         }
      710     }
      711 }