changeset 3913:107d55ecd50d

7026346: URLConnection.guessContentTypeFromStream does not support UTF-8 and UTF-32 xml streams with BOM Reviewed-by: alanb Contributed-by: Charles Lee <littlee@linux.vnet.ibm.com>
author chegar
date Fri, 11 Mar 2011 08:57:14 +0000
parents ab13f19ee0ff
children f4d3033b4b65
files src/share/classes/java/net/URLConnection.java test/java/net/URLConnection/GetXmlContentType.java
diffstat 2 files changed, 108 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/net/URLConnection.java	Fri Mar 11 08:47:10 2011 +0000
+++ b/src/share/classes/java/net/URLConnection.java	Fri Mar 11 08:57:14 2011 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1422,7 +1422,7 @@
         if (!is.markSupported())
             return null;
 
-        is.mark(12);
+        is.mark(16);
         int c1 = is.read();
         int c2 = is.read();
         int c3 = is.read();
@@ -1434,6 +1434,11 @@
         int c9 = is.read();
         int c10 = is.read();
         int c11 = is.read();
+        int c12 = is.read();
+        int c13 = is.read();
+        int c14 = is.read();
+        int c15 = is.read();
+        int c16 = is.read();
         is.reset();
 
         if (c1 == 0xCA && c2 == 0xFE && c3 == 0xBA && c4 == 0xBE) {
@@ -1461,6 +1466,13 @@
             }
         }
 
+        // big and little (identical) endian UTF-8 encodings, with BOM
+        if (c1 == 0xef &&  c2 == 0xbb &&  c3 == 0xbf) {
+            if (c4 == '<' &&  c5 == '?' &&  c6 == 'x') {
+                return "application/xml";
+            }
+        }
+
         // big and little endian UTF-16 encodings, with byte order mark
         if (c1 == 0xfe && c2 == 0xff) {
             if (c3 == 0 && c4 == '<' && c5 == 0 && c6 == '?' &&
@@ -1476,6 +1488,23 @@
             }
         }
 
+        // big and little endian UTF-32 encodings, with BOM
+        if (c1 == 0x00 &&  c2 == 0x00 &&  c3 == 0xfe &&  c4 == 0xff) {
+            if (c5  == 0 && c6  == 0 && c7  == 0 && c8  == '<' &&
+                c9  == 0 && c10 == 0 && c11 == 0 && c12 == '?' &&
+                c13 == 0 && c14 == 0 && c15 == 0 && c16 == 'x') {
+                return "application/xml";
+            }
+        }
+
+        if (c1 == 0xff &&  c2 == 0xfe &&  c3 == 0x00 &&  c4 == 0x00) {
+            if (c5  == '<' && c6  == 0 && c7  == 0 && c8  == 0 &&
+                c9  == '?' && c10 == 0 && c11 == 0 && c12 == 0 &&
+                c13 == 'x' && c14 == 0 && c15 == 0 && c16 == 0) {
+                return "application/xml";
+            }
+        }
+
         if (c1 == 'G' && c2 == 'I' && c3 == 'F' && c4 == '8') {
             return "image/gif";
         }
--- a/test/java/net/URLConnection/GetXmlContentType.java	Fri Mar 11 08:47:10 2011 +0000
+++ b/test/java/net/URLConnection/GetXmlContentType.java	Fri Mar 11 08:57:14 2011 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,8 +23,8 @@
 
 /*
  * @test
- * @bug 4160195
- * @summary Check for correct detection of XML content type
+ * @bug 4160195 7026346
+ * @summary Check for correct detection of XML content type, including BOM streams
  */
 
 import java.io.*;
@@ -34,6 +34,8 @@
 public class GetXmlContentType {
 
     static final String XML_MIME_TYPE = "application/xml";
+    static final String XML_HEADER = "<?xml";
+    static int passed, failed;
 
     // guess type from content and filename
     static final String goodFiles [] = {
@@ -50,52 +52,91 @@
         };
 
     public static void main(String[] args) throws Exception {
-        boolean sawError = false;
+        contentTypeFromFile();
+        contentTypeFromBOMStream();
 
-        //
+        if (failed > 0)
+            throw new RuntimeException (
+                "Test failed; passed = " + passed + ", failed = " + failed);
+    }
+
+    static void contentTypeFromFile() throws Exception {
         // POSITIVE tests:  good data --> good result
-        //
-        for (int i = 0; i < goodFiles.length; i++) {
-            String      result = getUrlContentType (goodFiles [i]);
 
-            if (!XML_MIME_TYPE.equals (result)) {
-                System.out.println ("Wrong MIME type: "
-                    + goodFiles [i]
-                    + " --> " + result
-                    );
-                sawError = true;
+        for (String goodFile : goodFiles) {
+            String result = getUrlContentType(goodFile);
+
+            if (!XML_MIME_TYPE.equals(result)) {
+                System.out.println("Wrong MIME type: " + goodFile + " --> " + result);
+                failed++;
+            } else {
+                passed++;
             }
         }
 
-        //
         // NEGATIVE tests:  bad data --> correct diagnostic
-        //
-        for (int i = 0; i < badFiles.length; i++) {
-            String      result = getUrlContentType (badFiles [i]);
+        for (String badFile : badFiles) {
+            String result = getUrlContentType(badFile);
 
-            if (XML_MIME_TYPE.equals (result)) {
-                System.out.println ("Wrong MIME type: "
-                    + badFiles [i]
-                    + " --> " + result
-                    );
-                sawError = true;
+            if (XML_MIME_TYPE.equals(result)) {
+                System.out.println("Wrong MIME type: " + badFile + " --> " + result);
+                failed++;
+            } else {
+                passed++;
             }
         }
-
-        if (sawError)
-            throw new Exception (
-                "GetXmlContentType Test failed; see diagnostics.");
     }
 
-    static String getUrlContentType (String name) throws IOException {
-        File            file = new File(System.getProperty("test.src", "."), "xml");
-        URL             u = new URL ("file:"
-                            + file.getCanonicalPath()
-                            + file.separator
-                            + name);
-        URLConnection   conn = u.openConnection ();
+    static String getUrlContentType(String name) throws IOException {
+        File file = new File(System.getProperty("test.src", "."), "xml");
+        URL u = new URL("file:"
+                         + file.getCanonicalPath()
+                         + file.separator
+                         + name);
+        URLConnection conn = u.openConnection();
 
-        return conn.getContentType ();
+        return conn.getContentType();
     }
 
+    static void contentTypeFromBOMStream() throws Exception {
+        final String[] encodings = new  String[]
+                {"UTF-8", "UTF-16BE", "UTF-16LE", "UTF-32BE", "UTF-32LE"};
+        for (String encoding : encodings) {
+             try (InputStream is = new ByteArrayInputStream(toBOMBytes(encoding))) {
+                 String mime = URLConnection.guessContentTypeFromStream(is);
+                 if ( !XML_MIME_TYPE.equals(mime) ) {
+                     System.out.println("Wrong MIME type: " + encoding + " --> " + mime);
+                     failed++;
+                 } else {
+                     passed++;
+                 }
+             }
+         }
+    }
+
+    static byte[] toBOMBytes(String encoding) throws Exception {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+        switch (encoding) {
+            case "UTF-8" :
+                bos.write(new  byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
+                break;
+            case "UTF-16BE" :
+                bos.write(new  byte[] { (byte) 0xFE, (byte) 0xFF });
+                break;
+            case "UTF-16LE" :
+                bos.write(new  byte[] { (byte) 0xFF, (byte) 0xFE });
+                break;
+            case "UTF-32BE" :
+                bos.write(new  byte[] { (byte) 0x00, (byte) 0x00,
+                                        (byte) 0xFE, (byte) 0xFF });
+                break;
+            case "UTF-32LE" :
+                bos.write(new  byte[] { (byte) 0xFF, (byte) 0xFE,
+                                        (byte) 0x00, (byte) 0x00 });
+        }
+
+        bos.write(XML_HEADER.getBytes(encoding));
+        return bos.toByteArray();
+    }
 }