changeset 1336:4839dcbae18d

8145974: XMLStreamWriter produces invalid XML for surrogate pairs on OutputStreamWriter Reviewed-by: joehw
author aefimov
date Fri, 13 May 2016 18:34:27 +0300
parents 573b1773c560
children 664dd7701654
files src/com/sun/xml/internal/stream/writers/XMLStreamWriterImpl.java
diffstat 1 files changed, 27 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/com/sun/xml/internal/stream/writers/XMLStreamWriterImpl.java	Fri May 06 13:49:56 2016 +0100
+++ b/src/com/sun/xml/internal/stream/writers/XMLStreamWriterImpl.java	Fri May 13 18:34:27 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1346,6 +1346,15 @@
     }
 
     /**
+     * Writes character reference in hex format.
+     */
+    private void writeCharRef(int codePoint) throws IOException {
+        fWriter.write( "&#x" );
+        fWriter.write( Integer.toHexString(codePoint) );
+        fWriter.write( ';' );
+    }
+
+    /**
      * Writes XML content to underlying writer. Escapes characters unless
      * escaping character feature is turned off.
      */
@@ -1368,10 +1377,14 @@
             if (fEncoder != null && !fEncoder.canEncode(ch)){
                 fWriter.write(content, startWritePos, index - startWritePos );
 
-                // Escape this char as underlying encoder cannot handle it
-                fWriter.write( "&#x" );
-                fWriter.write(Integer.toHexString(ch));
-                fWriter.write( ';' );
+                // Check if current and next characters forms a surrogate pair
+                // and escape it to avoid generation of invalid xml content
+                if ( index != end - 1 && Character.isSurrogatePair(ch, content[index+1])) {
+                    writeCharRef(Character.toCodePoint(ch, content[index+1]));
+                    index++;
+                } else {
+                    writeCharRef(ch);
+                }
                 startWritePos = index + 1;
                 continue;
             }
@@ -1439,10 +1452,15 @@
             if (fEncoder != null && !fEncoder.canEncode(ch)){
                 fWriter.write(content, startWritePos, index - startWritePos );
 
-                // Escape this char as underlying encoder cannot handle it
-                fWriter.write( "&#x" );
-                fWriter.write(Integer.toHexString(ch));
-                fWriter.write( ';' );
+                // Check if current and next characters forms a surrogate pair
+                // and escape it to avoid generation of invalid xml content
+                if ( index != end - 1 && Character.isSurrogatePair(ch, content.charAt(index+1))) {
+                    writeCharRef(Character.toCodePoint(ch, content.charAt(index+1)));
+                    index++;
+                } else {
+                    writeCharRef(ch);
+                }
+
                 startWritePos = index + 1;
                 continue;
             }