Snippet: JAXB and CDATA ContentHandler with CharacterEscapeHandler

Main program:

package com.tutego.jaxb;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Marshaller;
import java.io.PrintWriter;

public class App {
  public static void main( String[] args ) throws Exception {
    Dog dog = new Dog();
    dog.name = "Wüffi";
    Flea flea = new Flea();
    flea.name = "<><> Böser Floh <><>";
    dog.flea = flea;

    JAXBContext jaxbContext = JAXBContext.newInstance( dog.getClass() );
    Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
    jaxbMarshaller.setProperty( Marshaller.JAXB_FORMATTED_OUTPUT, true );
    jaxbMarshaller.marshal( dog, new CDATAContentHandler( new PrintWriter( System.out ) ) );
  }
}

Dog and Flea:

package com.tutego.jaxb;

import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement
public class Dog {
  public String name;
  public Flea flea;
}

class Flea {
  public String name;
}

CDATAContentHandler:

package com.tutego.jaxb;

import com.sun.xml.txw2.output.CharacterEscapeHandler;
import com.sun.xml.txw2.output.DataWriter;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.Writer;

public class CDATAContentHandler extends DataWriter {
  public CDATAContentHandler( Writer writer ) {
    super( writer, "UTF-8", MinimumEscapeHandler.theInstance );
  }

  @Override
  public void characters( char[] ch, int start, int length ) throws SAXException {
    boolean useCData = false;

    loop:
    for ( int i = start; i < start + length; i++ )
      switch ( ch[ i ] ) {
        case '<': case '>': case '&': useCData = true;
        break loop;
      }

    if ( useCData ) super.startCDATA();
    super.characters( ch, start, length );
    if ( useCData ) super.endCDATA();
  }
}

/**
 * Performs no character escaping. Usable only when the output encoding
 * is UTF, but this handler gives the maximum performance.
 *
 * @author Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
 */
class MinimumEscapeHandler implements CharacterEscapeHandler {

  private MinimumEscapeHandler() {
  }  // no instanciation please

  public static final CharacterEscapeHandler theInstance = new MinimumEscapeHandler();

  public void escape( char[] ch, int start, int length, boolean isAttVal, Writer out )
      throws IOException {
    // avoid calling the Writerwrite method too much by assuming
    // that the escaping occurs rarely.
    // profiling revealed that this is faster than the naive code.
    int limit = start + length;
    for ( int i = start; i < limit; i++ ) {
      char c = ch[ i ];
      if ( c == '&' || c == '<' || c == '>' || c == '\r' || (c == '\n' && isAttVal) || (c == '\"' && isAttVal) ) {
        if ( i != start )
          out.write( ch, start, i - start );
        start = i + 1;
        switch ( ch[ i ] ) {
          case '&':
            out.write( "&amp;" );
            break;
          case '<':
            out.write( "&lt;" );
            break;
          case '>':
            out.write( "&gt;" );
            break;
          case '\"':
            out.write( "&quot;" );
            break;
          case '\n':
          case '\r':
            out.write( "&#" );
            out.write( Integer.toString( c ) );
            out.write( ';' );
            break;
          default:
            throw new IllegalArgumentException( "Cannot escape: '" + c + "'" );
        }
      }
    }

    if ( start != limit )
      out.write( ch, start, limit - start );
  }
}

Über Christian Ullenboom

Ich bin Christian Ullenboom und Autor der Bücher ›Java ist auch eine Insel. Einführung, Ausbildung, Praxis‹ und ›Java SE 8 Standard-Bibliothek. Das Handbuch für Java-Entwickler‹. Seit 1997 berate ich Unternehmen im Einsatz von Java. Sun ernannte mich 2005 zum ›Java-Champion‹.

Schreibe einen Kommentar

Deine E-Mail-Adresse wird nicht veröffentlicht. Erforderliche Felder sind mit * markiert.