package textbender.d.gene.xhtml; // Copyright 2007, Michael Allan. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Textbender Software"), to deal in the Textbender Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicence, and/or sell copies of the Textbender Software, and to permit persons to whom the Textbender Software is furnished to do so, subject to the following conditions: The preceding copyright notice and this permission notice shall be included in all copies or substantial portions of the Textbender Software. THE TEXTBENDER SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE TEXTBENDER SOFTWARE OR THE USE OR OTHER DEALINGS IN THE TEXTBENDER SOFTWARE. import java.io.*; import java.util.*; import javax.xml.transform.*; import javax.xml.transform.dom.*; import org.w3c.dom.*; import org.w3c.dom.ls.*; import textbender.d.gene.*; import textbender.d.transfer.*; import textbender.g.lang.*; import textbender.g.xml.dom.*; import textbender.g.xml.dom.ls.*; import textbender.g.xml.transform.*; import static textbender._.Textbender.TEXTBENDER_NAMESPACE; import static textbender.d.gene.Gene.EMBEDDABLE_STUB_LOCAL_NAME; /** Recombinant XHTML definitions and utilities. */ public class RecombinantXHTML implements MutantAbstractor { /** The single instance of RecombinantXHTML. */ public static RecombinantXHTML i() { return instance; } private RecombinantXHTML() {} /** Public identifier of Recombinant XHTML documents. */ public static final String RECOMBINANT_XHTML_PUBLIC_ID = "-//textbender//DTD XHTML recombinant//EN"; /** Returns a Recombinant XHTML document's meta-data (tail) element; * if necessary creating it (complete with a 'gg' element). * * @param b string builder to use, overwriting its existing content * * @see DocumentRT#findMetaData(Node) */ public static Element ensureMetaData( final Document document, StringBuilder b ) { Element metaData = DocumentRT.findMetaData( document ); if( metaData == null ) { metaData = document.createElementNS( TEXTBENDER_NAMESPACE, "tail" ); metaData.appendChild( document.createTextNode( "\n " )); final Element html = document.getDocumentElement(); html.appendChild( metaData ); // a before b html.appendChild( document.createTextNode( "\n " )); metaData.setAttributeNS // b after a ( TEXTBENDER_NAMESPACE, DOM.buildAttributePrefix( metaData, TEXTBENDER_NAMESPACE, b ) .append( "document-meta-data" ).toString(), "1" ); metaData.setAttributeNS ( TEXTBENDER_NAMESPACE, DOM.buildAttributePrefix( metaData, TEXTBENDER_NAMESPACE, b ) .append( 'g' ).toString(), "--" ); Gene.ensureGG( metaData, b ); } return metaData; } /** Writes a Recombinant XHTML document. * Convenience method that serializes the document, in standard fashion. * * @param document Recombinant XHTML document to write * @param transformerFactory to use * @param result output format; typically a 'new * {@linkplain javax.xml.transform.stream.StreamResult StreamResult}(file|stream)' * * @throws TransformerException from newTransformer, transform */ public static @ThreadSafe void write( final Document document, final TransformerFactory transformerFactory, final Result result ) throws TransformerException { DOMSource source = new DOMSource( document ); Transformer transformer = transformerFactory.newTransformer(); // identity // transformer.setOutputProperty( OutputKeys.METHOD, "xml" ); // else may default to HTML, depending on content TransformerX.setOutputEncodingFrom( document, transformer ); TransformerX.setOutputDoctypeFrom( document, transformer ); // else no DOCTYPE declaration is output transformer.transform( source, result ); // though it crams XML and DOCTYPE declarations onto first line } // - M u t a n t - A b s t r a c t o r ------------------------------------------------ /** Returns an abstract form of a gene's sequence, * suitable for detecting mutations to record in ancestry. * Suitable for documents in which the significant, * creative content of leaf sequences is human-readable text, * as opposed to data-like elements and attributes. * The latter are stripped away under the assumption * of being style modifiers, of no creative significance. *

* This abstract form is subject to change. * Output sequences are not guaranteed to be accurately comparable * unless generated in the same release of textbender. *

* * @return abstract form of the sequence * as a character sequence (String), if the gene is a * leaf; * or as a locus sequence (List<String&rt;), if the gene is a structural * parent */ public @ThreadSafe Object abstract_transferMutant( final Element embeddableAbstractSequence, final StringBuilder b ) { assert EMBEDDABLE_STUB_LOCAL_NAME.equals("_"): "stub not valid Recombinant XHTML, hence infallible indicator of parent gene"; // Stub cannot be found as the child of a leaf gene. It does not occur in Recombinant XHTML. It is only used to represent genes in embedded forms, and leaves have no child genes. Therefore infallible. (Otherwise, we might be a little more careful in the structural parent test, since leaves might start with elements that look like stubs. Though not critical it be 100% infallible, and 100% may not be possible in some, other doc-types.) // Test for structural parent, first. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - final Node firstChild = embeddableAbstractSequence.getFirstChild(); ArrayList stringList = null; // lazilly created for( Node child = firstChild;; child = child.getNextSibling() ) { if( child != firstChild && stringList == null ) break; // first child not stub, sequence must be from leaf gene if( child == null ) break; if( !( child instanceof Element )) { assert child == firstChild : "should all be stubs, if first was"; continue; } final Element element = (Element)child; if( !element.getLocalName().equals( EMBEDDABLE_STUB_LOCAL_NAME ) || element.getNamespaceURI() != null ) { assert child == firstChild : "should all be stubs, if first was"; continue; } String locus = element.getAttributeNS( null, "locus" ); if( locus.length() == 0 ) { assert false; continue; } if( stringList == null ) stringList = new ArrayList(); stringList.add( locus ); } if( stringList != null ) return stringList; // structural parent // Must be leaf. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - b.delete( 0, Integer.MAX_VALUE ); abstract_transferMutant_appendTextOf( embeddableAbstractSequence, b ); boolean wasWhitespaceLast = true; // thus to trim all trailing spaces [cf. votorola.g.lang.StringBuilderX.collapseAndTrim()] for( int c = b.length() - 1; c >= 0; --c ) { final char ch = b.charAt( c ); final boolean isWhitespace = Character.isWhitespace( ch ); if( isWhitespace ) { if( wasWhitespaceLast ) b.deleteCharAt( c ); // collapse all whitespace else if( ch != ' ' ) b.setCharAt( c, ' ' ); // to single space char } wasWhitespaceLast = isWhitespace; } if( b.length() > 0 && wasWhitespaceLast ) b.deleteCharAt( 0 ); // trim leading space, if any (can only be one left) return b.toString(); } private static void abstract_transferMutant_appendTextOf( final Node node, final StringBuilder b ) { if( node instanceof Text ) b.append( ((Text)node).getData() ); else if( node instanceof Element ) { Node child = node.getFirstChild(); for(; child != null; child = child.getNextSibling() ) { abstract_transferMutant_appendTextOf( child, b ); } } } // ==================================================================================== /** A resolver for Recombinant XHTML resources. * *

BUG: properly-nested

*
* http://reluk.ca/var/cache/textbender-javadoc/textbender/d/gene/xhtml/RecombinantXHTML.DOMResourceResolver.html#properly-nested *
*

* Parser in 1.6 fails: * 'The replacement text of parameter entity "%xhtml11.dtd" must include properly nested declarations when the entity reference is used as a complete declaration.' * E.g. on system/host/obsidian/linux.xht. *

*

* Apparent bug in parser, because it used to be OK (1.5), * and it's still OK with xmllint. *

*/ public static final class DOMResourceResolver extends LSResourceResolverCP { /** Constructs a DOMResourceResolver. */ public DOMResourceResolver( DOMImplementationLS dom ) { super( dom ); } /** Overridden to recognize a request for the Recombinant XHTML DTD, * and fulfill it regardless of systemId. */ public @Override LSInput resolveResource( String type, String namespaceURI, String publicId, String systemId, String baseURI ) { // textbender.g.util.logging.LoggerX.i(getClass()).finest( systemId ); if( RECOMBINANT_XHTML_PUBLIC_ID.equals( publicId )) { systemId = "textbender/d/gene/xhtml/xhtml-recombinant.dtd"; // essentially filling in what's missing in typical documents } return super.resolveResource( type, namespaceURI, publicId, systemId, baseURI ); } } // ==================================================================================== /** A minimal resolver for Recombinant XHTML resources. * It resolves the DTD to resource path: * textbender/d/gene/xhtml/xhtml-recombinant-minimal.dtd. *

* Better than a {@linkplain textbender.g.xml.dom.ls.LSResourceResolver0 null resolver}, * which causes the parser to completely ignore * named character entities (LSParser, JDK 1.6). *

*/ public static final class DOMResourceResolverMin extends LSResourceResolverCP { /** Constructs a DOMResourceResolverMin. */ public DOMResourceResolverMin( DOMImplementationLS dom ) { super( dom ); } /** Overridden to recognize a request for the Recombinant XHTML DTD, * and fulfill it regardless of systemId. */ public @Override LSInput resolveResource( String type, String namespaceURI, String publicId, String systemId, String baseURI ) { // textbender.g.util.logging.LoggerX.i(getClass()).finest( systemId ); if( RECOMBINANT_XHTML_PUBLIC_ID.equals( publicId )) { systemId = "textbender/d/gene/xhtml/xhtml-recombinant-minimal.dtd"; // essentially filling in what's missing in typical documents } return super.resolveResource( type, namespaceURI, publicId, systemId, baseURI ); } } ////////////////////////////////////////////////////////////////////////////////////////// // Last, so static fields above initialize, and are available during instantiation below private static final RecombinantXHTML instance = new RecombinantXHTML(); }