diff --git a/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java b/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java index a0fc21c3..c02704ab 100644 --- a/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java +++ b/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java @@ -30,11 +30,21 @@ package com.twelvemonkeys.imageio.metadata.xmp; -import com.twelvemonkeys.imageio.metadata.Directory; -import com.twelvemonkeys.imageio.metadata.Entry; -import com.twelvemonkeys.imageio.metadata.MetadataReader; -import com.twelvemonkeys.imageio.util.IIOUtil; -import com.twelvemonkeys.lang.Validate; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import javax.imageio.IIOException; +import javax.imageio.stream.ImageInputStream; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; @@ -43,13 +53,11 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; -import javax.imageio.IIOException; -import javax.imageio.stream.ImageInputStream; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import java.io.IOException; -import java.util.*; +import com.twelvemonkeys.imageio.metadata.Directory; +import com.twelvemonkeys.imageio.metadata.Entry; +import com.twelvemonkeys.imageio.metadata.MetadataReader; +import com.twelvemonkeys.imageio.util.IIOUtil; +import com.twelvemonkeys.lang.Validate; /** * XMPReader @@ -67,10 +75,9 @@ public final class XMPReader extends MetadataReader { public Directory read(final ImageInputStream input) throws IOException { Validate.notNull(input, "input"); - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - try { + DocumentBuilderFactory factory = createDocumentBuilderFactory(); + // TODO: Consider parsing using SAX? // TODO: Determine encoding and parse using a Reader... // TODO: Refactor scanner to return inputstream? @@ -79,9 +86,6 @@ public final class XMPReader extends MetadataReader { builder.setErrorHandler(new DefaultHandler()); Document document = builder.parse(new InputSource(IIOUtil.createStreamAdapter(input))); -// XMLSerializer serializer = new XMLSerializer(System.err, System.getProperty("file.encoding")); -// serializer.serialize(document); - String toolkit = getToolkit(document); Node rdfRoot = document.getElementsByTagNameNS(XMP.NS_RDF, "RDF").item(0); NodeList descriptions = document.getElementsByTagNameNS(XMP.NS_RDF, "Description"); @@ -92,10 +96,33 @@ public final class XMPReader extends MetadataReader { throw new IIOException(e.getMessage(), e); } catch (ParserConfigurationException e) { - throw new RuntimeException(e); // TODO: Or IOException? + throw new RuntimeException(e); } } + private DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + + // Security: Disable XInclude & expanding entity references ("bombs"), not needed for XMP + factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); + + // Security: Enable "secure processing", to prevent DoS attacks + factory.setAttribute(XMLConstants.FEATURE_SECURE_PROCESSING, true); + + // Security: Remove possibility to access external DTDs or Schema, not needed for XMP + factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + + // Security: Disable loading of external DTD and entities, not needed for XMP + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + + return factory; + } + private String getToolkit(Document document) { NodeList xmpmeta = document.getElementsByTagNameNS(XMP.NS_X, "xmpmeta"); @@ -109,7 +136,7 @@ public final class XMPReader extends MetadataReader { } private XMPDirectory parseDirectories(final Node pParentNode, NodeList pNodes, String toolkit) { - Map> subdirs = new LinkedHashMap>(); + Map> subdirs = new LinkedHashMap<>(); for (Node desc : asIterable(pNodes)) { if (desc.getParentNode() != pParentNode) { @@ -127,7 +154,7 @@ public final class XMPReader extends MetadataReader { // Lookup List dir = subdirs.get(node.getNamespaceURI()); if (dir == null) { - dir = new ArrayList(); + dir = new ArrayList<>(); subdirs.put(node.getNamespaceURI(), dir); } @@ -139,7 +166,7 @@ public final class XMPReader extends MetadataReader { else { // TODO: This method contains loads of duplication an should be cleaned up... // Support attribute short-hand syntax - Map> subsubdirs = new LinkedHashMap>(); + Map> subsubdirs = new LinkedHashMap<>(); parseAttributesForKnownElements(subsubdirs, node); @@ -161,7 +188,7 @@ public final class XMPReader extends MetadataReader { } } - List entries = new ArrayList(subdirs.size()); + List entries = new ArrayList<>(subdirs.size()); // TODO: Should we still allow asking for a subdirectory by item id? for (Map.Entry> entry : subdirs.entrySet()) { @@ -179,7 +206,7 @@ public final class XMPReader extends MetadataReader { private RDFDescription parseAsResource(Node node) { // See: http://www.w3.org/TR/REC-rdf-syntax/#section-Syntax-parsetype-resource - List entries = new ArrayList(); + List entries = new ArrayList<>(); for (Node child : asIterable(node.getChildNodes())) { if (child.getNodeType() != Node.ELEMENT_NODE) { @@ -204,7 +231,7 @@ public final class XMPReader extends MetadataReader { List dir = subdirs.get(attr.getNamespaceURI()); if (dir == null) { - dir = new ArrayList(); + dir = new ArrayList<>(); subdirs.put(attr.getNamespaceURI(), dir); } @@ -216,7 +243,7 @@ public final class XMPReader extends MetadataReader { for (Node child : asIterable(node.getChildNodes())) { if (XMP.NS_RDF.equals(child.getNamespaceURI()) && "Alt".equals(child.getLocalName())) { // Support for -> return a Map keyed on xml:lang - Map alternatives = new LinkedHashMap(); + Map alternatives = new LinkedHashMap<>(); for (Node alternative : asIterable(child.getChildNodes())) { if (XMP.NS_RDF.equals(alternative.getNamespaceURI()) && "li".equals(alternative.getLocalName())) { NamedNodeMap attributes = alternative.getAttributes(); @@ -230,7 +257,7 @@ public final class XMPReader extends MetadataReader { else if (XMP.NS_RDF.equals(child.getNamespaceURI()) && ("Seq".equals(child.getLocalName()) || "Bag".equals(child.getLocalName()))) { // Support for -> return array // Support for -> return array/unordered collection (how can a serialized collection not have order?) - List seq = new ArrayList(); + List seq = new ArrayList<>(); for (Node sequence : asIterable(child.getChildNodes())) { if (XMP.NS_RDF.equals(sequence.getNamespaceURI()) && "li".equals(sequence.getLocalName())) { diff --git a/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java b/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java index d6bee3c6..02be1834 100644 --- a/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java +++ b/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java @@ -30,26 +30,32 @@ package com.twelvemonkeys.imageio.metadata.xmp; -import com.twelvemonkeys.imageio.metadata.CompoundDirectory; -import com.twelvemonkeys.imageio.metadata.Directory; -import com.twelvemonkeys.imageio.metadata.Entry; -import com.twelvemonkeys.imageio.metadata.MetadataReaderAbstractTest; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; -import org.junit.Test; - -import javax.imageio.ImageIO; -import javax.imageio.stream.ImageInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; +import javax.imageio.ImageIO; +import javax.imageio.stream.ImageInputStream; + +import org.junit.Test; + +import com.twelvemonkeys.imageio.metadata.CompoundDirectory; +import com.twelvemonkeys.imageio.metadata.Directory; +import com.twelvemonkeys.imageio.metadata.Entry; +import com.twelvemonkeys.imageio.metadata.MetadataReaderAbstractTest; /** * XMPReaderTest @@ -483,4 +489,69 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/PixelYDimension"), hasValue("550")); assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/NativeDigest"), hasValue("36864,40960,40961,37121,37122,40962,40963,37510,40964,36867,36868,33434,33437,34850,34852,34855,34856,37377,37378,37379,37380,37381,37382,37383,37384,37385,37386,37396,41483,41484,41486,41487,41488,41492,41493,41495,41728,41729,41730,41985,41986,41987,41988,41989,41990,41991,41992,41993,41994,41995,41996,42016,0,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,30;A7F21D25E2C562F152B2C4ECC9E534DA")); } + + @Test(timeout = 1500L) + public void testNoExternalRequest() throws Exception { + // TODO: Use dynamic port? + try (HTTPServer server = new HTTPServer(7777)) { + try { + createReader().read(getResourceAsIIS("/xmp/xmp-jpeg-xxe.xml")); + } catch (IOException ioe) { + if (ioe.getMessage().contains("501")) { + throw new AssertionError("Reading should not cause external requests", ioe); + } + + // Any other exception is a bug (but might happen if the parser does not support certain features) + throw ioe; + } + } + } + + private static class HTTPServer implements AutoCloseable { + private final ServerSocket server; + private final Thread thread; + + HTTPServer(int port) throws IOException { + server = new ServerSocket(port, 1); + thread = new Thread(new Runnable() { + @Override public void run() { + serve(); + } + }); + thread.start(); + } + + private void serve() { + try { + Socket client = server.accept(); + + // Get the input stream, don't care about the request + try (InputStream inputStream = client.getInputStream()) { + while (inputStream.available() > 0) { + if (inputStream.read() == -1) { + break; + } + } + + // Answer with 501, this will cause the client to throw IOException + try (OutputStream outputStream = client.getOutputStream()) { + outputStream.write("HTTP/1.0 501 Not Implemented\r\n\r\n".getBytes(StandardCharsets.UTF_8)); + } + } + } + catch (IOException e) { + if (server.isClosed() && e instanceof SocketException) { + // Socket closed due to server close, all good + return; + } + + throw new RuntimeException(e); + } + } + + @Override public void close() throws Exception { + server.close(); + thread.join(); // It's advised against throwing InterruptedException here, but this is not production code... + } + } } diff --git a/imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml b/imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml new file mode 100644 index 00000000..87da4b65 --- /dev/null +++ b/imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml @@ -0,0 +1,35 @@ + %ext;]> + + + + + xmp.iid:7EDC21BF-371B-4189-90AF-C83A54A6A190 + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file