From 66eae4ae00ff0a46017ad6fedd19e46e128e7b5d Mon Sep 17 00:00:00 2001 From: Henrik Plate <17928867+henrikplate@users.noreply.github.com> Date: Fri, 17 Jan 2025 08:54:50 +0100 Subject: [PATCH] Downport fix commit (#1080) --- .../imageio/metadata/xmp/XMPReader.java | 85 ++++++++++----- .../imageio/metadata/xmp/XMPReaderTest.java | 103 ++++++++++++++++-- .../src/test/resources/xmp/xmp-jpeg-xxe.xml | 35 ++++++ imageio/pom.xml | 7 ++ 4 files changed, 193 insertions(+), 37 deletions(-) create mode 100644 imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml diff --git a/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java b/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java index 28abce4f..51fcd8d6 100644 --- a/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java +++ b/imageio/imageio-metadata/src/main/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReader.java @@ -28,25 +28,34 @@ package com.twelvemonkeys.imageio.metadata.xmp; -import com.twelvemonkeys.imageio.metadata.Directory; -import com.twelvemonkeys.imageio.metadata.Entry; -import com.twelvemonkeys.imageio.metadata.MetadataReader; -import com.twelvemonkeys.imageio.util.IIOUtil; -import com.twelvemonkeys.lang.Validate; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import javax.imageio.IIOException; +import javax.imageio.stream.ImageInputStream; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; -import javax.imageio.IIOException; -import javax.imageio.stream.ImageInputStream; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import java.io.IOException; -import java.util.*; +import com.twelvemonkeys.imageio.metadata.Directory; +import com.twelvemonkeys.imageio.metadata.Entry; +import com.twelvemonkeys.imageio.metadata.MetadataReader; +import com.twelvemonkeys.imageio.util.IIOUtil; +import com.twelvemonkeys.lang.Validate; /** * XMPReader @@ -64,20 +73,17 @@ public final class XMPReader extends MetadataReader { public Directory read(final ImageInputStream input) throws IOException { Validate.notNull(input, "input"); - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - try { + DocumentBuilderFactory factory = createDocumentBuilderFactory(); + // TODO: Consider parsing using SAX? // TODO: Determine encoding and parse using a Reader... // TODO: Refactor scanner to return inputstream? // TODO: Be smarter about ASCII-NULL termination/padding (the SAXParser aka Xerces DOMParser doesn't like it)... DocumentBuilder builder = factory.newDocumentBuilder(); + builder.setErrorHandler(new DefaultHandler()); Document document = builder.parse(new InputSource(IIOUtil.createStreamAdapter(input))); -// XMLSerializer serializer = new XMLSerializer(System.err, System.getProperty("file.encoding")); -// serializer.serialize(document); - String toolkit = getToolkit(document); Node rdfRoot = document.getElementsByTagNameNS(XMP.NS_RDF, "RDF").item(0); NodeList descriptions = document.getElementsByTagNameNS(XMP.NS_RDF, "Description"); @@ -88,10 +94,33 @@ public final class XMPReader extends MetadataReader { throw new IIOException(e.getMessage(), e); } catch (ParserConfigurationException e) { - throw new RuntimeException(e); // TODO: Or IOException? + throw new RuntimeException(e); } } + private DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + + // Security: Disable XInclude & expanding entity references ("bombs"), not needed for XMP + factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); + + // Security: Enable "secure processing", to prevent DoS attacks + factory.setAttribute(XMLConstants.FEATURE_SECURE_PROCESSING, true); + + // Security: Remove possibility to access external DTDs or Schema, not needed for XMP + factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + + // Security: Disable loading of external DTD and entities, not needed for XMP + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + + return factory; + } + private String getToolkit(Document document) { NodeList xmpmeta = document.getElementsByTagNameNS(XMP.NS_X, "xmpmeta"); @@ -105,7 +134,7 @@ public final class XMPReader extends MetadataReader { } private XMPDirectory parseDirectories(final Node pParentNode, NodeList pNodes, String toolkit) { - Map> subdirs = new LinkedHashMap>(); + Map> subdirs = new LinkedHashMap<>(); for (Node desc : asIterable(pNodes)) { if (desc.getParentNode() != pParentNode) { @@ -123,7 +152,7 @@ public final class XMPReader extends MetadataReader { // Lookup List dir = subdirs.get(node.getNamespaceURI()); if (dir == null) { - dir = new ArrayList(); + dir = new ArrayList<>(); subdirs.put(node.getNamespaceURI(), dir); } @@ -135,12 +164,12 @@ public final class XMPReader extends MetadataReader { else { // TODO: This method contains loads of duplication an should be cleaned up... // Support attribute short-hand syntax - Map> subsubdirs = new LinkedHashMap>(); + Map> subsubdirs = new LinkedHashMap<>(); parseAttributesForKnownElements(subsubdirs, node); if (!subsubdirs.isEmpty()) { - List entries = new ArrayList(); + List entries = new ArrayList<>(subsubdirs.size()); for (Map.Entry> entry : subsubdirs.entrySet()) { entries.addAll(entry.getValue()); @@ -157,7 +186,7 @@ public final class XMPReader extends MetadataReader { } } - List entries = new ArrayList(); + List entries = new ArrayList<>(subdirs.size()); // TODO: Should we still allow asking for a subdirectory by item id? for (Map.Entry> entry : subdirs.entrySet()) { @@ -175,7 +204,7 @@ public final class XMPReader extends MetadataReader { private RDFDescription parseAsResource(Node node) { // See: http://www.w3.org/TR/REC-rdf-syntax/#section-Syntax-parsetype-resource - List entries = new ArrayList(); + List entries = new ArrayList<>(); for (Node child : asIterable(node.getChildNodes())) { if (child.getNodeType() != Node.ELEMENT_NODE) { @@ -200,7 +229,7 @@ public final class XMPReader extends MetadataReader { List dir = subdirs.get(attr.getNamespaceURI()); if (dir == null) { - dir = new ArrayList(); + dir = new ArrayList<>(); subdirs.put(attr.getNamespaceURI(), dir); } @@ -212,7 +241,7 @@ public final class XMPReader extends MetadataReader { for (Node child : asIterable(node.getChildNodes())) { if (XMP.NS_RDF.equals(child.getNamespaceURI()) && "Alt".equals(child.getLocalName())) { // Support for -> return a Map keyed on xml:lang - Map alternatives = new LinkedHashMap(); + Map alternatives = new LinkedHashMap<>(); for (Node alternative : asIterable(child.getChildNodes())) { if (XMP.NS_RDF.equals(alternative.getNamespaceURI()) && "li".equals(alternative.getLocalName())) { NamedNodeMap attributes = alternative.getAttributes(); @@ -226,7 +255,7 @@ public final class XMPReader extends MetadataReader { else if (XMP.NS_RDF.equals(child.getNamespaceURI()) && ("Seq".equals(child.getLocalName()) || "Bag".equals(child.getLocalName()))) { // Support for -> return array // Support for -> return array/unordered collection (how can a serialized collection not have order?) - List seq = new ArrayList(); + List seq = new ArrayList<>(); for (Node sequence : asIterable(child.getChildNodes())) { if (XMP.NS_RDF.equals(sequence.getNamespaceURI()) && "li".equals(sequence.getLocalName())) { diff --git a/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java b/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java index b66b2bd9..4709b798 100644 --- a/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java +++ b/imageio/imageio-metadata/src/test/java/com/twelvemonkeys/imageio/metadata/xmp/XMPReaderTest.java @@ -28,23 +28,32 @@ package com.twelvemonkeys.imageio.metadata.xmp; -import com.twelvemonkeys.imageio.metadata.CompoundDirectory; -import com.twelvemonkeys.imageio.metadata.Directory; -import com.twelvemonkeys.imageio.metadata.Entry; -import com.twelvemonkeys.imageio.metadata.MetadataReaderAbstractTest; -import org.junit.Test; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; -import javax.imageio.ImageIO; -import javax.imageio.stream.ImageInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.junit.Assert.*; +import javax.imageio.ImageIO; +import javax.imageio.stream.ImageInputStream; + +import org.junit.Test; + +import com.twelvemonkeys.imageio.metadata.CompoundDirectory; +import com.twelvemonkeys.imageio.metadata.Directory; +import com.twelvemonkeys.imageio.metadata.Entry; +import com.twelvemonkeys.imageio.metadata.MetadataReaderAbstractTest; /** * XMPReaderTest @@ -375,6 +384,7 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { // photoshop|http://ns.adobe.com/photoshop/1.0/ Directory photoshop = getDirectoryByNS(compound, XMP.NS_PHOTOSHOP); + assertNotNull(photoshop); assertEquals(3, photoshop.size()); assertThat(photoshop.getEntryById("http://ns.adobe.com/photoshop/1.0/ColorMode"), hasValue("1")); assertThat(photoshop.getEntryById("http://ns.adobe.com/photoshop/1.0/ICCProfile"), hasValue("Dot Gain 20%")); @@ -390,6 +400,8 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { // xapMM|http://ns.adobe.com/xap/1.0/mm/ Directory mm = getDirectoryByNS(compound, XMP.NS_XAP_MM); + + assertNotNull(mm); assertEquals(3, mm.size()); assertThat(directory.getEntryById("http://ns.adobe.com/xap/1.0/mm/DocumentID"), hasValue("uuid:6DCA50CC7D53DD119F20F5A7EA4C9BEC")); assertThat(directory.getEntryById("http://ns.adobe.com/xap/1.0/mm/InstanceID"), hasValue("uuid:6ECA50CC7D53DD119F20F5A7EA4C9BEC")); @@ -414,6 +426,8 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { // dc|http://purl.org/dc/elements/1.1/ Directory dc = getDirectoryByNS(compound, XMP.NS_DC); + + assertNotNull(dc); assertEquals(1, dc.size()); assertThat(dc.getEntryById("http://purl.org/dc/elements/1.1/format"), hasValue("image/jpeg")); @@ -428,6 +442,8 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { // tiff|http://ns.adobe.com/tiff/1.0/ Directory tiff = getDirectoryByNS(compound, XMP.NS_TIFF); + + assertNotNull(tiff); assertEquals(5, tiff.size()); assertThat(tiff.getEntryById("http://ns.adobe.com/tiff/1.0/Orientation"), hasValue("1")); assertThat(tiff.getEntryById("http://ns.adobe.com/tiff/1.0/XResolution"), hasValue("720000/10000")); @@ -445,6 +461,8 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { // xap|http://ns.adobe.com/xap/1.0/ Directory xap = getDirectoryByNS(compound, XMP.NS_XAP); + + assertNotNull(xap); assertEquals(4, xap.size()); assertThat(xap.getEntryById("http://ns.adobe.com/xap/1.0/ModifyDate"), hasValue("2008-07-16T14:44:49-07:00")); assertThat(xap.getEntryById("http://ns.adobe.com/xap/1.0/CreatorTool"), hasValue("Adobe Photoshop CS3 Windows")); @@ -461,10 +479,77 @@ public class XMPReaderTest extends MetadataReaderAbstractTest { // exif|http://ns.adobe.com/exif/1.0/ Directory exif = getDirectoryByNS(compound, XMP.NS_EXIF); + + assertNotNull(exif); assertEquals(4, exif.size()); assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/ColorSpace"), hasValue("-1")); // SIC. Same as unsigned short 65535, meaning "uncalibrated"? assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/PixelXDimension"), hasValue("426")); assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/PixelYDimension"), hasValue("550")); assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/NativeDigest"), hasValue("36864,40960,40961,37121,37122,40962,40963,37510,40964,36867,36868,33434,33437,34850,34852,34855,34856,37377,37378,37379,37380,37381,37382,37383,37384,37385,37386,37396,41483,41484,41486,41487,41488,41492,41493,41495,41728,41729,41730,41985,41986,41987,41988,41989,41990,41991,41992,41993,41994,41995,41996,42016,0,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,30;A7F21D25E2C562F152B2C4ECC9E534DA")); } + + @Test(timeout = 1500L) + public void testNoExternalRequest() throws Exception { + // TODO: Use dynamic port? + try (HTTPServer server = new HTTPServer(7777)) { + try { + createReader().read(getResourceAsIIS("/xmp/xmp-jpeg-xxe.xml")); + } catch (IOException ioe) { + if (ioe.getMessage().contains("501")) { + throw new AssertionError("Reading should not cause external requests", ioe); + } + + // Any other exception is a bug (but might happen if the parser does not support certain features) + throw ioe; + } + } + } + + private static class HTTPServer implements AutoCloseable { + private final ServerSocket server; + private final Thread thread; + + HTTPServer(int port) throws IOException { + server = new ServerSocket(port, 1); + thread = new Thread(new Runnable() { + @Override public void run() { + serve(); + } + }); + thread.start(); + } + + private void serve() { + try { + Socket client = server.accept(); + + // Get the input stream, don't care about the request + try (InputStream inputStream = client.getInputStream()) { + while (inputStream.available() > 0) { + if (inputStream.read() == -1) { + break; + } + } + + // Answer with 501, this will cause the client to throw IOException + try (OutputStream outputStream = client.getOutputStream()) { + outputStream.write("HTTP/1.0 501 Not Implemented\r\n\r\n".getBytes(StandardCharsets.UTF_8)); + } + } + } + catch (IOException e) { + if (server.isClosed() && e instanceof SocketException) { + // Socket closed due to server close, all good + return; + } + + throw new RuntimeException(e); + } + } + + @Override public void close() throws Exception { + server.close(); + thread.join(); // It's advised against throwing InterruptedException here, but this is not production code... + } + } } diff --git a/imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml b/imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml new file mode 100644 index 00000000..5499801d --- /dev/null +++ b/imageio/imageio-metadata/src/test/resources/xmp/xmp-jpeg-xxe.xml @@ -0,0 +1,35 @@ + %ext;]> + + + + + xmp.iid:7EDC21BF-371B-4189-90AF-C83A54A6A190 + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/imageio/pom.xml b/imageio/pom.xml index 170b9df9..71cc4208 100644 --- a/imageio/pom.xml +++ b/imageio/pom.xml @@ -101,6 +101,13 @@ 1.8.5 test + + + org.hamcrest + hamcrest + 2.2 + test +