Avoid fetching external resources in XMPReader.

This commit is contained in:
Harald Kuhr 2021-12-10 13:41:05 +01:00
parent 6653f4a85d
commit da4efe98bf
3 changed files with 172 additions and 39 deletions

View File

@ -30,11 +30,21 @@
package com.twelvemonkeys.imageio.metadata.xmp;
import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReader;
import com.twelvemonkeys.imageio.util.IIOUtil;
import com.twelvemonkeys.lang.Validate;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.IIOException;
import javax.imageio.stream.ImageInputStream;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
@ -43,13 +53,11 @@ import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.imageio.IIOException;
import javax.imageio.stream.ImageInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.*;
import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReader;
import com.twelvemonkeys.imageio.util.IIOUtil;
import com.twelvemonkeys.lang.Validate;
/**
* XMPReader
@ -67,10 +75,9 @@ public final class XMPReader extends MetadataReader {
public Directory read(final ImageInputStream input) throws IOException {
Validate.notNull(input, "input");
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
try {
DocumentBuilderFactory factory = createDocumentBuilderFactory();
// TODO: Consider parsing using SAX?
// TODO: Determine encoding and parse using a Reader...
// TODO: Refactor scanner to return inputstream?
@ -79,9 +86,6 @@ public final class XMPReader extends MetadataReader {
builder.setErrorHandler(new DefaultHandler());
Document document = builder.parse(new InputSource(IIOUtil.createStreamAdapter(input)));
// XMLSerializer serializer = new XMLSerializer(System.err, System.getProperty("file.encoding"));
// serializer.serialize(document);
String toolkit = getToolkit(document);
Node rdfRoot = document.getElementsByTagNameNS(XMP.NS_RDF, "RDF").item(0);
NodeList descriptions = document.getElementsByTagNameNS(XMP.NS_RDF, "Description");
@ -92,10 +96,33 @@ public final class XMPReader extends MetadataReader {
throw new IIOException(e.getMessage(), e);
}
catch (ParserConfigurationException e) {
throw new RuntimeException(e); // TODO: Or IOException?
throw new RuntimeException(e);
}
}
private DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
// Security: Disable XInclude & expanding entity references ("bombs"), not needed for XMP
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
// Security: Enable "secure processing", to prevent DoS attacks
factory.setAttribute(XMLConstants.FEATURE_SECURE_PROCESSING, true);
// Security: Remove possibility to access external DTDs or Schema, not needed for XMP
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
// Security: Disable loading of external DTD and entities, not needed for XMP
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
return factory;
}
private String getToolkit(Document document) {
NodeList xmpmeta = document.getElementsByTagNameNS(XMP.NS_X, "xmpmeta");
@ -109,7 +136,7 @@ public final class XMPReader extends MetadataReader {
}
private XMPDirectory parseDirectories(final Node pParentNode, NodeList pNodes, String toolkit) {
Map<String, List<Entry>> subdirs = new LinkedHashMap<String, List<Entry>>();
Map<String, List<Entry>> subdirs = new LinkedHashMap<>();
for (Node desc : asIterable(pNodes)) {
if (desc.getParentNode() != pParentNode) {
@ -127,7 +154,7 @@ public final class XMPReader extends MetadataReader {
// Lookup
List<Entry> dir = subdirs.get(node.getNamespaceURI());
if (dir == null) {
dir = new ArrayList<Entry>();
dir = new ArrayList<>();
subdirs.put(node.getNamespaceURI(), dir);
}
@ -139,7 +166,7 @@ public final class XMPReader extends MetadataReader {
else {
// TODO: This method contains loads of duplication an should be cleaned up...
// Support attribute short-hand syntax
Map<String, List<Entry>> subsubdirs = new LinkedHashMap<String, List<Entry>>();
Map<String, List<Entry>> subsubdirs = new LinkedHashMap<>();
parseAttributesForKnownElements(subsubdirs, node);
@ -161,7 +188,7 @@ public final class XMPReader extends MetadataReader {
}
}
List<Directory> entries = new ArrayList<Directory>(subdirs.size());
List<Directory> entries = new ArrayList<>(subdirs.size());
// TODO: Should we still allow asking for a subdirectory by item id?
for (Map.Entry<String, List<Entry>> entry : subdirs.entrySet()) {
@ -179,7 +206,7 @@ public final class XMPReader extends MetadataReader {
private RDFDescription parseAsResource(Node node) {
// See: http://www.w3.org/TR/REC-rdf-syntax/#section-Syntax-parsetype-resource
List<Entry> entries = new ArrayList<Entry>();
List<Entry> entries = new ArrayList<>();
for (Node child : asIterable(node.getChildNodes())) {
if (child.getNodeType() != Node.ELEMENT_NODE) {
@ -204,7 +231,7 @@ public final class XMPReader extends MetadataReader {
List<Entry> dir = subdirs.get(attr.getNamespaceURI());
if (dir == null) {
dir = new ArrayList<Entry>();
dir = new ArrayList<>();
subdirs.put(attr.getNamespaceURI(), dir);
}
@ -216,7 +243,7 @@ public final class XMPReader extends MetadataReader {
for (Node child : asIterable(node.getChildNodes())) {
if (XMP.NS_RDF.equals(child.getNamespaceURI()) && "Alt".equals(child.getLocalName())) {
// Support for <rdf:Alt><rdf:li> -> return a Map<String, Object> keyed on xml:lang
Map<String, Object> alternatives = new LinkedHashMap<String, Object>();
Map<String, Object> alternatives = new LinkedHashMap<>();
for (Node alternative : asIterable(child.getChildNodes())) {
if (XMP.NS_RDF.equals(alternative.getNamespaceURI()) && "li".equals(alternative.getLocalName())) {
NamedNodeMap attributes = alternative.getAttributes();
@ -230,7 +257,7 @@ public final class XMPReader extends MetadataReader {
else if (XMP.NS_RDF.equals(child.getNamespaceURI()) && ("Seq".equals(child.getLocalName()) || "Bag".equals(child.getLocalName()))) {
// Support for <rdf:Seq><rdf:li> -> return array
// Support for <rdf:Bag><rdf:li> -> return array/unordered collection (how can a serialized collection not have order?)
List<Object> seq = new ArrayList<Object>();
List<Object> seq = new ArrayList<>();
for (Node sequence : asIterable(child.getChildNodes())) {
if (XMP.NS_RDF.equals(sequence.getNamespaceURI()) && "li".equals(sequence.getLocalName())) {

View File

@ -30,26 +30,32 @@
package com.twelvemonkeys.imageio.metadata.xmp;
import com.twelvemonkeys.imageio.metadata.CompoundDirectory;
import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReaderAbstractTest;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import org.junit.Test;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import org.junit.Test;
import com.twelvemonkeys.imageio.metadata.CompoundDirectory;
import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReaderAbstractTest;
/**
* XMPReaderTest
@ -483,4 +489,69 @@ public class XMPReaderTest extends MetadataReaderAbstractTest {
assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/PixelYDimension"), hasValue("550"));
assertThat(exif.getEntryById("http://ns.adobe.com/exif/1.0/NativeDigest"), hasValue("36864,40960,40961,37121,37122,40962,40963,37510,40964,36867,36868,33434,33437,34850,34852,34855,34856,37377,37378,37379,37380,37381,37382,37383,37384,37385,37386,37396,41483,41484,41486,41487,41488,41492,41493,41495,41728,41729,41730,41985,41986,41987,41988,41989,41990,41991,41992,41993,41994,41995,41996,42016,0,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,30;A7F21D25E2C562F152B2C4ECC9E534DA"));
}
@Test(timeout = 1500L)
public void testNoExternalRequest() throws Exception {
// TODO: Use dynamic port?
try (HTTPServer server = new HTTPServer(7777)) {
try {
createReader().read(getResourceAsIIS("/xmp/xmp-jpeg-xxe.xml"));
} catch (IOException ioe) {
if (ioe.getMessage().contains("501")) {
throw new AssertionError("Reading should not cause external requests", ioe);
}
// Any other exception is a bug (but might happen if the parser does not support certain features)
throw ioe;
}
}
}
private static class HTTPServer implements AutoCloseable {
private final ServerSocket server;
private final Thread thread;
HTTPServer(int port) throws IOException {
server = new ServerSocket(port, 1);
thread = new Thread(new Runnable() {
@Override public void run() {
serve();
}
});
thread.start();
}
private void serve() {
try {
Socket client = server.accept();
// Get the input stream, don't care about the request
try (InputStream inputStream = client.getInputStream()) {
while (inputStream.available() > 0) {
if (inputStream.read() == -1) {
break;
}
}
// Answer with 501, this will cause the client to throw IOException
try (OutputStream outputStream = client.getOutputStream()) {
outputStream.write("HTTP/1.0 501 Not Implemented\r\n\r\n".getBytes(StandardCharsets.UTF_8));
}
}
}
catch (IOException e) {
if (server.isClosed() && e instanceof SocketException) {
// Socket closed due to server close, all good
return;
}
throw new RuntimeException(e);
}
}
@Override public void close() throws Exception {
server.close();
thread.join(); // It's advised against throwing InterruptedException here, but this is not production code...
}
}
}

View File

@ -0,0 +1,35 @@
<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?><!DOCTYPE root [<!ENTITY % ext SYSTEM 'http://localhost:7777/xxx'> %ext;]>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image::ExifTool 10.16'>
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
<rdf:Description rdf:about=''
xmlns:xmpMM='http://ns.adobe.com/xap/1.0/mm/'>
<xmpMM:InstanceID>xmp.iid:7EDC21BF-371B-4189-90AF-C83A54A6A190</xmpMM:InstanceID>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end='w'?>