Validate XML numeric character references before string construction

This commit is contained in:
Yuki Matsuhashi
2026-03-24 03:55:29 +09:00
parent b959027aa2
commit 1877069780
3 changed files with 40 additions and 1 deletions

View File

@@ -158,7 +158,7 @@ public class XML {
* @param cp code point to test * @param cp code point to test
* @return true if the code point is not valid for an XML * @return true if the code point is not valid for an XML
*/ */
private static boolean mustEscape(int cp) { static boolean mustEscape(int cp) {
/* Valid range from https://www.w3.org/TR/REC-xml/#charsets /* Valid range from https://www.w3.org/TR/REC-xml/#charsets
* *
* #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]

View File

@@ -167,6 +167,9 @@ public class XMLTokener extends JSONTokener {
int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X') int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X')
? parseHexEntity(e) ? parseHexEntity(e)
: parseDecimalEntity(e); : parseDecimalEntity(e);
if (XML.mustEscape(cp)) {
throw new JSONException("Invalid numeric character reference: &#" + e.substring(1) + ";");
}
return new String(new int[] {cp}, 0, 1); return new String(new int[] {cp}, 0, 1);
} }
Character knownEntity = entity.get(e); Character knownEntity = entity.get(e);

View File

@@ -1468,6 +1468,42 @@ public class XMLTest {
XML.toJSONObject(xmlStr); XML.toJSONObject(xmlStr);
} }
/**
* Tests that out-of-range hex entities throw JSONException rather than an uncaught runtime exception.
*/
@Test(expected = JSONException.class)
public void testOutOfRangeHexEntityThrowsJSONException() {
String xmlStr = "<a>&#x110000;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that out-of-range decimal entities throw JSONException rather than an uncaught runtime exception.
*/
@Test(expected = JSONException.class)
public void testOutOfRangeDecimalEntityThrowsJSONException() {
String xmlStr = "<a>&#1114112;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that surrogate code point entities throw JSONException.
*/
@Test(expected = JSONException.class)
public void testSurrogateHexEntityThrowsJSONException() {
String xmlStr = "<a>&#xD800;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that out-of-range numeric entities in attribute values throw JSONException.
*/
@Test(expected = JSONException.class)
public void testOutOfRangeHexEntityInAttributeThrowsJSONException() {
String xmlStr = "<a b=\"&#x110000;\"/>";
XML.toJSONObject(xmlStr);
}
/** /**
* Tests that valid decimal numeric entity &#65; works correctly. * Tests that valid decimal numeric entity &#65; works correctly.
* Should decode to character 'A'. * Should decode to character 'A'.