package org.json; /* Public Domain. */ import java.io.Reader; import java.io.StringReader; import java.math.BigDecimal; import java.math.BigInteger; import java.util.Iterator; import java.util.NoSuchElementException; /** * This provides static methods to convert an XML text into a JSONObject, and to * covert a JSONObject into an XML text. * * @author JSON.org * @version 2016-08-10 */ @SuppressWarnings("boxing") public class XML { /** * Constructs a new XML object. */ public XML() { } /** The Character '&'. */ public static final Character AMP = '&'; /** The Character '''. */ public static final Character APOS = '\''; /** The Character '!'. */ public static final Character BANG = '!'; /** The Character '='. */ public static final Character EQ = '='; /** The Character
{@code '>'. }*/
public static final Character GT = '>';
/** The Character '<'. */
public static final Character LT = '<';
/** The Character '?'. */
public static final Character QUEST = '?';
/** The Character '"'. */
public static final Character QUOT = '"';
/** The Character '/'. */
public static final Character SLASH = '/';
/**
* Null attribute name
*/
public static final String NULL_ATTR = "xsi:nil";
/**
* Represents the XML attribute name for specifying type information.
*/
public static final String TYPE_ATTR = "xsi:type";
/**
* Creates an iterator for navigating Code Points in a string instead of
* characters. Once Java7 support is dropped, this can be replaced with
*
* string.codePoints()
*
* which is available in Java8 and above.
*
* @see http://stackoverflow.com/a/21791059/6030888
*/
private static Iterable{@code
* & (ampersand) is replaced by &
* < (less than) is replaced by <
* > (greater than) is replaced by >
* " (double quote) is replaced by "
* ' (single quote / apostrophe) is replaced by '
* }
*
* @param string
* The string to be escaped.
* @return The escaped string.
*/
public static String escape(String string) {
StringBuilder sb = new StringBuilder(string.length());
for (final int cp : codePointIterator(string)) {
switch (cp) {
case '&':
sb.append("&");
break;
case '<':
sb.append("<");
break;
case '>':
sb.append(">");
break;
case '"':
sb.append(""");
break;
case '\'':
sb.append("'");
break;
default:
if (mustEscape(cp)) {
sb.append("");
sb.append(Integer.toHexString(cp));
sb.append(';');
} else {
sb.appendCodePoint(cp);
}
}
}
return sb.toString();
}
/**
* @param cp code point to test
* @return true if the code point is not valid for an XML
*/
private static boolean mustEscape(int cp) {
/* Valid range from https://www.w3.org/TR/REC-xml/#charsets
*
* #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
// isISOControl is true when (cp >= 0 && cp <= 0x1F) || (cp >= 0x7F && cp <= 0x9F)
// all ISO control characters are out of range except tabs and new lines
return (Character.isISOControl(cp)
&& cp != 0x9
&& cp != 0xA
&& cp != 0xD
) || !(
// valid the range of acceptable characters that aren't control
(cp >= 0x20 && cp <= 0xD7FF)
|| (cp >= 0xE000 && cp <= 0xFFFD)
|| (cp >= 0x10000 && cp <= 0x10FFFF)
)
;
}
/**
* Removes XML escapes from the string.
*
* @param string
* string to remove escapes from
* @return string with converted entities
*/
public static String unescape(String string) {
StringBuilder sb = new StringBuilder(string.length());
for (int i = 0, length = string.length(); i < length; i++) {
char c = string.charAt(i);
if (c == '&') {
final int semic = string.indexOf(';', i);
if (semic > i) {
final String entity = string.substring(i + 1, semic);
sb.append(XMLTokener.unescapeEntity(entity));
// skip past the entity we just parsed.
i += entity.length() + 1;
} else {
// this shouldn't happen in most cases since the parser
// errors on unclosed entries.
sb.append(c);
}
} else {
// not part of an entity
sb.append(c);
}
}
return sb.toString();
}
/**
* Throw an exception if the string contains whitespace. Whitespace is not
* allowed in tagNames and attributes.
*
* @param string
* A string.
* @throws JSONException Thrown if the string contains whitespace or is empty.
*/
public static void noSpace(String string) throws JSONException {
int i, length = string.length();
if (length == 0) {
throw new JSONException("Empty string.");
}
for (i = 0; i < length; i += 1) {
if (Character.isWhitespace(string.charAt(i))) {
throw new JSONException("'" + string
+ "' contains a space character.");
}
}
}
/**
* Scan the content following the named tag, attaching it to the context.
*
* @param x
* The XMLTokener containing the source string.
* @param context
* The JSONObject that will include the new material.
* @param name
* The tag name.
* @param config
* The XML parser configuration.
* @param currentNestingDepth
* The current nesting depth.
* @return true if the close tag is processed.
* @throws JSONException Thrown if any parsing error occurs.
*/
private static boolean parse(XMLTokener x, JSONObject context, String name, XMLParserConfiguration config, int currentNestingDepth)
throws JSONException {
char c;
int i;
JSONObject jsonObject = null;
String string;
String tagName;
Object token;
XMLXsiTypeConverter> xmlXsiTypeConverter;
// Test for and skip past these forms:
//
//
//
// ... ?>
// Report errors for these forms:
// <>
// <=
// <<
token = x.nextToken();
// ");
return false;
}
x.back();
} else if (c == '[') {
token = x.nextToken();
if ("CDATA".equals(token)) {
if (x.next() == '[') {
string = x.nextCDATA();
if (string.length() > 0) {
context.accumulate(config.getcDataTagName(), string);
}
return false;
}
}
throw x.syntaxError("Expected 'CDATA['");
}
i = 1;
do {
token = x.nextMeta();
if (token == null) {
throw x.syntaxError("Missing '>' after ' 0);
return false;
} else if (token == QUEST) {
//
x.skipPast("?>");
return false;
} else if (token == SLASH) {
// Close tag
token = x.nextToken();
if (name == null) {
throw x.syntaxError("Mismatched close tag " + token);
}
if (!token.equals(name)) {
throw x.syntaxError("Mismatched " + name + " and " + token);
}
if (x.nextToken() != GT) {
throw x.syntaxError("Misshaped close tag");
}
return true;
} else if (token instanceof Character) {
throw x.syntaxError("Misshaped tag");
// Open tag <
} else {
tagName = (String) token;
token = null;
jsonObject = new JSONObject();
boolean nilAttributeFound = false;
xmlXsiTypeConverter = null;
for (;;) {
if (token == null) {
token = x.nextToken();
}
// attribute = value
if (token instanceof String) {
string = (String) token;
token = x.nextToken();
if (token == EQ) {
token = x.nextToken();
if (!(token instanceof String)) {
throw x.syntaxError("Missing value");
}
if (config.isConvertNilAttributeToNull()
&& NULL_ATTR.equals(string)
&& Boolean.parseBoolean((String) token)) {
nilAttributeFound = true;
} else if(config.getXsiTypeMap() != null && !config.getXsiTypeMap().isEmpty()
&& TYPE_ATTR.equals(string)) {
xmlXsiTypeConverter = config.getXsiTypeMap().get(token);
} else if (!nilAttributeFound) {
Object obj = stringToValue((String) token);
if (obj instanceof Boolean) {
jsonObject.accumulate(string,
config.isKeepBooleanAsString()
? ((String) token)
: obj);
} else if (obj instanceof Number) {
jsonObject.accumulate(string,
config.isKeepNumberAsString()
? ((String) token)
: obj);
} else {
jsonObject.accumulate(string, stringToValue((String) token));
}
}
token = null;
} else {
jsonObject.accumulate(string, "");
}
} else if (token == SLASH) {
// Empty tag <.../>
if (x.nextToken() != GT) {
throw x.syntaxError("Misshaped tag");
}
if (config.getForceList().contains(tagName)) {
// Force the value to be an array
if (nilAttributeFound) {
context.append(tagName, JSONObject.NULL);
} else if (jsonObject.length() > 0) {
context.append(tagName, jsonObject);
} else {
context.put(tagName, new JSONArray());
}
} else {
if (nilAttributeFound) {
context.accumulate(tagName, JSONObject.NULL);
} else if (jsonObject.length() > 0) {
context.accumulate(tagName, jsonObject);
} else {
context.accumulate(tagName, "");
}
}
return false;
} else if (token == GT) {
// Content, between <...> and
for (;;) {
token = x.nextContent();
if (token == null) {
if (tagName != null) {
throw x.syntaxError("Unclosed tag " + tagName);
}
return false;
} else if (token instanceof String) {
string = (String) token;
if (string.length() > 0) {
if(xmlXsiTypeConverter != null) {
jsonObject.accumulate(config.getcDataTagName(),
stringToValue(string, xmlXsiTypeConverter));
} else {
Object obj = stringToValue((String) token);
if (obj instanceof Boolean) {
jsonObject.accumulate(config.getcDataTagName(),
config.isKeepBooleanAsString()
? ((String) token)
: obj);
} else if (obj instanceof Number) {
jsonObject.accumulate(config.getcDataTagName(),
config.isKeepNumberAsString()
? ((String) token)
: obj);
} else if (obj == JSONObject.NULL) {
jsonObject.accumulate(config.getcDataTagName(),
config.isKeepStrings() ? ((String) token) : obj);
} else {
jsonObject.accumulate(config.getcDataTagName(), stringToValue((String) token));
}
}
}
} else if (token == LT) {
// Nested element
if (currentNestingDepth == config.getMaxNestingDepth()) {
throw x.syntaxError("Maximum nesting depth of " + config.getMaxNestingDepth() + " reached");
}
if (parse(x, jsonObject, tagName, config, currentNestingDepth + 1)) {
if (config.getForceList().contains(tagName)) {
// Force the value to be an array
if (jsonObject.length() == 0) {
context.put(tagName, new JSONArray());
} else if (jsonObject.length() == 1
&& jsonObject.opt(config.getcDataTagName()) != null) {
context.append(tagName, jsonObject.opt(config.getcDataTagName()));
} else {
context.append(tagName, jsonObject);
}
} else {
if (jsonObject.length() == 0) {
context.accumulate(tagName, "");
} else if (jsonObject.length() == 1
&& jsonObject.opt(config.getcDataTagName()) != null) {
context.accumulate(tagName, jsonObject.opt(config.getcDataTagName()));
} else {
if (!config.shouldTrimWhiteSpace()) {
removeEmpty(jsonObject, config);
}
context.accumulate(tagName, jsonObject);
}
}
return false;
}
}
}
} else {
throw x.syntaxError("Misshaped tag");
}
}
}
}
/**
* This method removes any JSON entry which has the key set by XMLParserConfiguration.cDataTagName
* and contains whitespace as this is caused by whitespace between tags. See test XMLTest.testNestedWithWhitespaceTrimmingDisabled.
* @param jsonObject JSONObject which may require deletion
* @param config The XMLParserConfiguration which includes the cDataTagName
*/
private static void removeEmpty(final JSONObject jsonObject, final XMLParserConfiguration config) {
if (jsonObject.has(config.getcDataTagName())) {
final Object s = jsonObject.get(config.getcDataTagName());
if (s instanceof String) {
if (isStringAllWhiteSpace(s.toString())) {
jsonObject.remove(config.getcDataTagName());
}
}
else if (s instanceof JSONArray) {
final JSONArray sArray = (JSONArray) s;
for (int k = sArray.length()-1; k >= 0; k--){
final Object eachString = sArray.get(k);
if (eachString instanceof String) {
String s1 = (String) eachString;
if (isStringAllWhiteSpace(s1)) {
sArray.remove(k);
}
}
}
if (sArray.isEmpty()) {
jsonObject.remove(config.getcDataTagName());
}
}
}
}
private static boolean isStringAllWhiteSpace(final String s) {
for (int k = 0; k{@code
* <[ [ ]]>}
* are ignored.
*
* @param string
* The source string.
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(String string) throws JSONException {
return toJSONObject(string, XMLParserConfiguration.ORIGINAL);
}
/**
* Convert a well-formed (but not necessarily valid) XML into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* @param reader The XML source reader.
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(Reader reader) throws JSONException {
return toJSONObject(reader, XMLParserConfiguration.ORIGINAL);
}
/**
* Convert a well-formed (but not necessarily valid) XML into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* All values are converted as strings, for 1, 01, 29.0 will not be coerced to
* numbers but will instead be the exact value as seen in the XML document.
*
* @param reader The XML source reader.
* @param keepStrings If true, then values will not be coerced into boolean
* or numeric values and will instead be left as strings
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(Reader reader, boolean keepStrings) throws JSONException {
if(keepStrings) {
return toJSONObject(reader, XMLParserConfiguration.KEEP_STRINGS);
}
return toJSONObject(reader, XMLParserConfiguration.ORIGINAL);
}
/**
* Convert a well-formed (but not necessarily valid) XML into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* All numbers are converted as strings, for 1, 01, 29.0 will not be coerced to
* numbers but will instead be the exact value as seen in the XML document depending
* on how flag is set.
* All booleans are converted as strings, for true, false will not be coerced to
* booleans but will instead be the exact value as seen in the XML document depending
* on how flag is set.
*
* @param reader The XML source reader.
* @param keepNumberAsString If true, then numeric values will not be coerced into
* numeric values and will instead be left as strings
* @param keepBooleanAsString If true, then boolean values will not be coerced into
* * numeric values and will instead be left as strings
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(Reader reader, boolean keepNumberAsString, boolean keepBooleanAsString) throws JSONException {
XMLParserConfiguration xmlParserConfiguration = new XMLParserConfiguration();
if(keepNumberAsString) {
xmlParserConfiguration = xmlParserConfiguration.withKeepNumberAsString(keepNumberAsString);
}
if(keepBooleanAsString) {
xmlParserConfiguration = xmlParserConfiguration.withKeepBooleanAsString(keepBooleanAsString);
}
return toJSONObject(reader, xmlParserConfiguration);
}
/**
* Convert a well-formed (but not necessarily valid) XML into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* All values are converted as strings, for 1, 01, 29.0 will not be coerced to
* numbers but will instead be the exact value as seen in the XML document.
*
* @param reader The XML source reader.
* @param config Configuration options for the parser
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(Reader reader, XMLParserConfiguration config) throws JSONException {
JSONObject jo = new JSONObject();
XMLTokener x = new XMLTokener(reader, config);
while (x.more()) {
x.skipPast("<");
if(x.more()) {
parse(x, jo, null, config, 0);
}
}
return jo;
}
/**
* Convert a well-formed (but not necessarily valid) XML string into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* All values are converted as strings, for 1, 01, 29.0 will not be coerced to
* numbers but will instead be the exact value as seen in the XML document.
*
* @param string
* The source string.
* @param keepStrings If true, then values will not be coerced into boolean
* or numeric values and will instead be left as strings
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(String string, boolean keepStrings) throws JSONException {
return toJSONObject(new StringReader(string), keepStrings);
}
/**
* Convert a well-formed (but not necessarily valid) XML string into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* All numbers are converted as strings, for 1, 01, 29.0 will not be coerced to
* numbers but will instead be the exact value as seen in the XML document depending
* on how flag is set.
* All booleans are converted as strings, for true, false will not be coerced to
* booleans but will instead be the exact value as seen in the XML document depending
* on how flag is set.
*
* @param string
* The source string.
* @param keepNumberAsString If true, then numeric values will not be coerced into
* numeric values and will instead be left as strings
* @param keepBooleanAsString If true, then boolean values will not be coerced into
* numeric values and will instead be left as strings
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(String string, boolean keepNumberAsString, boolean keepBooleanAsString) throws JSONException {
return toJSONObject(new StringReader(string), keepNumberAsString, keepBooleanAsString);
}
/**
* Convert a well-formed (but not necessarily valid) XML string into a
* JSONObject. Some information may be lost in this transformation because
* JSON is a data format and XML is a document format. XML uses elements,
* attributes, and content text, while JSON uses unordered collections of
* name/value pairs and arrays of values. JSON does not does not like to
* distinguish between elements and attributes. Sequences of similar
* elements are represented as JSONArrays. Content text may be placed in a
* "content" member. Comments, prologs, DTDs, and {@code
* <[ [ ]]>}
* are ignored.
*
* All values are converted as strings, for 1, 01, 29.0 will not be coerced to
* numbers but will instead be the exact value as seen in the XML document.
*
* @param string
* The source string.
* @param config Configuration options for the parser.
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(String string, XMLParserConfiguration config) throws JSONException {
return toJSONObject(new StringReader(string), config);
}
/**
* Convert a JSONObject into a well-formed, element-normal XML string.
*
* @param object
* A JSONObject.
* @return A string.
* @throws JSONException Thrown if there is an error parsing the string
*/
public static String toString(Object object) throws JSONException {
return toString(object, null, XMLParserConfiguration.ORIGINAL);
}
/**
* Convert a JSONObject into a well-formed, element-normal XML string.
*
* @param object
* A JSONObject.
* @param tagName
* The optional name of the enclosing tag.
* @return A string.
* @throws JSONException Thrown if there is an error parsing the string
*/
public static String toString(final Object object, final String tagName) {
return toString(object, tagName, XMLParserConfiguration.ORIGINAL);
}
/**
* Convert a JSONObject into a well-formed, element-normal XML string.
*
* @param object
* A JSONObject.
* @param tagName
* The optional name of the enclosing tag.
* @param config
* Configuration that can control output to XML.
* @return A string.
* @throws JSONException Thrown if there is an error parsing the string
*/
public static String toString(final Object object, final String tagName, final XMLParserConfiguration config)
throws JSONException {
return toString(object, tagName, config, 0, 0);
}
/**
* Convert a JSONObject into a well-formed, element-normal XML string,
* either pretty print or single-lined depending on indent factor.
*
* @param object
* A JSONObject.
* @param tagName
* The optional name of the enclosing tag.
* @param config
* Configuration that can control output to XML.
* @param indentFactor
* The number of spaces to add to each level of indentation.
* @param indent
* The current ident level in spaces.
* @return
* @throws JSONException
*/
private static String toString(final Object object, final String tagName, final XMLParserConfiguration config, int indentFactor, int indent)
throws JSONException {
StringBuilder sb = new StringBuilder();
JSONArray ja;
JSONObject jo;
String string;
if (object instanceof JSONObject) {
// Emit