Add a config flag to disable whitespace trimming

This commit is contained in:
Keaton Taylor 2023-11-20 12:11:47 +02:00
parent 11c29c366d
commit 30f5b2de79
5 changed files with 160 additions and 7 deletions

View File

@ -431,6 +431,9 @@ public class XML {
&& jsonObject.opt(config.getcDataTagName()) != null) {
context.accumulate(tagName, jsonObject.opt(config.getcDataTagName()));
} else {
if (!config.shouldTrimWhiteSpace()) {
removeEmpty(jsonObject, config);
}
context.accumulate(tagName, jsonObject);
}
}
@ -445,6 +448,48 @@ public class XML {
}
}
}
/**
* This method removes any JSON entry which has the key set by XMLParserConfiguration.cDataTagName
* and contains whitespace as this is caused by whitespace between tags. See test XMLTest.testNestedWithWhitespaceTrimmingDisabled.
* @param jsonObject JSONObject which may require deletion
* @param config The XMLParserConfiguration which includes the cDataTagName
*/
private static void removeEmpty(final JSONObject jsonObject, final XMLParserConfiguration config) {
if (jsonObject.has(config.getcDataTagName())) {
final Object s = jsonObject.get(config.getcDataTagName());
if (s instanceof String) {
if (isStringAllWhiteSpace(s.toString())) {
jsonObject.remove(config.getcDataTagName());
}
}
else if (s instanceof JSONArray) {
final JSONArray sArray = (JSONArray) s;
for (int k = sArray.length()-1; k >= 0; k--){
final Object eachString = sArray.get(k);
if (eachString instanceof String) {
String s1 = (String) eachString;
if (isStringAllWhiteSpace(s1)) {
sArray.remove(k);
}
}
}
if (sArray.isEmpty()) {
jsonObject.remove(config.getcDataTagName());
}
}
}
}
private static boolean isStringAllWhiteSpace(final String s) {
for (int k = 0; k<s.length(); k++){
final char eachChar = s.charAt(k);
if (!Character.isWhitespace(eachChar)) {
return false;
}
}
return true;
}
/**
* This method tries to convert the given string value to the target object
@ -594,7 +639,7 @@ public class XML {
*/
public static JSONObject toJSONObject(Reader reader, XMLParserConfiguration config) throws JSONException {
JSONObject jo = new JSONObject();
XMLTokener x = new XMLTokener(reader);
XMLTokener x = new XMLTokener(reader, config);
while (x.more()) {
x.skipPast("<");
if(x.more()) {

View File

@ -61,9 +61,11 @@ public class XMLParserConfiguration extends ParserConfiguration {
*/
private Set<String> forceList;
private boolean shouldTrimWhiteSpace;
/**
* Default parser configuration. Does not keep strings (tries to implicitly convert
* values), and the CDATA Tag Name is "content".
* values), and the CDATA Tag Name is "content". Trims whitespace.
*/
public XMLParserConfiguration () {
super();
@ -71,6 +73,7 @@ public class XMLParserConfiguration extends ParserConfiguration {
this.convertNilAttributeToNull = false;
this.xsiTypeMap = Collections.emptyMap();
this.forceList = Collections.emptySet();
this.shouldTrimWhiteSpace = true;
}
/**
@ -153,13 +156,14 @@ public class XMLParserConfiguration extends ParserConfiguration {
*/
private XMLParserConfiguration (final boolean keepStrings, final String cDataTagName,
final boolean convertNilAttributeToNull, final Map<String, XMLXsiTypeConverter<?>> xsiTypeMap, final Set<String> forceList,
final int maxNestingDepth, final boolean closeEmptyTag) {
final int maxNestingDepth, final boolean closeEmptyTag, final boolean shouldTrimWhiteSpace) {
super(keepStrings, maxNestingDepth);
this.cDataTagName = cDataTagName;
this.convertNilAttributeToNull = convertNilAttributeToNull;
this.xsiTypeMap = Collections.unmodifiableMap(xsiTypeMap);
this.forceList = Collections.unmodifiableSet(forceList);
this.closeEmptyTag = closeEmptyTag;
this.shouldTrimWhiteSpace = shouldTrimWhiteSpace;
}
/**
@ -179,7 +183,8 @@ public class XMLParserConfiguration extends ParserConfiguration {
this.xsiTypeMap,
this.forceList,
this.maxNestingDepth,
this.closeEmptyTag
this.closeEmptyTag,
this.shouldTrimWhiteSpace
);
}
@ -325,7 +330,23 @@ public class XMLParserConfiguration extends ParserConfiguration {
return clonedConfiguration;
}
/**
* Sets whether whitespace should be trimmed inside of tags. *NOTE* Do not use this if
* you expect your XML tags to have names that are the same as cDataTagName as this is unsupported.
* cDataTagName should be set to a distinct value in these cases.
* @param shouldTrimWhiteSpace boolean to set trimming on or off. Off is default.
* @return same instance of configuration with empty tag config updated
*/
public XMLParserConfiguration withShouldTrimWhitespace(boolean shouldTrimWhiteSpace){
XMLParserConfiguration clonedConfiguration = this.clone();
clonedConfiguration.shouldTrimWhiteSpace = shouldTrimWhiteSpace;
return clonedConfiguration;
}
public boolean isCloseEmptyTag() {
return this.closeEmptyTag;
}
public boolean shouldTrimWhiteSpace() {
return this.shouldTrimWhiteSpace;
}
}

View File

@ -20,6 +20,8 @@ public class XMLTokener extends JSONTokener {
*/
public static final java.util.HashMap<String, Character> entity;
private static XMLParserConfiguration configuration = XMLParserConfiguration.ORIGINAL;;
static {
entity = new java.util.HashMap<String, Character>(8);
entity.put("amp", XML.AMP);
@ -45,6 +47,15 @@ public class XMLTokener extends JSONTokener {
super(s);
}
public XMLTokener(Reader r, XMLParserConfiguration configuration) {
super(r);
XMLTokener.configuration = configuration;
}
public XMLTokener(String s, XMLParserConfiguration configuration) {
super(s);
XMLTokener.configuration = configuration;
}
/**
* Get the text in the CDATA block.
* @return The string up to the <code>]]&gt;</code>.
@ -83,7 +94,7 @@ public class XMLTokener extends JSONTokener {
StringBuilder sb;
do {
c = next();
} while (Character.isWhitespace(c));
} while (Character.isWhitespace(c) && configuration.shouldTrimWhiteSpace());
if (c == 0) {
return null;
}
@ -97,7 +108,9 @@ public class XMLTokener extends JSONTokener {
}
if (c == '<') {
back();
return sb.toString().trim();
if (configuration.shouldTrimWhiteSpace()) {
return sb.toString().trim();
} else return sb.toString();
}
if (c == '&') {
sb.append(nextEntity(c));

View File

@ -1181,4 +1181,4 @@ public class XMLConfigurationTest {
assertTrue("Error: " +e.getMessage(), false);
}
}
}
}

View File

@ -1319,6 +1319,80 @@ public class XMLTest {
"parameter of the XMLParserConfiguration used");
}
}
@Test
public void testWithWhitespaceTrimmingDisabled() {
String originalXml = "<testXml> Test Whitespace String \t </testXml>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(false));
String expectedJsonString = "{\"testXml\":\" Test Whitespace String \t \"}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void testNestedWithWhitespaceTrimmingDisabled() {
String originalXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<addresses>\n"+
" <address>\n"+
" <name> Sherlock Holmes </name>\n"+
" </address>\n"+
"</addresses>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(false));
String expectedJsonString = "{\"addresses\":{\"address\":{\"name\":\" Sherlock Holmes \"}}}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void shouldTrimWhitespaceDoesNotSupportTagsEqualingCDataTagName() {
// When using withShouldTrimWhitespace = true, input containing tags with same name as cDataTagName is unsupported and should not be used in conjunction
String originalXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<addresses>\n"+
" <address>\n"+
" <content> Sherlock Holmes </content>\n"+
" </address>\n"+
"</addresses>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(false).withcDataTagName("content"));
String expectedJsonString = "{\"addresses\":{\"address\":[[\"\\n \",\" Sherlock Holmes \",\"\\n \"]]}}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void shouldTrimWhitespaceEnabledDropsTagsEqualingCDataTagNameButValueRemains() {
String originalXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<addresses>\n"+
" <address>\n"+
" <content> Sherlock Holmes </content>\n"+
" </address>\n"+
"</addresses>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(true).withcDataTagName("content"));
String expectedJsonString = "{\"addresses\":{\"address\":\"Sherlock Holmes\"}}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void testWithWhitespaceTrimmingEnabled() {
String originalXml = "<testXml> Test Whitespace String \t </testXml>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(true));
String expectedJsonString = "{\"testXml\":\"Test Whitespace String\"}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void testWithWhitespaceTrimmingEnabledByDefault() {
String originalXml = "<testXml> Test Whitespace String \t </testXml>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration());
String expectedJsonString = "{\"testXml\":\"Test Whitespace String\"}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
}