Merge pull request #832 from keatontaylor10/feature-disable-whitespace-trim

Add a config flag to disable whitespace trimming
This commit is contained in:
Sean Leary 2024-01-26 19:40:36 -06:00 committed by GitHub
commit f2d20988de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 167 additions and 6 deletions

View File

@ -431,6 +431,9 @@ public class XML {
&& jsonObject.opt(config.getcDataTagName()) != null) {
context.accumulate(tagName, jsonObject.opt(config.getcDataTagName()));
} else {
if (!config.shouldTrimWhiteSpace()) {
removeEmpty(jsonObject, config);
}
context.accumulate(tagName, jsonObject);
}
}
@ -445,6 +448,48 @@ public class XML {
}
}
}
/**
* This method removes any JSON entry which has the key set by XMLParserConfiguration.cDataTagName
* and contains whitespace as this is caused by whitespace between tags. See test XMLTest.testNestedWithWhitespaceTrimmingDisabled.
* @param jsonObject JSONObject which may require deletion
* @param config The XMLParserConfiguration which includes the cDataTagName
*/
private static void removeEmpty(final JSONObject jsonObject, final XMLParserConfiguration config) {
if (jsonObject.has(config.getcDataTagName())) {
final Object s = jsonObject.get(config.getcDataTagName());
if (s instanceof String) {
if (isStringAllWhiteSpace(s.toString())) {
jsonObject.remove(config.getcDataTagName());
}
}
else if (s instanceof JSONArray) {
final JSONArray sArray = (JSONArray) s;
for (int k = sArray.length()-1; k >= 0; k--){
final Object eachString = sArray.get(k);
if (eachString instanceof String) {
String s1 = (String) eachString;
if (isStringAllWhiteSpace(s1)) {
sArray.remove(k);
}
}
}
if (sArray.isEmpty()) {
jsonObject.remove(config.getcDataTagName());
}
}
}
}
private static boolean isStringAllWhiteSpace(final String s) {
for (int k = 0; k<s.length(); k++){
final char eachChar = s.charAt(k);
if (!Character.isWhitespace(eachChar)) {
return false;
}
}
return true;
}
/**
* This method tries to convert the given string value to the target object
@ -594,7 +639,7 @@ public class XML {
*/
public static JSONObject toJSONObject(Reader reader, XMLParserConfiguration config) throws JSONException {
JSONObject jo = new JSONObject();
XMLTokener x = new XMLTokener(reader);
XMLTokener x = new XMLTokener(reader, config);
while (x.more()) {
x.skipPast("<");
if(x.more()) {

View File

@ -61,9 +61,18 @@ public class XMLParserConfiguration extends ParserConfiguration {
*/
private Set<String> forceList;
/**
* Flag to indicate whether white space should be trimmed when parsing XML.
* The default behaviour is to trim white space. When this is set to false, inputting XML
* with tags that are the same as the value of cDataTagName is unsupported. It is recommended to set cDataTagName
* to a distinct value in this case.
*/
private boolean shouldTrimWhiteSpace;
/**
* Default parser configuration. Does not keep strings (tries to implicitly convert
* values), and the CDATA Tag Name is "content".
* values), and the CDATA Tag Name is "content". Trims whitespace.
*/
public XMLParserConfiguration () {
super();
@ -71,6 +80,7 @@ public class XMLParserConfiguration extends ParserConfiguration {
this.convertNilAttributeToNull = false;
this.xsiTypeMap = Collections.emptyMap();
this.forceList = Collections.emptySet();
this.shouldTrimWhiteSpace = true;
}
/**
@ -172,7 +182,7 @@ public class XMLParserConfiguration extends ParserConfiguration {
// item, a new map instance should be created and if possible each value in the
// map should be cloned as well. If the values of the map are known to also
// be immutable, then a shallow clone of the map is acceptable.
return new XMLParserConfiguration(
final XMLParserConfiguration config = new XMLParserConfiguration(
this.keepStrings,
this.cDataTagName,
this.convertNilAttributeToNull,
@ -181,6 +191,8 @@ public class XMLParserConfiguration extends ParserConfiguration {
this.maxNestingDepth,
this.closeEmptyTag
);
config.shouldTrimWhiteSpace = this.shouldTrimWhiteSpace;
return config;
}
/**
@ -327,7 +339,23 @@ public class XMLParserConfiguration extends ParserConfiguration {
return clonedConfiguration;
}
/**
* Sets whether whitespace should be trimmed inside of tags. *NOTE* Do not use this if
* you expect your XML tags to have names that are the same as cDataTagName as this is unsupported.
* cDataTagName should be set to a distinct value in these cases.
* @param shouldTrimWhiteSpace boolean to set trimming on or off. Off is default.
* @return same instance of configuration with empty tag config updated
*/
public XMLParserConfiguration withShouldTrimWhitespace(boolean shouldTrimWhiteSpace){
XMLParserConfiguration clonedConfiguration = this.clone();
clonedConfiguration.shouldTrimWhiteSpace = shouldTrimWhiteSpace;
return clonedConfiguration;
}
public boolean isCloseEmptyTag() {
return this.closeEmptyTag;
}
public boolean shouldTrimWhiteSpace() {
return this.shouldTrimWhiteSpace;
}
}

View File

@ -20,6 +20,8 @@ public class XMLTokener extends JSONTokener {
*/
public static final java.util.HashMap<String, Character> entity;
private XMLParserConfiguration configuration = XMLParserConfiguration.ORIGINAL;
static {
entity = new java.util.HashMap<String, Character>(8);
entity.put("amp", XML.AMP);
@ -45,6 +47,16 @@ public class XMLTokener extends JSONTokener {
super(s);
}
/**
* Construct an XMLTokener from a Reader and an XMLParserConfiguration.
* @param r A source reader.
* @param configuration the configuration that can be used to set certain flags
*/
public XMLTokener(Reader r, XMLParserConfiguration configuration) {
super(r);
this.configuration = configuration;
}
/**
* Get the text in the CDATA block.
* @return The string up to the <code>]]&gt;</code>.
@ -83,7 +95,7 @@ public class XMLTokener extends JSONTokener {
StringBuilder sb;
do {
c = next();
} while (Character.isWhitespace(c));
} while (Character.isWhitespace(c) && configuration.shouldTrimWhiteSpace());
if (c == 0) {
return null;
}
@ -97,7 +109,9 @@ public class XMLTokener extends JSONTokener {
}
if (c == '<') {
back();
return sb.toString().trim();
if (configuration.shouldTrimWhiteSpace()) {
return sb.toString().trim();
} else return sb.toString();
}
if (c == '&') {
sb.append(nextEntity(c));

View File

@ -1181,4 +1181,4 @@ public class XMLConfigurationTest {
assertTrue("Error: " +e.getMessage(), false);
}
}
}
}

View File

@ -1323,6 +1323,80 @@ public class XMLTest {
"parameter of the XMLParserConfiguration used");
}
}
@Test
public void testWithWhitespaceTrimmingDisabled() {
String originalXml = "<testXml> Test Whitespace String \t </testXml>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(false));
String expectedJsonString = "{\"testXml\":\" Test Whitespace String \t \"}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void testNestedWithWhitespaceTrimmingDisabled() {
String originalXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<addresses>\n"+
" <address>\n"+
" <name> Sherlock Holmes </name>\n"+
" </address>\n"+
"</addresses>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(false));
String expectedJsonString = "{\"addresses\":{\"address\":{\"name\":\" Sherlock Holmes \"}}}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void shouldTrimWhitespaceDoesNotSupportTagsEqualingCDataTagName() {
// When using withShouldTrimWhitespace = true, input containing tags with same name as cDataTagName is unsupported and should not be used in conjunction
String originalXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<addresses>\n"+
" <address>\n"+
" <content> Sherlock Holmes </content>\n"+
" </address>\n"+
"</addresses>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(false).withcDataTagName("content"));
String expectedJsonString = "{\"addresses\":{\"address\":[[\"\\n \",\" Sherlock Holmes \",\"\\n \"]]}}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void shouldTrimWhitespaceEnabledDropsTagsEqualingCDataTagNameButValueRemains() {
String originalXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<addresses>\n"+
" <address>\n"+
" <content> Sherlock Holmes </content>\n"+
" </address>\n"+
"</addresses>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(true).withcDataTagName("content"));
String expectedJsonString = "{\"addresses\":{\"address\":\"Sherlock Holmes\"}}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void testWithWhitespaceTrimmingEnabled() {
String originalXml = "<testXml> Test Whitespace String \t </testXml>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration().withShouldTrimWhitespace(true));
String expectedJsonString = "{\"testXml\":\"Test Whitespace String\"}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
@Test
public void testWithWhitespaceTrimmingEnabledByDefault() {
String originalXml = "<testXml> Test Whitespace String \t </testXml>";
JSONObject actualJson = XML.toJSONObject(originalXml, new XMLParserConfiguration());
String expectedJsonString = "{\"testXml\":\"Test Whitespace String\"}";
JSONObject expectedJson = new JSONObject(expectedJsonString);
Util.compareActualVsExpectedJsonObjects(actualJson,expectedJson);
}
}