Commit f8469e04 authored by Matija Obreza's avatar Matija Obreza
Browse files

Another implementation

parent 562ed4da
/*
* Copyright 2016 Global Crop Diversity Trust, www.croptrust.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.croptrust.excel.templates.impl;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.croptrust.excel.templates.ExcelToJSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.Configuration.ConfigurationBuilder;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
import com.jayway.jsonpath.PathNotFoundException;
import com.jayway.jsonpath.spi.json.JacksonJsonProvider;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
/**
* The Class ExcelToJSONImpl.
*/
public class ExcelToJSON2Impl implements ExcelToJSON {
/** The Constant LOG. */
private final static Logger LOG = LoggerFactory.getLogger(ExcelToJSON2Impl.class);
/** JsonPath configuration */
private static Configuration jsonPathConfig;
static {
jsonPathConfig = new ConfigurationBuilder().jsonProvider(new JacksonJsonProvider())
.mappingProvider(new JacksonMappingProvider()).options(Option.DEFAULT_PATH_LEAF_TO_NULL).build();
}
/*
* (non-Javadoc)
*
* @see org.croptrust.excel.templates.ExcelToJSON#readAsJson(org.apache.poi.ss.usermodel.Sheet, java.lang.String[],
* java.lang.String, boolean, boolean)
*/
@Override
public List<String> readAsJson(final Sheet sheet, final String[] columnMapping, String objectDefaults,
final boolean ignoreHeaderRow, final boolean ignoreEmptyRows) {
final List<String> results = new ArrayList<String>(sheet.getLastRowNum());
if (StringUtils.isBlank(objectDefaults)) {
if (LOG.isTraceEnabled()) {
LOG.trace("Blank defaults provided");
}
objectDefaults = EMPTY_JSON_OBJECT_STRING;
}
{
// Log defaults
final DocumentContext defaults = JsonPath.using(jsonPathConfig).parse(objectDefaults);
if (LOG.isTraceEnabled()) {
LOG.trace("Defaults: {}", defaults.jsonString());
}
// TODO Expand defaults object with all mappings defined
// Update objectDefaults
objectDefaults = defaults.jsonString();
}
// Read rows
final int startAt = ignoreHeaderRow ? 1 : 0;
final int lastRowIndex = sheet.getLastRowNum();
for (int rowIndex = startAt; rowIndex < lastRowIndex; rowIndex++) {
final Row row = sheet.getRow(rowIndex);
if (row == null && !ignoreEmptyRows) {
results.add(null);
continue;
}
// Create object with specified defaults
final DocumentContext defaults = JsonPath.using(jsonPathConfig).parse(objectDefaults);
final String updatedJson = rowToJson(row, columnMapping, defaults);
if (LOG.isTraceEnabled()) {
LOG.trace("Row JSON: {}", updatedJson);
}
results.add(updatedJson);
}
return results;
}
/**
* Row to json.
*
* @param row the row
* @param columnMapping the column mapping
* @param obj the obj
* @return the string
*/
private String rowToJson(final Row row, final String[] columnMapping, final DocumentContext obj) {
int updates = 0;
for (int i = 0; i < columnMapping.length && i < row.getLastCellNum(); i++) {
if (StringUtils.isBlank(columnMapping[i])) {
// Skip blank and null mapppings
continue;
}
final String jsonPath = "$." + columnMapping[i];
final JsonPath compiledPath = JsonPath.compile(jsonPath);
final Cell cell = row.getCell(i);
final Object cellValue = cellValue(cell);
if (LOG.isTraceEnabled()) {
LOG.trace("Mapping {} to {}", cellValue, jsonPath);
}
try {
final Object read = obj.read(compiledPath);
if (LOG.isTraceEnabled()) {
LOG.trace("Updating {} old={} val={}", jsonPath, read, cellValue);
}
obj.set(compiledPath, cellValue);
updates++;
} catch (final PathNotFoundException e) {
if (LOG.isTraceEnabled()) {
LOG.trace("New element path={} val={}", jsonPath, cellValue);
}
// TODO This should be done before the loop on the defaults object
ensureParents(obj, compiledPath);
obj.set(compiledPath, cellValue);
updates++;
}
if (LOG.isDebugEnabled()) {
LOG.debug("== {}", obj.jsonString());
}
}
if (updates == 0) {
// When nothing was done, return null
return null;
}
return obj.jsonString();
}
/**
* Ensure parents.
*
* @param obj the obj
* @param compiledPath the compiled path
*/
@SuppressWarnings("unchecked")
private void ensureParents(final DocumentContext obj, final JsonPath compiledPath) {
final String bracketNotation = compiledPath.getPath();
final StringBuilder currentPath = new StringBuilder();
currentPath.append("$");
if (LOG.isTraceEnabled()) {
LOG.trace("Analyzing {} for parents", bracketNotation);
}
final Matcher matcher = Pattern.compile("\\[([^]]+)\\]").matcher(bracketNotation);
final List<String> fields = new ArrayList<>();
while (matcher.find()) {
String fieldName = matcher.group(1);
if (fieldName.startsWith("'")) {
fieldName = fieldName.substring(1, fieldName.length() - 1);
}
fields.add(fieldName);
}
for (int i = 0; i < fields.size(); i++) {
final String fieldName = fields.get(i);
if (LOG.isTraceEnabled()) {
LOG.trace("Inspecting field {}", fieldName);
}
Integer arrayRef = null;
try {
arrayRef = Integer.parseInt(fieldName);
} catch (final NumberFormatException e) {
}
final JsonPath parentPath = JsonPath.compile(currentPath.toString());
if (arrayRef == null) {
currentPath.append("['").append(fieldName).append("']");
} else {
currentPath.append("[").append(fieldName).append("]");
}
if (LOG.isTraceEnabled()) {
LOG.trace("Current path={}", currentPath);
}
Object nodeAtPath = null;
try {
nodeAtPath = obj.read(currentPath.toString());
} catch (PathNotFoundException e) {
}
if (LOG.isTraceEnabled()) {
LOG.trace(">> Node before path={} node={}", currentPath, obj.jsonString());
}
if (nodeAtPath == null) {
// Make node
if (LOG.isDebugEnabled()) {
LOG.debug("Making node at path={} arrayRef={}", currentPath, arrayRef);
}
if (arrayRef == null) {
obj.set(currentPath.toString(), new HashMap<>());
} else if (obj.read(parentPath) instanceof List) {
if (LOG.isTraceEnabled()) {
LOG.trace("Updating list at path={} arrayRef={}", currentPath, arrayRef);
}
obj.set(parentPath, updateList((List<Object>) obj.read(parentPath), arrayRef));
} else {
// Parent must be array
if (LOG.isTraceEnabled()) {
LOG.trace("Making list at path={}", parentPath);
}
obj.set(parentPath, makeList(arrayRef));
}
} else {
if (LOG.isTraceEnabled()) {
LOG.trace("Node at path={} is node={} class={}", currentPath, nodeAtPath, nodeAtPath.getClass()
.getSimpleName());
}
if (arrayRef == null) {
// Ignore
} else {
if (nodeAtPath instanceof List) {
} else {
throw new UnsupportedOperationException("Node exists, but is not a list.");
}
}
}
if (LOG.isTraceEnabled()) {
LOG.trace("<< Node after path={} node={}", currentPath, obj.jsonString());
}
}
}
private List<?> updateList(List<Object> l, Integer arrayRef) {
for (int i = l.size() - 1; i < (arrayRef == null ? 1 : arrayRef + 1); i++) {
if (LOG.isTraceEnabled()) {
LOG.trace("Adding element to list");
}
l.add(new HashMap<>());
}
return l;
}
private List<?> makeList(Integer arrayRef) {
List<Object> l = new ArrayList<>();
for (int i = 0; i < (arrayRef == null ? 1 : arrayRef + 1); i++) {
l.add(new HashMap<>());
}
return l;
}
/**
* Cell value.
*
* @param cell the cell
* @return the object
*/
private Object cellValue(final Cell cell) {
if (cell == null) {
return null;
}
switch (cell.getCellType()) {
case Cell.CELL_TYPE_BOOLEAN:
return cell.getBooleanCellValue();
case Cell.CELL_TYPE_NUMERIC:
return cell.getNumericCellValue();
case Cell.CELL_TYPE_STRING:
return cell.getStringCellValue();
case Cell.CELL_TYPE_BLANK:
return null;
case Cell.CELL_TYPE_ERROR:
LOG.debug("Cell contains #ERROR: {}", cell.getErrorCellValue());
return null;
case Cell.CELL_TYPE_FORMULA:
LOG.debug("Cell contains a formula");
final FormulaEvaluator evaluator = cell.getSheet().getWorkbook().getCreationHelper()
.createFormulaEvaluator();
return cellValue(evaluator.evaluateInCell(cell));
default:
LOG.warn("Unsupported cell type {}", cell.getCellType());
return null;
}
}
}
......@@ -32,7 +32,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.Configuration.ConfigurationBuilder;
import com.jayway.jsonpath.DocumentContext;
......@@ -150,16 +149,23 @@ public class ExcelToJSONImpl implements ExcelToJSON {
updates++;
} catch (final PathNotFoundException e) {
if (LOG.isTraceEnabled()) {
LOG.trace("Setting {} val={}", jsonPath, cellValue);
LOG.trace("New element path={} val={}", jsonPath, cellValue);
}
// TODO This should be done before the loop on the defaults object
// Add missing properties!
ensureParents(obj, compiledPath);
// System.err.println("1: " + obj.jsonString());
// System.err.println(compiledPath.getPath() + " = " + cellValue);
obj.set(compiledPath, cellValue);
// System.err.println("2: " + obj.jsonString());
updates++;
}
if (LOG.isDebugEnabled()) {
LOG.debug("== {}", obj.jsonString());
}
}
if (updates == 0) {
......@@ -184,14 +190,41 @@ public class ExcelToJSONImpl implements ExcelToJSON {
if (LOG.isTraceEnabled()) {
LOG.trace("Analyzing {} for parents", bracketNotation);
}
final Matcher matcher = Pattern.compile("\\[([^]]+)\\]").matcher(bracketNotation);
final List<String> fields = new ArrayList<>();
while (matcher.find()) {
final String field = matcher.group(1);
String fieldName = matcher.group(1);
if (fieldName.startsWith("'")) {
fieldName = fieldName.substring(1, fieldName.length() - 1);
}
fields.add(fieldName);
}
for (int i = 0; i < fields.size(); i++) {
final String fieldName = fields.get(i);
if (LOG.isTraceEnabled()) {
LOG.trace("Inspecting field {}", field);
LOG.trace("Inspecting field {}", fieldName);
}
final boolean isLast = i == fields.size() - 1;
Integer arrayRef = null;
try {
arrayRef = Integer.parseInt(fieldName);
} catch (final NumberFormatException e) {
}
final String parentPath = currentPath.toString();
currentPath.append("[").append(field).append("]");
if (arrayRef == null) {
currentPath.append("['").append(fieldName).append("']");
} else {
currentPath.append("[").append(fieldName).append("]");
}
if (LOG.isTraceEnabled()) {
LOG.trace("Current path={}", currentPath.toString());
}
try {
// System.err.println(currentPath);
final Object res = obj.read(currentPath.toString());
......@@ -204,7 +237,7 @@ public class ExcelToJSONImpl implements ExcelToJSON {
if (LOG.isTraceEnabled()) {
LOG.trace("No path found: {}", e.getMessage());
}
if (field.startsWith("'")) {
if (arrayRef==null) {
// Is a property: ['property']
if (LOG.isTraceEnabled()) {
LOG.trace("Creating property at {}", currentPath);
......@@ -226,17 +259,47 @@ public class ExcelToJSONImpl implements ExcelToJSON {
// make parent into array
final Object parent = obj.read(parentPath);
// System.err.println(parent);
if (parent == null || parent instanceof ObjectNode) {
if (parent == null || parent instanceof HashMap<?, ?>) {
// convert to array
obj.set(parentPath, new ArrayList<>());
// System.err.println("Converted "+ obj.jsonString());
ArrayList<Object> list = new ArrayList<>();
list.add(new HashMap<>());
obj.set(parentPath, list);
if (LOG.isTraceEnabled()) {
LOG.trace("Converted " + obj.jsonString() + " parent="
+ (parent == null ? "NULL" : parent.getClass()));
}
} else if (parent instanceof ArrayList<?>) {
// parent is array
if (LOG.isTraceEnabled()) {
LOG.trace("Parent is array={}", parent);
}
} else {
throw new UnsupportedOperationException("What now? class=" + parent.getClass());
}
// TODO make as many as needed!
if (LOG.isTraceEnabled()) {
LOG.trace("Creating new array item in {}", parentPath);
LOG.trace("Creating new array item={} in parentPath={} node={}", currentPath, parentPath, obj.jsonString());
}
if (isLast) {
// We're not doing anything here.
if (LOG.isTraceEnabled()) {
LOG.trace("LAST at currentPath={}", currentPath);
}
ArrayList<Object> list = new ArrayList<>();
list.add(new HashMap<>());
System.err.println(obj.jsonString());
obj.set(currentPath.toString(), list);
System.err.println(obj.jsonString());
} else {
obj.add(parentPath, new HashMap<>());
}
obj.add(parentPath, new HashMap<>());
// System.err.println(">> " + obj.jsonString());
}
if (LOG.isDebugEnabled()) {
LOG.debug(">> " + obj.jsonString());
}
}
}
......
......@@ -33,14 +33,15 @@ import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.croptrust.excel.templates.impl.ExcelToMapsJSONImpl;
import org.croptrust.excel.templates.impl.ExcelToJSON2Impl;
import org.junit.Test;
import com.jayway.jsonpath.JsonPath;
public class ExcelToJSONTest extends JsonPathTest {
private ExcelToJSON excelToJson = new ExcelToMapsJSONImpl(); // new ExcelToJSONImpl();
// private ExcelToJSON excelToJson = new ExcelToMapsJSONImpl();
private ExcelToJSON excelToJson = new ExcelToJSON2Impl();
/**
* Check that we can read an XLSX file.
......@@ -194,15 +195,39 @@ public class ExcelToJSONTest extends JsonPathTest {
// Expect converter to return a list of null objects with size matching number of rows
List<String> jsonStrings = excelToJson.readAsJson(sheet, columnMapping, "{}", true, true);
jsonStrings.stream().forEach(
json -> {
assertThat("$.timestamp should have a value", JsonPath.parse(json).read(columnMapping[0]),
notNullValue());
assertThat(columnMapping[1] + " should have a value",
JsonPath.parse(json).read(columnMapping[1]), notNullValue());
assertThat(columnMapping[3] + " should have a value",
JsonPath.parse(json).read(columnMapping[3]), notNullValue());
});
jsonStrings.stream().forEach(json -> {
for (String cm : columnMapping) {
if (cm == null) {
continue;
}
assertThat(cm + " should have a value", JsonPath.parse(json).read(cm), notNullValue());
}
});
}
}
@Test
public void arrayKnown() throws EncryptedDocumentException, InvalidFormatException, IOException, URISyntaxException {
try (Workbook wb = WorkbookFactory.create(BasicExcelTest.PARTICIPANTS_XSLX)) {
Sheet sheet = wb.getSheetAt(0);
String[] columnMapping = new String[] { "timestamp", "traveller.fullName", null, "traveller.organization",
"traveller.address", "traveller.email", "traveller.phone", "traveller.dateOfBirth",
"traveller.passportCountry", null, null, null, "itinerary[0].origin", "itinerary[1].destination" };
// Expect converter to return a list of null objects with size matching number of rows
List<String> jsonStrings = excelToJson.readAsJson(sheet, columnMapping, "{}", true, true);
jsonStrings.stream().forEach(json -> {
for (String cm : columnMapping) {
if (cm == null) {
continue;
}
assertThat(cm + " should have a value", JsonPath.parse(json).read(cm), notNullValue());
}
});
}
}
}
......@@ -24,35 +24,43 @@ import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.croptrust.excel.templates.impl.ExcelToJSON2Impl;
import org.croptrust.excel.templates.impl.ExcelToJSONImpl;
import org.croptrust.excel.templates.impl.ExcelToMapsJSONImpl;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Ignore
public class PerformanceTest extends JsonPathTest {
private final static Logger LOG = LoggerFactory.getLogger(PerformanceTest.class);
private ExcelToJSON[] excelToJsonServices = new ExcelToJSON[] { new ExcelToJSONImpl(), new ExcelToMapsJSONImpl() };
private ExcelToJSON[] excelToJsonServices = new ExcelToJSON[] { new ExcelToJSONImpl(), new ExcelToJSON2Impl(),
new ExcelToMapsJSONImpl() };
@Test
public void testPerformance() throws IOException, EncryptedDocumentException, InvalidFormatException {
String[][] columnMappings = new String[][] {
new String[] { "timestamp" }, new String[] { "timestamp", "traveller.fullName", "", "traveller.organization" },
new String[] { "timestamp" },
new String[] { "timestamp", "traveller.fullName", "", "traveller.organization" },
new String[] { "timestamp", "itinerary[0].origin", null, "itinerary[1].destination" },
new String[] { "a", "b", "c", "d", "e", "f", "g", "h" },
new String[] { "timestamp", "traveller.fullName", null, "traveller.organization", "traveller.address",
"traveller.email", "traveller.phone", "traveller.dateOfBirth", "traveller.passportCountry",
null, null, null, "itinerary[0].origin", "itinerary[1].destination" }
};
StopWatch stopWatch = new StopWatch();
String defaults="{\"this\":[\"is a\", 1], \"test\":{\"bar\":{\"foo\":[1]}}}";
String defaults = "{\"this\":[\"is a\", 1], \"test\":{\"bar\":{\"foo\":[1]}}}";
try (Workbook wb = WorkbookFactory.create(BasicExcelTest.PARTICIPANTS_XSLX)) {
Sheet sheet = wb.getSheetAt(0);
int numberOfRepeats1 = LOG.isDebugEnabled() ? 100 : 20;
int numberOfRepeats1 = LOG.isDebugEnabled() ? 100 : 30;
while (numberOfRepeats1-- > 0) {