Commit 92389cf6 authored by Artem Hrybeniuk's avatar Artem Hrybeniuk Committed by Matija Obreza
Browse files

Detecting data structure

parent ce1d1179
......@@ -458,6 +458,29 @@ paths:
'404':
description: Not Found
deprecated: false
'/api/v0/preview/{uuid}/generate-descriptors':
post:
tags:
- preview
summary: Generate descriptors for all PreviewSheets of given Preview
operationId: generateDescriptors
parameters:
- name: uuid
in: path
description: Your reference UUID
required: true
type: string
format: uuid
responses:
'200':
description: OK
'401':
description: Unauthorized
'403':
description: Forbidden
'404':
description: Not Found
deprecated: false
definitions:
Observation:
type: object
......
......@@ -90,6 +90,7 @@ public class PreviewApi {
@ApiOperation(nickname="getPreview", value = "Get the overview of the parsed dataset", notes = "Use the same reference UUID as provided when ingesting a dataset")
public Preview get(@ApiParam(value = "Your reference UUID", required = true) @PathVariable UUID uuid) {
Preview preview = previewService.getPreview(uuid);
previewService.generateDescriptors(uuid);
if (preview == null) {
throw new NoSuchThingException("No preview for reference UUID=" + uuid);
}
......@@ -128,4 +129,14 @@ public class PreviewApi {
return previewService.getStatisticsData(uuid, sheet, startRow, limit.orElse(100), fields);
}
@PostMapping(path = "/{uuid}/generate-descriptors")
@ApiOperation(nickname="generateDescriptors", value = "Generate descriptors for all PreviewSheets of given Preview")
public void generateDescriptors(@ApiParam(value = "Your reference UUID", required = true) @PathVariable UUID uuid) {
Preview preview = previewService.getPreview(uuid);
if (preview == null) {
throw new NoSuchThingException("No preview for reference UUID=" + uuid);
}
previewService.generateDescriptors(uuid);
}
}
......@@ -21,6 +21,7 @@ import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
......@@ -28,6 +29,7 @@ import org.springframework.data.mongodb.MongoDbFactory;
import org.springframework.data.mongodb.MongoTransactionManager;
import org.springframework.data.mongodb.config.AbstractMongoConfiguration;
import org.springframework.data.mongodb.config.EnableMongoAuditing;
import org.springframework.data.mongodb.core.convert.MappingMongoConverter;
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
import com.mongodb.MongoClient;
......@@ -79,4 +81,5 @@ public class DatabaseConfig extends AbstractMongoConfiguration {
MongoClient mongoClient = new MongoClient(servers, credential, options);
return mongoClient;
}
}
......@@ -15,10 +15,14 @@
*/
package org.genesys.amphibian.model;
import java.util.List;
import java.util.Map;
public class PreviewSheet {
public String name;
public int index;
public int rowCount;
public List<Descriptor> descriptors;
public PreviewSheet() {
}
......@@ -28,4 +32,14 @@ public class PreviewSheet {
name = sheetName;
rowCount = -1;
}
public static class Descriptor {
public String columnName;
public DataType dataType;
public Map<String, String> codes;
public enum DataType {
CODED, TEXT, NUMERIC
}
}
}
......@@ -44,6 +44,8 @@ public interface PreviewService {
Preview registerSheet(String id, PreviewSheet sheet);
void generateDescriptors(UUID referenceUuid);
class StatisticsData implements Serializable {
private static final long serialVersionUID = 3826590456962597612L;
......
......@@ -21,13 +21,16 @@ import static org.springframework.data.mongodb.core.query.Query.*;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.bson.Document;
import org.genesys.amphibian.model.Preview;
import org.genesys.amphibian.model.Preview.State;
......@@ -193,6 +196,58 @@ public class PreviewServiceImpl implements PreviewService, InitializingBean {
return statistics;
}
@Override
public void generateDescriptors(UUID referenceUuid) {
Preview preview = previewRepository.findByReferenceUuid(referenceUuid);
List<PreviewSheet> previewSheets = preview.getSheets();
// generate descriptors for all sheets
for (PreviewSheet previewSheet : previewSheets) {
// get columns
Query q = query(where(RAWDATA_DATASET).is(referenceUuid).and(RAWDATA_SHEET).is(previewSheet.index).and(RAWDATA_ROW).gte(0)).with(Sort.by(RAWDATA_ROW));
q.limit(1);
q.fields().exclude(RAWDATA_DATASET).exclude(RAWDATA_SHEET).exclude(RAWDATA_ROW).exclude(MONGO_ID);
Document namesRow = mongoTemplate.find(q, Document.class, MONGO_RAWDATA).get(0);
Set<String> columns = namesRow.keySet();
List<String> columnNames = namesRow.values().stream().map(value -> (String) value).collect(Collectors.toList());
// get statistics data for columns
List<StatisticsData> statisticsData = getStatisticsData(referenceUuid, previewSheet.index, 1, 100, columns.toArray(String[]::new));
List<PreviewSheet.Descriptor> descriptors = new ArrayList<>();
for (int i = 0; i < columns.size(); i++) {
StatisticsData statistic = statisticsData.get(i);
// create descriptor
PreviewSheet.Descriptor descriptor = new PreviewSheet.Descriptor();
descriptor.columnName = columnNames.get(i);
if (checkIsNumericValues(statistic.distinctValues)) {
descriptor.dataType = PreviewSheet.Descriptor.DataType.NUMERIC;
} else if (((float) statistic.numberOfDistinct / statistic.totalValues) < 0.2) {
descriptor.dataType = PreviewSheet.Descriptor.DataType.CODED;
descriptor.codes = new HashMap<>();
statistic.distinctValues.forEach(value -> descriptor.codes.put("code", String.valueOf(value)));
} else {
descriptor.dataType = PreviewSheet.Descriptor.DataType.TEXT;
}
descriptors.add(descriptor);
}
previewSheet.descriptors = descriptors;
}
preview.setSheets(previewSheets);
previewRepository.save(preview);
}
private boolean checkIsNumericValues(Set<Object> values) {
return values.stream().allMatch(value -> NumberUtils.isCreatable(String.valueOf(value)));
}
public void removeMongoRawdata(UUID referenceUuid) {
Preview preview = previewRepository.findByReferenceUuid(referenceUuid);
if (preview != null) {
......
......@@ -8,6 +8,8 @@ import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.genesys.amphibian.api.v0.PreviewApi;
import org.genesys.amphibian.model.Preview;
import org.genesys.amphibian.model.PreviewSheet;
import org.genesys.amphibian.service.MetadataService;
import org.genesys.amphibian.service.PreviewService;
import org.genesys.amphibian.test.base.AbstractApiTest;
import org.junit.Test;
......@@ -16,6 +18,7 @@ import org.springframework.http.MediaType;
import org.springframework.mock.web.MockMultipartFile;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
......@@ -33,6 +36,9 @@ public class PreviewApiTest extends AbstractApiTest {
@Autowired
private PreviewService previewService;
@Autowired
private MetadataService metadataService;
@Test
public void ingestCsvTest() throws Exception {
UUID uuid = UUID.randomUUID();
......@@ -41,7 +47,7 @@ public class PreviewApiTest extends AbstractApiTest {
CSVWriter writer = new CSVWriter(new StringBuilderWriter(content));
//todo insert data
//insert data
String[] row1 = new String[]{"INSTCODE", "ACCENUMB", "GENUS", "SEEDWGT"};
String[] row2 = new String[]{"SYR002", "IG 137552", "Lens", "0.8799999952316284"};
List<String[]> rows = new ArrayList<>();
......@@ -88,7 +94,7 @@ public class PreviewApiTest extends AbstractApiTest {
XSSFSheet spreadsheet = workbook.createSheet("test");
XSSFRow row;
//todo insert data
//insert data
Map<String, Object[]> data = new TreeMap<String, Object[]>();
data.put("1", new Object[] {"INSTCODE", "ACCENUMB", "GENUS", "SEEDWGT"});
......@@ -140,4 +146,60 @@ public class PreviewApiTest extends AbstractApiTest {
assertThat(preview.getSheets(), hasSize(1));
assertThat(preview.getSheets().get(0).rowCount, is(2));
}
@Test
public void generateDescriptorsTest() throws Exception {
UUID uuid = UUID.randomUUID();
StringBuilder content = new StringBuilder();
CSVWriter writer = new CSVWriter(new StringBuilderWriter(content));
String[] row1 = new String[]{"INSTCODE", "ACCENUMB", "GENUS", "SEEDWGT"};
String[] row2 = new String[]{"SYR002", "IG 137552", "Lens", "0.8799999952316284"};
String[] row3 = new String[]{"SYR002", "IG 137553", "Banana", "0.9799999952316284"};
String[] row4 = new String[]{"SYR002", "IG 137554", "Lens", "0.5799999952316284"};
List<String[]> rows = new ArrayList<>();
rows.add(row1);
rows.add(row2);
rows.add(row3);
rows.add(row4);
writer.writeAll(rows);
MockMultipartFile file
= new MockMultipartFile(
"file",
"preview.csv",
"text/csv",
content.toString().getBytes()
);
metadataService.previewFromCSV(uuid, null, file, null, null);
//waiting for data inserting
Thread.sleep(500);
/*@formatter:off*/
mockMvc
.perform(multipart(PreviewApi.CONTROLLER_URL.concat("/{uuid}/generate-descriptors"), uuid.toString())
.file(file))
// .andDo(org.springframework.test.web.servlet.result.MockMvcResultHandlers.print())
.andExpect(status().isOk())
;
/*@formatter:on*/
Preview preview = previewService.getPreview(uuid);
assertThat(preview, is(notNullValue()));
assertThat(preview.getSheets(), hasSize(1));
assertThat(preview.getSheets().get(0).rowCount, is(4));
assertThat(preview.getSheets().get(0).descriptors.get(0).columnName, is("INSTCODE"));
assertThat(preview.getSheets().get(0).descriptors.get(0).dataType, is(PreviewSheet.Descriptor.DataType.TEXT));
assertThat(preview.getSheets().get(0).descriptors.get(1).columnName, is("ACCENUMB"));
assertThat(preview.getSheets().get(0).descriptors.get(1).dataType, is(PreviewSheet.Descriptor.DataType.TEXT));
assertThat(preview.getSheets().get(0).descriptors.get(2).columnName, is("GENUS"));
assertThat(preview.getSheets().get(0).descriptors.get(2).dataType, is(PreviewSheet.Descriptor.DataType.TEXT));
assertThat(preview.getSheets().get(0).descriptors.get(3).columnName, is("SEEDWGT"));
assertThat(preview.getSheets().get(0).descriptors.get(3).dataType, is(PreviewSheet.Descriptor.DataType.NUMERIC));
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment