Commit 4cdf9e55 authored by Matija Obreza's avatar Matija Obreza

Download worldclim.org dataset as Excel

parent 553f019e
...@@ -16,7 +16,11 @@ ...@@ -16,7 +16,11 @@
package org.genesys2.server.mvc.admin; package org.genesys2.server.mvc.admin;
import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream;
import javax.servlet.http.HttpServletResponse;
import org.genesys2.server.model.dataset.DS; import org.genesys2.server.model.dataset.DS;
import org.genesys2.server.model.dataset.DSColumn; import org.genesys2.server.model.dataset.DSColumn;
...@@ -68,4 +72,25 @@ public class DS2Controller { ...@@ -68,4 +72,25 @@ public class DS2Controller {
} }
return "redirect:/admin/ds2/"; return "redirect:/admin/ds2/";
} }
@RequestMapping(value = "/worldclim/download", method = RequestMethod.POST)
public void worldclimDownload(HttpServletResponse response) throws IOException {
DS ds = dsService.loadDatasetByUuid(WorldClimUpdater.WORLDCLIM_DATASET);
// Write MCPD to the stream.
response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
response.addHeader("Content-Disposition", String.format("attachment; filename=\"worldclim-%1s.xlsx\"", System.currentTimeMillis()));
// response.flushBuffer();
final OutputStream outputStream = response.getOutputStream();
try {
dsService.download(ds, outputStream);
response.flushBuffer();
} catch (EOFException e) {
LOG.warn("Download was aborted: {}", e.getMessage());
}
}
} }
...@@ -18,8 +18,10 @@ import java.util.regex.Pattern; ...@@ -18,8 +18,10 @@ import java.util.regex.Pattern;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFSheet; import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFWorkbook; import org.apache.poi.xssf.streaming.SuperSXSSFSheet;
import org.apache.poi.xssf.streaming.SuperSXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.genesys2.server.model.dataset.DS; import org.genesys2.server.model.dataset.DS;
import org.genesys2.server.model.dataset.DSColumn; import org.genesys2.server.model.dataset.DSColumn;
...@@ -413,29 +415,32 @@ public class DSServiceImpl implements DSService { ...@@ -413,29 +415,32 @@ public class DSServiceImpl implements DSService {
@Override @Override
public void download(DS ds, List<DSColumn> dsds, OutputStream outputStream) throws IOException { public void download(DS ds, List<DSColumn> dsds, OutputStream outputStream) throws IOException {
XSSFWorkbook template = new XSSFWorkbook();
// keep 1000 rows in memory, exceeding rows will be flushed to disk
SXSSFWorkbook wb = new SXSSFWorkbook(template, 100);
SXSSFSheet sheet = (SXSSFSheet) wb.createSheet("worldclim.org");
int rowIndex = 0, cellIndex = 0;
Row r = sheet.createRow(rowIndex++);
r.createCell(cellIndex++).setCellValue("row_id");
for (DSQualifier dsq : ds.getQualifiers()) {
r.createCell(cellIndex++).setCellValue(dsq.getDescriptor().getCode());
}
// Prepare descriptor mapping
Long[] columnDescriptors = new Long[dsds.size()]; Long[] columnDescriptors = new Long[dsds.size()];
XSSFWorkbook template = new XSSFWorkbook();
{ {
int columnIndex = 0; XSSFSheet sheet = template.createSheet("worldclim.org");
for (DSColumn dsd : dsds) { int rowIndex = 0, cellIndex = 0;
r.createCell(cellIndex++).setCellValue(dsd.getDescriptor().getCode()); Row r = sheet.createRow(rowIndex++);
columnDescriptors[columnIndex++] = dsd.getId(); r.createCell(cellIndex++).setCellValue("row_id");
for (DSQualifier dsq : ds.getQualifiers()) {
r.createCell(cellIndex++).setCellValue(dsq.getDescriptor().getCode());
}
// Prepare descriptor mapping
{
int columnIndex = 0;
for (DSColumn dsd : dsds) {
r.createCell(cellIndex++).setCellValue(dsd.getDescriptor().getCode());
columnDescriptors[columnIndex++] = dsd.getId();
}
} }
} }
// keep 1000 rows in memory, exceeding rows will be flushed to disk
SuperSXSSFWorkbook wb = new SuperSXSSFWorkbook(template, 50);
SuperSXSSFSheet sheet = wb.getStreamingSheet("worldclim.org");
// List<DSRow> rows = dsRowRepo.findByDataset(ds); // List<DSRow> rows = dsRowRepo.findByDataset(ds);
// LOG.info("Retrieved DS rows " +rows.size()); // LOG.info("Retrieved DS rows " +rows.size());
...@@ -443,43 +448,47 @@ public class DSServiceImpl implements DSService { ...@@ -443,43 +448,47 @@ public class DSServiceImpl implements DSService {
List<Object[]> allDsrq = dsRowRepo.getQualifiers(ds); List<Object[]> allDsrq = dsRowRepo.getQualifiers(ds);
LOG.info("Got {} row qualifiers", allDsrq.size()); LOG.info("Got {} row qualifiers", allDsrq.size());
int batchSize = 100; sheet.setRowGenerator((sheet2) -> {
int batchSize = 100;
for (int fromIndex = 0; fromIndex < allDsrq.size(); fromIndex += batchSize) { int rowIndex = 1, cellIndex = 0;
List<Object[]> batch = allDsrq.subList(fromIndex, Math.min(fromIndex + batchSize, allDsrq.size()));
LOG.info("Processing position: {} of {}", fromIndex, allDsrq.size()); for (int fromIndex = 0; fromIndex < allDsrq.size(); fromIndex += batchSize) {
List<Object[]> batch = allDsrq.subList(fromIndex, Math.min(fromIndex + batchSize, allDsrq.size()));
List<Long> rowIds = new ArrayList<Long>(batchSize); LOG.info("Processing position: {} of {}", fromIndex, allDsrq.size());
for (Object[] x : batch) {
rowIds.add((Long) x[0]); List<Long> rowIds = new ArrayList<Long>(batchSize);
} for (Object[] x : batch) {
List<Object[]> allValues = dsRowRepo.getRowValues(rowIds, columnDescriptors); rowIds.add((Long) x[0]);
int batchPos = 0;
for (Object[] x : batch) {
r = sheet.createRow(rowIndex++);
cellIndex = 0;
for (Object v : x) {
addCell(r, cellIndex++, v);
} }
List<Object[]> allValues = dsRowRepo.getRowValues(rowIds, columnDescriptors);
Object[] values = allValues.get(batchPos++); int batchPos = 0;
if (values != null) { for (Object[] x : batch) {
for (Object v : values) { SXSSFRow r = sheet.createRow(rowIndex++);
cellIndex = 0;
for (Object v : x) {
addCell(r, cellIndex++, v); addCell(r, cellIndex++, v);
} }
Object[] values = allValues.get(batchPos++);
if (values != null) {
for (Object v : values) {
addCell(r, cellIndex++, v);
}
}
} }
// if (rowIndex > 10000) {
// LOG.warn("Breaking");
// break;
// }
} }
});
// if (rowIndex > 10000) {
// LOG.warn("Breaking");
// break;
// }
}
sheet.flushRows();
LOG.info("Writing to output stream"); LOG.info("Writing to output stream");
wb.write(outputStream); wb.write(outputStream);
wb.close();
wb.dispose(); wb.dispose();
outputStream.flush();
LOG.info("Done"); LOG.info("Done");
} }
......
...@@ -15,12 +15,19 @@ ...@@ -15,12 +15,19 @@
<input type="hidden" name="${_csrf.parameterName}" value="${_csrf.token}"/> <input type="hidden" name="${_csrf.parameterName}" value="${_csrf.token}"/>
</form> </form>
<form method="post" action="<c:url value="/admin/ds2/worldclim/download" />">
<input type="submit" class="btn btn-default" value="Download worldclim.org" />
<!-- CSRF protection -->
<input type="hidden" name="${_csrf.parameterName}" value="${_csrf.token}"/>
</form>
<form method="post" action="<c:url value="/admin/ds2/worldclim/delete" />"> <form method="post" action="<c:url value="/admin/ds2/worldclim/delete" />">
<input type="submit" class="btn btn-default" value="Delete worldclim.org" /> <input type="submit" class="btn btn-default" value="Delete worldclim.org" />
<!-- CSRF protection --> <!-- CSRF protection -->
<input type="hidden" name="${_csrf.parameterName}" value="${_csrf.token}"/> <input type="hidden" name="${_csrf.parameterName}" value="${_csrf.token}"/>
</form> </form>
<content tag="javascript"> <content tag="javascript">
<script type="text/javascript"> <script type="text/javascript">
$(document).ready(function() { $(document).ready(function() {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment