FileLoader.java
/*
* This file is part of Indicators.
*
* Indicators is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Indicators is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Indicators. If not, see <https://www.gnu.org/licenses/>.
*/
package fr.inrae.agroclim.indicators.model.data;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import fr.inrae.agroclim.indicators.model.data.climate.ClimaticDailyData;
import fr.inrae.agroclim.indicators.util.PathUtils;
import fr.inrae.agroclim.indicators.util.StringUtils;
import jakarta.xml.bind.annotation.XmlAccessType;
import jakarta.xml.bind.annotation.XmlAccessorType;
import jakarta.xml.bind.annotation.XmlAttribute;
import jakarta.xml.bind.annotation.XmlTransient;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
import lombok.extern.log4j.Log4j2;
import tools.jackson.databind.MappingIterator;
import tools.jackson.databind.ObjectReader;
import tools.jackson.dataformat.csv.CsvMapper;
import tools.jackson.dataformat.csv.CsvReadFeature;
import tools.jackson.dataformat.csv.CsvSchema;
/**
* Common methods to load data from CSV/TXT file.
*
* Last changed : $Date$
*
* @author $Author$
* @version $Revision$
*/
@Log4j2
@EqualsAndHashCode(
callSuper = false,
of = {"path"}
)
@XmlAccessorType(XmlAccessType.FIELD)
public class FileLoader extends DataLoadingListenerHandler {
/**
* UID for serialization.
*/
private static final long serialVersionUID = -2427563621931015669L;
/**
* @param separator CSV separator
* @return CsvPreference
*/
protected static CsvSchema.Builder getCsvSchemaBuilder(
final String separator) {
Objects.requireNonNull(separator, "Null separator not handled!");
if (separator.length() != 1) {
throw new IllegalArgumentException("separator must be 1 char");
}
return CsvSchema.builder()
.setUseHeader(true)
.setColumnSeparator(separator.charAt(0));
}
/**
* @param file CSV file
* @param sep column separator
* @return columns names from the first line
*/
public static String[] getHeaders(final File file, final char sep) {
final CsvSchema schema = CsvSchema.emptySchema()
.withColumnSeparator(sep);
final CsvMapper mapper = CsvMapper.builder() //
.configure(CsvReadFeature.WRAP_AS_ARRAY, true) //
.build();
final ObjectReader objReader = mapper.readerFor(String[].class).with(schema);
try (MappingIterator<String[]> it = objReader.readValues(file)) {
return it.next();
}
}
/**
* @param separator CSV separator
* @param variables headers for the columns used in the CSV file
* @return Processors to parse CSV file with SuperCSV.
*/
private static CsvSchema getSchema(
final String separator,
final String[] variables) {
final CsvSchema.Builder builder = getCsvSchemaBuilder(separator);
for (final String variable : variables) {
if (variable != null && !variable.isEmpty()) {
builder.addNumberColumn(variable);
}
}
return builder.build();
}
/**
* Directory of evaluation (.gri file).
*/
private transient Path baseDirectory;
/**
* Type of event to fire.
*/
@Getter(AccessLevel.PROTECTED)
@Setter(AccessLevel.PROTECTED)
private transient DataLoadingListener.DataFile dataFile;
/**
* File instance for the relative path of CSV file.
*/
@XmlTransient
private File file;
/**
* Relative path of CSV file.
*/
@Getter
@Setter
@XmlAttribute
private String path;
/**
* Transform path which is relative to directory of evaluation into absolute
* path.
*
* @param relativePath relative path
* @return absolute path
*/
public final String absolutize(@NonNull final String relativePath) {
if (baseDirectory == null) {
return relativePath;
}
return PathUtils.resolve(baseDirectory.toString(), relativePath);
}
/**
* @return File instance of relative path for the CSV file.
*/
public final File getFile() {
if (file == null) {
file = new File(absolutize(path));
}
return file;
}
/**
* @param headers Headers of CSV file.
* @return variables provided by the loader
*/
protected Set<Variable> getProvidedVariables(final String[] headers) {
final Set<Variable> variables = new HashSet<>();
if (headers != null) {
for (final String header : headers) {
if (StringUtils.isBlank(header)) {
continue;
}
if ("year".equalsIgnoreCase(header) || "month".equalsIgnoreCase(header)
|| "day".equalsIgnoreCase(header)) {
continue;
}
try {
variables.add(Variable.valueOf(header.toUpperCase()));
} catch (final IllegalArgumentException e) {
LOGGER.warn("Strange, header {} does not match any variable ({})", header, Variable.values());
}
}
}
return variables;
}
/**
* Generic method to load data from file.
*
* @param csvFile CSV file
* @param separator CSV separator
* @param headers user defined headers
* @param allowedHeaders allowed CSV headers
* @param clazz data class
* @param <T> data class
* @return data from file.
*/
public final <T extends Data> List<T> load(final File csvFile,
final String separator, final String[] headers,
final List<String> allowedHeaders, final Class<T> clazz) {
if (clazz == ClimaticDailyData.class) {
throw new UnsupportedOperationException("Use ClimaticDailyData!");
}
final List<String> lcAllowedHeaders = new ArrayList<>();
allowedHeaders.forEach(header -> lcAllowedHeaders.add(header.toLowerCase()));
final List<String> userHeaders = new ArrayList<>();
for (final String header : headers) {
final String lcHeader = header.toLowerCase();
final int index = lcAllowedHeaders.indexOf(lcHeader);
if (index > -1) {
userHeaders.add(allowedHeaders.get(index));
} else {
userHeaders.add(null);
}
}
final String[] userHeadersArray = userHeaders
.toArray(new String[userHeaders.size()]);
final CsvSchema schema = getSchema(separator, userHeadersArray);
final CsvMapper mapper = new CsvMapper();
final List<T> data = new ArrayList<>();
try (MappingIterator<T> it = mapper.readerFor(clazz)
.with(schema).readValues(csvFile)) {
fireDataLoadingStartEvent(csvFile.getAbsolutePath());
while (it.hasNextValue()) {
final T aData = it.nextValue();
fireDataLoadingAddEvent(aData);
data.add(aData);
}
fireDataLoadingEndEvent(csvFile.getAbsolutePath());
}
return data;
}
/**
* @param absolutePath path to relativize according to evaluation directory.
* @return relative path
*/
public final String relativize(final Path absolutePath) {
Objects.requireNonNull(baseDirectory, "baseDirectory must be set!");
return PathUtils.relativize(baseDirectory.toString(), absolutePath.toString());
}
/**
* @param dir Directory of evaluation (.gri file).
*/
public final void setBaseDirectory(final Path dir) {
baseDirectory = dir;
if (file != null) {
path = relativize(file.toPath());
}
}
/**
* @param csvfile CSV file
*/
public final void setFile(@NonNull final File csvfile) {
LOGGER.traceEntry("file={}", csvfile);
this.file = null;
if (baseDirectory == null) {
this.path = csvfile.getAbsolutePath();
} else {
this.path = relativize(Paths.get(csvfile.toURI()));
}
fireDataSetEvent(getDataFile());
LOGGER.traceExit();
}
}