ClimateFileLoader.java
/*
* This file is part of Indicators.
*
* Indicators is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Indicators is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Indicators. If not, see <https://www.gnu.org/licenses/>.
*/
package fr.inrae.agroclim.indicators.model.data.climate;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import fr.inrae.agroclim.indicators.model.TimeScale;
import fr.inrae.agroclim.indicators.model.data.DataLoadingListener;
import fr.inrae.agroclim.indicators.model.data.FileLoader;
import fr.inrae.agroclim.indicators.model.data.Resource;
import fr.inrae.agroclim.indicators.model.data.Variable;
import fr.inrae.agroclim.indicators.resources.I18n;
import fr.inrae.agroclim.indicators.resources.Messages;
import fr.inrae.agroclim.indicators.util.DateUtils;
import fr.inrae.agroclim.indicators.util.StringUtils;
import jakarta.xml.bind.annotation.XmlAccessType;
import jakarta.xml.bind.annotation.XmlAccessorType;
import jakarta.xml.bind.annotation.XmlElement;
import jakarta.xml.bind.annotation.XmlTransient;
import jakarta.xml.bind.annotation.XmlType;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.log4j.Log4j2;
import tools.jackson.databind.MappingIterator;
import tools.jackson.databind.ObjectReader;
import tools.jackson.dataformat.csv.CsvMapper;
import tools.jackson.dataformat.csv.CsvReadFeature;
import tools.jackson.dataformat.csv.CsvSchema;
/**
* Load climate data from file.
*
* Last changed : $Date$
*
* @author $Author$
* @version $Revision$
*/
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(propOrder = {"separator", "headers", "midnight", "endYear", "startYear"})
@Log4j2
public final class ClimateFileLoader extends FileLoader implements ClimateLoader {
/**
* UUID for Serializable.
*/
private static final long serialVersionUID = 1913730755957817418L;
/**
* Localized date format for log message.
*/
@XmlTransient
private DateFormat dateFormat = DateFormat.getDateInstance(DateFormat.SHORT);
/**
* Localized datetime format for log message.
*/
@XmlTransient
private DateFormat dateTimeFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT);
/**
* Hour of midnight (0 for 0-23 or 24 for 1-24).
*/
@Getter
@Setter
@XmlElement
private Integer midnight = 0;
/**
* Headers of CSV file.
*/
@Getter
@Setter
@XmlElement(name = "header")
private String[] headers;
/**
* CSV separator.
*/
@Getter
@Setter
@XmlElement
private String separator = Resource.DEFAULT_SEP;
/**
* Calculator to compute ETP from climatic daily data.
*/
@Setter
@XmlTransient
private EtpCalculator etpCalculator;
/**
* End year of data filtering (included).
*/
@Getter
@Setter
private Integer endYear;
/**
* Start year of data filtering (included).
*/
@Getter
@Setter
private Integer startYear;
/**
* Related time scales.
*/
@Getter
@Setter
@XmlTransient
private TimeScale timeScale = TimeScale.DAILY;
/**
* Constructor.
*/
public ClimateFileLoader() {
setDataFile(DataLoadingListener.DataFile.CLIMATIC);
}
/**
* Constructor.
*
* @param csvFile
* relative path of CSV file
* @param csvHeaders
* CSV headers
* @param csvSeparator
* CSV separator
*/
public ClimateFileLoader(final String csvFile, final String[] csvHeaders,
final String csvSeparator) {
this();
setPath(csvFile);
this.headers = csvHeaders;
this.separator = csvSeparator;
}
/**
* Ensure climatic data are ordered and there is not any missing.
*
* @param previous previous data
* @param current current data to check
* @param line line number
* @param path file path
*/
void checkDate(final ClimaticDailyData previous, final ClimaticDailyData current, final int line,
final String path) {
if (previous == null || current == null) {
return;
}
final DateFormat df;
if (timeScale == null) {
throw new IllegalStateException("timeScale must not be null!");
}
final long delta;
switch (timeScale) {
case DAILY -> {
df = dateFormat;
delta = DateUtils.NB_OF_MS_IN_DAY;
}
case HOURLY -> {
df = dateTimeFormat;
delta = DateUtils.NB_OF_MS_IN_HOUR;
}
default -> throw new IllegalStateException("TimeScale not handled: " + timeScale);
}
if (previous.getDate() != null && current.getDate() != null) {
final long previousTime = previous.getDate().getTime();
final long currentTime = current.getDate().getTime();
final long interval = currentTime - previousTime;
if (interval < 0) {
current.getErrors().add(
Messages.format("error.day.succession", path, line,
df.format(current.getDate()),
df.format(previous.getDate())
)
);
return;
}
if (interval == 0) {
current.getErrors().add(Messages.format("error.day.duplicate", path, line,
df.format(previous.getDate())));
return;
}
if (interval > delta) {
current.getErrors().add(Messages.format("error.day.missing", path, line, df.format(current.getDate())));
}
} else {
current.getErrors().add(Messages.format("error.date.notread"));
}
}
@Override
public ClimateFileLoader clone() {
final ClimateFileLoader clone = new ClimateFileLoader();
clone.etpCalculator = etpCalculator.clone();
clone.setPath(getPath());
clone.headers = headers;
clone.separator = separator;
return clone;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final ClimateFileLoader other = (ClimateFileLoader) obj;
if (!Objects.equals(this.separator, other.separator)) {
return false;
}
if (!Objects.equals(this.getPath(), other.getPath())) {
return false;
}
if (!Arrays.deepEquals(this.headers, other.headers)) {
return false;
}
if (!Objects.equals(this.etpCalculator, other.etpCalculator)) {
return false;
}
if (!Objects.equals(this.endYear, other.endYear)) {
return false;
}
return Objects.equals(this.startYear, other.startYear);
}
@Override
public Map<String, String> getConfigurationErrors() {
final Map<String, String> errors = new HashMap<>();
if (getPath() == null) {
errors.put("climate.file", "error.evaluation.climate.file.missing");
}
if (!getFile().exists()) {
errors.put("climate.file", "error.evaluation.climate.file.doesnotexist");
} else if (getFile().length() == 0) {
errors.put("climate.file", "error.evaluation.climate.file.empty");
}
if (separator == null) {
errors.put("climate.separator", "error.evaluation.climate.separator.missing");
} else if (separator.isEmpty()) {
errors.put("climate.separator", "error.evaluation.climate.separator.empty");
}
if (headers == null) {
errors.put("climate.header", "error.evaluation.climate.header.missing");
}
if (timeScale == TimeScale.DAILY && etpCalculator == null) {
errors.put("climate.etpCalculator", "error.evaluation.climate.etpCalculator.missing");
}
if (errors.isEmpty()) {
return null;
}
return errors;
}
/**
* @return Calculator to compute ETP from climatic daily data.
*/
private EtpCalculator getEtpCalculator() {
if (timeScale != TimeScale.DAILY) {
throw new UnsupportedOperationException("Only daily data should have ETP!");
}
if (etpCalculator == null) {
throw new RuntimeException("EtpCalculator not set!");
}
return etpCalculator;
}
/**
* @return Missing climatic variables, to check in aggregation indicators.
*/
@Override
public Collection<String> getMissingVariables() {
final List<String> all = new ArrayList<>(ClimaticDailyData.getAllColumnNames(timeScale));
if (headers != null) {
for (final String header : headers) {
all.remove(header.toLowerCase());
}
}
return all;
}
@Override
public Set<Variable> getProvidedVariables() {
return super.getProvidedVariables(headers);
}
@Override
public Set<Variable> getVariables() {
if (etpCalculator == null) {
return new HashSet<>();
}
return etpCalculator.getVariables();
}
@Override
public int hashCode() {
final int prime1 = 7;
final int prime = 71;
int hash = prime1;
hash = prime * hash + Objects.hashCode(this.getPath());
hash = prime * hash + Arrays.deepHashCode(this.headers);
hash = prime * hash + Objects.hashCode(this.separator);
hash = prime * hash + Objects.hashCode(this.etpCalculator);
hash = prime * hash + Objects.hashCode(this.endYear);
hash = prime * hash + Objects.hashCode(this.startYear);
return hash;
}
@Override
public List<ClimaticDailyData> load() {
LOGGER.trace("start");
if (getPath() == null || getFile() == null) {
throw new RuntimeException("no file defined for climate.");
}
if (separator == null) {
throw new RuntimeException("no separator defined for climate.");
}
LOGGER.trace("headers: {}", StringUtils.join(headers, ","));
final List<ClimaticDailyData> data = new ArrayList<>();
final List<String> headerFiltered = new ArrayList<>();
final Map<Variable, Integer> valuesCol = new EnumMap<>(Variable.class);
int yearCol = -1;
int monthCol = -1;
int dayCol = -1;
int hourCol = -1;
final String[] headersFromFile = getHeaders(getFile(), separator.charAt(0));
String[] usedHeaders;
if (headers == null) {
usedHeaders = headersFromFile;
} else {
usedHeaders = headers;
}
for (int i = 0; i < usedHeaders.length; i++) {
final String header = usedHeaders[i];
final String lcHeader = header.toLowerCase();
final int index = ClimaticDailyData.getAllColumnNames(timeScale).indexOf(lcHeader);
if (index != -1) {
headerFiltered.add(header.substring(0, 1).toUpperCase() + lcHeader.substring(1));
if (header.equals("year")) {
yearCol = i;
continue;
}
if (header.equals("month")) {
monthCol = i;
continue;
}
if (header.equals("day")) {
dayCol = i;
continue;
}
if (header.equals("hour")) {
hourCol = i;
continue;
}
valuesCol.put(Variable.getByName(header), i);
} else {
headerFiltered.add(null);
}
}
LOGGER.trace("userHeadersArray: {}", StringUtils.join(headerFiltered, ","));
LOGGER.trace("year: {}, month: {}, day: {}", yearCol, monthCol, dayCol);
LOGGER.trace("variables: {}", valuesCol);
if (usedHeaders.length != headersFromFile.length) {
final I18n i18n = new I18n("fr.inrae.agroclim.indicators.resources.messages", Locale.getDefault());
final String msg = i18n.format("error.climate.wrong.headers", headersFromFile.length,
StringUtils.join(headersFromFile, ", "), usedHeaders.length, StringUtils.join(usedHeaders, ", "));
throw new RuntimeException(msg);
}
fireDataLoadingStartEvent("Start of reading file: " + getFile().getName());
final CsvSchema schema = CsvSchema.emptySchema().withSkipFirstDataRow(true)//
.withColumnSeparator(separator.charAt(0));
final CsvMapper mapper = CsvMapper.builder() //
.configure(CsvReadFeature.WRAP_AS_ARRAY, true) //
.build();
final ObjectReader objReader = mapper.readerFor(String[].class).with(schema);
try (MappingIterator<String[]> it = objReader.readValues(getFile())) {
ClimaticDailyData previous = null;
while (it.hasNext()) {
final int lineNumber = it.currentLocation().getLineNr();
final String[] row = it.next();
final Integer year = this.parseInt(row[yearCol], null);
if (startYear != null && year < startYear || endYear != null && year > endYear) {
continue;
}
final ClimaticDailyData dailyData = new ClimaticDailyData();
dailyData.setTimescale(timeScale);
dailyData.setYear(year);
dailyData.setMonth(this.parseInt(row[monthCol], null));
dailyData.setDay(this.parseInt(row[dayCol], null));
if (timeScale == TimeScale.HOURLY) {
final int hour = Integer.parseInt(row[hourCol]);
if (midnight == DateUtils.NB_OF_HOURS_IN_DAY && hour == DateUtils.NB_OF_HOURS_IN_DAY) {
final long newTime = dailyData.getDate().getTime() + DateUtils.NB_OF_MS_IN_DAY;
final Date newDate = new Date(newTime);
dailyData.setYear(DateUtils.getYear(newDate));
dailyData.setMonth(DateUtils.getMonth(newDate));
dailyData.setDay(DateUtils.getDom(newDate));
dailyData.setHour(0);
} else {
dailyData.setHour(hour);
}
}
valuesCol.forEach((variable, index) -> {
if (row[index] != null && !row[index].isEmpty()) {
dailyData.setValue(variable, Double.valueOf(row[index]));
}
});
if (timeScale == TimeScale.DAILY) {
dailyData.setEtpCalculator(getEtpCalculator());
}
dailyData.check(lineNumber, getFile().getName());
checkDate(previous, dailyData, lineNumber, getFile().getName());
fireDataLoadingAddEvent(dailyData);
data.add(dailyData);
previous = dailyData;
}
fireDataLoadingEndEvent("End of reading " + getFile().getName());
}
return data;
}
/**
* Parse String to Integer, like {@code Integer.parseInt} method.
* @param value the string value to parsing
* @param defaultValue if not possible to parse, this value will be returned
* @return value parsed or defaultValue
*/
private Integer parseInt(final String value, final Integer defaultValue) {
Integer ret;
try {
ret = Integer.valueOf(value);
} catch (final NumberFormatException e) {
ret = defaultValue;
}
return ret;
}
/**
* @param locale locale for date formatter
*/
public void setLocale(final Locale locale) {
dateFormat = DateFormat.getDateInstance(DateFormat.SHORT, locale);
dateTimeFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT, locale);
}
}