ClimateFileLoader.java

/*
 * This file is part of Indicators.
 *
 * Indicators is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Indicators is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Indicators. If not, see <https://www.gnu.org/licenses/>.
 */
package fr.inrae.agroclim.indicators.model.data.climate;

import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import fr.inrae.agroclim.indicators.model.TimeScale;
import fr.inrae.agroclim.indicators.model.data.DataLoadingListener;
import fr.inrae.agroclim.indicators.model.data.FileLoader;
import fr.inrae.agroclim.indicators.model.data.Resource;
import fr.inrae.agroclim.indicators.model.data.Variable;
import fr.inrae.agroclim.indicators.resources.I18n;
import fr.inrae.agroclim.indicators.resources.Messages;
import fr.inrae.agroclim.indicators.util.DateUtils;
import fr.inrae.agroclim.indicators.util.StringUtils;
import jakarta.xml.bind.annotation.XmlAccessType;
import jakarta.xml.bind.annotation.XmlAccessorType;
import jakarta.xml.bind.annotation.XmlElement;
import jakarta.xml.bind.annotation.XmlTransient;
import jakarta.xml.bind.annotation.XmlType;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.log4j.Log4j2;
import tools.jackson.databind.MappingIterator;
import tools.jackson.databind.ObjectReader;
import tools.jackson.dataformat.csv.CsvMapper;
import tools.jackson.dataformat.csv.CsvReadFeature;
import tools.jackson.dataformat.csv.CsvSchema;

/**
 * Load climate data from file.
 *
 * Last changed : $Date$
 *
 * @author $Author$
 * @version $Revision$
 */
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(propOrder = {"separator", "headers", "midnight", "endYear", "startYear"})
@Log4j2
public final class ClimateFileLoader extends FileLoader implements ClimateLoader {
    /**
     * UUID for Serializable.
     */
    private static final long serialVersionUID = 1913730755957817418L;

    /**
     * Localized date format for log message.
     */
    @XmlTransient
    private DateFormat dateFormat = DateFormat.getDateInstance(DateFormat.SHORT);

    /**
     * Localized datetime format for log message.
     */
    @XmlTransient
    private DateFormat dateTimeFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT);

    /**
     * Hour of midnight (0 for 0-23 or 24 for 1-24).
     */
    @Getter
    @Setter
    @XmlElement
    private Integer midnight = 0;

    /**
     * Headers of CSV file.
     */
    @Getter
    @Setter
    @XmlElement(name = "header")
    private String[] headers;

    /**
     * CSV separator.
     */
    @Getter
    @Setter
    @XmlElement
    private String separator = Resource.DEFAULT_SEP;

    /**
     * Calculator to compute ETP from climatic daily data.
     */
    @Setter
    @XmlTransient
    private EtpCalculator etpCalculator;

    /**
     * End year of data filtering (included).
     */
    @Getter
    @Setter
    private Integer endYear;

    /**
     * Start year of data filtering (included).
     */
    @Getter
    @Setter
    private Integer startYear;

    /**
     * Related time scales.
     */
    @Getter
    @Setter
    @XmlTransient
    private TimeScale timeScale = TimeScale.DAILY;

    /**
     * Constructor.
     */
    public ClimateFileLoader() {
        setDataFile(DataLoadingListener.DataFile.CLIMATIC);
    }

    /**
     * Constructor.
     *
     * @param csvFile
     *            relative path of CSV file
     * @param csvHeaders
     *            CSV headers
     * @param csvSeparator
     *            CSV separator
     */
    public ClimateFileLoader(final String csvFile, final String[] csvHeaders,
            final String csvSeparator) {
        this();
        setPath(csvFile);
        this.headers = csvHeaders;
        this.separator = csvSeparator;
    }

    /**
     * Ensure climatic data are ordered and there is not any missing.
     *
     * @param previous previous data
     * @param current current data to check
     * @param line line number
     * @param path file path
     */
    void checkDate(final ClimaticDailyData previous, final ClimaticDailyData current, final int line,
            final String path) {
        if (previous == null || current == null) {
            return;
        }
        final DateFormat df;
        if (timeScale == null) {
            throw new IllegalStateException("timeScale must not be null!");
        }
        final long delta;
        switch (timeScale) {
        case DAILY -> {
            df = dateFormat;
            delta = DateUtils.NB_OF_MS_IN_DAY;
        }
        case HOURLY -> {
            df = dateTimeFormat;
            delta = DateUtils.NB_OF_MS_IN_HOUR;
        }
        default -> throw new IllegalStateException("TimeScale not handled: " + timeScale);
        }
        if (previous.getDate() != null && current.getDate() != null) {
            final long previousTime = previous.getDate().getTime();
            final long currentTime = current.getDate().getTime();
            final long interval = currentTime - previousTime;
            if (interval < 0) {
                current.getErrors().add(
                        Messages.format("error.day.succession", path, line,
                                df.format(current.getDate()),
                                df.format(previous.getDate())
                                )
                        );
                return;
            }
            if (interval == 0) {
                current.getErrors().add(Messages.format("error.day.duplicate", path, line,
                        df.format(previous.getDate())));
                return;
            }
            if (interval > delta) {
                current.getErrors().add(Messages.format("error.day.missing", path, line, df.format(current.getDate())));
            }
        } else {
            current.getErrors().add(Messages.format("error.date.notread"));
        }
    }

    @Override
    public ClimateFileLoader clone() {
        final ClimateFileLoader clone = new ClimateFileLoader();
        clone.etpCalculator = etpCalculator.clone();
        clone.setPath(getPath());
        clone.headers = headers;
        clone.separator = separator;
        return clone;
    }

    @Override
    public boolean equals(final Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final ClimateFileLoader other = (ClimateFileLoader) obj;
        if (!Objects.equals(this.separator, other.separator)) {
            return false;
        }
        if (!Objects.equals(this.getPath(), other.getPath())) {
            return false;
        }
        if (!Arrays.deepEquals(this.headers, other.headers)) {
            return false;
        }
        if (!Objects.equals(this.etpCalculator, other.etpCalculator)) {
            return false;
        }
        if (!Objects.equals(this.endYear, other.endYear)) {
            return false;
        }
        return Objects.equals(this.startYear, other.startYear);
    }

    @Override
    public Map<String, String> getConfigurationErrors() {
        final Map<String, String> errors = new HashMap<>();
        if (getPath() == null) {
            errors.put("climate.file", "error.evaluation.climate.file.missing");
        }
        if (!getFile().exists()) {
            errors.put("climate.file", "error.evaluation.climate.file.doesnotexist");
        } else if (getFile().length() == 0) {
            errors.put("climate.file", "error.evaluation.climate.file.empty");
        }
        if (separator == null) {
            errors.put("climate.separator", "error.evaluation.climate.separator.missing");
        } else if (separator.isEmpty()) {
            errors.put("climate.separator", "error.evaluation.climate.separator.empty");
        }
        if (headers == null) {
            errors.put("climate.header", "error.evaluation.climate.header.missing");
        }
        if (timeScale == TimeScale.DAILY && etpCalculator == null) {
            errors.put("climate.etpCalculator", "error.evaluation.climate.etpCalculator.missing");
        }
        if (errors.isEmpty()) {
            return null;
        }
        return errors;
    }

    /**
     * @return Calculator to compute ETP from climatic daily data.
     */
    private EtpCalculator getEtpCalculator() {
        if (timeScale != TimeScale.DAILY) {
            throw new UnsupportedOperationException("Only daily data should have ETP!");
        }
        if (etpCalculator == null) {
            throw new RuntimeException("EtpCalculator not set!");
        }
        return etpCalculator;
    }

    /**
     * @return Missing climatic variables, to check in aggregation indicators.
     */
    @Override
    public Collection<String> getMissingVariables() {
        final List<String> all = new ArrayList<>(ClimaticDailyData.getAllColumnNames(timeScale));
        if (headers != null) {
            for (final String header : headers) {
                all.remove(header.toLowerCase());
            }
        }
        return all;
    }

    @Override
    public Set<Variable> getProvidedVariables() {
        return super.getProvidedVariables(headers);
    }

    @Override
    public Set<Variable> getVariables() {
        if (etpCalculator == null) {
            return new HashSet<>();
        }
        return etpCalculator.getVariables();
    }

    @Override
    public int hashCode() {
        final int prime1 = 7;
        final int prime = 71;
        int hash = prime1;
        hash = prime * hash + Objects.hashCode(this.getPath());
        hash = prime * hash + Arrays.deepHashCode(this.headers);
        hash = prime * hash + Objects.hashCode(this.separator);
        hash = prime * hash + Objects.hashCode(this.etpCalculator);
        hash = prime * hash + Objects.hashCode(this.endYear);
        hash = prime * hash + Objects.hashCode(this.startYear);
        return hash;
    }

    @Override
    public List<ClimaticDailyData> load() {
        LOGGER.trace("start");
        if (getPath() == null || getFile() == null) {
            throw new RuntimeException("no file defined for climate.");
        }
        if (separator == null) {
            throw new RuntimeException("no separator defined for climate.");
        }
        LOGGER.trace("headers: {}", StringUtils.join(headers, ","));
        final List<ClimaticDailyData> data = new ArrayList<>();
        final List<String> headerFiltered = new ArrayList<>();
        final Map<Variable, Integer> valuesCol = new EnumMap<>(Variable.class);
        int yearCol = -1;
        int monthCol = -1;
        int dayCol = -1;
        int hourCol = -1;
        final String[] headersFromFile = getHeaders(getFile(), separator.charAt(0));
        String[] usedHeaders;

        if (headers == null) {
            usedHeaders = headersFromFile;
        } else {
            usedHeaders = headers;
        }
        for (int i = 0; i < usedHeaders.length; i++) {
            final String header = usedHeaders[i];
            final String lcHeader = header.toLowerCase();
            final int index = ClimaticDailyData.getAllColumnNames(timeScale).indexOf(lcHeader);
            if (index != -1) {
                headerFiltered.add(header.substring(0, 1).toUpperCase() + lcHeader.substring(1));
                if (header.equals("year")) {
                    yearCol = i;
                    continue;
                }
                if (header.equals("month")) {
                    monthCol = i;
                    continue;
                }
                if (header.equals("day")) {
                    dayCol = i;
                    continue;
                }
                if (header.equals("hour")) {
                    hourCol = i;
                    continue;
                }
                valuesCol.put(Variable.getByName(header), i);
            } else {
                headerFiltered.add(null);
            }
        }

        LOGGER.trace("userHeadersArray: {}", StringUtils.join(headerFiltered, ","));

        LOGGER.trace("year: {}, month: {}, day: {}", yearCol, monthCol, dayCol);
        LOGGER.trace("variables: {}", valuesCol);
        if (usedHeaders.length != headersFromFile.length) {
            final I18n i18n = new I18n("fr.inrae.agroclim.indicators.resources.messages", Locale.getDefault());
            final String msg = i18n.format("error.climate.wrong.headers", headersFromFile.length,
                    StringUtils.join(headersFromFile, ", "), usedHeaders.length, StringUtils.join(usedHeaders, ", "));
            throw new RuntimeException(msg);
        }
        fireDataLoadingStartEvent("Start of reading file: " + getFile().getName());

        final CsvSchema schema = CsvSchema.emptySchema().withSkipFirstDataRow(true)//
                .withColumnSeparator(separator.charAt(0));
        final CsvMapper mapper = CsvMapper.builder() //
                .configure(CsvReadFeature.WRAP_AS_ARRAY, true) //
                .build();

        final ObjectReader objReader = mapper.readerFor(String[].class).with(schema);
        try (MappingIterator<String[]> it = objReader.readValues(getFile())) {
            ClimaticDailyData previous = null;
            while (it.hasNext()) {
                final int lineNumber = it.currentLocation().getLineNr();
                final String[] row = it.next();
                final Integer year = this.parseInt(row[yearCol], null);
                if (startYear != null && year < startYear || endYear != null && year > endYear) {
                    continue;
                }
                final ClimaticDailyData dailyData = new ClimaticDailyData();
                dailyData.setTimescale(timeScale);
                dailyData.setYear(year);
                dailyData.setMonth(this.parseInt(row[monthCol], null));
                dailyData.setDay(this.parseInt(row[dayCol], null));
                if (timeScale == TimeScale.HOURLY) {
                    final int hour = Integer.parseInt(row[hourCol]);
                    if (midnight == DateUtils.NB_OF_HOURS_IN_DAY && hour == DateUtils.NB_OF_HOURS_IN_DAY) {
                        final long newTime = dailyData.getDate().getTime() + DateUtils.NB_OF_MS_IN_DAY;
                        final Date newDate = new Date(newTime);
                        dailyData.setYear(DateUtils.getYear(newDate));
                        dailyData.setMonth(DateUtils.getMonth(newDate));
                        dailyData.setDay(DateUtils.getDom(newDate));
                        dailyData.setHour(0);
                    } else {
                        dailyData.setHour(hour);
                    }
                }
                valuesCol.forEach((variable, index) -> {
                    if (row[index] != null && !row[index].isEmpty()) {
                        dailyData.setValue(variable, Double.valueOf(row[index]));
                    }
                });
                if (timeScale == TimeScale.DAILY) {
                    dailyData.setEtpCalculator(getEtpCalculator());
                }
                dailyData.check(lineNumber, getFile().getName());
                checkDate(previous, dailyData, lineNumber, getFile().getName());
                fireDataLoadingAddEvent(dailyData);
                data.add(dailyData);
                previous = dailyData;
            }
            fireDataLoadingEndEvent("End of reading " + getFile().getName());
        }
        return data;
    }

    /**
     * Parse String to Integer, like {@code Integer.parseInt} method.
     * @param value the string value to parsing
     * @param defaultValue if not possible to parse, this value will be returned
     * @return value parsed or defaultValue
     */
    private Integer parseInt(final String value, final Integer defaultValue) {
        Integer ret;
        try {
            ret = Integer.valueOf(value);
        } catch (final NumberFormatException e) {
            ret = defaultValue;
        }
        return ret;
    }
    /**
     * @param locale locale for date formatter
     */
    public void setLocale(final Locale locale) {
        dateFormat = DateFormat.getDateInstance(DateFormat.SHORT, locale);
        dateTimeFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT, locale);
    }
}