view pkg/imports/agm.go @ 2624:9dbaf69c7a66

Improve geoserver config to better calculate bounding boxes * Disable the use of estimated extents for the postgis storage configuration for geoserver, which is set via the gemma middleware. This way we are able to get better bounding boxes for many layers where the postgis function `ST_EstimatedExtent()` would be far off.
author Bernhard Reiter <bernhard@intevation.de>
date Wed, 13 Mar 2019 16:18:39 +0100
parents ce39e9954e85
children 6d4f361c36e8
line wrap: on
line source

// This is Free Software under GNU Affero General Public License v >= 3.0
// without warranty, see README.md and license for details.
//
// SPDX-License-Identifier: AGPL-3.0-or-later
// License-Filename: LICENSES/AGPL-3.0.txt
//
// Copyright (C) 2018 by via donau
//   – Österreichische Wasserstraßen-Gesellschaft mbH
// Software engineering by Intevation GmbH
//
// Author(s):
//  * Sascha L. Teichmann <sascha.teichmann@intevation.de>
//  * Sascha Wilde <wilde@intevation.de>

package imports

import (
	"bufio"
	"context"
	"database/sql"
	"encoding/csv"
	"encoding/json"
	"fmt"
	"io"
	"math"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"gemma.intevation.de/gemma/pkg/misc"
	"gemma.intevation.de/gemma/pkg/models"
)

type ApprovedGaugeMeasurements struct {
	Dir        string `json:"dir"`
	Originator string `json:"originator"`
}

// GMAPJobKind is the unique name of an approved gauge measurements import job.
const AGMJobKind JobKind = "agm"

type agmJobCreator struct{}

func init() {
	RegisterJobCreator(AGMJobKind, agmJobCreator{})
}

func (agmJobCreator) AutoAccept() bool { return false }

func (agmJobCreator) Description() string {
	return "approved gauge measurements"
}

func (agmJobCreator) Create() Job { return new(ApprovedGaugeMeasurements) }

func (agmJobCreator) Depends() []string {
	return []string{
		"gauges",
		"gauge_measurements",
	}
}

const (
	// delete the old  and keep the new measures.
	agmStageDoneDeleteSQL = `
WITH staged AS (
  SELECT key
  FROM import.track_imports
  WHERE import_id = $1 AND
        relation = 'waterway.gauge_measurements'::regclass
),
to_delete AS (
  SELECT o.id AS id
  FROM waterway.gauge_measurements o
  JOIN waterway.gauge_measurements n
    ON n.fk_gauge_id = o.fk_gauge_id AND n.measure_date = o.measure_date
    WHERE n.id     IN (SELECT key FROM staged)
	  AND o.id NOT IN (SELECT key FROM staged)
)
DELETE FROM waterway.gauge_measurements WHERE id IN (SELECT id from to_delete)`

	agmStageDoneSQL = `
UPDATE waterway.gauge_measurements SET staging_done = true
WHERE id IN (
  SELECT key FROM import.track_imports
  WHERE import_id = $1 AND
    relation = 'waterway.gauge_measurements'::regclass)`
)

func (agmJobCreator) StageDone(
	ctx context.Context,
	tx *sql.Tx,
	id int64,
) error {
	_, err := tx.ExecContext(ctx, agmStageDoneDeleteSQL, id)
	if err == nil {
		_, err = tx.ExecContext(ctx, agmStageDoneSQL, id)
	}
	return err
}

// CleanUp removes the folder containing the CSV file with the
// the approved gauge measurements.
func (agm *ApprovedGaugeMeasurements) CleanUp() error {
	return os.RemoveAll(agm.Dir)
}

var guessDate = misc.TimeGuesser([]string{
	"02.01.2006 15:04",
	"2006-01-02T15:04:05-07:00",
}).Guess

type timetz struct{ time.Time }

func (ttz *timetz) MarshalJSON() ([]byte, error) {
	return json.Marshal(ttz.Time.Format("2006-01-02T15:04:05-07:00"))
}

type agmLine struct {
	CountryCode        string   `json:"country-code"`
	Sender             string   `json:"sender"`
	LanguageCode       string   `json:"language-code"`
	DateIssue          timetz   `json:"date-issue"`
	ReferenceCode      string   `json:"reference-code"`
	WaterLevel         float64  `json:"water-level"`
	Predicted          bool     `json:"predicted"`
	ValueMin           *float64 `json:"value-min"`
	ValueMax           *float64 `json:"value-max"`
	DateInfo           timetz   `json:"date-info"`
	SourceOrganization string   `json:"source-organization"`
}

func (a *agmLine) hasDiff(b *agmLine) bool {
	const eps = 0.00001
	fdiff := func(x, y *float64) bool {
		if x == nil && y == nil {
			return false
		}
		if (x == nil && y != nil) || (x != nil && y == nil) {
			return true
		}
		return math.Abs(*x-*y) > eps
	}
	return a.CountryCode != b.CountryCode ||
		a.Sender != b.Sender ||
		a.LanguageCode != b.LanguageCode ||
		a.ReferenceCode != b.ReferenceCode ||
		math.Abs(a.WaterLevel-b.WaterLevel) > eps ||
		a.Predicted != b.Predicted ||
		fdiff(a.ValueMin, b.ValueMin) ||
		fdiff(a.ValueMax, b.ValueMax) ||
		a.SourceOrganization != b.SourceOrganization
}

type agmSummaryEntry struct {
	FKGaugeID   models.Isrs `json:"fk-gauge-id"`
	MeasureDate timetz      `json:"measure-date"`
	Versions    []*agmLine  `json:"versions"`
}

const (
	agmSelectSQL = `
SELECT
  id,
  country_code,
  sender,
  language_code,
  date_issue,
  reference_code,
  water_level,
  predicted,
  value_min,
  value_max,
  date_info,
  source_organization
FROM waterway.gauge_measurements
WHERE
  fk_gauge_id = ($1::char(2), $2::char(3), $3::char(5), $4::char(5), $5::int) AND
  measure_date = $6 AND staging_done`

	agmInsertSQL = `
INSERT INTO waterway.gauge_measurements (
  fk_gauge_id,
  measure_date,
  country_code,
  sender,
  language_code,
  date_issue,
  reference_code,
  water_level,
  predicted,
  value_min,
  value_max,
  date_info,
  source_organization,
  is_waterlevel,
  staging_done
) VALUES(
  ($1::char(2), $2::char(3), $3::char(5), $4::char(5), $5::int),
  $6,
  $7,
  $8,
  $9,
  $10,
  $11,
  $12,
  $13,
  $14,
  $15,
  $16,
  $17,
  true,
  false
)
RETURNING id`

	agmGaugeCheckSQL = `
SELECT EXISTS(
  SELECT 1 FROM waterway.gauges
  WHERE location = ($1::char(2), $2::char(3), $3::char(5), $4::char(5), $5::int))
`
)

// Do executes the actual approved gauge measurements import.
func (agm *ApprovedGaugeMeasurements) Do(
	ctx context.Context,
	importID int64,
	conn *sql.Conn,
	feedback Feedback,
) (interface{}, error) {

	start := time.Now()

	f, err := os.Open(filepath.Join(agm.Dir, "agm.csv"))
	if err != nil {
		return nil, err
	}
	defer f.Close()

	r := csv.NewReader(bufio.NewReader(f))
	r.Comma = ';'
	r.ReuseRecord = true

	headers, err := r.Read()
	if err != nil {
		return nil, err
	}

	var (
		fkGaugeIDIdx   = -1
		measureDateIdx = -1
		valueIdx       = -1
	)

	headerFields := []struct {
		idx  *int
		name string
	}{
		{&fkGaugeIDIdx, "fk_gauge_id"},
		{&measureDateIdx, "measure_date"},
		{&valueIdx, "value"}, // "water_level",
	}

nextHeader:
	for i, f := range headers {
		h := strings.Replace(strings.ToLower(
			strings.TrimSpace(f)), " ", "_", -1)

		for j := range headerFields {
			if headerFields[j].name == h {
				if *headerFields[j].idx != -1 {
					return nil, fmt.Errorf(
						"There is more than one column namend '%s'", h)
				}
				*headerFields[j].idx = i
				continue nextHeader
			}
		}
	}

	var missing []string
	for i := range headerFields {
		if headerFields[i].name != "unit" && *headerFields[i].idx == -1 {
			missing = append(missing, headerFields[i].name)
		}
	}
	if len(missing) > 0 {
		return nil, fmt.Errorf("Missing columns: %s", strings.Join(missing, ", "))
	}

	tx, err := conn.BeginTx(ctx, nil)
	if err != nil {
		return nil, err
	}
	defer tx.Rollback()

	gaugeCheckStmt, err := tx.PrepareContext(ctx, agmGaugeCheckSQL)
	if err != nil {
		return nil, err
	}
	defer gaugeCheckStmt.Close()
	selectStmt, err := tx.PrepareContext(ctx, agmSelectSQL)
	if err != nil {
		return nil, err
	}
	defer selectStmt.Close()
	insertStmt, err := tx.PrepareContext(ctx, agmInsertSQL)
	if err != nil {
		return nil, err
	}
	defer insertStmt.Close()
	trackStmt, err := tx.PrepareContext(ctx, trackImportSQL)
	if err != nil {
		return nil, err
	}
	defer trackStmt.Close()

	entries := []*agmSummaryEntry{}

	checkedGauges := map[models.Isrs]bool{}

lines:
	for line := 1; ; line++ {

		row, err := r.Read()
		switch {
		case err == io.EOF || len(row) == 0:
			break lines
		case err != nil:
			return nil, fmt.Errorf("CSV parsing failed: %v", err)
		}

		gids := row[fkGaugeIDIdx]
		gid, err := models.IsrsFromString(gids)
		if err != nil {
			return nil, fmt.Errorf("Invalid ISRS code line %d: %v", line, err)
		}

		if exists, found := checkedGauges[*gid]; found {
			if !exists {
				feedback.Warn("Ignoring data for unknown gauge %s", gid.String())
				continue lines
			}
		} else { // not found in gauge cache
			if err := gaugeCheckStmt.QueryRowContext(
				ctx,
				gid.CountryCode,
				gid.LoCode,
				gid.FairwaySection,
				gid.Orc,
				gid.Hectometre,
			).Scan(&exists); err != nil {
				return nil, err
			}
			checkedGauges[*gid] = exists
			if !exists {
				feedback.Warn("Ignoring data for unknown gauge %s", gid.String())
				continue lines
			}
		}

		md, err := guessDate(row[measureDateIdx])
		if err != nil {
			return nil, fmt.Errorf("Invalid 'measure_date' line %d: %v", line, err)
		}

		var (
			oldID                 int64
			oldCountryCode        string
			oldSender             string
			oldLanguageCode       string
			oldDateIssue          time.Time
			oldReferenceCode      string
			oldValue              float64
			oldPredicted          bool
			oldValueMin           sql.NullFloat64
			oldValueMax           sql.NullFloat64
			oldDateInfo           time.Time
			oldSourceOrganization string
		)

		err = selectStmt.QueryRowContext(
			ctx,
			gid.CountryCode,
			gid.LoCode,
			gid.FairwaySection,
			gid.Orc,
			gid.Hectometre,
			md,
		).Scan(
			&oldID,
			&oldCountryCode,
			&oldSender,
			&oldLanguageCode,
			&oldDateIssue,
			&oldReferenceCode,
			&oldValue,
			&oldPredicted,
			&oldValueMin,
			&oldValueMax,
			&oldDateInfo,
			&oldSourceOrganization,
		)

		var newEntry bool
		switch {
		case err == sql.ErrNoRows:
			// Complete new one
			newEntry = true
		case err != nil:
			return nil, err
		}

		newSender := agm.Originator
		newCountryCode := gid.CountryCode
		newLanguageCode := misc.CCtoLang[gid.CountryCode]
		newDateIssue := time.Now()
		newReferenceCode := "ZPG"

		value, err := strconv.ParseFloat(row[valueIdx], 32)
		if err != nil {
			return nil, fmt.Errorf("Invalid 'value' line %d: %v", line, err)
		}
		newValue := value

		newPredicted := false

		newValueMin := sql.NullFloat64{
			Float64: 0,
			Valid:   true,
		}
		newValueMax := sql.NullFloat64{
			Float64: 0,
			Valid:   true,
		}

		newDateInfo := newDateIssue

		newSourceOrganization := newSender

		var newID int64

		if err := insertStmt.QueryRowContext(
			ctx,
			gid.CountryCode,
			gid.LoCode,
			gid.FairwaySection,
			gid.Orc,
			gid.Hectometre,
			md,
			newCountryCode,
			newSender,
			newLanguageCode,
			newDateIssue,
			newReferenceCode,
			newValue,
			newPredicted,
			newValueMin,
			newValueMax,
			newDateInfo,
			newSourceOrganization,
		).Scan(&newID); err != nil {
			return nil, err
		}
		if _, err := trackStmt.ExecContext(
			ctx, importID, "waterway.gauge_measurements", newID,
		); err != nil {
			return nil, err
		}

		n := newAGMLine(
			newCountryCode,
			newSender,
			newLanguageCode,
			newDateIssue,
			newReferenceCode,
			newValue,
			newPredicted,
			newValueMin,
			newValueMax,
			newDateInfo,
			newSourceOrganization,
		)

		ase := &agmSummaryEntry{
			FKGaugeID:   *gid,
			MeasureDate: timetz{md},
		}

		if newEntry {
			ase.Versions = []*agmLine{n}
		} else {
			o := newAGMLine(
				oldCountryCode,
				oldSender,
				oldLanguageCode,
				oldDateIssue,
				oldReferenceCode,
				oldValue,
				oldPredicted,
				oldValueMin,
				oldValueMax,
				oldDateInfo,
				oldSourceOrganization,
			)
			// Ignore if there is no diff.
			if !n.hasDiff(o) {
				continue
			}
			ase.Versions = []*agmLine{o, n}
		}
		entries = append(entries, ase)
	}

	if err := tx.Commit(); err != nil {
		return nil, fmt.Errorf("Commit failed: %v", err)
	}

	feedback.Info("Importing approved gauge measurements took %s",
		time.Since(start))

	return entries, nil
}

func newAGMLine(
	countryCode string,
	sender string,
	languageCode string,
	dateIssue time.Time,
	referenceCode string,
	waterLevel float64,
	predicted bool,
	valueMin sql.NullFloat64,
	valueMax sql.NullFloat64,
	dateInfo time.Time,
	sourceOrganization string,
) *agmLine {
	nilFloat := func(v sql.NullFloat64) *float64 {
		var p *float64
		if v.Valid {
			p = &v.Float64
		}
		return p
	}
	return &agmLine{
		CountryCode:        countryCode,
		Sender:             sender,
		LanguageCode:       languageCode,
		DateIssue:          timetz{dateIssue},
		ReferenceCode:      referenceCode,
		WaterLevel:         waterLevel,
		Predicted:          predicted,
		ValueMin:           nilFloat(valueMin),
		ValueMax:           nilFloat(valueMax),
		DateInfo:           timetz{dateInfo},
		SourceOrganization: sourceOrganization,
	}
}