view cmd/soundingresults/main.go @ 603:3d33c53db1e3

Sounding results: Read point data from xyz files.
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Mon, 10 Sep 2018 12:10:10 +0200
parents 10f898bbe50f
children 4d97066c311c
line wrap: on
line source

package main

import (
	"bufio"
	"compress/bzip2"
	"compress/gzip"
	"database/sql"
	"flag"
	"fmt"
	"io"
	"log"
	"os"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/jackc/pgx"
	"github.com/jackc/pgx/stdlib"
)

var (
	insecure   = flag.Bool("insecure", false, "skip SSL verification")
	dbhost     = flag.String("dbhost", "localhost", "database host")
	dbport     = flag.Uint("dbport", 5432, "database port")
	dbname     = flag.String("dbname", "gemma", "database user")
	dbuser     = flag.String("dbuser", "scott", "database user")
	dbpassword = flag.String("dbpw", "tiger", "database password")
	dbssl      = flag.String("dbssl", "prefer", "database SSL mode")
)

func run(fn func(*sql.DB) error) error {

	// To ease SSL config ride a bit on parsing.
	cc, err := pgx.ParseConnectionString("sslmode=" + *dbssl)
	if err != nil {
		return err
	}

	// Do the rest manually to allow whitespace in user/password.
	cc.Host = *dbhost
	cc.Port = uint16(*dbport)
	cc.User = *dbuser
	cc.Password = *dbpassword
	cc.Database = *dbname

	db := stdlib.OpenDB(cc)
	defer db.Close()

	return fn(db)
}

type meta struct {
	date           time.Time
	name           string
	depthReference string
}

func wrap(fname string, f io.Reader) (io.Reader, error) {

	switch strings.ToLower(filepath.Ext(fname)) {
	case ".gz":
		return gzip.NewReader(f)
	case ".bz2":
		return bzip2.NewReader(f), nil
	}

	return bufio.NewReader(f), nil
}

type point3d struct {
	x float64
	y float64
	z float64
}

func parseXYZ(fname string) ([]*point3d, error) {
	f, err := os.Open(fname)
	if err != nil {
		return nil, err
	}
	defer f.Close()

	r, err := wrap(fname, f)
	if err != nil {
		return nil, err
	}

	// Alloc in larger chunks to reduce pressure on memory management.
	var chunk []point3d
	alloc := func() *point3d {
		if len(chunk) == 0 {
			chunk = make([]point3d, 8*1024)
		}
		p := &chunk[0]
		chunk = chunk[1:]
		return p
	}

	var points []*point3d

	s := bufio.NewScanner(r)
	if s.Scan() { // Skip header line.
		for line := 2; s.Scan(); line++ {
			p := alloc()
			text := s.Text()
			// fmt.Sscanf(text, "%f,%f,%f") is 4 times slower.
			idx := strings.IndexByte(text, ',')
			if idx == -1 {
				log.Printf("format error in line %d\n", line)
				continue
			}
			if p.x, err = strconv.ParseFloat(text[:idx], 64); err != nil {
				log.Printf("format error in line %d: %v\n", line, err)
				continue
			}
			text = text[idx+1:]
			if idx = strings.IndexByte(text, ','); idx == -1 {
				log.Printf("format error in line %d\n", line)
				continue
			}
			if p.y, err = strconv.ParseFloat(text[:idx], 64); err != nil {
				log.Printf("format error in line %d: %v\n", line, err)
				continue
			}
			text = text[idx+1:]
			if p.z, err = strconv.ParseFloat(text, 64); err != nil {
				log.Printf("format error in line %d: %v\n", line, err)
				continue
			}
			points = append(points, p)
		}
	}

	return points, s.Err()
}

func substituteName(fname, name string) string {
	dir := filepath.Dir(fname)
	info := filepath.Join(dir, "INFO.txt")
	f, err := os.Open(info)
	if err != nil {
		log.Printf("warn: %v\n", err)
		return name
	}
	defer f.Close()

	s := bufio.NewScanner(f)

	for search := strings.ToLower(name); s.Scan(); {
		line := strings.TrimSpace(s.Text())
		if line == "" || strings.HasPrefix(line, "#") {
			continue
		}

		if parts := strings.SplitN(line, "=", 2); len(parts) == 2 &&
			strings.TrimSpace(strings.ToLower(parts[0])) == search {
			return strings.TrimSpace(parts[1])
		}
	}

	if err := s.Err(); err != nil {
		log.Printf("error: %v\n", err)
	}

	return name
}

func parseFilename(fname string) (meta, error) {

	base := filepath.Base(fname)

	compressed := strings.ToLower(filepath.Ext(base))
	for _, ext := range []string{".gz", ".bz2"} {
		if ext == compressed {
			base = base[:len(base)-len(ext)]
			break
		}
	}

	// Cut .txt
	base = base[:len(base)-len(filepath.Ext(base))]

	if !strings.HasSuffix(strings.ToUpper(base), "_WGS84") {
		return meta{}, fmt.Errorf("%s is not in WGS84", base)
	}

	base = base[:len(base)-len("_WGS84")]

	idx := strings.IndexRune(base, '_')
	if idx == -1 {
		return meta{}, fmt.Errorf("%s has no date", base)
	}

	datePart := base[:idx]

	date, err := time.Parse("20060102", datePart)
	if err != nil {
		return meta{}, fmt.Errorf("error %s: %v\n", err)
	}

	rest := base[idx+1:]

	if idx = strings.LastIndex(rest, "_"); idx == -1 {
		return meta{}, fmt.Errorf("%s has no depth reference", base)
	}

	depthReference := rest[idx+1:]

	rest = rest[:idx]

	if !strings.HasSuffix(strings.ToUpper(rest), "_MB") {
		return meta{}, fmt.Errorf("%s is not in WGS84", base)
	}

	name := rest[:len(rest)-len("_MB")]

	name = substituteName(fname, name)

	return meta{
		name:           name,
		depthReference: depthReference,
		date:           date,
	}, nil
}

func processor(fnames <-chan string, wg *sync.WaitGroup) {
	defer wg.Done()

	for fname := range fnames {
		log.Printf("Processing %s\n", fname)
		m, err := parseFilename(fname)
		if err != nil {
			log.Printf("error: %v\n", err)
			continue
		}
		_ = m
		points, err := parseXYZ(fname)
		if err != nil {
			log.Printf("error: %v\n", err)
			continue
		}
		fmt.Printf("Number of points: %d\n", len(points))
	}
}

func main() {
	flag.Parse()

	var wg sync.WaitGroup

	fnames := make(chan string)

	for i, n := 0, runtime.NumCPU(); i < n; i++ {
		wg.Add(1)
		go processor(fnames, &wg)
	}

	for _, fname := range flag.Args() {
		fnames <- fname
	}

	close(fnames)

	wg.Wait()
}